diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8a6f2e2..e1a0a02 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -14,6 +14,7 @@ permissions: env: PYTHONUNBUFFERED: 1 + MISE_PYTHON_GITHUB_ATTESTATIONS: false jobs: ci: diff --git a/gitops_server/utils/git.py b/gitops_server/utils/git.py index 7fcfea5..f5370d1 100644 --- a/gitops_server/utils/git.py +++ b/gitops_server/utils/git.py @@ -16,7 +16,17 @@ REPO_CACHE_DIR = Path("/tmp/gitops/repocache") REPO_CACHE: dict[str, Path] = {} -repo_lock = asyncio.Semaphore(1) +_cache_locks: dict[str, asyncio.Lock] = {} +_cache_locks_guard = asyncio.Lock() + + +async def _get_cache_lock(git_repo_url: str) -> asyncio.Lock: + async with _cache_locks_guard: + lock = _cache_locks.get(git_repo_url) + if lock is None: + lock = asyncio.Lock() + _cache_locks[git_repo_url] = lock + return lock async def clone_repo(git_repo_url: str, path: str, sha: str | None = None, branch: str | None = None) -> None: @@ -57,22 +67,25 @@ async def temp_repo(git_repo_url: str, ref: str | None) -> AsyncGenerator[str, N with tracer.start_as_current_span("checkout_temp_repo"): cache_path = REPO_CACHE_DIR / git_repo_url.split("/")[-1].split(".")[0] + cache_lock = await _get_cache_lock(git_repo_url) - # Prep the repo cache - if not (cache_path / ".git").exists(): - logger.info("Repo %s not in cache, cloning", git_repo_url) - async with repo_lock: - if cache_path.exists(): - await run(f"rm -rf {cache_path}", suppress_errors=True) - if not cache_path.exists(): + with tempfile.TemporaryDirectory() as temporary_folder_path: + # Serialize cache init and reads against the shared cache so a + # concurrent clone/fetch can't leak a half-written .git/index.lock + # into the copy. + async with cache_lock: + if not (cache_path / ".git").exists(): + logger.info("Repo %s not in cache, cloning", git_repo_url) + if cache_path.exists(): + await run(f"rm -rf {cache_path}", suppress_errors=True) cache_path.mkdir(parents=True) - REPO_CACHE[git_repo_url] = cache_path - await clone_repo(git_repo_url, path=str(cache_path)) + REPO_CACHE[git_repo_url] = cache_path + await clone_repo(git_repo_url, path=str(cache_path)) - # Copy the repo cache to a temporary folder - with tempfile.TemporaryDirectory() as temporary_folder_path: - await run(f"rm -rf {temporary_folder_path}", suppress_errors=True) - await run(f"cp -r {cache_path} {temporary_folder_path}") + await run(f"rm -rf {temporary_folder_path}", suppress_errors=True) + await run(f"cp -r {cache_path} {temporary_folder_path}") + + await run(f"rm -f {temporary_folder_path}/.git/index.lock", suppress_errors=True) await run(f"cd {temporary_folder_path};git fetch; git checkout {ref}") yield temporary_folder_path