Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ repos:
boto3>=1.28.0,
click>=8.0.0,
'fastapi[standard]>=0.109.1',
gitpython>=3.1.0,
httpx,
loguru>=0.7.0,
pathspec>=0.12.1,
Expand Down Expand Up @@ -144,6 +145,7 @@ repos:
boto3>=1.28.0,
click>=8.0.0,
'fastapi[standard]>=0.109.1',
gitpython>=3.1.0,
httpx,
loguru>=0.7.0,
pathspec>=0.12.1,
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ COPY src/ ./src/

RUN set -eux; \
pip install --no-cache-dir --upgrade pip; \
pip install --no-cache-dir --timeout 1000 .[server]
pip install --no-cache-dir --timeout 1000 .[server,mcp]

# Stage 2: Runtime image
FROM python:3.13.5-slim@sha256:4c2cf9917bd1cbacc5e9b07320025bdb7cdf2df7b0ceaccb55e9dd7e30987419
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ readme = {file = "README.md", content-type = "text/markdown" }
requires-python = ">= 3.8"
dependencies = [
"click>=8.0.0",
"gitpython>=3.1.0",
"httpx",
"loguru>=0.7.0",
"pathspec>=0.12.1",
Expand Down
113 changes: 83 additions & 30 deletions src/gitingest/clone.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,17 @@
from pathlib import Path
from typing import TYPE_CHECKING

import git

from gitingest.config import DEFAULT_TIMEOUT
from gitingest.utils.git_utils import (
check_repo_exists,
checkout_partial_clone,
create_git_auth_header,
create_git_command,
create_git_repo,
ensure_git_installed,
git_auth_context,
is_github_host,
resolve_commit,
run_command,
)
from gitingest.utils.logging_config import get_logger
from gitingest.utils.os_utils import ensure_directory_exists_or_create
Expand Down Expand Up @@ -46,6 +47,8 @@ async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
------
ValueError
If the repository is not found, if the provided URL is invalid, or if the token format is invalid.
RuntimeError
If Git operations fail during the cloning process.

"""
# Extract and validate query parameters
Expand Down Expand Up @@ -83,41 +86,91 @@ async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
commit = await resolve_commit(config, token=token)
logger.debug("Resolved commit", extra={"commit": commit})

clone_cmd = ["git"]
if token and is_github_host(url):
clone_cmd += ["-c", create_git_auth_header(token, url=url)]

clone_cmd += ["clone", "--single-branch", "--no-checkout", "--depth=1"]
if partial_clone:
clone_cmd += ["--filter=blob:none", "--sparse"]

clone_cmd += [url, local_path]

# Clone the repository
logger.info("Executing git clone command", extra={"command": " ".join([*clone_cmd[:-1], "<url>", local_path])})
await run_command(*clone_cmd)
logger.info("Git clone completed successfully")
# Clone the repository using GitPython with proper authentication
logger.info("Executing git clone operation", extra={"url": "<redacted>", "local_path": local_path})
try:
clone_kwargs = {
"single_branch": True,
"no_checkout": True,
"depth": 1,
}

with git_auth_context(url, token) as (git_cmd, auth_url):
if partial_clone:
# For partial clones, use git.Git() with filter and sparse options
cmd_args = ["--single-branch", "--no-checkout", "--depth=1"]
cmd_args.extend(["--filter=blob:none", "--sparse"])
cmd_args.extend([auth_url, local_path])
git_cmd.clone(*cmd_args)
elif token and is_github_host(url):
# For authenticated GitHub repos, use git_cmd with auth URL
cmd_args = ["--single-branch", "--no-checkout", "--depth=1", auth_url, local_path]
git_cmd.clone(*cmd_args)
else:
# For non-authenticated repos, use the standard GitPython method
git.Repo.clone_from(url, local_path, **clone_kwargs)

logger.info("Git clone completed successfully")
except git.GitCommandError as exc:
msg = f"Git clone failed: {exc}"
raise RuntimeError(msg) from exc

# Checkout the subpath if it is a partial clone
if partial_clone:
logger.info("Setting up partial clone for subpath", extra={"subpath": config.subpath})
await checkout_partial_clone(config, token=token)
logger.debug("Partial clone setup completed")

git = create_git_command(["git"], local_path, url, token)
# Perform post-clone operations
await _perform_post_clone_operations(config, local_path, url, token, commit)

# Ensure the commit is locally available
logger.debug("Fetching specific commit", extra={"commit": commit})
await run_command(*git, "fetch", "--depth=1", "origin", commit)
logger.info("Git clone operation completed successfully", extra={"local_path": local_path})

# Write the work-tree at that commit
logger.info("Checking out commit", extra={"commit": commit})
await run_command(*git, "checkout", commit)

# Update submodules
if config.include_submodules:
logger.info("Updating submodules")
await run_command(*git, "submodule", "update", "--init", "--recursive", "--depth=1")
logger.debug("Submodules updated successfully")
async def _perform_post_clone_operations(
config: CloneConfig,
local_path: str,
url: str,
token: str | None,
commit: str,
) -> None:
"""Perform post-clone operations like fetching, checkout, and submodule updates.

logger.info("Git clone operation completed successfully", extra={"local_path": local_path})
Parameters
----------
config : CloneConfig
The configuration for cloning the repository.
local_path : str
The local path where the repository was cloned.
url : str
The repository URL.
token : str | None
GitHub personal access token (PAT) for accessing private repositories.
commit : str
The commit SHA to checkout.

Raises
------
RuntimeError
If any Git operation fails.

"""
try:
repo = create_git_repo(local_path, url, token)

# Ensure the commit is locally available
logger.debug("Fetching specific commit", extra={"commit": commit})
repo.git.fetch("--depth=1", "origin", commit)

# Write the work-tree at that commit
logger.info("Checking out commit", extra={"commit": commit})
repo.git.checkout(commit)

# Update submodules
if config.include_submodules:
logger.info("Updating submodules")
repo.git.submodule("update", "--init", "--recursive", "--depth=1")
logger.debug("Submodules updated successfully")
except git.GitCommandError as exc:
msg = f"Git operation failed: {exc}"
raise RuntimeError(msg) from exc
Loading
Loading