Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/deploy-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,9 @@ jobs:
comment-tag: 'pr-preview'
create-if-not-exists: 'true'
message: |
⚙️ Preview environment for PR #${{ env.PR_ID }} is available at:
https://pr-${{ env.PR_ID }}.${{ env.APP_NAME }}.coderamp.dev/
🌐 [Preview environment](https://pr-${{ env.PR_ID }}.${{ env.APP_NAME }}.coderamp.dev/) for PR #${{ env.PR_ID }}

📊 [Log viewer](https://app.datadoghq.eu/logs?query=kube_namespace%3Aprs-gitingest%20version%3Apr-${{ env.PR_ID }})

remove-pr-env:
if: >-
Expand Down
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ repos:
click>=8.0.0,
'fastapi[standard]>=0.109.1',
httpx,
loguru>=0.7.0,
pathspec>=0.12.1,
prometheus-client,
pydantic,
Expand All @@ -144,6 +145,7 @@ repos:
click>=8.0.0,
'fastapi[standard]>=0.109.1',
httpx,
loguru>=0.7.0,
pathspec>=0.12.1,
prometheus-client,
pydantic,
Expand Down
3 changes: 1 addition & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,7 @@ If you ever get stuck, reach out on [Discord](https://discord.com/invite/zerRaGK
9. **Run the local server** to sanity-check:

```bash
cd src
uvicorn server.main:app
python -m server
```

Open [http://localhost:8000](http://localhost:8000) to confirm everything works.
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,4 @@ USER appuser

EXPOSE 8000
EXPOSE 9090
CMD ["python", "-m", "uvicorn", "server.main:app", "--host", "0.0.0.0", "--port", "8000"]
CMD ["python", "-m", "server"]
89 changes: 47 additions & 42 deletions compose.yml
Original file line number Diff line number Diff line change
@@ -1,27 +1,45 @@
# Common base configuration for all services
x-base-environment: &base-environment
# Python Configuration
PYTHONUNBUFFERED: "1"
PYTHONDONTWRITEBYTECODE: "1"
# Host Configuration
ALLOWED_HOSTS: ${ALLOWED_HOSTS:-gitingest.com,*.gitingest.com,localhost,127.0.0.1}
# Metrics Configuration
GITINGEST_METRICS_ENABLED: ${GITINGEST_METRICS_ENABLED:-true}
GITINGEST_METRICS_HOST: ${GITINGEST_METRICS_HOST:-0.0.0.0}
GITINGEST_METRICS_PORT: ${GITINGEST_METRICS_PORT:-9090}
# Sentry Configuration
GITINGEST_SENTRY_ENABLED: ${GITINGEST_SENTRY_ENABLED:-false}
GITINGEST_SENTRY_DSN: ${GITINGEST_SENTRY_DSN:-}
GITINGEST_SENTRY_TRACES_SAMPLE_RATE: ${GITINGEST_SENTRY_TRACES_SAMPLE_RATE:-1.0}
GITINGEST_SENTRY_PROFILE_SESSION_SAMPLE_RATE: ${GITINGEST_SENTRY_PROFILE_SESSION_SAMPLE_RATE:-1.0}
GITINGEST_SENTRY_PROFILE_LIFECYCLE: ${GITINGEST_SENTRY_PROFILE_LIFECYCLE:-trace}
GITINGEST_SENTRY_SEND_DEFAULT_PII: ${GITINGEST_SENTRY_SEND_DEFAULT_PII:-true}

x-prod-environment: &prod-environment
GITINGEST_SENTRY_ENVIRONMENT: ${GITINGEST_SENTRY_ENVIRONMENT:-production}

x-dev-environment: &dev-environment
DEBUG: "true"
LOG_LEVEL: "debug"
RELOAD: "true"
GITINGEST_SENTRY_ENVIRONMENT: ${GITINGEST_SENTRY_ENVIRONMENT:-development}
# S3 Configuration for development
S3_ENABLED: "true"
S3_ENDPOINT: http://minio:9000
S3_ACCESS_KEY: ${S3_ACCESS_KEY:-gitingest}
S3_SECRET_KEY: ${S3_SECRET_KEY:-gitingest123}
S3_BUCKET_NAME: ${S3_BUCKET_NAME:-gitingest-bucket}
S3_REGION: ${S3_REGION:-us-east-1}
S3_DIRECTORY_PREFIX: ${S3_DIRECTORY_PREFIX:-dev}
S3_ALIAS_HOST: ${S3_ALIAS_HOST:-http://127.0.0.1:9000/${S3_BUCKET_NAME:-gitingest-bucket}}

x-app-base: &app-base
ports:
- "${APP_WEB_BIND:-8000}:8000" # Main application port
- "${GITINGEST_METRICS_HOST:-127.0.0.1}:${GITINGEST_METRICS_PORT:-9090}:9090" # Metrics port
environment:
# Python Configuration
- PYTHONUNBUFFERED=1
- PYTHONDONTWRITEBYTECODE=1
# Host Configuration
- ALLOWED_HOSTS=${ALLOWED_HOSTS:-gitingest.com,*.gitingest.com,localhost,127.0.0.1}
# Metrics Configuration
- GITINGEST_METRICS_ENABLED=${GITINGEST_METRICS_ENABLED:-true}
- GITINGEST_METRICS_HOST=${GITINGEST_METRICS_HOST:-127.0.0.1}
- GITINGEST_METRICS_PORT=${GITINGEST_METRICS_PORT:-9090}
# Sentry Configuration
- GITINGEST_SENTRY_ENABLED=${GITINGEST_SENTRY_ENABLED:-false}
- GITINGEST_SENTRY_DSN=${GITINGEST_SENTRY_DSN:-}
- GITINGEST_SENTRY_TRACES_SAMPLE_RATE=${GITINGEST_SENTRY_TRACES_SAMPLE_RATE:-1.0}
- GITINGEST_SENTRY_PROFILE_SESSION_SAMPLE_RATE=${GITINGEST_SENTRY_PROFILE_SESSION_SAMPLE_RATE:-1.0}
- GITINGEST_SENTRY_PROFILE_LIFECYCLE=${GITINGEST_SENTRY_PROFILE_LIFECYCLE:-trace}
- GITINGEST_SENTRY_SEND_DEFAULT_PII=${GITINGEST_SENTRY_SEND_DEFAULT_PII:-true}
user: "1000:1000"
command: ["python", "-m", "uvicorn", "server.main:app", "--host", "0.0.0.0", "--port", "8000"]
command: ["python", "-m", "server"]

services:
# Production service configuration
Expand All @@ -31,7 +49,7 @@ services:
profiles:
- prod
environment:
- GITINGEST_SENTRY_ENVIRONMENT=${GITINGEST_SENTRY_ENVIRONMENT:-production}
<<: [*base-environment, *prod-environment]
restart: unless-stopped

# Development service configuration
Expand All @@ -43,24 +61,12 @@ services:
profiles:
- dev
environment:
- DEBUG=true
- GITINGEST_SENTRY_ENVIRONMENT=${GITINGEST_SENTRY_ENVIRONMENT:-development}
# S3 Configuration
- S3_ENABLED=true
- S3_ENDPOINT=http://minio:9000
- S3_ACCESS_KEY=${S3_ACCESS_KEY:-gitingest}
- S3_SECRET_KEY=${S3_SECRET_KEY:-gitingest123}
# Use lowercase bucket name to ensure compatibility with MinIO
- S3_BUCKET_NAME=${S3_BUCKET_NAME:-gitingest-bucket}
- S3_REGION=${S3_REGION:-us-east-1}
- S3_DIRECTORY_PREFIX=${S3_DIRECTORY_PREFIX:-dev}
# Public URL for S3 resources
- S3_ALIAS_HOST=${S3_ALIAS_HOST:-http://127.0.0.1:9000/${S3_BUCKET_NAME:-gitingest-bucket}}
<<: [*base-environment, *dev-environment]
volumes:
# Mount source code for live development
- ./src:/app:ro
# Use --reload flag for hot reloading during development
command: ["python", "-m", "uvicorn", "server.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
command: ["python", "-m", "server"]
depends_on:
minio-setup:
condition: service_completed_successfully
Expand All @@ -73,9 +79,9 @@ services:
ports:
- "9000:9000" # API port
- "9001:9001" # Console port
environment:
- MINIO_ROOT_USER=${MINIO_ROOT_USER:-minioadmin}
- MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-minioadmin}
environment: &minio-environment
MINIO_ROOT_USER: ${MINIO_ROOT_USER:-minioadmin}
MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-minioadmin}
volumes:
- minio-data:/data
command: server /data --console-address ":9001"
Expand All @@ -96,11 +102,10 @@ services:
minio:
condition: service_healthy
environment:
- MINIO_ROOT_USER=${MINIO_ROOT_USER:-minioadmin}
- MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-minioadmin}
- S3_ACCESS_KEY=${S3_ACCESS_KEY:-gitingest}
- S3_SECRET_KEY=${S3_SECRET_KEY:-gitingest123}
- S3_BUCKET_NAME=${S3_BUCKET_NAME:-gitingest-bucket}
<<: *minio-environment
S3_ACCESS_KEY: ${S3_ACCESS_KEY:-gitingest}
S3_SECRET_KEY: ${S3_SECRET_KEY:-gitingest123}
S3_BUCKET_NAME: ${S3_BUCKET_NAME:-gitingest-bucket}
volumes:
- ./.docker/minio/setup.sh:/setup.sh:ro
entrypoint: sh
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ requires-python = ">= 3.8"
dependencies = [
"click>=8.0.0",
"httpx",
"loguru>=0.7.0",
"pathspec>=0.12.1",
"pydantic",
"python-dotenv",
Expand Down Expand Up @@ -96,7 +97,6 @@ ignore = [ # https://docs.astral.sh/ruff/rules/...

# TODO: fix the following issues:
"TD003", # missing-todo-link, TODO: add issue links
"T201", # print, TODO: replace with logging
"S108", # hardcoded-temp-file, TODO: replace with tempfile
"BLE001", # blind-except, TODO: replace with specific exceptions
"FAST003", # fast-api-unused-path-parameter, TODO: fix
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ boto3>=1.28.0 # AWS SDK for S3 support
click>=8.0.0
fastapi[standard]>=0.109.1 # Vulnerable to https://osv.dev/vulnerability/PYSEC-2024-38
httpx
loguru>=0.7.0
pathspec>=0.12.1
prometheus-client
pydantic
Expand Down
6 changes: 6 additions & 0 deletions src/gitingest/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
from gitingest.config import MAX_FILE_SIZE, OUTPUT_FILE_NAME
from gitingest.entrypoint import ingest_async

# Import logging configuration first to intercept all logging
from gitingest.utils.logging_config import get_logger

# Initialize logger for this module
logger = get_logger(__name__)


class _CLIArgs(TypedDict):
source: str
Expand Down
35 changes: 35 additions & 0 deletions src/gitingest/clone.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,16 @@
resolve_commit,
run_command,
)
from gitingest.utils.logging_config import get_logger
from gitingest.utils.os_utils import ensure_directory_exists_or_create
from gitingest.utils.timeout_wrapper import async_timeout

if TYPE_CHECKING:
from gitingest.schemas import CloneConfig

# Initialize logger for this module
logger = get_logger(__name__)


@async_timeout(DEFAULT_TIMEOUT)
async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
Expand Down Expand Up @@ -49,14 +53,35 @@ async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
local_path: str = config.local_path
partial_clone: bool = config.subpath != "/"

logger.info(
"Starting git clone operation",
extra={
"url": url,
"local_path": local_path,
"partial_clone": partial_clone,
"subpath": config.subpath,
"branch": config.branch,
"tag": config.tag,
"commit": config.commit,
"include_submodules": config.include_submodules,
},
)

logger.debug("Ensuring git is installed")
await ensure_git_installed()

logger.debug("Creating local directory", extra={"parent_path": str(Path(local_path).parent)})
await ensure_directory_exists_or_create(Path(local_path).parent)

logger.debug("Checking if repository exists", extra={"url": url})
if not await check_repo_exists(url, token=token):
logger.error("Repository not found", extra={"url": url})
msg = "Repository not found. Make sure it is public or that you have provided a valid token."
raise ValueError(msg)

logger.debug("Resolving commit reference")
commit = await resolve_commit(config, token=token)
logger.debug("Resolved commit", extra={"commit": commit})

clone_cmd = ["git"]
if token and is_github_host(url):
Expand All @@ -69,20 +94,30 @@ async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
clone_cmd += [url, local_path]

# Clone the repository
logger.info("Executing git clone command", extra={"command": " ".join([*clone_cmd[:-1], "<url>", local_path])})
await run_command(*clone_cmd)
logger.info("Git clone completed successfully")

# Checkout the subpath if it is a partial clone
if partial_clone:
logger.info("Setting up partial clone for subpath", extra={"subpath": config.subpath})
await checkout_partial_clone(config, token=token)
logger.debug("Partial clone setup completed")

git = create_git_command(["git"], local_path, url, token)

# Ensure the commit is locally available
logger.debug("Fetching specific commit", extra={"commit": commit})
await run_command(*git, "fetch", "--depth=1", "origin", commit)

# Write the work-tree at that commit
logger.info("Checking out commit", extra={"commit": commit})
await run_command(*git, "checkout", commit)

# Update submodules
if config.include_submodules:
logger.info("Updating submodules")
await run_command(*git, "submodule", "update", "--init", "--recursive", "--depth=1")
logger.debug("Submodules updated successfully")

logger.info("Git clone operation completed successfully", extra={"local_path": local_path})
Loading
Loading