Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ x-prod-environment: &prod-environment

x-dev-environment: &dev-environment
DEBUG: "true"
LOG_LEVEL: "debug"
LOG_LEVEL: "DEBUG"
RELOAD: "true"
GITINGEST_SENTRY_ENVIRONMENT: ${GITINGEST_SENTRY_ENVIRONMENT:-development}
# S3 Configuration for development
Expand Down
2 changes: 2 additions & 0 deletions src/server/query_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ async def _check_s3_cache(
user_name=cast("str", query.user_name),
repo_name=cast("str", query.repo_name),
commit=query.commit,
subpath=query.subpath,
include_patterns=query.include_patterns,
ignore_patterns=query.ignore_patterns,
)
Expand Down Expand Up @@ -168,6 +169,7 @@ def _store_digest_content(
user_name=cast("str", query.user_name),
repo_name=cast("str", query.repo_name),
commit=query.commit,
subpath=query.subpath,
include_patterns=query.include_patterns,
ignore_patterns=query.ignore_patterns,
)
Expand Down
10 changes: 7 additions & 3 deletions src/server/s3_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,15 @@ def generate_s3_file_path(
user_name: str,
repo_name: str,
commit: str,
subpath: str,
include_patterns: set[str] | None,
ignore_patterns: set[str],
) -> str:
"""Generate S3 file path with proper naming convention.

The file path is formatted as:
[<S3_DIRECTORY_PREFIX>/]ingest/<provider>/<repo-owner>/<repo-name>/<branch>/<commit-ID>/
<exclude&include hash>/<owner>-<repo-name>.txt
<exclude&include hash>/<owner>-<repo-name>-<subpath-hash>.txt

If S3_DIRECTORY_PREFIX environment variable is set, it will be prefixed to the path.
The commit-ID is always included in the URL.
Expand All @@ -85,6 +86,8 @@ def generate_s3_file_path(
Repository name.
commit : str
Commit hash.
subpath : str
Subpath of the repository.
include_patterns : set[str] | None
Set of patterns specifying which files to include.
ignore_patterns : set[str]
Expand All @@ -111,9 +114,10 @@ def generate_s3_file_path(
patterns_str = f"include:{sorted(include_patterns) if include_patterns else []}"
patterns_str += f"exclude:{sorted(ignore_patterns)}"
patterns_hash = hashlib.sha256(patterns_str.encode()).hexdigest()[:16]
subpath_hash = hashlib.sha256(subpath.encode()).hexdigest()[:16]

# Build the base path using hostname directly
base_path = f"ingest/{hostname}/{user_name}/{repo_name}/{commit}/{patterns_hash}/{user_name}-{repo_name}.txt"
file_name = f"{user_name}-{repo_name}-{subpath_hash}.txt"
base_path = f"ingest/{hostname}/{user_name}/{repo_name}/{commit}/{patterns_hash}/{file_name}"

# Check for S3_DIRECTORY_PREFIX environment variable
s3_directory_prefix = os.getenv("S3_DIRECTORY_PREFIX")
Expand Down
Loading