-
Notifications
You must be signed in to change notification settings - Fork 987
Refactor/gitingest structure #66
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
cyclotruc
merged 11 commits into
coderamp-labs:main
from
filipchristiansen:refactor/gitingest-structure
Dec 29, 2024
Merged
Changes from 9 commits
Commits
Show all changes
11 commits
Select commit
Hold shift + click to select a range
32808dd
chore: add pre-commit config, type hints, badges, and lint codebase
filipchristiansen d0a320f
Resolve error and fix remaining type hint violations
filipchristiansen 6268899
Fix absolute imports and mock paths in test_clone.py to resolve test …
filipchristiansen 2655278
Replace deprecated 'dotenv' with 'python-dotenv' in requirements.txt …
filipchristiansen 39f30a9
Merge branch 'main' into chore/precommit-lint-typehints
cyclotruc 086aba0
Refactor and enhance gitingest module for improved clarity, maintaina…
filipchristiansen 757a380
Merge branch 'main' into refactor/gitingest-structure
filipchristiansen 075b454
resolve merge conflicts
filipchristiansen 70ff34e
Refactor parse_url and parse_query for improved clarity and maintaina…
filipchristiansen cdf0c65
Update src/gitingest/ingest.py
filipchristiansen 1724720
Update src/process_query.py
filipchristiansen File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,79 +1,144 @@ | ||
| import asyncio | ||
| from typing import Any, Dict, Tuple | ||
| from dataclasses import dataclass | ||
| from typing import Optional, Tuple | ||
|
|
||
| from gitingest.utils import async_timeout | ||
| from gitingest.utils import AsyncTimeoutError, async_timeout | ||
|
|
||
| CLONE_TIMEOUT = 20 | ||
|
|
||
|
|
||
| @dataclass | ||
| class CloneConfig: | ||
| url: str | ||
| local_path: str | ||
| commit: Optional[str] = None | ||
| branch: Optional[str] = None | ||
|
|
||
|
|
||
| async def check_repo_exists(url: str) -> bool: | ||
| """ | ||
| Check if a repository exists at the given URL using an HTTP HEAD request. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| url : str | ||
| The URL of the repository. | ||
|
|
||
| Returns | ||
| ------- | ||
| bool | ||
| True if the repository exists, False otherwise. | ||
| """ | ||
| proc = await asyncio.create_subprocess_exec( | ||
| "curl", | ||
| "-I", | ||
| url, | ||
| stdout=asyncio.subprocess.PIPE, | ||
| stderr=asyncio.subprocess.PIPE, | ||
| ) | ||
| stdout, stderr = await proc.communicate() | ||
| stdout, _ = await proc.communicate() | ||
| if proc.returncode != 0: | ||
| return False | ||
| # Check if stdout contains "404" status code | ||
| stdout_str = stdout.decode() | ||
| return "HTTP/1.1 404" not in stdout_str and "HTTP/2 404" not in stdout_str | ||
|
|
||
|
|
||
| @async_timeout(CLONE_TIMEOUT) | ||
| async def clone_repo(query: Dict[str, Any]) -> Tuple[bytes, bytes]: | ||
| if not await check_repo_exists(query['url']): | ||
| raise ValueError("Repository not found, make sure it is public") | ||
| async def run_git_command(*args: str) -> Tuple[bytes, bytes]: | ||
| """ | ||
| Executes a git command asynchronously and captures its output. | ||
|
|
||
| if query['commit']: | ||
| proc = await asyncio.create_subprocess_exec( | ||
| "git", | ||
| "clone", | ||
| "--single-branch", | ||
| query['url'], | ||
| query['local_path'], | ||
| stdout=asyncio.subprocess.PIPE, | ||
| stderr=asyncio.subprocess.PIPE, | ||
| ) | ||
| stdout, stderr = await proc.communicate() | ||
|
|
||
| proc = await asyncio.create_subprocess_exec( | ||
| "git", | ||
| "-C", | ||
| query['local_path'], | ||
| "checkout", | ||
| query['branch'], | ||
| stdout=asyncio.subprocess.PIPE, | ||
| stderr=asyncio.subprocess.PIPE, | ||
| ) | ||
| stdout, stderr = await proc.communicate() | ||
| elif query['branch'] != 'main' and query['branch'] != 'master' and query['branch']: | ||
| proc = await asyncio.create_subprocess_exec( | ||
| "git", | ||
| "clone", | ||
| "--depth=1", | ||
| "--single-branch", | ||
| "--branch", | ||
| query['branch'], | ||
| query['url'], | ||
| query['local_path'], | ||
| stdout=asyncio.subprocess.PIPE, | ||
| stderr=asyncio.subprocess.PIPE, | ||
| ) | ||
| else: | ||
| proc = await asyncio.create_subprocess_exec( | ||
| "git", | ||
| "clone", | ||
| "--depth=1", | ||
| "--single-branch", | ||
| query['url'], | ||
| query['local_path'], | ||
| stdout=asyncio.subprocess.PIPE, | ||
| stderr=asyncio.subprocess.PIPE, | ||
| ) | ||
| Parameters | ||
| ---------- | ||
| *args : str | ||
| The git command and its arguments to execute. | ||
|
|
||
| Returns | ||
| ------- | ||
| Tuple[bytes, bytes] | ||
| A tuple containing the stdout and stderr of the git command. | ||
|
|
||
| Raises | ||
| ------ | ||
| RuntimeError | ||
| If the git command exits with a non-zero status. | ||
| """ | ||
| proc = await asyncio.create_subprocess_exec( | ||
| *args, | ||
| stdout=asyncio.subprocess.PIPE, | ||
| stderr=asyncio.subprocess.PIPE, | ||
| ) | ||
| stdout, stderr = await proc.communicate() | ||
| if proc.returncode != 0: | ||
| error_message = stderr.decode().strip() | ||
| raise RuntimeError(f"Git command failed: {' '.join(args)}\nError: {error_message}") | ||
|
|
||
| return stdout, stderr | ||
|
|
||
|
|
||
| @async_timeout(CLONE_TIMEOUT) | ||
| async def clone_repo(config: CloneConfig) -> Tuple[bytes, bytes]: | ||
| """ | ||
| Clones a repository to a local path based on the provided query parameters. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| config : CloneConfig | ||
| A dictionary containing the following keys: | ||
| - url (str): The URL of the repository. | ||
| - local_path (str): The local path to clone the repository to. | ||
| - commit (Optional[str]): The specific commit hash to checkout. | ||
| - branch (Optional[str]): The branch to clone. Defaults to 'main' or 'master' if not provided. | ||
|
|
||
| Returns | ||
| ------- | ||
| Tuple[bytes, bytes] | ||
| A tuple containing the stdout and stderr of the git commands executed. | ||
|
|
||
| Raises | ||
| ------ | ||
| ValueError | ||
| If the repository does not exist or if required query parameters are missing. | ||
| RuntimeError | ||
| If any git command fails during execution. | ||
| AsyncTimeoutError | ||
| If the cloning process exceeds the specified timeout. | ||
| """ | ||
| # Extract and validate query parameters | ||
| url: str = config.url | ||
| local_path: str = config.local_path | ||
| commit: Optional[str] = config.commit | ||
| branch: Optional[str] = config.branch | ||
|
|
||
| if not url: | ||
| raise ValueError("The 'url' parameter is required.") | ||
|
|
||
| if not local_path: | ||
| raise ValueError("The 'local_path' parameter is required.") | ||
|
|
||
| # Check if the repository exists | ||
| if not await check_repo_exists(url): | ||
| raise ValueError("Repository not found, make sure it is public") | ||
|
|
||
| try: | ||
| if commit: | ||
| # Scenario 1: Clone and checkout a specific commit | ||
| # Clone the repository without depth to ensure full history for checkout | ||
| clone_cmd = ["git", "clone", "--single-branch", url, local_path] | ||
| await run_git_command(*clone_cmd) | ||
|
|
||
| # Checkout the specific commit | ||
| checkout_cmd = ["git", "-C", local_path, "checkout", commit] | ||
| return await run_git_command(*checkout_cmd) | ||
|
|
||
| if branch and branch.lower() not in ('main', 'master'): | ||
| # Scenario 2: Clone a specific branch with shallow depth | ||
| clone_cmd = ["git", "clone", "--depth=1", "--single-branch", "--branch", branch, url, local_path] | ||
| return await run_git_command(*clone_cmd) | ||
|
|
||
| # Scenario 3: Clone the default branch with shallow depth | ||
| clone_cmd = ["git", "clone", "--depth=1", "--single-branch", url, local_path] | ||
| return await run_git_command(*clone_cmd) | ||
|
|
||
| except (RuntimeError, asyncio.TimeoutError, AsyncTimeoutError): | ||
| raise # Re-raise the exception |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.