diff --git a/src/gitingest/cli.py b/src/gitingest/cli.py index 14df2190..9e0e3c41 100644 --- a/src/gitingest/cli.py +++ b/src/gitingest/cli.py @@ -37,7 +37,7 @@ def main( if not output: output = "digest.txt" - summary, tree, content = ingest(source, max_size, include_patterns, exclude_patterns, output=output) + summary, _, _ = ingest(source, max_size, include_patterns, exclude_patterns, output=output) click.echo(f"Analysis complete! Output written to: {output}") click.echo("\nSummary:") diff --git a/src/gitingest/clone.py b/src/gitingest/clone.py index e7994c14..4a3fda33 100644 --- a/src/gitingest/clone.py +++ b/src/gitingest/clone.py @@ -1,12 +1,34 @@ import asyncio -from typing import Any, Dict, Tuple +from dataclasses import dataclass +from typing import Optional, Tuple -from gitingest.utils import async_timeout +from gitingest.utils import AsyncTimeoutError, async_timeout CLONE_TIMEOUT = 20 +@dataclass +class CloneConfig: + url: str + local_path: str + commit: Optional[str] = None + branch: Optional[str] = None + + async def check_repo_exists(url: str) -> bool: + """ + Check if a repository exists at the given URL using an HTTP HEAD request. + + Parameters + ---------- + url : str + The URL of the repository. + + Returns + ------- + bool + True if the repository exists, False otherwise. + """ proc = await asyncio.create_subprocess_exec( "curl", "-I", @@ -14,7 +36,7 @@ async def check_repo_exists(url: str) -> bool: stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, ) - stdout, stderr = await proc.communicate() + stdout, _ = await proc.communicate() if proc.returncode != 0: return False # Check if stdout contains "404" status code @@ -22,58 +44,101 @@ async def check_repo_exists(url: str) -> bool: return "HTTP/1.1 404" not in stdout_str and "HTTP/2 404" not in stdout_str -@async_timeout(CLONE_TIMEOUT) -async def clone_repo(query: Dict[str, Any]) -> Tuple[bytes, bytes]: - if not await check_repo_exists(query['url']): - raise ValueError("Repository not found, make sure it is public") +async def run_git_command(*args: str) -> Tuple[bytes, bytes]: + """ + Executes a git command asynchronously and captures its output. - if query['commit']: - proc = await asyncio.create_subprocess_exec( - "git", - "clone", - "--single-branch", - query['url'], - query['local_path'], - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) - stdout, stderr = await proc.communicate() - - proc = await asyncio.create_subprocess_exec( - "git", - "-C", - query['local_path'], - "checkout", - query['branch'], - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) - stdout, stderr = await proc.communicate() - elif query['branch'] != 'main' and query['branch'] != 'master' and query['branch']: - proc = await asyncio.create_subprocess_exec( - "git", - "clone", - "--depth=1", - "--single-branch", - "--branch", - query['branch'], - query['url'], - query['local_path'], - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) - else: - proc = await asyncio.create_subprocess_exec( - "git", - "clone", - "--depth=1", - "--single-branch", - query['url'], - query['local_path'], - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) + Parameters + ---------- + *args : str + The git command and its arguments to execute. + Returns + ------- + Tuple[bytes, bytes] + A tuple containing the stdout and stderr of the git command. + + Raises + ------ + RuntimeError + If the git command exits with a non-zero status. + """ + proc = await asyncio.create_subprocess_exec( + *args, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) stdout, stderr = await proc.communicate() + if proc.returncode != 0: + error_message = stderr.decode().strip() + raise RuntimeError(f"Git command failed: {' '.join(args)}\nError: {error_message}") return stdout, stderr + + +@async_timeout(CLONE_TIMEOUT) +async def clone_repo(config: CloneConfig) -> Tuple[bytes, bytes]: + """ + Clones a repository to a local path based on the provided query parameters. + + Parameters + ---------- + config : CloneConfig + A dictionary containing the following keys: + - url (str): The URL of the repository. + - local_path (str): The local path to clone the repository to. + - commit (Optional[str]): The specific commit hash to checkout. + - branch (Optional[str]): The branch to clone. Defaults to 'main' or 'master' if not provided. + + Returns + ------- + Tuple[bytes, bytes] + A tuple containing the stdout and stderr of the git commands executed. + + Raises + ------ + ValueError + If the repository does not exist or if required query parameters are missing. + RuntimeError + If any git command fails during execution. + AsyncTimeoutError + If the cloning process exceeds the specified timeout. + """ + # Extract and validate query parameters + url: str = config.url + local_path: str = config.local_path + commit: Optional[str] = config.commit + branch: Optional[str] = config.branch + + if not url: + raise ValueError("The 'url' parameter is required.") + + if not local_path: + raise ValueError("The 'local_path' parameter is required.") + + # Check if the repository exists + if not await check_repo_exists(url): + raise ValueError("Repository not found, make sure it is public") + + try: + if commit: + # Scenario 1: Clone and checkout a specific commit + # Clone the repository without depth to ensure full history for checkout + clone_cmd = ["git", "clone", "--single-branch", url, local_path] + await run_git_command(*clone_cmd) + + # Checkout the specific commit + checkout_cmd = ["git", "-C", local_path, "checkout", commit] + return await run_git_command(*checkout_cmd) + + if branch and branch.lower() not in ('main', 'master'): + # Scenario 2: Clone a specific branch with shallow depth + clone_cmd = ["git", "clone", "--depth=1", "--single-branch", "--branch", branch, url, local_path] + return await run_git_command(*clone_cmd) + + # Scenario 3: Clone the default branch with shallow depth + clone_cmd = ["git", "clone", "--depth=1", "--single-branch", url, local_path] + return await run_git_command(*clone_cmd) + + except (RuntimeError, asyncio.TimeoutError, AsyncTimeoutError): + raise # Re-raise the exception diff --git a/src/gitingest/ingest.py b/src/gitingest/ingest.py index 22fae6d2..4889bc5c 100644 --- a/src/gitingest/ingest.py +++ b/src/gitingest/ingest.py @@ -4,14 +4,14 @@ from pathlib import Path from typing import List, Optional, Tuple, Union -from gitingest.clone import clone_repo +from gitingest.clone import CloneConfig, clone_repo from gitingest.ingest_from_query import ingest_from_query from gitingest.parse_query import parse_query def ingest( source: str, - max_file_size: int = 10 * 1024 * 1024, + max_file_size: int = 10 * 1024 * 1024, # 10 MB include_patterns: Union[List[str], str, None] = None, exclude_patterns: Union[List[str], str, None] = None, output: Optional[str] = None, @@ -25,7 +25,16 @@ def ingest( ignore_patterns=exclude_patterns, ) if query['url']: - clone_result = clone_repo(query) + + # Extract relevant fields for CloneConfig + clone_config = CloneConfig( + url=query["url"], + local_path=query['local_path'], + commit=query.get('commit'), + branch=query.get('branch'), + ) + clone_result = clone_repo(clone_config) + if inspect.iscoroutine(clone_result): asyncio.run(clone_result) else: diff --git a/src/gitingest/ingest_from_query.py b/src/gitingest/ingest_from_query.py index 0080c25b..a9130a39 100644 --- a/src/gitingest/ingest_from_query.py +++ b/src/gitingest/ingest_from_query.py @@ -278,7 +278,7 @@ def create_file_content_string(files: List[Dict[str, Any]]) -> str: return output -def create_summary_string(query: Dict[str, Any], nodes: Dict[str, Any], files: List[Dict[str, Any]]) -> str: +def create_summary_string(query: Dict[str, Any], nodes: Dict[str, Any]) -> str: """Creates a summary string with file counts and content size.""" if "user_name" in query: summary = f"Repository: {query['user_name']}/{query['repo_name']}\n" @@ -297,12 +297,7 @@ def create_summary_string(query: Dict[str, Any], nodes: Dict[str, Any], files: L return summary -def create_tree_structure( - query: Dict[str, Any], - node: Dict[str, Any], - prefix: str = "", - is_last: bool = True, -) -> str: +def create_tree_structure(query: Dict[str, Any], node: Dict[str, Any], prefix: str = "", is_last: bool = True) -> str: """Creates a tree-like string representation of the file structure.""" tree = "" @@ -386,7 +381,7 @@ def ingest_directory(path: str, query: Dict[str, Any]) -> Tuple[str, str, str]: if not nodes: raise ValueError(f"No files found in {path}") files = extract_files_content(query=query, node=nodes, max_file_size=query['max_file_size']) - summary = create_summary_string(query, nodes, files) + summary = create_summary_string(query, nodes) tree = "Directory structure:\n" + create_tree_structure(query, nodes) files_content = create_file_content_string(files) diff --git a/src/gitingest/parse_query.py b/src/gitingest/parse_query.py index fcf8186b..fe7b01a9 100644 --- a/src/gitingest/parse_query.py +++ b/src/gitingest/parse_query.py @@ -1,30 +1,19 @@ import os +import string import uuid - -from urllib.parse import unquote from typing import Any, Dict, List, Optional, Union +from urllib.parse import unquote + from gitingest.ignore_patterns import DEFAULT_IGNORE_PATTERNS TMP_BASE_PATH = "../tmp" +HEX_DIGITS = set(string.hexdigits) def parse_url(url: str) -> Dict[str, Any]: - parsed = { - "user_name": None, - "repo_name": None, - "type": None, - "branch": None, - "commit": None, - "subpath": "/", - "local_path": None, - "url": None, - "slug": None, - "id": None, - } - url = url.split(" ")[0] url = unquote(url) # Decode URL-encoded characters - + if not url.startswith('https://'): url = 'https://' + url @@ -36,43 +25,63 @@ def parse_url(url: str) -> Dict[str, Any]: if len(path_parts) < 2: raise ValueError("Invalid repository URL. Please provide a valid Git repository URL.") - parsed["user_name"] = path_parts[0] - parsed["repo_name"] = path_parts[1] - - # Keep original URL format but with decoded components - parsed["url"] = f"https://{domain}/{parsed['user_name']}/{parsed['repo_name']}" - parsed['slug'] = f"{parsed['user_name']}-{parsed['repo_name']}" - parsed["id"] = str(uuid.uuid4()) - parsed["local_path"] = f"{TMP_BASE_PATH}/{parsed['id']}/{parsed['slug']}" - - if len(path_parts) > 3: - - parsed["type"] = path_parts[2] # Usually 'tree' or 'blob' - - # Find the commit hash or reconstruct the branch name - remaining_parts = path_parts[3:] - if remaining_parts[0] and len(remaining_parts[0]) == 40 and all(c in '0123456789abcdefABCDEF' for c in remaining_parts[0]): - parsed["commit"] = remaining_parts[0] - parsed["subpath"] = "/" + "/".join(remaining_parts[1:]) if len(remaining_parts) > 1 else "/" - else: - # Handle branch names with slashes and special characters - for i, part in enumerate(remaining_parts): - if part in ('tree', 'blob'): - # Found another type indicator, everything before this was the branch name - parsed["branch"] = "/".join(remaining_parts[:i]) - parsed["subpath"] = "/" + "/".join(remaining_parts[i+2:]) if len(remaining_parts) > i+2 else "/" - break - else: - # No additional type indicator found, assume everything is part of the branch name - parsed["branch"] = "/".join(remaining_parts) - parsed["subpath"] = "/" + user_name = path_parts[0] + repo_name = path_parts[1] + _id = str(uuid.uuid4()) + slug = f"{user_name}-{repo_name}" + + parsed = { + "user_name": user_name, + "repo_name": repo_name, + "type": None, + "branch": None, + "commit": None, + "subpath": "/", + "local_path": f"{TMP_BASE_PATH}/{_id}/{slug}", + # Keep original URL format but with decoded components + "url": f"https://{domain}/{user_name}/{repo_name}", + "slug": slug, + "id": _id, + } + + if len(path_parts) < 4: + return parsed + + parsed["type"] = path_parts[2] # Usually 'tree' or 'blob' + commit = path_parts[3] + + # Find the commit hash or reconstruct the branch name + remaining_parts = path_parts[3:] + + if _is_valid_git_commit_hash(commit): + parsed["commit"] = commit + if len(remaining_parts) > 1: + parsed["subpath"] += "/".join(remaining_parts[1:]) + return parsed + # Handle branch names with slashes and special characters + + # Find the index of the first type indicator ('tree' or 'blob'), if any + type_indicator_index = next((i for i, part in enumerate(remaining_parts) if part in ('tree', 'blob')), None) + + if type_indicator_index is None: + # No type indicator found; assume the entire input is the branch name + parsed["branch"] = "/".join(remaining_parts) + return parsed + + # Found a type indicator; update branch and subpath + parsed["branch"] = "/".join(remaining_parts[:type_indicator_index]) + if len(remaining_parts) > type_indicator_index + 2: + parsed["subpath"] += "/".join(remaining_parts[type_indicator_index + 2 :]) return parsed +def _is_valid_git_commit_hash(commit: str) -> bool: + return len(commit) == 40 and all(c in HEX_DIGITS for c in commit) + + def normalize_pattern(pattern: str) -> str: - pattern = pattern.strip() pattern = pattern.lstrip(os.sep) if pattern.endswith(os.sep): pattern += "*" @@ -80,33 +89,45 @@ def normalize_pattern(pattern: str) -> str: def parse_patterns(pattern: Union[List[str], str]) -> List[str]: - if isinstance(pattern, list): - pattern = ",".join(pattern) + patterns = pattern if isinstance(pattern, list) else [pattern] + patterns = [p.strip() for p in patterns] - for p in pattern.split(","): - if not all(c.isalnum() or c in "-_./+*" for c in p.strip()): + for p in patterns: + if not all(c.isalnum() or c in "-_./+*" for c in p): raise ValueError( f"Pattern '{p}' contains invalid characters. Only alphanumeric characters, dash (-), " "underscore (_), dot (.), forward slash (/), plus (+), and asterisk (*) are allowed." ) - patterns = [normalize_pattern(p) for p in pattern.split(",")] - return patterns + + return [normalize_pattern(p) for p in patterns] def override_ignore_patterns(ignore_patterns: List[str], include_patterns: List[str]) -> List[str]: - for pattern in include_patterns: - if pattern in ignore_patterns: - ignore_patterns.remove(pattern) - return ignore_patterns + """ + Removes patterns from ignore_patterns that are present in include_patterns using set difference. + + Parameters + ---------- + ignore_patterns : List[str] + The list of patterns to potentially remove. + include_patterns : List[str] + The list of patterns to exclude from ignore_patterns. + + Returns + ------- + List[str] + A new list of ignore_patterns with specified patterns removed. + """ + return list(set(ignore_patterns) - set(include_patterns)) def parse_path(path: str) -> Dict[str, Any]: query = { + "url": None, "local_path": os.path.abspath(path), "slug": os.path.basename(os.path.dirname(path)) + "/" + os.path.basename(path), "subpath": "/", "id": str(uuid.uuid4()), - "url": None, } return query @@ -118,28 +139,52 @@ def parse_query( include_patterns: Optional[Union[List[str], str]] = None, ignore_patterns: Optional[Union[List[str], str]] = None, ) -> Dict[str, Any]: - if from_web: + """ + Parses the input source to construct a query dictionary with specified parameters. + + Parameters + ---------- + source : str + The source URL or file path to parse. + max_file_size : int + The maximum file size in bytes to include. + from_web : bool + Flag indicating whether the source is a web URL. + include_patterns : Optional[Union[List[str], str]], optional + Patterns to include, by default None. Can be a list of strings or a single string. + ignore_patterns : Optional[Union[List[str], str]], optional + Patterns to ignore, by default None. Can be a list of strings or a single string. + + Returns + ------- + Dict[str, Any] + A dictionary containing the parsed query parameters, including 'max_file_size', + 'ignore_patterns', and 'include_patterns'. + """ + # Determine the parsing method based on the source type + if from_web or source.startswith("https://") or "github.com" in source: query = parse_url(source) else: - if source.startswith("https://") or "github.com" in source: - query = parse_url(source) - else: - query = parse_path(source) + query = parse_path(source) - query['max_file_size'] = max_file_size + # Process ignore patterns + ignore_patterns_list = DEFAULT_IGNORE_PATTERNS.copy() + if ignore_patterns: + ignore_patterns_list += parse_patterns(ignore_patterns) - if ignore_patterns and ignore_patterns != "": - ignore_patterns = DEFAULT_IGNORE_PATTERNS + parse_patterns(ignore_patterns) + # Process include patterns and override ignore patterns accordingly + if include_patterns: + parsed_include = parse_patterns(include_patterns) + ignore_patterns_list = override_ignore_patterns(ignore_patterns_list, include_patterns=parsed_include) else: - ignore_patterns = DEFAULT_IGNORE_PATTERNS - - if include_patterns and include_patterns != "": - include_patterns = parse_patterns(include_patterns) - ignore_patterns = override_ignore_patterns(ignore_patterns, include_patterns) - else: - include_patterns = None - - query['ignore_patterns'] = ignore_patterns - query['include_patterns'] = include_patterns - + parsed_include = None + + # Update the query dictionary with max_file_size and processed patterns + query.update( + { + 'max_file_size': max_file_size, + 'ignore_patterns': ignore_patterns_list, + 'include_patterns': parsed_include, + } + ) return query diff --git a/src/gitingest/tests/test_clone.py b/src/gitingest/tests/test_clone.py index 680181c8..5f33b98b 100644 --- a/src/gitingest/tests/test_clone.py +++ b/src/gitingest/tests/test_clone.py @@ -2,62 +2,55 @@ import pytest -from gitingest.clone import check_repo_exists, clone_repo +from gitingest.clone import CloneConfig, check_repo_exists, clone_repo @pytest.mark.asyncio async def test_clone_repo_with_commit() -> None: - query = { - 'commit': 'a' * 40, # Simulating a valid commit hash - 'branch': 'main', - 'url': 'https://github.com/user/repo', - 'local_path': '/tmp/repo', - } + clone_config = CloneConfig( + url='https://github.com/user/repo', + local_path='/tmp/repo', + commit='a' * 40, # Simulating a valid commit hash + branch='main', + ) with patch('gitingest.clone.check_repo_exists', return_value=True) as mock_check: - with patch('asyncio.create_subprocess_exec', new_callable=AsyncMock) as mock_exec: + with patch('gitingest.clone.run_git_command', new_callable=AsyncMock) as mock_exec: mock_process = AsyncMock() mock_process.communicate.return_value = (b'output', b'error') mock_exec.return_value = mock_process - - await clone_repo(query) - mock_check.assert_called_once_with(query['url']) + await clone_repo(clone_config) + mock_check.assert_called_once_with(clone_config.url) assert mock_exec.call_count == 2 # Clone and checkout calls @pytest.mark.asyncio async def test_clone_repo_without_commit() -> None: - query = { - 'commit': None, - 'branch': 'main', - 'url': 'https://github.com/user/repo', - 'local_path': '/tmp/repo', - } + query = CloneConfig(url='https://github.com/user/repo', local_path='/tmp/repo', commit=None, branch='main') with patch('gitingest.clone.check_repo_exists', return_value=True) as mock_check: - with patch('asyncio.create_subprocess_exec', new_callable=AsyncMock) as mock_exec: + with patch('gitingest.clone.run_git_command', new_callable=AsyncMock) as mock_exec: mock_process = AsyncMock() mock_process.communicate.return_value = (b'output', b'error') mock_exec.return_value = mock_process await clone_repo(query) - mock_check.assert_called_once_with(query['url']) + mock_check.assert_called_once_with(query.url) assert mock_exec.call_count == 1 # Only clone call @pytest.mark.asyncio async def test_clone_repo_nonexistent_repository() -> None: - query = { - 'commit': None, - 'branch': 'main', - 'url': 'https://github.com/user/nonexistent-repo', - 'local_path': '/tmp/repo', - } - + clone_config = CloneConfig( + url='https://github.com/user/nonexistent-repo', + local_path='/tmp/repo', + commit=None, + branch='main', + ) with patch('gitingest.clone.check_repo_exists', return_value=False) as mock_check: with pytest.raises(ValueError, match="Repository not found"): - await clone_repo(query) - mock_check.assert_called_once_with(query['url']) + await clone_repo(clone_config) + mock_check.assert_called_once_with(clone_config.url) @pytest.mark.asyncio diff --git a/src/gitingest/tests/test_parse_query.py b/src/gitingest/tests/test_parse_query.py index ae4c1659..1ab5e447 100644 --- a/src/gitingest/tests/test_parse_query.py +++ b/src/gitingest/tests/test_parse_query.py @@ -37,7 +37,7 @@ def test_parse_query_include_pattern() -> None: url = "https://github.com/user/repo" result = parse_query(url, max_file_size=50, from_web=True, include_patterns='*.py') assert result["include_patterns"] == ["*.py"] - assert result["ignore_patterns"] == DEFAULT_IGNORE_PATTERNS + assert set(result["ignore_patterns"]) == set(DEFAULT_IGNORE_PATTERNS) def test_parse_query_invalid_pattern() -> None: diff --git a/src/gitingest/utils.py b/src/gitingest/utils.py index 1f07b533..2445f14e 100644 --- a/src/gitingest/utils.py +++ b/src/gitingest/utils.py @@ -10,8 +10,6 @@ class AsyncTimeoutError(Exception): """Raised when an async operation exceeds its timeout limit.""" - pass - def async_timeout(seconds: int = 10) -> Callable[[Callable[P, Awaitable[T]]], Callable[P, Awaitable[T]]]: def decorator(func: Callable[P, Awaitable[T]]) -> Callable[P, Awaitable[T]]: diff --git a/src/process_query.py b/src/process_query.py index 466b11d2..761fdf27 100644 --- a/src/process_query.py +++ b/src/process_query.py @@ -1,11 +1,9 @@ -from typing import Any, Dict - from fastapi import Request from fastapi.templating import Jinja2Templates from starlette.templating import _TemplateResponse from config import EXAMPLE_REPOS, MAX_DISPLAY_SIZE -from gitingest.clone import clone_repo +from gitingest.clone import CloneConfig, clone_repo from gitingest.ingest_from_query import ingest_from_query from gitingest.parse_query import parse_query from server_utils import Colors, logSliderToSize @@ -13,14 +11,8 @@ templates = Jinja2Templates(directory="templates") -def print_query( - query: Dict[str, Any], - request: Request, - max_file_size: int, - pattern_type: str, - pattern: str, -) -> None: - print(f"{Colors.WHITE}{query['url']:<20}{Colors.END}", end="") +def print_query(url: str, max_file_size: int, pattern_type: str, pattern: str) -> None: + print(f"{Colors.WHITE}{url:<20}{Colors.END}", end="") if int(max_file_size / 1024) != 50: print(f" | {Colors.YELLOW}Size: {int(max_file_size/1024)}kb{Colors.END}", end="") if pattern_type == "include" and pattern != "": @@ -29,30 +21,16 @@ def print_query( print(f" | {Colors.YELLOW}Exclude {pattern}{Colors.END}", end="") -def print_error( - query: Dict[str, Any], - request: Request, - e: Exception, - max_file_size: int, - pattern_type: str, - pattern: str, -) -> None: +def print_error(url: str, e: Exception, max_file_size: int, pattern_type: str, pattern: str) -> None: print(f"{Colors.BROWN}WARN{Colors.END}: {Colors.RED}<- {Colors.END}", end="") - print_query(query, request, max_file_size, pattern_type, pattern) + print_query(url, max_file_size, pattern_type, pattern) print(f" | {Colors.RED}{e}{Colors.END}") -def print_success( - query: Dict[str, Any], - request: Request, - max_file_size: int, - pattern_type: str, - pattern: str, - summary: str, -) -> None: +def print_success(url: str, max_file_size: int, pattern_type: str, pattern: str, summary: str) -> None: estimated_tokens = summary[summary.index("Estimated tokens:") + len("Estimated ") :] print(f"{Colors.GREEN}INFO{Colors.END}: {Colors.GREEN}<- {Colors.END}", end="") - print_query(query, request, max_file_size, pattern_type, pattern) + print_query(url, max_file_size, pattern_type, pattern) print(f" | {Colors.PURPLE}{estimated_tokens}{Colors.END}") @@ -82,15 +60,21 @@ async def process_query( include_patterns=include_patterns, ignore_patterns=exclude_patterns, ) - await clone_repo(query) + clone_config = CloneConfig( + url=query["url"], + local_path=query['local_path'], + commit=query.get('commit'), + branch=query.get('branch'), + ) + await clone_repo(clone_config) summary, tree, content = ingest_from_query(query) - with open(f"{query['local_path']}.txt", "w") as f: + with open(f"{clone_config.local_path}.txt", "w") as f: f.write(tree + "\n" + content) except Exception as e: # hack to print error message when query is not defined if 'query' in locals() and query is not None and isinstance(query, dict): - print_error(query, request, e, max_file_size, pattern_type, pattern) + print_error(query['url'], e, max_file_size, pattern_type, pattern) else: print(f"{Colors.BROWN}WARN{Colors.END}: {Colors.RED}<- {Colors.END}", end="") print(f"{Colors.RED}{e}{Colors.END}") @@ -115,8 +99,7 @@ async def process_query( ) print_success( - query=query, - request=request, + url=query['url'], max_file_size=max_file_size, pattern_type=pattern_type, pattern=pattern,