From 246e2a3420ca94136458f01e70119123b31f104f Mon Sep 17 00:00:00 2001 From: ix-56h Date: Sun, 27 Jul 2025 16:21:57 +0200 Subject: [PATCH 1/3] remove logarithm conversion from the backend, handle 422http errors in the front, correctly convert kb to bits before processing the ingestion --- src/server/models.py | 8 ++-- src/server/query_processor.py | 14 +++--- src/server/routers/ingest.py | 6 +-- src/server/routers_utils.py | 2 +- src/server/server_config.py | 4 +- src/server/server_utils.py | 21 +-------- .../templates/components/git_form.jinja | 4 +- src/static/js/utils.js | 44 ++++++++++++++++--- 8 files changed, 57 insertions(+), 46 deletions(-) diff --git a/src/server/models.py b/src/server/models.py index a1aed314..533da611 100644 --- a/src/server/models.py +++ b/src/server/models.py @@ -3,14 +3,16 @@ from __future__ import annotations from enum import Enum -from typing import Union +from typing import TYPE_CHECKING, Union from pydantic import BaseModel, Field, field_validator from gitingest.utils.compat_func import removesuffix +from server.server_config import MAX_FILE_SIZE_KB # needed for type checking (pydantic) -from server.form_types import IntForm, OptStrForm, StrForm # noqa: TC001 (typing-only-first-party-import) +if TYPE_CHECKING: + from server.form_types import IntForm, OptStrForm, StrForm class PatternType(str, Enum): @@ -39,7 +41,7 @@ class IngestRequest(BaseModel): """ input_text: str = Field(..., description="Git repository URL or slug to ingest") - max_file_size: int = Field(..., ge=0, le=500, description="File size slider position (0-500)") + max_file_size: int = Field(..., ge=1, le=MAX_FILE_SIZE_KB, description="File size in KB") pattern_type: PatternType = Field(default=PatternType.EXCLUDE, description="Pattern type for file filtering") pattern: str = Field(default="", description="Glob/regex pattern for file filtering") token: str | None = Field(default=None, description="GitHub PAT for private repositories") diff --git a/src/server/query_processor.py b/src/server/query_processor.py index 88d7ff50..ecbb990f 100644 --- a/src/server/query_processor.py +++ b/src/server/query_processor.py @@ -13,12 +13,12 @@ from server.models import IngestErrorResponse, IngestResponse, IngestSuccessResponse, PatternType from server.s3_utils import generate_s3_file_path, is_s3_enabled, upload_to_s3 from server.server_config import MAX_DISPLAY_SIZE -from server.server_utils import Colors, log_slider_to_size +from server.server_utils import Colors async def process_query( input_text: str, - slider_position: int, + max_file_size: int, pattern_type: PatternType, pattern: str, token: str | None = None, @@ -32,8 +32,8 @@ async def process_query( ---------- input_text : str Input text provided by the user, typically a Git repository URL or slug. - slider_position : int - Position of the slider, representing the maximum file size in the query. + max_file_size : int + Max file size in KB to be include in the digest. pattern_type : PatternType Type of pattern to use (either "include" or "exclude") pattern : str @@ -55,8 +55,6 @@ async def process_query( if token: validate_github_token(token) - max_file_size = log_slider_to_size(slider_position) - try: query = await parse_remote_repo(input_text, token=token) except Exception as exc: @@ -65,7 +63,7 @@ async def process_query( return IngestErrorResponse(error=str(exc)) query.url = cast("str", query.url) - query.max_file_size = max_file_size + query.max_file_size = max_file_size * 1024 # Convert to bits since we currently use KB in higher levels query.ignore_patterns, query.include_patterns = process_patterns( exclude_patterns=pattern if pattern_type == PatternType.EXCLUDE else None, include_patterns=pattern if pattern_type == PatternType.INCLUDE else None, @@ -142,7 +140,7 @@ async def process_query( digest_url=digest_url, tree=tree, content=content, - default_max_file_size=slider_position, + default_max_file_size=max_file_size, pattern_type=pattern_type, pattern=pattern, ) diff --git a/src/server/routers/ingest.py b/src/server/routers/ingest.py index 42efefdf..ce9e6512 100644 --- a/src/server/routers/ingest.py +++ b/src/server/routers/ingest.py @@ -11,7 +11,7 @@ from server.models import IngestRequest from server.routers_utils import COMMON_INGEST_RESPONSES, _perform_ingestion from server.s3_utils import is_s3_enabled -from server.server_config import MAX_DISPLAY_SIZE +from server.server_config import DEFAULT_FILE_SIZE_KB from server.server_utils import limiter ingest_counter = Counter("gitingest_ingest_total", "Number of ingests", ["status", "url"]) @@ -58,7 +58,7 @@ async def api_ingest_get( request: Request, # noqa: ARG001 (unused-function-argument) # pylint: disable=unused-argument user: str, repository: str, - max_file_size: int = MAX_DISPLAY_SIZE, + max_file_size: int = DEFAULT_FILE_SIZE_KB, pattern_type: str = "exclude", pattern: str = "", token: str = "", @@ -74,7 +74,7 @@ async def api_ingest_get( - **repository** (`str`): GitHub repository name **Query Parameters** - - **max_file_size** (`int`, optional): Maximum file size to include in the digest (default: 50 KB) + - **max_file_size** (`int`, optional): Maximum file size in KB to include in the digest (default: 5120 KB) - **pattern_type** (`str`, optional): Type of pattern to use ("include" or "exclude", default: "exclude") - **pattern** (`str`, optional): Pattern to include or exclude in the query (default: "") - **token** (`str`, optional): GitHub personal access token for private repositories (default: "") diff --git a/src/server/routers_utils.py b/src/server/routers_utils.py index 83242e26..3eaf0e59 100644 --- a/src/server/routers_utils.py +++ b/src/server/routers_utils.py @@ -33,7 +33,7 @@ async def _perform_ingestion( result = await process_query( input_text=input_text, - slider_position=max_file_size, + max_file_size=max_file_size, pattern_type=pattern_type, pattern=pattern, token=token, diff --git a/src/server/server_config.py b/src/server/server_config.py index 0257db8b..d0b51c4d 100644 --- a/src/server/server_config.py +++ b/src/server/server_config.py @@ -10,8 +10,8 @@ DELETE_REPO_AFTER: int = 60 * 60 # In seconds (1 hour) # Slider configuration (if updated, update the logSliderToSize function in src/static/js/utils.js) -MAX_FILE_SIZE_KB: int = 100 * 1024 # 100 MB -MAX_SLIDER_POSITION: int = 500 # Maximum slider position +DEFAULT_FILE_SIZE_KB: int = 5 * 1024 # 5 mb +MAX_FILE_SIZE_KB: int = 100 * 1024 # 100 mb EXAMPLE_REPOS: list[dict[str, str]] = [ {"name": "Gitingest", "url": "https://github.com/coderamp-labs/gitingest"}, diff --git a/src/server/server_utils.py b/src/server/server_utils.py index b0371661..ee6f9eca 100644 --- a/src/server/server_utils.py +++ b/src/server/server_utils.py @@ -1,7 +1,6 @@ """Utility functions for the server.""" import asyncio -import math import shutil import time from contextlib import asynccontextmanager, suppress @@ -15,7 +14,7 @@ from slowapi.util import get_remote_address from gitingest.config import TMP_BASE_PATH -from server.server_config import DELETE_REPO_AFTER, MAX_FILE_SIZE_KB, MAX_SLIDER_POSITION +from server.server_config import DELETE_REPO_AFTER # Initialize a rate limiter limiter = Limiter(key_func=get_remote_address) @@ -161,24 +160,6 @@ def _append_line(path: Path, line: str) -> None: fp.write(f"{line}\n") -def log_slider_to_size(position: int) -> int: - """Convert a slider position to a file size in bytes using a logarithmic scale. - - Parameters - ---------- - position : int - Slider position ranging from 0 to 500. - - Returns - ------- - int - File size in bytes corresponding to the slider position. - - """ - maxv = math.log(MAX_FILE_SIZE_KB) - return round(math.exp(maxv * pow(position / MAX_SLIDER_POSITION, 1.5))) * 1024 - - ## Color printing utility class Colors: """ANSI color codes.""" diff --git a/src/server/templates/components/git_form.jinja b/src/server/templates/components/git_form.jinja index 8ea0821f..e2e7c91c 100644 --- a/src/server/templates/components/git_form.jinja +++ b/src/server/templates/components/git_form.jinja @@ -76,12 +76,12 @@ +
diff --git a/src/static/js/utils.js b/src/static/js/utils.js index 6370036b..ce19e95e 100644 --- a/src/static/js/utils.js +++ b/src/static/js/utils.js @@ -126,13 +126,13 @@ function collectFormData(form) { const json_data = {}; const inputText = form.querySelector('[name="input_text"]'); const token = form.querySelector('[name="token"]'); - const slider = document.getElementById('file_size'); + const hiddenInput = document.getElementById('max_file_size_kb'); const patternType = document.getElementById('pattern_type'); const pattern = document.getElementById('pattern'); if (inputText) {json_data.input_text = inputText.value;} if (token) {json_data.token = token.value;} - if (slider) {json_data.max_file_size = slider.value;} + if (hiddenInput) {json_data.max_file_size = hiddenInput.value;} if (patternType) {json_data.pattern_type = patternType.value;} if (pattern) {json_data.pattern = pattern.value;} @@ -206,6 +206,14 @@ function handleSubmit(event, showLoadingSpinner = false) { if (!form) {return;} + // Ensure hidden input is updated before collecting form data + const slider = document.getElementById('file_size'); + const hiddenInput = document.getElementById('max_file_size_kb'); + + if (slider && hiddenInput) { + hiddenInput.value = logSliderToSize(slider.value); + } + if (showLoadingSpinner) { showLoading(); } @@ -226,12 +234,32 @@ function handleSubmit(event, showLoadingSpinner = false) { headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(json_data) }) - .then((response) => response.json()) - .then( (data) => { - // Hide loading overlay + .then(async (response) => { + let data; + + try { + data = await response.json(); + } catch { + data = {}; + } setButtonLoadingState(submitButton, false); - // Handle error + if (!response.ok) { + // Show all error details if present + if (Array.isArray(data.detail)) { + const details = data.detail.map((d) => `
  • ${d.msg || JSON.stringify(d)}
  • `).join(''); + + showError(`
    Error(s):
      ${details}
    `); + + return; + } + // Other errors + showError(`
    ${data.error || JSON.stringify(data) || 'An error occurred.'}
    `); + + return; + } + + // Handle error in data if (data.error) { showError(`
    ${data.error}
    `); @@ -327,14 +355,16 @@ function logSliderToSize(position) { function initializeSlider() { const slider = document.getElementById('file_size'); const sizeValue = document.getElementById('size_value'); + const hiddenInput = document.getElementById('max_file_size_kb'); - if (!slider || !sizeValue) {return;} + if (!slider || !sizeValue || !hiddenInput) {return;} function updateSlider() { const value = logSliderToSize(slider.value); sizeValue.textContent = formatSize(value); slider.style.backgroundSize = `${(slider.value / slider.max) * 100}% 100%`; + hiddenInput.value = value; // Set hidden input to KB value } // Update on slider change From a50e80d73f944795ef8af1439bdec475441cf574 Mon Sep 17 00:00:00 2001 From: ix-56h Date: Sun, 27 Jul 2025 18:04:29 +0200 Subject: [PATCH 2/3] specify max_file_size is bytes in IngestionQuery --- src/gitingest/schemas/ingestion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gitingest/schemas/ingestion.py b/src/gitingest/schemas/ingestion.py index 92572aeb..21369075 100644 --- a/src/gitingest/schemas/ingestion.py +++ b/src/gitingest/schemas/ingestion.py @@ -41,7 +41,7 @@ class IngestionQuery(BaseModel): # pylint: disable=too-many-instance-attributes tag : str | None The tag of the repository. max_file_size : int - The maximum file size to ingest (default: 10 MB). + The maximum file size to ingest in bytes (default: 10 MB). ignore_patterns : set[str] The patterns to ignore (default: ``set()``). include_patterns : set[str] | None From 089072946791c8e6b835205370c19657beb5066e Mon Sep 17 00:00:00 2001 From: Zarial <39010759+ix-56h@users.noreply.github.com> Date: Sun, 27 Jul 2025 18:19:36 +0200 Subject: [PATCH 3/3] Update src/server/query_processor.py Co-authored-by: Nicolas Iragne --- src/server/query_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server/query_processor.py b/src/server/query_processor.py index ecbb990f..172330ac 100644 --- a/src/server/query_processor.py +++ b/src/server/query_processor.py @@ -63,7 +63,7 @@ async def process_query( return IngestErrorResponse(error=str(exc)) query.url = cast("str", query.url) - query.max_file_size = max_file_size * 1024 # Convert to bits since we currently use KB in higher levels + query.max_file_size = max_file_size * 1024 # Convert to bytes since we currently use KB in higher levels query.ignore_patterns, query.include_patterns = process_patterns( exclude_patterns=pattern if pattern_type == PatternType.EXCLUDE else None, include_patterns=pattern if pattern_type == PatternType.INCLUDE else None,