Skip to content

Commit 3869aa3

Browse files
feat(web-ui): add private-GitHub ingestion via PAT (#286)
* feat(web-ui, backend): allow ingesting private GitHub repos with PAT authentication * Accept a GitHub personal access token (PAT) from the UI and forward it through - `git_form.jinja` → new “Private Repository” checkbox + PAT field - routers (`index.py`, `dynamic.py`) and `query_processor.py` * Propagate `token` throughout the ingestion stack - `gitingest.entrypoint.parse_query` - `query_parsing` (including `try_domains_for_user_and_repo`) so we can infer the host when the user enters a bare “user/repo” slug * Tests - Added `"token": ""` to the `form_data` dict in the tests in `tests/test_flow_integration.py` **Limitation:** This PR enables PAT-protected cloning **only for GitHub**; other hosts (GitLab, Gitea, etc.) remain public-only for now. * help link to generate PAT * pre-commit hooks --------- Co-authored-by: cyclotruc <romain@coderamp.io>
1 parent c656635 commit 3869aa3

File tree

8 files changed

+176
-83
lines changed

8 files changed

+176
-83
lines changed

src/gitingest/entrypoint.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ async def ingest_async(
7373
from_web=False,
7474
include_patterns=include_patterns,
7575
ignore_patterns=exclude_patterns,
76+
token=token,
7677
)
7778

7879
if query.url:

src/gitingest/query_parsing.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ async def parse_query(
2929
from_web: bool,
3030
include_patterns: Optional[Union[str, Set[str]]] = None,
3131
ignore_patterns: Optional[Union[str, Set[str]]] = None,
32+
token: Optional[str] = None,
3233
) -> IngestionQuery:
3334
"""
3435
Parse the input source (URL or path) to extract relevant details for the query.
@@ -49,7 +50,10 @@ async def parse_query(
4950
Patterns to include, by default None. Can be a set of strings or a single string.
5051
ignore_patterns : Union[str, Set[str]], optional
5152
Patterns to ignore, by default None. Can be a set of strings or a single string.
52-
53+
token : str, optional
54+
GitHub personal-access token (PAT). Needed when *source* refers to a
55+
**private** repository. Can also be set via the ``GITHUB_TOKEN`` env var.
56+
Must start with 'github_pat_' or 'gph_' for GitHub repositories.
5357
Returns
5458
-------
5559
IngestionQuery
@@ -59,7 +63,7 @@ async def parse_query(
5963
# Determine the parsing method based on the source type
6064
if from_web or urlparse(source).scheme in ("https", "http") or any(h in source for h in KNOWN_GIT_HOSTS):
6165
# We either have a full URL or a domain-less slug
62-
query = await _parse_remote_repo(source)
66+
query = await _parse_remote_repo(source, token=token)
6367
else:
6468
# Local path scenario
6569
query = _parse_local_dir_path(source)

src/server/query_processor.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Process a query by parsing input, cloning a repository, and generating a summary."""
22

33
from functools import partial
4+
from typing import Optional
45

56
from fastapi import Request
67
from starlette.templating import _TemplateResponse
@@ -19,6 +20,7 @@ async def process_query(
1920
pattern_type: str = "exclude",
2021
pattern: str = "",
2122
is_index: bool = False,
23+
token: Optional[str] = None,
2224
) -> _TemplateResponse:
2325
"""
2426
Process a query by parsing input, cloning a repository, and generating a summary.
@@ -40,6 +42,9 @@ async def process_query(
4042
Pattern to include or exclude in the query, depending on the pattern type.
4143
is_index : bool
4244
Flag indicating whether the request is for the index page (default is False).
45+
token : str, optional
46+
GitHub personal-access token (PAT). Needed when *input_text* refers to a
47+
**private** repository.
4348
4449
Returns
4550
-------
@@ -71,6 +76,7 @@ async def process_query(
7176
"default_file_size": slider_position,
7277
"pattern_type": pattern_type,
7378
"pattern": pattern,
79+
"token": token,
7480
}
7581

7682
try:
@@ -80,12 +86,13 @@ async def process_query(
8086
from_web=True,
8187
include_patterns=include_patterns,
8288
ignore_patterns=exclude_patterns,
89+
token=token,
8390
)
8491
if not query.url:
8592
raise ValueError("The 'url' parameter is required.")
8693

8794
clone_config = query.extract_clone_config()
88-
await clone_repo(clone_config)
95+
await clone_repo(clone_config, token=token)
8996
summary, tree, content = ingest_query(query)
9097
with open(f"{clone_config.local_path}.txt", "w", encoding="utf-8") as f:
9198
f.write(tree + "\n" + content)

src/server/routers/dynamic.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ async def process_catch_all(
5050
max_file_size: int = Form(...),
5151
pattern_type: str = Form(...),
5252
pattern: str = Form(...),
53+
token: str = Form(...),
5354
) -> HTMLResponse:
5455
"""
5556
Process the form submission with user input for query parameters.
@@ -69,18 +70,22 @@ async def process_catch_all(
6970
The type of pattern used for the query, specified by the user.
7071
pattern : str
7172
The pattern string used in the query, specified by the user.
72-
73+
token : str
74+
GitHub personal-access token (PAT). Needed when *input_text* refers to a
75+
**private** repository.
7376
Returns
7477
-------
7578
HTMLResponse
7679
An HTML response generated after processing the form input and query logic,
7780
which will be rendered and returned to the user.
7881
"""
82+
resolved_token = None if token == "" else token
7983
return await process_query(
8084
request,
8185
input_text,
8286
max_file_size,
8387
pattern_type,
8488
pattern,
8589
is_index=False,
90+
token=resolved_token,
8691
)

src/server/routers/index.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ async def index_post(
4747
max_file_size: int = Form(...),
4848
pattern_type: str = Form(...),
4949
pattern: str = Form(...),
50+
token: str = Form(...),
5051
) -> HTMLResponse:
5152
"""
5253
Process the form submission with user input for query parameters.
@@ -67,18 +68,22 @@ async def index_post(
6768
The type of pattern used for the query, specified by the user.
6869
pattern : str
6970
The pattern string used in the query, specified by the user.
70-
71+
token : str
72+
GitHub personal-access token (PAT). Needed when *input_text* refers to a
73+
**private** repository.
7174
Returns
7275
-------
7376
HTMLResponse
7477
An HTML response containing the results of processing the form input and query logic,
7578
which will be rendered and returned to the user.
7679
"""
80+
resolved_token = None if token == "" else token
7781
return await process_query(
7882
request,
7983
input_text,
8084
max_file_size,
8185
pattern_type,
8286
pattern,
8387
is_index=True,
88+
token=resolved_token,
8489
)

0 commit comments

Comments
 (0)