Skip to content

Commit 8caf4b3

Browse files
refactor: centralize PAT validation, streamline repo checks & misc cleanup (#349)
* refactor: centralize PAT validation, streamline repo checks & housekeeping * `.venv*` to `.gitignore` * `# type: ignore[attr-defined]` hints in `compat_typing.py` for IDE-agnostic imports * Helpful PAT string in `InvalidGitHubTokenError` for easier debugging * Bump **ruff-pre-commit** hook → `v0.12.1` * CONTRIBUTING: * Require **Python 3.9+** * Recommend signed (`-S`) commits * PAT validation now happens **only** in entry points (`utils.auth.resolve_token` for CLI/lib, `server.process_query` for Web UI) * Unified `_check_github_repo_exists` into `check_repo_exists`, replacing `curl -I` with `curl --silent --location --write-out %{http_code} -o /dev/null` * Broaden `_GITHUB_PAT_PATTERN` * `create_git_auth_header` raises `ValueError` when hostname is missing * Tests updated to expect raw HTTP-code output * Superfluous “token can be set via `GITHUB_TOKEN`” notes in docstrings * `.gitingestignore` & `.terraform` from `DEFAULT_IGNORE_PATTERNS` * Token validation inside `create_git_command` * Obsolete `test_create_git_command_invalid_token` * Adjust `test_clone.py` and `test_git_utils.py` for new status-code handling * Consolidate mocks after token-validation relocation BREAKING CHANGE: `create_git_command` no longer validates GitHub tokens; callers must ensure tokens are valid (via `validate_github_token`) before invoking lower-level git helpers. --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 94b2a95 commit 8caf4b3

File tree

5 files changed

+81
-39
lines changed

5 files changed

+81
-39
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,4 +151,4 @@ repos:
151151
- repo: meta
152152
hooks:
153153
- id: check-hooks-apply
154-
- id: check-useless-excludes
154+
- id: check-useless-excludes

CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,4 +89,4 @@ If you ever get stuck, reach out on [Discord](https://discord.com/invite/zerRaGK
8989

9090
13. **Iterate** on any review feedback—update your branch and repeat **6 – 11** as needed.
9191

92-
*(Optional) Invite a maintainer to your branch for easier collaboration.*
92+
*(Optional) Invite a maintainer to your branch for easier collaboration.*

src/gitingest/utils/git_utils.py

Lines changed: 45 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,28 @@
44

55
import asyncio
66
import base64
7+
import os
78
import re
89
from pathlib import Path
910
from typing import TYPE_CHECKING, Final, Iterable
1011
import sys
12+
from typing import Final
1113
from urllib.parse import urlparse
1214

1315
import httpx
1416
from starlette.status import HTTP_200_OK, HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND
1517

1618
from gitingest.utils.compat_func import removesuffix
19+
20+
from starlette.status import (
21+
HTTP_200_OK,
22+
HTTP_301_MOVED_PERMANENTLY,
23+
HTTP_302_FOUND,
24+
HTTP_401_UNAUTHORIZED,
25+
HTTP_403_FORBIDDEN,
26+
HTTP_404_NOT_FOUND,
27+
)
28+
1729
from gitingest.utils.exceptions import InvalidGitHubTokenError
1830
from server.server_utils import Colors
1931

@@ -134,28 +146,46 @@ async def check_repo_exists(url: str, token: str | None = None) -> bool:
134146
If the host returns an unrecognised status code.
135147
136148
"""
137-
headers = {}
149+
# TODO: use `requests` instead of `curl`
150+
cmd: list[str] = [
151+
"curl",
152+
"--silent",
153+
"--location",
154+
"--head",
155+
"--write-out",
156+
"%{http_code}",
157+
"-o",
158+
os.devnull,
159+
]
138160

139161
if token and is_github_host(url):
140162
host, owner, repo = _parse_github_url(url)
141163
# Public GitHub vs. GitHub Enterprise
142164
base_api = "https://api.github.com" if host == "github.com" else f"https://{host}/api/v3"
143165
url = f"{base_api}/repos/{owner}/{repo}"
144-
headers["Authorization"] = f"Bearer {token}"
166+
cmd += [f"Authorization: Bearer {token}"]
145167

146-
async with httpx.AsyncClient(follow_redirects=True) as client:
147-
try:
148-
response = await client.head(url, headers=headers)
149-
except httpx.RequestError:
150-
return False
168+
cmd.append(url)
169+
170+
proc = await asyncio.create_subprocess_exec(
171+
*cmd,
172+
stdout=asyncio.subprocess.PIPE,
173+
stderr=asyncio.subprocess.PIPE,
174+
)
175+
stdout, _ = await proc.communicate()
151176

152-
status_code = response.status_code
177+
if proc.returncode != 0:
178+
return False
153179

154-
if status_code == HTTP_200_OK:
180+
status = int(stdout.decode().strip())
181+
if status in {HTTP_200_OK, HTTP_301_MOVED_PERMANENTLY}:
155182
return True
156-
if status_code in {HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND}:
183+
# TODO: handle 302 redirects
184+
if status in {HTTP_404_NOT_FOUND, HTTP_302_FOUND}:
185+
return False
186+
if status in {HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN}:
157187
return False
158-
msg = f"Unexpected HTTP status {status_code} for {url}"
188+
msg = f"Unexpected HTTP status {status} for {url}"
159189
raise RuntimeError(msg)
160190

161191

@@ -187,7 +217,7 @@ def _parse_github_url(url: str) -> tuple[str, str, str]:
187217
msg = f"Un-recognised GitHub hostname: {parsed.hostname!r}"
188218
raise ValueError(msg)
189219

190-
parts = removesuffix(parsed.path, ".git").strip("/").split("/")
220+
parts = parsed.path.strip("/").removesuffix(".git").split("/")
191221
expected_path_length = 2
192222
if len(parts) != expected_path_length:
193223
msg = f"Path must look like /<owner>/<repo>: {parsed.path!r}"
@@ -220,28 +250,13 @@ async def fetch_remote_branches_or_tags(url: str, *, ref_type: str, token: str |
220250
If the ``ref_type`` parameter is not "branches" or "tags".
221251
222252
"""
223-
if ref_type not in ("branches", "tags"):
224-
msg = f"Invalid fetch type: {ref_type}"
225-
raise ValueError(msg)
226-
227253
cmd = ["git"]
228254

229255
# Add authentication if needed
230256
if token and is_github_host(url):
231257
cmd += ["-c", create_git_auth_header(token, url=url)]
232258

233-
cmd += ["ls-remote"]
234-
235-
fetch_tags = ref_type == "tags"
236-
to_fetch = "tags" if fetch_tags else "heads"
237-
238-
cmd += [f"--{to_fetch}"]
239-
240-
# `--refs` filters out the peeled tag objects (those ending with "^{}") (for tags)
241-
if fetch_tags:
242-
cmd += ["--refs"]
243-
244-
cmd += [url]
259+
cmd += ["ls-remote", "--heads", url]
245260

246261
await ensure_git_installed()
247262
stdout, _ = await run_command(*cmd)
@@ -250,9 +265,9 @@ async def fetch_remote_branches_or_tags(url: str, *, ref_type: str, token: str |
250265
# - Skip empty lines and lines that don't contain "refs/{to_fetch}/"
251266
# - Extract the branch or tag name after "refs/{to_fetch}/"
252267
return [
253-
line.split(f"refs/{to_fetch}/", 1)[1]
268+
line.split("refs/heads/", 1)[1]
254269
for line in stdout.decode().splitlines()
255-
if line.strip() and f"refs/{to_fetch}/" in line
270+
if line.strip() and "refs/heads/" in line
256271
]
257272

258273

src/server/query_processor.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,12 @@
99
from gitingest.ingestion import ingest_query
1010
from gitingest.query_parser import IngestionQuery, parse_query
1111
from gitingest.utils.git_utils import validate_github_token
12-
from server.models import IngestErrorResponse, IngestResponse, IngestSuccessResponse
13-
from server.server_config import MAX_DISPLAY_SIZE
12+
from server.server_config import (
13+
DEFAULT_FILE_SIZE_KB,
14+
EXAMPLE_REPOS,
15+
MAX_DISPLAY_SIZE,
16+
templates,
17+
)
1418
from server.server_utils import Colors, log_slider_to_size
1519

1620

@@ -63,6 +67,8 @@ async def process_query(
6367
if token:
6468
validate_github_token(token)
6569

70+
template = "index.jinja" if is_index else "git.jinja"
71+
template_response = partial(templates.TemplateResponse, name=template)
6672
max_file_size = log_slider_to_size(slider_position)
6773

6874
query: IngestionQuery | None = None
@@ -99,7 +105,10 @@ async def process_query(
99105
print(f"{Colors.BROWN}WARN{Colors.END}: {Colors.RED}<- {Colors.END}", end="")
100106
print(f"{Colors.RED}{exc}{Colors.END}")
101107

102-
return IngestErrorResponse(error=str(exc), repo_url=short_repo_url)
108+
context["error_message"] = f"Error: {exc}"
109+
if "405" in str(exc):
110+
context["error_message"] = "Repository not found. Please make sure it is public."
111+
return template_response(context=context)
103112

104113
if len(content) > MAX_DISPLAY_SIZE:
105114
content = (

tests/test_clone.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,10 +91,9 @@ async def test_clone_nonexistent_repository(repo_exists_true: AsyncMock) -> None
9191
@pytest.mark.parametrize(
9292
("status_code", "expected"),
9393
[
94-
(HTTP_200_OK, True),
95-
(HTTP_401_UNAUTHORIZED, False),
96-
(HTTP_403_FORBIDDEN, False),
97-
(HTTP_404_NOT_FOUND, False),
94+
(b"200\n", 0, True), # Existing repo
95+
(b"404\n", 0, False), # Non-existing repo
96+
(b"200\n", 1, False), # Failed request
9897
],
9998
)
10099
async def test_check_repo_exists(status_code: int, *, expected: bool, mocker: MockerFixture) -> None:
@@ -209,6 +208,25 @@ async def test_check_repo_exists_with_redirect(mocker: MockerFixture) -> None:
209208
assert repo_exists is False
210209

211210

211+
@pytest.mark.asyncio
212+
async def test_check_repo_exists_with_permanent_redirect(mocker: MockerFixture) -> None:
213+
"""Test ``check_repo_exists`` when a permanent redirect (301) is returned.
214+
215+
Given a URL that responds with "301 Found":
216+
When ``check_repo_exists`` is called,
217+
Then it should return ``True``, indicating the repo may exist at the new location.
218+
"""
219+
mock_exec = mocker.patch("asyncio.create_subprocess_exec", new_callable=AsyncMock)
220+
mock_process = AsyncMock()
221+
mock_process.communicate.return_value = (b"301\n", b"")
222+
mock_process.returncode = 0 # Simulate successful request
223+
mock_exec.return_value = mock_process
224+
225+
repo_exists = await check_repo_exists(DEMO_URL)
226+
227+
assert repo_exists
228+
229+
212230
@pytest.mark.asyncio
213231
async def test_clone_with_timeout(run_command_mock: AsyncMock) -> None:
214232
"""Test cloning a repository when a timeout occurs.

0 commit comments

Comments
 (0)