Skip to content

Commit 95009bd

Browse files
test: add pytest-mock, introduce fixtures & type hints (#290)
* Added pytest-mock to dev dependencies and pre-commit hooks * Introduced InvalidGitHubTokenError for clearer token-validation failures * Refactored tests: * Replaced ad-hoc mocks with reusable fixtures * Parametrised URL/branch matrices to cut duplication * Added type hints throughout * New coverage: * validate_github_token (happy & error paths) * create_git_command / create_git_auth_header
1 parent 3869aa3 commit 95009bd

File tree

10 files changed

+581
-464
lines changed

10 files changed

+581
-464
lines changed

.pre-commit-config.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ repos:
9999
"fastapi[standard]>=0.109.1",
100100
pydantic,
101101
pytest-asyncio,
102+
pytest-mock,
102103
python-dotenv,
103104
slowapi,
104105
starlette>=0.40.0,
@@ -117,6 +118,7 @@ repos:
117118
"fastapi[standard]>=0.109.1",
118119
pydantic,
119120
pytest-asyncio,
121+
pytest-mock,
120122
python-dotenv,
121123
slowapi,
122124
starlette>=0.40.0,

requirements-dev.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ pre-commit
55
pylint
66
pytest
77
pytest-asyncio
8+
pytest-mock

src/gitingest/utils/exceptions.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,13 @@ class InvalidNotebookError(Exception):
3535

3636
def __init__(self, message: str) -> None:
3737
super().__init__(message)
38+
39+
40+
class InvalidGitHubTokenError(ValueError):
41+
"""Exception raised when a GitHub Personal Access Token is malformed."""
42+
43+
def __init__(self) -> None:
44+
super().__init__(
45+
"Invalid GitHub token format. Token should start with 'github_pat_' or 'ghp_' "
46+
"followed by at least 36 characters of letters, numbers, and underscores."
47+
)

src/gitingest/utils/git_utils.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import re
66
from typing import List, Optional, Tuple
77

8+
from gitingest.utils.exceptions import InvalidGitHubTokenError
9+
810
GITHUB_PAT_PATTERN = r"^(?:github_pat_|ghp_)[A-Za-z0-9_]{36,}$"
911

1012

@@ -256,11 +258,8 @@ def validate_github_token(token: str) -> None:
256258
257259
Raises
258260
------
259-
ValueError
261+
InvalidGitHubTokenError
260262
If the token format is invalid
261263
"""
262264
if not re.match(GITHUB_PAT_PATTERN, token):
263-
raise ValueError(
264-
"Invalid GitHub token format. Token should start with 'github_pat_' or 'ghp_' "
265-
"followed by at least 36 characters of letters, numbers, and underscores."
266-
)
265+
raise InvalidGitHubTokenError()

tests/conftest.py

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,19 @@
77

88
import json
99
from pathlib import Path
10-
from typing import Any, Callable, Dict
10+
from typing import Any, Callable, Dict, List
11+
from unittest.mock import AsyncMock
1112

1213
import pytest
14+
from pytest_mock import MockerFixture
1315

1416
from gitingest.query_parsing import IngestionQuery
1517

1618
WriteNotebookFunc = Callable[[str, Dict[str, Any]], Path]
1719

20+
DEMO_URL = "https://github.com/user/repo"
21+
LOCAL_REPO_PATH = "/tmp/repo"
22+
1823

1924
@pytest.fixture
2025
def sample_query() -> IngestionQuery:
@@ -129,3 +134,51 @@ def _write_notebook(name: str, content: Dict[str, Any]) -> Path:
129134
return notebook_path
130135

131136
return _write_notebook
137+
138+
139+
@pytest.fixture
140+
def stub_branches(mocker: MockerFixture) -> Callable[[List[str]], None]:
141+
"""Return a function that stubs git branch discovery to *branches*."""
142+
143+
def _factory(branches: List[str]) -> None:
144+
mocker.patch(
145+
"gitingest.utils.git_utils.run_command",
146+
new_callable=AsyncMock,
147+
return_value=("\n".join(f"refs/heads/{b}" for b in branches).encode() + b"\n", b""),
148+
)
149+
mocker.patch(
150+
"gitingest.utils.git_utils.fetch_remote_branch_list",
151+
new_callable=AsyncMock,
152+
return_value=branches,
153+
)
154+
155+
return _factory
156+
157+
158+
@pytest.fixture
159+
def repo_exists_true(mocker: MockerFixture) -> AsyncMock:
160+
"""Patch `gitingest.cloning.check_repo_exists` to always return ``True``.
161+
162+
Many cloning-related tests assume that the remote repository exists. This fixture centralises
163+
that behaviour so individual tests no longer need to repeat the same ``mocker.patch`` call.
164+
The mock object is returned so that tests can make assertions on how it was used or override
165+
its behaviour when needed.
166+
"""
167+
return mocker.patch("gitingest.cloning.check_repo_exists", return_value=True)
168+
169+
170+
@pytest.fixture
171+
def run_command_mock(mocker: MockerFixture) -> AsyncMock:
172+
"""Patch `gitingest.cloning.run_command` with an ``AsyncMock``.
173+
174+
The mocked function returns a dummy process whose ``communicate`` method yields generic
175+
*stdout* / *stderr* bytes. Tests can still access / tweak the mock via the fixture argument.
176+
"""
177+
mock_exec = mocker.patch("gitingest.cloning.run_command", new_callable=AsyncMock)
178+
179+
# Provide a default dummy process so most tests don't have to create one.
180+
dummy_process = AsyncMock()
181+
dummy_process.communicate.return_value = (b"output", b"error")
182+
mock_exec.return_value = dummy_process
183+
184+
return mock_exec

tests/query_parser/test_git_host_agnostic.py

Lines changed: 46 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -5,91 +5,60 @@
55
Bitbucket, Gitea, and Codeberg, even if the host is omitted.
66
"""
77

8-
from typing import List
8+
from typing import List, Tuple
99

1010
import pytest
1111

1212
from gitingest.query_parsing import parse_query
1313

14+
# Repository matrix: (host, user, repo)
15+
_REPOS: List[Tuple[str, str, str]] = [
16+
("github.com", "tiangolo", "fastapi"),
17+
("gitlab.com", "gitlab-org", "gitlab-runner"),
18+
("bitbucket.org", "na-dna", "llm-knowledge-share"),
19+
("gitea.com", "xorm", "xorm"),
20+
("codeberg.org", "forgejo", "forgejo"),
21+
]
1422

15-
@pytest.mark.parametrize(
16-
"urls, expected_user, expected_repo, expected_url",
17-
[
18-
(
19-
[
20-
"https://github.com/tiangolo/fastapi",
21-
"github.com/tiangolo/fastapi",
22-
"tiangolo/fastapi",
23-
],
24-
"tiangolo",
25-
"fastapi",
26-
"https://github.com/tiangolo/fastapi",
27-
),
28-
(
29-
[
30-
"https://gitlab.com/gitlab-org/gitlab-runner",
31-
"gitlab.com/gitlab-org/gitlab-runner",
32-
"gitlab-org/gitlab-runner",
33-
],
34-
"gitlab-org",
35-
"gitlab-runner",
36-
"https://gitlab.com/gitlab-org/gitlab-runner",
37-
),
38-
(
39-
[
40-
"https://bitbucket.org/na-dna/llm-knowledge-share",
41-
"bitbucket.org/na-dna/llm-knowledge-share",
42-
"na-dna/llm-knowledge-share",
43-
],
44-
"na-dna",
45-
"llm-knowledge-share",
46-
"https://bitbucket.org/na-dna/llm-knowledge-share",
47-
),
48-
(
49-
[
50-
"https://gitea.com/xorm/xorm",
51-
"gitea.com/xorm/xorm",
52-
"xorm/xorm",
53-
],
54-
"xorm",
55-
"xorm",
56-
"https://gitea.com/xorm/xorm",
57-
),
58-
(
59-
[
60-
"https://codeberg.org/forgejo/forgejo",
61-
"codeberg.org/forgejo/forgejo",
62-
"forgejo/forgejo",
63-
],
64-
"forgejo",
65-
"forgejo",
66-
"https://codeberg.org/forgejo/forgejo",
67-
),
68-
],
69-
)
23+
24+
# Generate cartesian product of repository tuples with URL variants.
25+
@pytest.mark.parametrize("host, user, repo", _REPOS, ids=[f"{h}:{u}/{r}" for h, u, r in _REPOS])
26+
@pytest.mark.parametrize("variant", ["full", "noscheme", "slug"])
7027
@pytest.mark.asyncio
7128
async def test_parse_query_without_host(
72-
urls: List[str],
73-
expected_user: str,
74-
expected_repo: str,
75-
expected_url: str,
29+
host: str,
30+
user: str,
31+
repo: str,
32+
variant: str,
7633
) -> None:
77-
"""
78-
Test `parse_query` for Git host agnosticism.
34+
"""Verify that `parse_query` handles URLs, host-omitted URLs and raw slugs."""
35+
36+
# Build the input URL based on the selected variant
37+
if variant == "full":
38+
url = f"https://{host}/{user}/{repo}"
39+
elif variant == "noscheme":
40+
url = f"{host}/{user}/{repo}"
41+
else: # "slug"
42+
url = f"{user}/{repo}"
43+
44+
expected_url = f"https://{host}/{user}/{repo}"
45+
46+
query = await parse_query(url, max_file_size=50, from_web=True)
47+
48+
# Compare against the canonical dict while ignoring unpredictable fields.
49+
actual = query.model_dump(exclude={"id", "local_path", "ignore_patterns"})
7950

80-
Given multiple URL variations for the same user/repo on different Git hosts (with or without host names):
81-
When `parse_query` is called with each variation,
82-
Then the parser should correctly identify the user, repo, canonical URL, and other default fields.
83-
"""
84-
for url in urls:
85-
query = await parse_query(url, max_file_size=50, from_web=True)
51+
expected = {
52+
"user_name": user,
53+
"repo_name": repo,
54+
"url": expected_url,
55+
"slug": f"{user}-{repo}",
56+
"subpath": "/",
57+
"type": None,
58+
"branch": None,
59+
"commit": None,
60+
"max_file_size": 50,
61+
"include_patterns": None,
62+
}
8663

87-
assert query.user_name == expected_user
88-
assert query.repo_name == expected_repo
89-
assert query.url == expected_url
90-
assert query.slug == f"{expected_user}-{expected_repo}"
91-
assert query.id is not None
92-
assert query.subpath == "/"
93-
assert query.branch is None
94-
assert query.commit is None
95-
assert query.type is None
64+
assert actual == expected

0 commit comments

Comments
 (0)