|
5 | 5 | including filtering patterns and subpaths. |
6 | 6 | """ |
7 | 7 |
|
| 8 | +import re |
8 | 9 | from pathlib import Path |
| 10 | +from typing import Set, TypedDict |
| 11 | + |
| 12 | +import pytest |
9 | 13 |
|
10 | 14 | from gitingest.ingestion import ingest_query |
11 | 15 | from gitingest.query_parsing import IngestionQuery |
@@ -42,5 +46,187 @@ def test_run_ingest_query(temp_directory: Path, sample_query: IngestionQuery) -> |
42 | 46 | # TODO: Additional tests: |
43 | 47 | # - Multiple include patterns, e.g. ["*.txt", "*.py"] or ["/src/*", "*.txt"]. |
44 | 48 | # - Edge cases with weird file names or deep subdirectory structures. |
45 | | -# TODO : def test_include_txt_pattern |
46 | 49 | # TODO : def test_include_nonexistent_extension |
| 50 | + |
| 51 | + |
| 52 | +class PatternScenario(TypedDict): |
| 53 | + include_patterns: Set[str] |
| 54 | + ignore_patterns: Set[str] |
| 55 | + expected_num_files: int |
| 56 | + expected_content: Set[str] |
| 57 | + expected_structure: Set[str] |
| 58 | + expected_not_structure: Set[str] |
| 59 | + |
| 60 | + |
| 61 | +@pytest.mark.parametrize( |
| 62 | + "pattern_scenario", |
| 63 | + [ |
| 64 | + pytest.param( |
| 65 | + PatternScenario( |
| 66 | + { |
| 67 | + "include_patterns": {"file2.py", "dir2/file_dir2.txt"}, |
| 68 | + "ignore_patterns": {*()}, |
| 69 | + "expected_num_files": 2, |
| 70 | + "expected_content": {"file2.py", "dir2/file_dir2.txt"}, |
| 71 | + "expected_structure": {"test_repo/", "dir2/"}, |
| 72 | + "expected_not_structure": {"src/", "subdir/", "dir1/"}, |
| 73 | + } |
| 74 | + ), |
| 75 | + id="include-explicit-files", |
| 76 | + ), |
| 77 | + pytest.param( |
| 78 | + PatternScenario( |
| 79 | + { |
| 80 | + "include_patterns": { |
| 81 | + "file1.txt", |
| 82 | + "file2.py", |
| 83 | + "file_dir1.txt", |
| 84 | + "*/file_dir2.txt", |
| 85 | + }, |
| 86 | + "ignore_patterns": {*()}, |
| 87 | + "expected_num_files": 3, |
| 88 | + "expected_content": {"file1.txt", "file2.py", "dir2/file_dir2.txt"}, |
| 89 | + "expected_structure": {"test_repo/", "dir2/"}, |
| 90 | + "expected_not_structure": {"src/", "subdir/", "dir1/"}, |
| 91 | + } |
| 92 | + ), |
| 93 | + id="include-wildcard-directory", |
| 94 | + ), |
| 95 | + pytest.param( |
| 96 | + PatternScenario( |
| 97 | + { |
| 98 | + "include_patterns": {"*.py"}, |
| 99 | + "ignore_patterns": {*()}, |
| 100 | + "expected_num_files": 3, |
| 101 | + "expected_content": { |
| 102 | + "file2.py", |
| 103 | + "src/subfile2.py", |
| 104 | + "src/subdir/file_subdir.py", |
| 105 | + }, |
| 106 | + "expected_structure": {"test_repo/", "src/", "subdir/"}, |
| 107 | + "expected_not_structure": {"dir1/", "dir2/"}, |
| 108 | + } |
| 109 | + ), |
| 110 | + id="include-wildcard-files", |
| 111 | + ), |
| 112 | + pytest.param( |
| 113 | + PatternScenario( |
| 114 | + { |
| 115 | + "include_patterns": {"**/file_dir2.txt", "src/**/*.py"}, |
| 116 | + "ignore_patterns": {*()}, |
| 117 | + "expected_num_files": 2, |
| 118 | + "expected_content": { |
| 119 | + "dir2/file_dir2.txt", |
| 120 | + "src/subdir/file_subdir.py", |
| 121 | + }, |
| 122 | + "expected_structure": {"test_repo/", "dir2/", "src/", "subdir/"}, |
| 123 | + "expected_not_structure": {"dir1/"}, |
| 124 | + } |
| 125 | + ), |
| 126 | + id="include-recursive-wildcard", |
| 127 | + ), |
| 128 | + pytest.param( |
| 129 | + PatternScenario( |
| 130 | + { |
| 131 | + "include_patterns": {*()}, |
| 132 | + "ignore_patterns": {"file2.py", "dir2/file_dir2.txt"}, |
| 133 | + "expected_num_files": 6, |
| 134 | + "expected_content": { |
| 135 | + "file1.txt", |
| 136 | + "src/subfile1.txt", |
| 137 | + "src/subfile2.py", |
| 138 | + "src/subdir/file_subdir.txt", |
| 139 | + "src/subdir/file_subdir.py", |
| 140 | + "dir1/file_dir1.txt", |
| 141 | + }, |
| 142 | + "expected_structure": {"test_repo/", "src/", "subdir/", "dir1/"}, |
| 143 | + "expected_not_structure": {"dir2/"}, |
| 144 | + } |
| 145 | + ), |
| 146 | + id="exclude-explicit-files", |
| 147 | + ), |
| 148 | + pytest.param( |
| 149 | + PatternScenario( |
| 150 | + { |
| 151 | + "include_patterns": {*()}, |
| 152 | + "ignore_patterns": {"file1.txt", "file2.py", "*/file_dir1.txt"}, |
| 153 | + "expected_num_files": 5, |
| 154 | + "expected_content": { |
| 155 | + "src/subfile1.txt", |
| 156 | + "src/subfile2.py", |
| 157 | + "src/subdir/file_subdir.txt", |
| 158 | + "src/subdir/file_subdir.py", |
| 159 | + "dir2/file_dir2.txt", |
| 160 | + }, |
| 161 | + "expected_structure": {"test_repo/", "src/", "subdir/", "dir2/"}, |
| 162 | + "expected_not_structure": {"dir1/"}, |
| 163 | + } |
| 164 | + ), |
| 165 | + id="exclude-wildcard-directory", |
| 166 | + ), |
| 167 | + pytest.param( |
| 168 | + PatternScenario( |
| 169 | + { |
| 170 | + "include_patterns": {*()}, |
| 171 | + "ignore_patterns": {"src/**/*.py"}, |
| 172 | + "expected_num_files": 7, |
| 173 | + "expected_content": { |
| 174 | + "file1.txt", |
| 175 | + "file2.py", |
| 176 | + "src/subfile1.txt", |
| 177 | + "src/subfile2.py", |
| 178 | + "src/subdir/file_subdir.txt", |
| 179 | + "dir1/file_dir1.txt", |
| 180 | + "dir2/file_dir2.txt", |
| 181 | + }, |
| 182 | + "expected_structure": { |
| 183 | + "test_repo/", |
| 184 | + "dir1/", |
| 185 | + "dir2/", |
| 186 | + "src/", |
| 187 | + "subdir/", |
| 188 | + }, |
| 189 | + "expected_not_structure": {*()}, |
| 190 | + } |
| 191 | + ), |
| 192 | + id="exclude-recursive-wildcard", |
| 193 | + ), |
| 194 | + ], |
| 195 | +) |
| 196 | +def test_include_ignore_patterns( |
| 197 | + temp_directory: Path, |
| 198 | + sample_query: IngestionQuery, |
| 199 | + pattern_scenario: PatternScenario, |
| 200 | +) -> None: |
| 201 | + """ |
| 202 | + Test `ingest_query` to ensure included and ignored paths are included and ignored respectively. |
| 203 | +
|
| 204 | + Given a directory with .txt and .py files, and a set of include patterns or a set of ignore patterns: |
| 205 | + When `ingest_query` is invoked, |
| 206 | + Then it should produce a summary string listing the files analyzed and a combined content string. |
| 207 | + """ |
| 208 | + |
| 209 | + sample_query.local_path = temp_directory |
| 210 | + sample_query.subpath = "/" |
| 211 | + sample_query.type = None |
| 212 | + sample_query.include_patterns = pattern_scenario["include_patterns"] or None |
| 213 | + sample_query.ignore_patterns = pattern_scenario["ignore_patterns"] or None |
| 214 | + |
| 215 | + summary, structure, content = ingest_query(sample_query) |
| 216 | + |
| 217 | + assert "Repository: test_user/test_repo" in summary |
| 218 | + num_files_regex = re.compile(r"^Files analyzed: (\d+)$", re.MULTILINE) |
| 219 | + assert (num_files_match := num_files_regex.search(summary)) is not None |
| 220 | + assert int(num_files_match.group(1)) == pattern_scenario["expected_num_files"] |
| 221 | + |
| 222 | + # Check presence of key files in the content |
| 223 | + for expected_content_item in pattern_scenario["expected_content"]: |
| 224 | + assert expected_content_item in content |
| 225 | + |
| 226 | + # check presence of included directories in structure |
| 227 | + for expected_structure_item in pattern_scenario["expected_structure"]: |
| 228 | + assert expected_structure_item in structure |
| 229 | + |
| 230 | + # check non-presence of non-included directories in structure |
| 231 | + for expected_not_structure_item in pattern_scenario["expected_not_structure"]: |
| 232 | + assert expected_not_structure_item not in structure |
0 commit comments