Skip to content

Commit 650a2dd

Browse files
committed
feat: add GitRepository Type
1 parent bfb4381 commit 650a2dd

File tree

4 files changed

+123
-62
lines changed

4 files changed

+123
-62
lines changed

src/gitingest/ingestion.py

Lines changed: 35 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,8 @@
66
from typing import TYPE_CHECKING
77

88
from gitingest.config import MAX_DIRECTORY_DEPTH, MAX_FILES, MAX_TOTAL_SIZE_BYTES
9-
from gitingest.output_formatter import DefaultFormatter, DebugFormatter, SummaryFormatter
10-
from gitingest.schemas import FileSystemNode, FileSystemStats, Context
11-
from gitingest.schemas.filesystem import FileSystemDirectory, FileSystemFile, FileSystemSymlink
9+
from gitingest.schemas import Context, FileSystemNode, FileSystemStats
10+
from gitingest.schemas.filesystem import FileSystemDirectory, FileSystemFile, FileSystemSymlink, GitRepository
1211
from gitingest.utils.ingestion_utils import _should_exclude, _should_include
1312
from gitingest.utils.logging_config import get_logger
1413

@@ -19,6 +18,11 @@
1918
logger = get_logger(__name__)
2019

2120

21+
def _is_git_repository(path: Path) -> bool:
22+
"""Check if a directory contains a .git folder."""
23+
return (path / ".git").exists()
24+
25+
2226
def ingest_query(query: IngestionQuery) -> Context:
2327
"""Run the ingestion process for a parsed query.
2428
@@ -90,17 +94,19 @@ def ingest_query(query: IngestionQuery) -> Context:
9094
)
9195
return Context([file_node], query)
9296

93-
# root_node = FileSystemNode(
94-
# name=path.name,
95-
# type=FileSystemNodeType.DIRECTORY,
96-
# path_str=str(path.relative_to(query.local_path)),
97-
# path=path,
98-
# )
99-
root_node = FileSystemDirectory(
100-
name=path.name,
101-
path_str=str(path.relative_to(query.local_path)),
102-
path=path,
103-
)
97+
# Check if this is a git repository and create appropriate node type
98+
if _is_git_repository(path):
99+
root_node = GitRepository(
100+
name=path.name,
101+
path_str=str(path.relative_to(query.local_path)),
102+
path=path,
103+
)
104+
else:
105+
root_node = FileSystemDirectory(
106+
name=path.name,
107+
path_str=str(path.relative_to(query.local_path)),
108+
path=path,
109+
)
104110

105111
stats = FileSystemStats()
106112

@@ -161,12 +167,21 @@ def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystem
161167
continue
162168
_process_file(path=sub_path, parent_node=node, stats=stats, local_path=query.local_path)
163169
elif sub_path.is_dir():
164-
child_directory_node = FileSystemDirectory(
165-
name=sub_path.name,
166-
path_str=str(sub_path.relative_to(query.local_path)),
167-
path=sub_path,
168-
depth=node.depth + 1,
169-
)
170+
# Check if this subdirectory is a git repository
171+
if _is_git_repository(sub_path):
172+
child_directory_node = GitRepository(
173+
name=sub_path.name,
174+
path_str=str(sub_path.relative_to(query.local_path)),
175+
path=sub_path,
176+
depth=node.depth + 1,
177+
)
178+
else:
179+
child_directory_node = FileSystemDirectory(
180+
name=sub_path.name,
181+
path_str=str(sub_path.relative_to(query.local_path)),
182+
path=sub_path,
183+
depth=node.depth + 1,
184+
)
170185

171186
_process_node(node=child_directory_node, query=query, stats=stats)
172187

@@ -256,7 +271,6 @@ def _process_file(path: Path, parent_node: FileSystemDirectory, stats: FileSyste
256271
stats.total_files += 1
257272
stats.total_size += file_size
258273

259-
260274
child = FileSystemFile(
261275
name=path.name,
262276
path_str=str(path.relative_to(local_path)),

src/gitingest/output_formatter.py

Lines changed: 33 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,17 @@
33
from __future__ import annotations
44

55
import ssl
6+
from functools import singledispatchmethod
67
from typing import TYPE_CHECKING
78

89
import requests.exceptions
910
import tiktoken
11+
from jinja2 import BaseLoader, Environment
1012

11-
from gitingest.schemas import FileSystemNode
13+
from gitingest.schemas import FileSystemDirectory, FileSystemFile, FileSystemNode, FileSystemSymlink, Source
14+
from gitingest.schemas.filesystem import SEPARATOR, Context, FileSystemNodeType, GitRepository
1215
from gitingest.utils.compat_func import readlink
13-
from functools import singledispatchmethod
14-
from gitingest.schemas import Source, FileSystemFile, FileSystemDirectory, FileSystemSymlink
15-
from gitingest.schemas.filesystem import SEPARATOR, Context, FileSystemNodeType
1616
from gitingest.utils.logging_config import get_logger
17-
from jinja2 import Environment, BaseLoader
1817

1918
if TYPE_CHECKING:
2019
from gitingest.schemas import IngestionQuery
@@ -30,6 +29,7 @@
3029

3130
# Backward compatibility
3231

32+
3333
def _create_summary_prefix(query: IngestionQuery, *, single_file: bool = False) -> str:
3434
"""Create a prefix string for summarizing a repository or local directory.
3535
@@ -208,8 +208,7 @@ def format(self, node: Source, query):
208208

209209
@format.register
210210
def _(self, node: FileSystemFile, query):
211-
template = \
212-
"""
211+
template = """
213212
{{ SEPARATOR }}
214213
{{ node.name }}
215214
{{ SEPARATOR }}
@@ -221,8 +220,7 @@ def _(self, node: FileSystemFile, query):
221220

222221
@format.register
223222
def _(self, node: FileSystemDirectory, query):
224-
template = \
225-
"""
223+
template = """
226224
{% if node.depth == 0 %}
227225
{{ node.name }}:
228226
{{ node.tree }}
@@ -236,9 +234,23 @@ def _(self, node: FileSystemDirectory, query):
236234
return dir_template.render(node=node, query=query, formatter=self)
237235

238236
@format.register
239-
def _(self, node: FileSystemSymlink, query):
240-
template = \
237+
def _(self, node: GitRepository, query):
238+
template = """
239+
{% if node.depth == 0 %}
240+
🔗 Git Repository: {{ node.name }}
241+
{{ node.tree }}
242+
243+
{% endif %}
244+
{% for child in node.children %}
245+
{{ formatter.format(child, query) }}
246+
{% endfor %}
241247
"""
248+
git_template = self.env.from_string(template)
249+
return git_template.render(node=node, query=query, formatter=self)
250+
251+
@format.register
252+
def _(self, node: FileSystemSymlink, query):
253+
template = """
242254
{{ SEPARATOR }}
243255
{{ node.name }}{% if node.target %} -> {{ node.target }}{% endif %}
244256
{{ SEPARATOR }}
@@ -249,8 +261,7 @@ def _(self, node: FileSystemSymlink, query):
249261
@format.register
250262
def _(self, context: Context, query):
251263
"""Format a Context by formatting all its sources."""
252-
template = \
253-
"""
264+
template = """
254265
# Generated using https://gitingest.com/{{ context.query.user_name }}/{{ context.query.repo_name }}
255266
Sources used:
256267
{% for source in context.sources %}
@@ -282,20 +293,19 @@ def format(self, node: Source, query):
282293

283294
# Try to get dataclass fields first
284295
try:
285-
if hasattr(node, '__dataclass_fields__') and hasattr(node.__dataclass_fields__, 'keys'):
296+
if hasattr(node, "__dataclass_fields__") and hasattr(node.__dataclass_fields__, "keys"):
286297
field_names.extend(node.__dataclass_fields__.keys())
287298
else:
288299
raise AttributeError # Fall through to backup method
289300
except (AttributeError, TypeError):
290301
# Fall back to getting all non-private attributes
291-
field_names = [attr for attr in dir(node)
292-
if not attr.startswith('_')
293-
and not callable(getattr(node, attr, None))]
302+
field_names = [
303+
attr for attr in dir(node) if not attr.startswith("_") and not callable(getattr(node, attr, None))
304+
]
294305

295306
# Format the debug output
296307
fields_str = ", ".join(field_names)
297-
template = \
298-
"""
308+
template = """
299309
{{ SEPARATOR }}
300310
DEBUG: {{ class_name }}
301311
Fields: {{ fields_str }}
@@ -305,7 +315,7 @@ def format(self, node: Source, query):
305315
return debug_template.render(
306316
SEPARATOR=SEPARATOR,
307317
class_name=class_name,
308-
fields_str=fields_str
318+
fields_str=fields_str,
309319
)
310320

311321

@@ -321,20 +331,17 @@ def summary(self, node: Source, query):
321331

322332
@summary.register
323333
def _(self, node: FileSystemDirectory, query):
324-
template = \
325-
"""
334+
template = """
326335
Directory structure:
327336
{{ node.tree }}
328337
"""
329338
summary_template = self.env.from_string(template)
330339
return summary_template.render(node=node, query=query)
331340

332-
333341
@summary.register
334342
def _(self, context: Context, query):
335-
template = \
336-
"""
343+
template = """
337344
{{ context.summary }}
338345
"""
339346
summary_template = self.env.from_string(template)
340-
return summary_template.render(context=context, query=query)
347+
return summary_template.render(context=context, query=query)

src/gitingest/schemas/__init__.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,26 @@
11
"""Module containing the schemas for the Gitingest package."""
22

33
from gitingest.schemas.cloning import CloneConfig
4-
from gitingest.schemas.filesystem import FileSystemNode, FileSystemFile, FileSystemDirectory, FileSystemSymlink, FileSystemStats, Context, Source
4+
from gitingest.schemas.filesystem import (
5+
Context,
6+
FileSystemDirectory,
7+
FileSystemFile,
8+
FileSystemNode,
9+
FileSystemStats,
10+
FileSystemSymlink,
11+
GitRepository,
12+
Source,
13+
)
514
from gitingest.schemas.ingestion import IngestionQuery
615

7-
__all__ = ["CloneConfig", "FileSystemNode", "FileSystemFile", "FileSystemDirectory", "FileSystemSymlink", "FileSystemStats", "IngestionQuery", "Context"]
16+
__all__ = [
17+
"CloneConfig",
18+
"Context",
19+
"FileSystemDirectory",
20+
"FileSystemFile",
21+
"FileSystemNode",
22+
"FileSystemStats",
23+
"FileSystemSymlink",
24+
"GitRepository",
25+
"IngestionQuery",
26+
]

src/gitingest/schemas/filesystem.py

Lines changed: 34 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,19 @@
22

33
from __future__ import annotations
44

5-
import os
5+
from abc import ABC
66
from dataclasses import dataclass, field
77
from enum import Enum, auto
88
from typing import TYPE_CHECKING
9-
from abc import ABC
10-
from functools import singledispatchmethod
11-
12-
from gitingest.utils.compat_func import readlink
13-
from gitingest.utils.file_utils import _decodes, _get_preferred_encodings, _read_chunk
14-
from gitingest.utils.notebook import process_notebook
159

1610
if TYPE_CHECKING:
1711
from pathlib import Path
12+
1813
from gitingest.schemas import IngestionQuery
19-
from gitingest.output_formatter import Formatter
2014

2115
SEPARATOR = "=" * 48 # Tiktoken, the tokenizer openai uses, counts 2 tokens if we have more than 48
2216

17+
2318
class FileSystemNodeType(Enum):
2419
"""Enum representing the type of a file system node (directory or file)."""
2520

@@ -35,10 +30,11 @@ class FileSystemStats:
3530
total_files: int = 0
3631
total_size: int = 0
3732

33+
3834
@dataclass
3935
class Source(ABC):
4036
"""Abstract base class for all sources (files, directories, etc)."""
41-
pass
37+
4238

4339
@dataclass
4440
class FileSystemNode(Source):
@@ -52,13 +48,14 @@ class FileSystemNode(Source):
5248
def tree(self):
5349
return self.name
5450

51+
5552
@dataclass
5653
class FileSystemFile(FileSystemNode):
5754
@property
5855
def content(self):
5956
# read the file
6057
try:
61-
with open(self.path, "r") as f:
58+
with open(self.path) as f:
6259
return f.read()
6360
except Exception as e:
6461
return f"Error reading content of {self.name}: {e}"
@@ -70,28 +67,30 @@ def render_tree(self, prefix="", is_last=True):
7067

7168
@dataclass
7269
class FileSystemDirectory(FileSystemNode):
73-
children: list['FileSystemNode'] = field(default_factory=list)
70+
children: list[FileSystemNode] = field(default_factory=list)
7471
file_count: int = 0
7572
dir_count: int = 0
7673
type: FileSystemNodeType = FileSystemNodeType.DIRECTORY
7774

7875
def sort_children(self) -> None:
7976
"""Sort the children nodes of a directory according to a specific order."""
77+
8078
def _sort_key(child: FileSystemNode) -> tuple[int, str]:
8179
name = child.name.lower()
82-
if hasattr(child, 'type') and getattr(child, 'type', None) == FileSystemNodeType.FILE:
80+
if hasattr(child, "type") and getattr(child, "type", None) == FileSystemNodeType.FILE:
8381
if name == "readme" or name.startswith("readme."):
8482
return (0, name)
8583
return (1 if not name.startswith(".") else 2, name)
8684
return (3 if not name.startswith(".") else 4, name)
85+
8786
self.children.sort(key=_sort_key)
8887

8988
def render_tree(self, prefix="", is_last=True):
9089
lines = []
9190
current_prefix = "└── " if is_last else "├── "
9291
display_name = self.name + "/"
9392
lines.append(f"{prefix}{current_prefix}{display_name}")
94-
if hasattr(self, 'children') and self.children:
93+
if hasattr(self, "children") and self.children:
9594
new_prefix = prefix + (" " if is_last else "│ ")
9695
for i, child in enumerate(self.children):
9796
is_last_child = i == len(self.children) - 1
@@ -102,6 +101,27 @@ def render_tree(self, prefix="", is_last=True):
102101
def tree(self):
103102
return "\n".join(self.render_tree())
104103

104+
105+
@dataclass
106+
class GitRepository(FileSystemDirectory):
107+
"""A directory that contains a .git folder, representing a Git repository."""
108+
109+
git_info: dict = field(default_factory=dict) # Store git metadata like branch, commit, etc.
110+
111+
def render_tree(self, prefix="", is_last=True):
112+
lines = []
113+
current_prefix = "└── " if is_last else "├── "
114+
# Mark as git repo in the tree
115+
display_name = f"{self.name}/ (git repository)"
116+
lines.append(f"{prefix}{current_prefix}{display_name}")
117+
if hasattr(self, "children") and self.children:
118+
new_prefix = prefix + (" " if is_last else "│ ")
119+
for i, child in enumerate(self.children):
120+
is_last_child = i == len(self.children) - 1
121+
lines.extend(child.render_tree(prefix=new_prefix, is_last=is_last_child))
122+
return lines
123+
124+
105125
@dataclass
106126
class FileSystemSymlink(FileSystemNode):
107127
target: str = ""
@@ -122,6 +142,7 @@ class Context(Source):
122142
The list of source objects to format.
123143
query : IngestionQuery
124144
The query context.
145+
125146
"""
126147

127148
def __init__(self, sources: list[Source], query: IngestionQuery):

0 commit comments

Comments
 (0)