Skip to content

Commit bfb4381

Browse files
committed
wip
1 parent 9c569b0 commit bfb4381

File tree

6 files changed

+446
-183
lines changed

6 files changed

+446
-183
lines changed

src/gitingest/ingestion.py

Lines changed: 11 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
from typing import TYPE_CHECKING
77

88
from gitingest.config import MAX_DIRECTORY_DEPTH, MAX_FILES, MAX_TOTAL_SIZE_BYTES
9-
from gitingest.output_formatter import DefaultFormatter, StupidFormatter
9+
from gitingest.output_formatter import DefaultFormatter, DebugFormatter, SummaryFormatter
1010
from gitingest.schemas import FileSystemNode, FileSystemStats, Context
11-
from gitingest.schemas.filesystem import FileSystemDirectory, FileSystemFile, FileSystemSymlink, FileSystemTextFile
11+
from gitingest.schemas.filesystem import FileSystemDirectory, FileSystemFile, FileSystemSymlink
1212
from gitingest.utils.ingestion_utils import _should_exclude, _should_include
1313
from gitingest.utils.logging_config import get_logger
1414

@@ -70,14 +70,6 @@ def ingest_query(query: IngestionQuery) -> Context:
7070

7171
relative_path = path.relative_to(query.local_path)
7272

73-
# file_node = FileSystemNode(
74-
# name=path.name,
75-
# type=FileSystemNodeType.FILE,
76-
# size=path.stat().st_size,
77-
# file_count=1,
78-
# path_str=str(relative_path),
79-
# path=path,
80-
# )
8173
file_node = FileSystemFile(
8274
name=path.name,
8375
path_str=str(relative_path),
@@ -96,7 +88,7 @@ def ingest_query(query: IngestionQuery) -> Context:
9688
"file_size": file_node.size,
9789
},
9890
)
99-
return Context([file_node], StupidFormatter(), query)
91+
return Context([file_node], query)
10092

10193
# root_node = FileSystemNode(
10294
# name=path.name,
@@ -125,7 +117,7 @@ def ingest_query(query: IngestionQuery) -> Context:
125117
},
126118
)
127119

128-
return Context([root_node], StupidFormatter(), query)
120+
return Context([root_node], query)
129121

130122

131123
def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystemStats) -> None:
@@ -264,22 +256,13 @@ def _process_file(path: Path, parent_node: FileSystemDirectory, stats: FileSyste
264256
stats.total_files += 1
265257
stats.total_size += file_size
266258

267-
# if file is a .txt file, create a FileSystemTextFile
268-
if path.suffix == ".txt":
269-
child = FileSystemTextFile(
270-
name=path.name,
271-
path_str=str(path.relative_to(local_path)),
272-
path=path,
273-
depth=parent_node.depth + 1,
274-
)
275-
else:
276259

277-
child = FileSystemFile(
278-
name=path.name,
279-
path_str=str(path.relative_to(local_path)),
280-
path=path,
281-
depth=parent_node.depth + 1,
282-
)
260+
child = FileSystemFile(
261+
name=path.name,
262+
path_str=str(path.relative_to(local_path)),
263+
path=path,
264+
depth=parent_node.depth + 1,
265+
)
283266

284267
parent_node.children.append(child)
285268
parent_node.size += file_size
@@ -290,7 +273,7 @@ def limit_exceeded(stats: FileSystemStats, depth: int) -> bool:
290273
"""Check if any of the traversal limits have been exceeded.
291274
292275
This function checks if the current traversal has exceeded any of the configured limits:
293-
maximum directory depth, maximum number of files, or maximum total size in bytes.
276+
maximum directory depth, ma ximum number of files, or maximum total size in bytes.
294277
295278
Parameters
296279
----------

src/gitingest/output_formatter.py

Lines changed: 126 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -11,36 +11,11 @@
1111
from gitingest.schemas import FileSystemNode
1212
from gitingest.utils.compat_func import readlink
1313
from functools import singledispatchmethod
14-
from gitingest.schemas import Source, FileSystemFile, FileSystemDirectory, FileSystemSymlink, FileSystemTextFile
15-
from gitingest.schemas.filesystem import SEPARATOR, FileSystemNodeType, CONTEXT_HEADER, CONTEXT_FOOTER
14+
from gitingest.schemas import Source, FileSystemFile, FileSystemDirectory, FileSystemSymlink
15+
from gitingest.schemas.filesystem import SEPARATOR, Context, FileSystemNodeType
1616
from gitingest.utils.logging_config import get_logger
1717
from jinja2 import Environment, BaseLoader
1818

19-
20-
class OverridableDispatcher:
21-
"""Custom dispatcher that allows later registrations to override earlier ones, even for parent types."""
22-
23-
def __init__(self, default_func):
24-
self.default_func = default_func
25-
self.registry = [] # List of (type, func) in registration order
26-
27-
def register(self, type_):
28-
def decorator(func):
29-
# Remove any existing registration for this exact type
30-
self.registry = [(t, f) for t, f in self.registry if t != type_]
31-
# Add new registration at the end (highest priority)
32-
self.registry.append((type_, func))
33-
return func
34-
return decorator
35-
36-
def __call__(self, instance, *args, **kwargs):
37-
# Check registrations in reverse order (most recent first)
38-
for type_, func in reversed(self.registry):
39-
if isinstance(instance, type_):
40-
return func(instance, *args, **kwargs)
41-
# Fall back to default
42-
return self.default_func(instance, *args, **kwargs)
43-
4419
if TYPE_CHECKING:
4520
from gitingest.schemas import IngestionQuery
4621

@@ -202,116 +177,164 @@ def _format_token_count(text: str) -> str | None:
202177

203178
return str(total_tokens)
204179

180+
181+
def generate_digest(context: Context) -> str:
182+
"""Generate a digest string from a Context object.
183+
184+
This is a convenience function that uses the DefaultFormatter to format a Context.
185+
186+
Parameters
187+
----------
188+
context : Context
189+
The Context object containing sources and query information.
190+
191+
Returns
192+
-------
193+
str
194+
The formatted digest string.
195+
"""
196+
formatter = DefaultFormatter()
197+
return formatter.format(context, context.query)
198+
199+
205200
class DefaultFormatter:
206201
def __init__(self):
207202
self.separator = SEPARATOR
208203
self.env = Environment(loader=BaseLoader())
209204

210-
# Set up custom dispatchers
211-
def _default_format(node: Source, query):
212-
return f"{getattr(node, 'content', '')}"
213-
214-
def _default_summary(node: Source, query):
215-
return f"{getattr(node, 'name', '')}"
205+
@singledispatchmethod
206+
def format(self, node: Source, query):
207+
return f"{getattr(node, 'content', '')}"
216208

217-
self.format = OverridableDispatcher(_default_format)
218-
self.summary = OverridableDispatcher(_default_summary)
219-
220-
# Register the default implementations
221-
self._register_defaults()
222-
223-
def _register_defaults(self):
224-
@self.format.register(FileSystemFile)
225-
def _(node: FileSystemFile, query):
226-
template = \
209+
@format.register
210+
def _(self, node: FileSystemFile, query):
211+
template = \
227212
"""
228213
{{ SEPARATOR }}
229214
{{ node.name }}
230215
{{ SEPARATOR }}
231216
232217
{{ node.content }}
233218
"""
234-
file_template = self.env.from_string(template)
235-
return file_template.render(SEPARATOR=SEPARATOR, node=node, query=query, formatter=self)
219+
file_template = self.env.from_string(template)
220+
return file_template.render(SEPARATOR=SEPARATOR, node=node, query=query, formatter=self)
236221

237-
@self.format.register(FileSystemDirectory)
238-
def _(node: FileSystemDirectory, query):
239-
template = \
222+
@format.register
223+
def _(self, node: FileSystemDirectory, query):
224+
template = \
240225
"""
226+
{% if node.depth == 0 %}
227+
{{ node.name }}:
228+
{{ node.tree }}
229+
230+
{% endif %}
241231
{% for child in node.children %}
242232
{{ formatter.format(child, query) }}
243233
{% endfor %}
244234
"""
245-
dir_template = self.env.from_string(template)
246-
return dir_template.render(node=node, query=query, formatter=self)
235+
dir_template = self.env.from_string(template)
236+
return dir_template.render(node=node, query=query, formatter=self)
247237

248-
@self.summary.register(FileSystemDirectory)
249-
def _(node: FileSystemDirectory, query):
250-
template = \
251-
"""
252-
Directory structure:
253-
{{ node.tree }}
254-
"""
255-
summary_template = self.env.from_string(template)
256-
return summary_template.render(node=node, query=query, formatter=self)
257-
258-
@self.format.register(FileSystemSymlink)
259-
def _(node: FileSystemSymlink, query):
260-
template = \
238+
@format.register
239+
def _(self, node: FileSystemSymlink, query):
240+
template = \
261241
"""
262242
{{ SEPARATOR }}
263243
{{ node.name }}{% if node.target %} -> {{ node.target }}{% endif %}
264244
{{ SEPARATOR }}
265245
"""
266-
symlink_template = self.env.from_string(template)
267-
return symlink_template.render(SEPARATOR=SEPARATOR, node=node, query=query, formatter=self)
246+
symlink_template = self.env.from_string(template)
247+
return symlink_template.render(SEPARATOR=SEPARATOR, node=node, query=query, formatter=self)
248+
249+
@format.register
250+
def _(self, context: Context, query):
251+
"""Format a Context by formatting all its sources."""
252+
template = \
253+
"""
254+
# Generated using https://gitingest.com/{{ context.query.user_name }}/{{ context.query.repo_name }}
255+
Sources used:
256+
{% for source in context.sources %}
257+
- {{ source.name }}: {{ source.__class__.__name__ }}
258+
{% endfor %}
259+
260+
{% for source in context.sources %}
261+
{{ formatter.format(source, context.query) }}
262+
{% endfor %}
263+
# End of generated content
264+
"""
265+
context_template = self.env.from_string(template)
266+
return context_template.render(context=context, formatter=self)
268267

269-
class StupidFormatter(DefaultFormatter):
268+
269+
class DebugFormatter:
270270
def __init__(self):
271-
super().__init__()
271+
self.separator = SEPARATOR
272+
self.env = Environment(loader=BaseLoader())
272273

273-
@self.summary.register(FileSystemTextFile)
274-
def _(node: FileSystemTextFile, query):
275-
template = \
274+
@singledispatchmethod
275+
def format(self, node: Source, query):
276+
"""Format any Source type with debug information."""
277+
# Get the actual class name
278+
class_name = node.__class__.__name__
279+
280+
# Get all field names (both from dataclass fields and regular attributes)
281+
field_names = []
282+
283+
# Try to get dataclass fields first
284+
try:
285+
if hasattr(node, '__dataclass_fields__') and hasattr(node.__dataclass_fields__, 'keys'):
286+
field_names.extend(node.__dataclass_fields__.keys())
287+
else:
288+
raise AttributeError # Fall through to backup method
289+
except (AttributeError, TypeError):
290+
# Fall back to getting all non-private attributes
291+
field_names = [attr for attr in dir(node)
292+
if not attr.startswith('_')
293+
and not callable(getattr(node, attr, None))]
294+
295+
# Format the debug output
296+
fields_str = ", ".join(field_names)
297+
template = \
276298
"""
277299
{{ SEPARATOR }}
278-
{{ node.name }}
300+
DEBUG: {{ class_name }}
301+
Fields: {{ fields_str }}
279302
{{ SEPARATOR }}
280-
FileSystemTextFile
281303
"""
304+
debug_template = self.env.from_string(template)
305+
return debug_template.render(
306+
SEPARATOR=SEPARATOR,
307+
class_name=class_name,
308+
fields_str=fields_str
309+
)
282310

283-
@self.format.register(FileSystemFile)
284-
def _(node: FileSystemFile, query):
285-
template = \
311+
312+
class SummaryFormatter:
313+
"""Dedicated formatter for generating summaries of filesystem nodes."""
314+
315+
def __init__(self):
316+
self.env = Environment(loader=BaseLoader())
317+
318+
@singledispatchmethod
319+
def summary(self, node: Source, query):
320+
return f"{getattr(node, 'name', '')}"
321+
322+
@summary.register
323+
def _(self, node: FileSystemDirectory, query):
324+
template = \
286325
"""
287-
{{ SEPARATOR }}
288-
{{ node.name }}
289-
{{ SEPARATOR }}
290-
FileSystemFile
326+
Directory structure:
327+
{{ node.tree }}
291328
"""
292-
file_template = self.env.from_string(template)
293-
return file_template.render(SEPARATOR=SEPARATOR, node=node, query=query, formatter=self)
329+
summary_template = self.env.from_string(template)
330+
return summary_template.render(node=node, query=query)
294331

295332

296-
def generate_digest(context) -> str:
297-
"""Generate a digest from a Context object.
298-
299-
Parameters
300-
----------
301-
context : Context
302-
The context object containing nodes, formatter, and query.
303-
304-
Returns
305-
-------
306-
str
307-
The formatted digest string with header, content, and footer.
308-
"""
309-
if context.query.user_name and context.query.repo_name:
310-
context_header = CONTEXT_HEADER.format(f"/{context.query.user_name}/{context.query.repo_name}")
311-
else:
312-
context_header = CONTEXT_HEADER.format("")
313-
context_footer = CONTEXT_FOOTER
314-
formatted = []
315-
for node in context.nodes:
316-
formatted.append(context.formatter.format(node, context.query))
317-
return context_header + "\n".join(formatted) + context_footer
333+
@summary.register
334+
def _(self, context: Context, query):
335+
template = \
336+
"""
337+
{{ context.summary }}
338+
"""
339+
summary_template = self.env.from_string(template)
340+
return summary_template.render(context=context, query=query)

src/gitingest/schemas/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""Module containing the schemas for the Gitingest package."""
22

33
from gitingest.schemas.cloning import CloneConfig
4-
from gitingest.schemas.filesystem import FileSystemNode, FileSystemFile, FileSystemDirectory, FileSystemSymlink, FileSystemTextFile, FileSystemStats, Context, Source
4+
from gitingest.schemas.filesystem import FileSystemNode, FileSystemFile, FileSystemDirectory, FileSystemSymlink, FileSystemStats, Context, Source
55
from gitingest.schemas.ingestion import IngestionQuery
66

7-
__all__ = ["CloneConfig", "FileSystemNode", "FileSystemFile", "FileSystemDirectory", "FileSystemSymlink", "FileSystemTextFile", "FileSystemStats", "IngestionQuery", "Context"]
7+
__all__ = ["CloneConfig", "FileSystemNode", "FileSystemFile", "FileSystemDirectory", "FileSystemSymlink", "FileSystemStats", "IngestionQuery", "Context"]

0 commit comments

Comments
 (0)