From 42aa7603aa752850c8ad89cca61e280dab520faf Mon Sep 17 00:00:00 2001 From: Greg Funni Date: Thu, 20 Nov 2025 21:43:36 +0000 Subject: [PATCH 01/26] win32: pthread_cond_init should return a value This value is not checked, but it must return to match POSIX Signed-off-by: Greg Funni Signed-off-by: Junio C Hamano --- compat/win32/pthread.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compat/win32/pthread.h b/compat/win32/pthread.h index e2b5c4f64c9b91..000604cdf69ffc 100644 --- a/compat/win32/pthread.h +++ b/compat/win32/pthread.h @@ -34,7 +34,7 @@ typedef int pthread_mutexattr_t; #define pthread_cond_t CONDITION_VARIABLE -#define pthread_cond_init(a,b) InitializeConditionVariable((a)) +#define pthread_cond_init(a,b) return_0((InitializeConditionVariable((a)), 0)) #define pthread_cond_destroy(a) do {} while (0) #define pthread_cond_wait(a,b) return_0(SleepConditionVariableCS((a), (b), INFINITE)) #define pthread_cond_signal WakeConditionVariable From 6bdda3a3b00fff9a1d64d1bb4732f0c446d7012c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:26 +0100 Subject: [PATCH 02/26] streaming: rename `git_istream` into `odb_read_stream` In the following patches we are about to make the `git_istream` more generic so that it becomes fully controlled by the specific object source that wants to create it. As part of these refactorings we'll fully move the structure into the object database subsystem. Prepare for this change by renaming the structure from `git_istream` to `odb_read_stream`. This mirrors the `odb_write_stream` structure that we already have. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- archive-tar.c | 2 +- archive-zip.c | 2 +- builtin/index-pack.c | 2 +- builtin/pack-objects.c | 4 +-- object-file.c | 2 +- streaming.c | 62 +++++++++++++++++++++--------------------- streaming.h | 12 ++++---- 7 files changed, 43 insertions(+), 43 deletions(-) diff --git a/archive-tar.c b/archive-tar.c index 73b63ddc41bad6..dc1eda09e01e2b 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -129,7 +129,7 @@ static void write_trailer(void) */ static int stream_blocked(struct repository *r, const struct object_id *oid) { - struct git_istream *st; + struct odb_read_stream *st; enum object_type type; unsigned long sz; char buf[BLOCKSIZE]; diff --git a/archive-zip.c b/archive-zip.c index bea5bdd43dc43e..40a9c93ff95233 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -309,7 +309,7 @@ static int write_zip_entry(struct archiver_args *args, enum zip_method method; unsigned char *out; void *deflated = NULL; - struct git_istream *stream = NULL; + struct odb_read_stream *stream = NULL; unsigned long flags = 0; int is_binary = -1; const char *path_without_prefix = path + args->baselen; diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 2b78ba7fe4d14a..5f90f12f92d9c4 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -762,7 +762,7 @@ static void find_ref_delta_children(const struct object_id *oid, struct compare_data { struct object_entry *entry; - struct git_istream *st; + struct odb_read_stream *st; unsigned char *buf; unsigned long buf_size; }; diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 69e80b1443a9b7..c693d948e193ed 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -404,7 +404,7 @@ static unsigned long do_compress(void **pptr, unsigned long size) return stream.total_out; } -static unsigned long write_large_blob_data(struct git_istream *st, struct hashfile *f, +static unsigned long write_large_blob_data(struct odb_read_stream *st, struct hashfile *f, const struct object_id *oid) { git_zstream stream; @@ -513,7 +513,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent unsigned hdrlen; enum object_type type; void *buf; - struct git_istream *st = NULL; + struct odb_read_stream *st = NULL; const unsigned hashsz = the_hash_algo->rawsz; if (!usable_delta) { diff --git a/object-file.c b/object-file.c index 811c569ed36aa4..b62b21a45289fc 100644 --- a/object-file.c +++ b/object-file.c @@ -134,7 +134,7 @@ int stream_object_signature(struct repository *r, const struct object_id *oid) struct object_id real_oid; unsigned long size; enum object_type obj_type; - struct git_istream *st; + struct odb_read_stream *st; struct git_hash_ctx c; char hdr[MAX_HEADER_LEN]; int hdrlen; diff --git a/streaming.c b/streaming.c index 00ad649ae397f3..1fb4b7c1c002e8 100644 --- a/streaming.c +++ b/streaming.c @@ -14,17 +14,17 @@ #include "replace-object.h" #include "packfile.h" -typedef int (*open_istream_fn)(struct git_istream *, +typedef int (*open_istream_fn)(struct odb_read_stream *, struct repository *, const struct object_id *, enum object_type *); -typedef int (*close_istream_fn)(struct git_istream *); -typedef ssize_t (*read_istream_fn)(struct git_istream *, char *, size_t); +typedef int (*close_istream_fn)(struct odb_read_stream *); +typedef ssize_t (*read_istream_fn)(struct odb_read_stream *, char *, size_t); #define FILTER_BUFFER (1024*16) struct filtered_istream { - struct git_istream *upstream; + struct odb_read_stream *upstream; struct stream_filter *filter; char ibuf[FILTER_BUFFER]; char obuf[FILTER_BUFFER]; @@ -33,7 +33,7 @@ struct filtered_istream { int input_finished; }; -struct git_istream { +struct odb_read_stream { open_istream_fn open; close_istream_fn close; read_istream_fn read; @@ -71,7 +71,7 @@ struct git_istream { * *****************************************************************/ -static void close_deflated_stream(struct git_istream *st) +static void close_deflated_stream(struct odb_read_stream *st) { if (st->z_state == z_used) git_inflate_end(&st->z); @@ -84,13 +84,13 @@ static void close_deflated_stream(struct git_istream *st) * *****************************************************************/ -static int close_istream_filtered(struct git_istream *st) +static int close_istream_filtered(struct odb_read_stream *st) { free_stream_filter(st->u.filtered.filter); return close_istream(st->u.filtered.upstream); } -static ssize_t read_istream_filtered(struct git_istream *st, char *buf, +static ssize_t read_istream_filtered(struct odb_read_stream *st, char *buf, size_t sz) { struct filtered_istream *fs = &(st->u.filtered); @@ -150,10 +150,10 @@ static ssize_t read_istream_filtered(struct git_istream *st, char *buf, return filled; } -static struct git_istream *attach_stream_filter(struct git_istream *st, - struct stream_filter *filter) +static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, + struct stream_filter *filter) { - struct git_istream *ifs = xmalloc(sizeof(*ifs)); + struct odb_read_stream *ifs = xmalloc(sizeof(*ifs)); struct filtered_istream *fs = &(ifs->u.filtered); ifs->close = close_istream_filtered; @@ -173,7 +173,7 @@ static struct git_istream *attach_stream_filter(struct git_istream *st, * *****************************************************************/ -static ssize_t read_istream_loose(struct git_istream *st, char *buf, size_t sz) +static ssize_t read_istream_loose(struct odb_read_stream *st, char *buf, size_t sz) { size_t total_read = 0; @@ -218,14 +218,14 @@ static ssize_t read_istream_loose(struct git_istream *st, char *buf, size_t sz) return total_read; } -static int close_istream_loose(struct git_istream *st) +static int close_istream_loose(struct odb_read_stream *st) { close_deflated_stream(st); munmap(st->u.loose.mapped, st->u.loose.mapsize); return 0; } -static int open_istream_loose(struct git_istream *st, struct repository *r, +static int open_istream_loose(struct odb_read_stream *st, struct repository *r, const struct object_id *oid, enum object_type *type) { @@ -277,7 +277,7 @@ static int open_istream_loose(struct git_istream *st, struct repository *r, * *****************************************************************/ -static ssize_t read_istream_pack_non_delta(struct git_istream *st, char *buf, +static ssize_t read_istream_pack_non_delta(struct odb_read_stream *st, char *buf, size_t sz) { size_t total_read = 0; @@ -336,13 +336,13 @@ static ssize_t read_istream_pack_non_delta(struct git_istream *st, char *buf, return total_read; } -static int close_istream_pack_non_delta(struct git_istream *st) +static int close_istream_pack_non_delta(struct odb_read_stream *st) { close_deflated_stream(st); return 0; } -static int open_istream_pack_non_delta(struct git_istream *st, +static int open_istream_pack_non_delta(struct odb_read_stream *st, struct repository *r UNUSED, const struct object_id *oid UNUSED, enum object_type *type UNUSED) @@ -380,13 +380,13 @@ static int open_istream_pack_non_delta(struct git_istream *st, * *****************************************************************/ -static int close_istream_incore(struct git_istream *st) +static int close_istream_incore(struct odb_read_stream *st) { free(st->u.incore.buf); return 0; } -static ssize_t read_istream_incore(struct git_istream *st, char *buf, size_t sz) +static ssize_t read_istream_incore(struct odb_read_stream *st, char *buf, size_t sz) { size_t read_size = sz; size_t remainder = st->size - st->u.incore.read_ptr; @@ -400,7 +400,7 @@ static ssize_t read_istream_incore(struct git_istream *st, char *buf, size_t sz) return read_size; } -static int open_istream_incore(struct git_istream *st, struct repository *r, +static int open_istream_incore(struct odb_read_stream *st, struct repository *r, const struct object_id *oid, enum object_type *type) { struct object_info oi = OBJECT_INFO_INIT; @@ -420,7 +420,7 @@ static int open_istream_incore(struct git_istream *st, struct repository *r, * static helpers variables and functions for users of streaming interface *****************************************************************************/ -static int istream_source(struct git_istream *st, +static int istream_source(struct odb_read_stream *st, struct repository *r, const struct object_id *oid, enum object_type *type) @@ -458,25 +458,25 @@ static int istream_source(struct git_istream *st, * Users of streaming interface ****************************************************************/ -int close_istream(struct git_istream *st) +int close_istream(struct odb_read_stream *st) { int r = st->close(st); free(st); return r; } -ssize_t read_istream(struct git_istream *st, void *buf, size_t sz) +ssize_t read_istream(struct odb_read_stream *st, void *buf, size_t sz) { return st->read(st, buf, sz); } -struct git_istream *open_istream(struct repository *r, - const struct object_id *oid, - enum object_type *type, - unsigned long *size, - struct stream_filter *filter) +struct odb_read_stream *open_istream(struct repository *r, + const struct object_id *oid, + enum object_type *type, + unsigned long *size, + struct stream_filter *filter) { - struct git_istream *st = xmalloc(sizeof(*st)); + struct odb_read_stream *st = xmalloc(sizeof(*st)); const struct object_id *real = lookup_replace_object(r, oid); int ret = istream_source(st, r, real, type); @@ -493,7 +493,7 @@ struct git_istream *open_istream(struct repository *r, } if (filter) { /* Add "&& !is_null_stream_filter(filter)" for performance */ - struct git_istream *nst = attach_stream_filter(st, filter); + struct odb_read_stream *nst = attach_stream_filter(st, filter); if (!nst) { close_istream(st); return NULL; @@ -508,7 +508,7 @@ struct git_istream *open_istream(struct repository *r, int stream_blob_to_fd(int fd, const struct object_id *oid, struct stream_filter *filter, int can_seek) { - struct git_istream *st; + struct odb_read_stream *st; enum object_type type; unsigned long sz; ssize_t kept = 0; diff --git a/streaming.h b/streaming.h index bd27f59e5764ae..f5ff5d7ac9a573 100644 --- a/streaming.h +++ b/streaming.h @@ -7,14 +7,14 @@ #include "object.h" /* opaque */ -struct git_istream; +struct odb_read_stream; struct stream_filter; -struct git_istream *open_istream(struct repository *, const struct object_id *, - enum object_type *, unsigned long *, - struct stream_filter *); -int close_istream(struct git_istream *); -ssize_t read_istream(struct git_istream *, void *, size_t); +struct odb_read_stream *open_istream(struct repository *, const struct object_id *, + enum object_type *, unsigned long *, + struct stream_filter *); +int close_istream(struct odb_read_stream *); +ssize_t read_istream(struct odb_read_stream *, void *, size_t); int stream_blob_to_fd(int fd, const struct object_id *, struct stream_filter *, int can_seek); From 70c8b5f5453b9f128a72fad4398acfb9e7d869c4 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:27 +0100 Subject: [PATCH 03/26] streaming: drop the `open()` callback function When creating a read stream we first populate the structure with the open callback function and then subsequently call the function. This layout is somewhat weird though: - The structure needs to be allocated and partially populated with the open function before we can properly initialize it. - We only ever call the `open()` callback function right after having populated the `struct odb_read_stream::open` member, and it's never called thereafter again. So it is somewhat pointless to store the callback in the first place. Especially the first point creates a problem for us. In subsequent commits we'll want to fully move construction of the read source into the respective object sources. E.g., the loose object source will be the one that is responsible for creating the structure. But this creates a problem: if we first need to create the structure so that we can call the source-specific callback we cannot fully handle creation of the structure in the source itself. We could of course work around that and have the loose object source create the structure and populate its `open()` callback, only. But this doesn't really buy us anything due to the second bullet point above. Instead, drop the callback entirely and refactor `istream_source()` so that we open the streams immediately. This unblocks a subsequent step, where we'll also start to allocate the structure in the source-specific logic. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/streaming.c b/streaming.c index 1fb4b7c1c002e8..1bb3f393b87519 100644 --- a/streaming.c +++ b/streaming.c @@ -14,10 +14,6 @@ #include "replace-object.h" #include "packfile.h" -typedef int (*open_istream_fn)(struct odb_read_stream *, - struct repository *, - const struct object_id *, - enum object_type *); typedef int (*close_istream_fn)(struct odb_read_stream *); typedef ssize_t (*read_istream_fn)(struct odb_read_stream *, char *, size_t); @@ -34,7 +30,6 @@ struct filtered_istream { }; struct odb_read_stream { - open_istream_fn open; close_istream_fn close; read_istream_fn read; @@ -437,21 +432,25 @@ static int istream_source(struct odb_read_stream *st, switch (oi.whence) { case OI_LOOSE: - st->open = open_istream_loose; + if (open_istream_loose(st, r, oid, type) < 0) + break; return 0; case OI_PACKED: - if (!oi.u.packed.is_delta && - repo_settings_get_big_file_threshold(the_repository) < size) { - st->u.in_pack.pack = oi.u.packed.pack; - st->u.in_pack.pos = oi.u.packed.offset; - st->open = open_istream_pack_non_delta; - return 0; - } - /* fallthru */ - default: - st->open = open_istream_incore; + if (oi.u.packed.is_delta || + repo_settings_get_big_file_threshold(the_repository) >= size) + break; + + st->u.in_pack.pack = oi.u.packed.pack; + st->u.in_pack.pos = oi.u.packed.offset; + if (open_istream_pack_non_delta(st, r, oid, type) < 0) + break; + return 0; + default: + break; } + + return open_istream_incore(st, r, oid, type); } /**************************************************************** @@ -485,12 +484,6 @@ struct odb_read_stream *open_istream(struct repository *r, return NULL; } - if (st->open(st, r, real, type)) { - if (open_istream_incore(st, r, real, type)) { - free(st); - return NULL; - } - } if (filter) { /* Add "&& !is_null_stream_filter(filter)" for performance */ struct odb_read_stream *nst = attach_stream_filter(st, filter); From 3f64deabdf0a2a9664acec61698affc449e07496 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:28 +0100 Subject: [PATCH 04/26] streaming: propagate final object type via the stream When opening the read stream for a specific object the caller is also expected to pass in a pointer to the object type. This type is passed down via multiple levels and will eventually be populated with the type of the looked-up object. The way we propagate down the pointer though is somewhat non-obvious. While `istream_source()` still expects the pointer and looks it up via `odb_read_object_info_extended()`, we also pass it down even further into the format-specific callbacks that perform another lookup. This is quite confusing overall. Refactor the code so that the responsibility to populate the object type rests solely with the format-specific callbacks. This will allow us to drop the call to `odb_read_object_info_extended()` in `istream_source()` entirely in a subsequent patch. Furthermore, instead of propagating the type via an in-pointer, we now propagate the type via a new field in the object stream. It already has a `size` field, so it's only natural to have a second field that contains the object type. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/streaming.c b/streaming.c index 1bb3f393b87519..665624ddc0494e 100644 --- a/streaming.c +++ b/streaming.c @@ -33,6 +33,7 @@ struct odb_read_stream { close_istream_fn close; read_istream_fn read; + enum object_type type; unsigned long size; /* inflated size of full object */ git_zstream z; enum { z_unused, z_used, z_done, z_error } z_state; @@ -159,6 +160,7 @@ static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, fs->o_end = fs->o_ptr = 0; fs->input_finished = 0; ifs->size = -1; /* unknown */ + ifs->type = st->type; return ifs; } @@ -221,14 +223,13 @@ static int close_istream_loose(struct odb_read_stream *st) } static int open_istream_loose(struct odb_read_stream *st, struct repository *r, - const struct object_id *oid, - enum object_type *type) + const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; struct odb_source *source; oi.sizep = &st->size; - oi.typep = type; + oi.typep = &st->type; odb_prepare_alternates(r->objects); for (source = r->objects->sources; source; source = source->next) { @@ -249,7 +250,7 @@ static int open_istream_loose(struct odb_read_stream *st, struct repository *r, case ULHR_TOO_LONG: goto error; } - if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || *type < 0) + if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || st->type < 0) goto error; st->u.loose.hdr_used = strlen(st->u.loose.hdr) + 1; @@ -339,8 +340,7 @@ static int close_istream_pack_non_delta(struct odb_read_stream *st) static int open_istream_pack_non_delta(struct odb_read_stream *st, struct repository *r UNUSED, - const struct object_id *oid UNUSED, - enum object_type *type UNUSED) + const struct object_id *oid UNUSED) { struct pack_window *window; enum object_type in_pack_type; @@ -361,6 +361,7 @@ static int open_istream_pack_non_delta(struct odb_read_stream *st, case OBJ_TAG: break; } + st->type = in_pack_type; st->z_state = z_unused; st->close = close_istream_pack_non_delta; st->read = read_istream_pack_non_delta; @@ -396,7 +397,7 @@ static ssize_t read_istream_incore(struct odb_read_stream *st, char *buf, size_t } static int open_istream_incore(struct odb_read_stream *st, struct repository *r, - const struct object_id *oid, enum object_type *type) + const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; @@ -404,7 +405,7 @@ static int open_istream_incore(struct odb_read_stream *st, struct repository *r, st->close = close_istream_incore; st->read = read_istream_incore; - oi.typep = type; + oi.typep = &st->type; oi.sizep = &st->size; oi.contentp = (void **)&st->u.incore.buf; return odb_read_object_info_extended(r->objects, oid, &oi, @@ -417,14 +418,12 @@ static int open_istream_incore(struct odb_read_stream *st, struct repository *r, static int istream_source(struct odb_read_stream *st, struct repository *r, - const struct object_id *oid, - enum object_type *type) + const struct object_id *oid) { unsigned long size; int status; struct object_info oi = OBJECT_INFO_INIT; - oi.typep = type; oi.sizep = &size; status = odb_read_object_info_extended(r->objects, oid, &oi, 0); if (status < 0) @@ -432,7 +431,7 @@ static int istream_source(struct odb_read_stream *st, switch (oi.whence) { case OI_LOOSE: - if (open_istream_loose(st, r, oid, type) < 0) + if (open_istream_loose(st, r, oid) < 0) break; return 0; case OI_PACKED: @@ -442,7 +441,7 @@ static int istream_source(struct odb_read_stream *st, st->u.in_pack.pack = oi.u.packed.pack; st->u.in_pack.pos = oi.u.packed.offset; - if (open_istream_pack_non_delta(st, r, oid, type) < 0) + if (open_istream_pack_non_delta(st, r, oid) < 0) break; return 0; @@ -450,7 +449,7 @@ static int istream_source(struct odb_read_stream *st, break; } - return open_istream_incore(st, r, oid, type); + return open_istream_incore(st, r, oid); } /**************************************************************** @@ -477,7 +476,7 @@ struct odb_read_stream *open_istream(struct repository *r, { struct odb_read_stream *st = xmalloc(sizeof(*st)); const struct object_id *real = lookup_replace_object(r, oid); - int ret = istream_source(st, r, real, type); + int ret = istream_source(st, r, real); if (ret) { free(st); @@ -495,6 +494,7 @@ struct odb_read_stream *open_istream(struct repository *r, } *size = st->size; + *type = st->type; return st; } From 3c7722dd4d376e0fce4c48f723fe8b69af785998 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:29 +0100 Subject: [PATCH 05/26] streaming: explicitly pass packfile info when streaming a packed object When streaming a packed object we first populate the stream with information about the pack that contains the object before calling `open_istream_pack_non_delta()`. This is done because we have already looked up both the pack and the object's offset, so it would be a waste of time to look up this information again. But the way this is done makes for a somewhat awkward calling interface, as the caller now needs to be aware of how exactly the function itself behaves. Refactor the code so that we instead explicitly pass the packfile info into `open_istream_pack_non_delta()`. This makes the calling convention explicit, but more importantly this allows us to refactor the function so that it becomes its responsibility to allocate the stream itself in a subsequent patch. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/streaming.c b/streaming.c index 665624ddc0494e..bf277daadd48c2 100644 --- a/streaming.c +++ b/streaming.c @@ -340,16 +340,18 @@ static int close_istream_pack_non_delta(struct odb_read_stream *st) static int open_istream_pack_non_delta(struct odb_read_stream *st, struct repository *r UNUSED, - const struct object_id *oid UNUSED) + const struct object_id *oid UNUSED, + struct packed_git *pack, + off_t offset) { struct pack_window *window; enum object_type in_pack_type; window = NULL; - in_pack_type = unpack_object_header(st->u.in_pack.pack, + in_pack_type = unpack_object_header(pack, &window, - &st->u.in_pack.pos, + &offset, &st->size); unuse_pack(&window); switch (in_pack_type) { @@ -365,6 +367,8 @@ static int open_istream_pack_non_delta(struct odb_read_stream *st, st->z_state = z_unused; st->close = close_istream_pack_non_delta; st->read = read_istream_pack_non_delta; + st->u.in_pack.pack = pack; + st->u.in_pack.pos = offset; return 0; } @@ -436,14 +440,10 @@ static int istream_source(struct odb_read_stream *st, return 0; case OI_PACKED: if (oi.u.packed.is_delta || - repo_settings_get_big_file_threshold(the_repository) >= size) + repo_settings_get_big_file_threshold(the_repository) >= size || + open_istream_pack_non_delta(st, r, oid, oi.u.packed.pack, + oi.u.packed.offset) < 0) break; - - st->u.in_pack.pack = oi.u.packed.pack; - st->u.in_pack.pos = oi.u.packed.offset; - if (open_istream_pack_non_delta(st, r, oid) < 0) - break; - return 0; default: break; From 595296e124f5e8a67c4669fcaeb1b28e71c2d751 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:30 +0100 Subject: [PATCH 06/26] streaming: allocate stream inside the backend-specific logic When creating a new stream we first allocate it and then call into backend-specific logic to populate the stream. This design requires that the stream itself contains a `union` with backend-specific members that then ultimately get populated by the backend-specific logic. This works, but it's awkward in the context of pluggable object databases. Each backend will need its own member in that union, and as the structure itself is completely opaque (it's only defined in "streaming.c") it also has the consequence that we must have the logic that is specific to backends in "streaming.c". Ideally though, the infrastructure would be reversed: we have a generic `struct odb_read_stream` and some helper functions in "streaming.c", whereas the backend-specific logic sits in the backend's subsystem itself. This can be realized by using a design that is similar to how we handle reference databases: instead of having a union of members, we instead have backend-specific structures with a `struct odb_read_stream base` as its first member. The backends would thus hand out the pointer to the base, but internally they know to cast back to the backend-specific type. This means though that we need to allocate different structures depending on the backend. To prepare for this, move allocation of the structure into the backend-specific functions that open a new stream. Subsequent commits will then create those new backend-specific structs. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 103 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 65 insertions(+), 38 deletions(-) diff --git a/streaming.c b/streaming.c index bf277daadd48c2..a2c2d887387c57 100644 --- a/streaming.c +++ b/streaming.c @@ -222,27 +222,34 @@ static int close_istream_loose(struct odb_read_stream *st) return 0; } -static int open_istream_loose(struct odb_read_stream *st, struct repository *r, +static int open_istream_loose(struct odb_read_stream **out, + struct repository *r, const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; + struct odb_read_stream *st; struct odb_source *source; - - oi.sizep = &st->size; - oi.typep = &st->type; + unsigned long mapsize; + void *mapped; odb_prepare_alternates(r->objects); for (source = r->objects->sources; source; source = source->next) { - st->u.loose.mapped = odb_source_loose_map_object(source, oid, - &st->u.loose.mapsize); - if (st->u.loose.mapped) + mapped = odb_source_loose_map_object(source, oid, &mapsize); + if (mapped) break; } - if (!st->u.loose.mapped) + if (!mapped) return -1; - switch (unpack_loose_header(&st->z, st->u.loose.mapped, - st->u.loose.mapsize, st->u.loose.hdr, + /* + * Note: we must allocate this structure early even though we may still + * fail. This is because we need to initialize the zlib stream, and it + * is not possible to copy the stream around after the fact because it + * has self-referencing pointers. + */ + CALLOC_ARRAY(st, 1); + + switch (unpack_loose_header(&st->z, mapped, mapsize, st->u.loose.hdr, sizeof(st->u.loose.hdr))) { case ULHR_OK: break; @@ -250,19 +257,28 @@ static int open_istream_loose(struct odb_read_stream *st, struct repository *r, case ULHR_TOO_LONG: goto error; } + + oi.sizep = &st->size; + oi.typep = &st->type; + if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || st->type < 0) goto error; + st->u.loose.mapped = mapped; + st->u.loose.mapsize = mapsize; st->u.loose.hdr_used = strlen(st->u.loose.hdr) + 1; st->u.loose.hdr_avail = st->z.total_out; st->z_state = z_used; st->close = close_istream_loose; st->read = read_istream_loose; + *out = st; + return 0; error: git_inflate_end(&st->z); munmap(st->u.loose.mapped, st->u.loose.mapsize); + free(st); return -1; } @@ -338,12 +354,16 @@ static int close_istream_pack_non_delta(struct odb_read_stream *st) return 0; } -static int open_istream_pack_non_delta(struct odb_read_stream *st, +static int open_istream_pack_non_delta(struct odb_read_stream **out, struct repository *r UNUSED, const struct object_id *oid UNUSED, struct packed_git *pack, off_t offset) { + struct odb_read_stream stream = { + .close = close_istream_pack_non_delta, + .read = read_istream_pack_non_delta, + }; struct pack_window *window; enum object_type in_pack_type; @@ -352,7 +372,7 @@ static int open_istream_pack_non_delta(struct odb_read_stream *st, in_pack_type = unpack_object_header(pack, &window, &offset, - &st->size); + &stream.size); unuse_pack(&window); switch (in_pack_type) { default: @@ -363,12 +383,13 @@ static int open_istream_pack_non_delta(struct odb_read_stream *st, case OBJ_TAG: break; } - st->type = in_pack_type; - st->z_state = z_unused; - st->close = close_istream_pack_non_delta; - st->read = read_istream_pack_non_delta; - st->u.in_pack.pack = pack; - st->u.in_pack.pos = offset; + stream.type = in_pack_type; + stream.z_state = z_unused; + stream.u.in_pack.pack = pack; + stream.u.in_pack.pos = offset; + + CALLOC_ARRAY(*out, 1); + **out = stream; return 0; } @@ -400,27 +421,35 @@ static ssize_t read_istream_incore(struct odb_read_stream *st, char *buf, size_t return read_size; } -static int open_istream_incore(struct odb_read_stream *st, struct repository *r, +static int open_istream_incore(struct odb_read_stream **out, + struct repository *r, const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; - - st->u.incore.read_ptr = 0; - st->close = close_istream_incore; - st->read = read_istream_incore; - - oi.typep = &st->type; - oi.sizep = &st->size; - oi.contentp = (void **)&st->u.incore.buf; - return odb_read_object_info_extended(r->objects, oid, &oi, - OBJECT_INFO_DIE_IF_CORRUPT); + struct odb_read_stream stream = { + .close = close_istream_incore, + .read = read_istream_incore, + }; + int ret; + + oi.typep = &stream.type; + oi.sizep = &stream.size; + oi.contentp = (void **)&stream.u.incore.buf; + ret = odb_read_object_info_extended(r->objects, oid, &oi, + OBJECT_INFO_DIE_IF_CORRUPT); + if (ret) + return ret; + + CALLOC_ARRAY(*out, 1); + **out = stream; + return 0; } /***************************************************************************** * static helpers variables and functions for users of streaming interface *****************************************************************************/ -static int istream_source(struct odb_read_stream *st, +static int istream_source(struct odb_read_stream **out, struct repository *r, const struct object_id *oid) { @@ -435,13 +464,13 @@ static int istream_source(struct odb_read_stream *st, switch (oi.whence) { case OI_LOOSE: - if (open_istream_loose(st, r, oid) < 0) + if (open_istream_loose(out, r, oid) < 0) break; return 0; case OI_PACKED: if (oi.u.packed.is_delta || repo_settings_get_big_file_threshold(the_repository) >= size || - open_istream_pack_non_delta(st, r, oid, oi.u.packed.pack, + open_istream_pack_non_delta(out, r, oid, oi.u.packed.pack, oi.u.packed.offset) < 0) break; return 0; @@ -449,7 +478,7 @@ static int istream_source(struct odb_read_stream *st, break; } - return open_istream_incore(st, r, oid); + return open_istream_incore(out, r, oid); } /**************************************************************** @@ -474,14 +503,12 @@ struct odb_read_stream *open_istream(struct repository *r, unsigned long *size, struct stream_filter *filter) { - struct odb_read_stream *st = xmalloc(sizeof(*st)); + struct odb_read_stream *st; const struct object_id *real = lookup_replace_object(r, oid); - int ret = istream_source(st, r, real); + int ret = istream_source(&st, r, real); - if (ret) { - free(st); + if (ret) return NULL; - } if (filter) { /* Add "&& !is_null_stream_filter(filter)" for performance */ From e030d0aeb5ebf79cdc4910e79d59e33998de78cd Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:31 +0100 Subject: [PATCH 07/26] streaming: create structure for in-core object streams As explained in a preceding commit, we want to get rid of the union of stream-type specific data in `struct odb_read_stream`. Create a new structure for in-core object streams to move towards this design. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 44 +++++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/streaming.c b/streaming.c index a2c2d887387c57..35307d72295988 100644 --- a/streaming.c +++ b/streaming.c @@ -39,11 +39,6 @@ struct odb_read_stream { enum { z_unused, z_used, z_done, z_error } z_state; union { - struct { - char *buf; /* from odb_read_object_info_extended() */ - unsigned long read_ptr; - } incore; - struct { void *mapped; unsigned long mapsize; @@ -401,22 +396,30 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out, * *****************************************************************/ -static int close_istream_incore(struct odb_read_stream *st) +struct odb_incore_read_stream { + struct odb_read_stream base; + char *buf; /* from odb_read_object_info_extended() */ + unsigned long read_ptr; +}; + +static int close_istream_incore(struct odb_read_stream *_st) { - free(st->u.incore.buf); + struct odb_incore_read_stream *st = (struct odb_incore_read_stream *)_st; + free(st->buf); return 0; } -static ssize_t read_istream_incore(struct odb_read_stream *st, char *buf, size_t sz) +static ssize_t read_istream_incore(struct odb_read_stream *_st, char *buf, size_t sz) { + struct odb_incore_read_stream *st = (struct odb_incore_read_stream *)_st; size_t read_size = sz; - size_t remainder = st->size - st->u.incore.read_ptr; + size_t remainder = st->base.size - st->read_ptr; if (remainder <= read_size) read_size = remainder; if (read_size) { - memcpy(buf, st->u.incore.buf + st->u.incore.read_ptr, read_size); - st->u.incore.read_ptr += read_size; + memcpy(buf, st->buf + st->read_ptr, read_size); + st->read_ptr += read_size; } return read_size; } @@ -426,22 +429,25 @@ static int open_istream_incore(struct odb_read_stream **out, const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; - struct odb_read_stream stream = { - .close = close_istream_incore, - .read = read_istream_incore, + struct odb_incore_read_stream stream = { + .base.close = close_istream_incore, + .base.read = read_istream_incore, }; + struct odb_incore_read_stream *st; int ret; - oi.typep = &stream.type; - oi.sizep = &stream.size; - oi.contentp = (void **)&stream.u.incore.buf; + oi.typep = &stream.base.type; + oi.sizep = &stream.base.size; + oi.contentp = (void **)&stream.buf; ret = odb_read_object_info_extended(r->objects, oid, &oi, OBJECT_INFO_DIE_IF_CORRUPT); if (ret) return ret; - CALLOC_ARRAY(*out, 1); - **out = stream; + CALLOC_ARRAY(st, 1); + *st = stream; + *out = &st->base; + return 0; } From b7774c0f0de43379c40984b4ede265a512c1a4f0 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:32 +0100 Subject: [PATCH 08/26] streaming: create structure for loose object streams As explained in a preceding commit, we want to get rid of the union of stream-type specific data in `struct odb_read_stream`. Create a new structure for loose object streams to move towards this design. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 85 +++++++++++++++++++++++++++-------------------------- 1 file changed, 44 insertions(+), 41 deletions(-) diff --git a/streaming.c b/streaming.c index 35307d72295988..ac7b3026f5a604 100644 --- a/streaming.c +++ b/streaming.c @@ -39,14 +39,6 @@ struct odb_read_stream { enum { z_unused, z_used, z_done, z_error } z_state; union { - struct { - void *mapped; - unsigned long mapsize; - char hdr[32]; - int hdr_avail; - int hdr_used; - } loose; - struct { struct packed_git *pack; off_t pos; @@ -165,11 +157,21 @@ static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, * *****************************************************************/ -static ssize_t read_istream_loose(struct odb_read_stream *st, char *buf, size_t sz) +struct odb_loose_read_stream { + struct odb_read_stream base; + void *mapped; + unsigned long mapsize; + char hdr[32]; + int hdr_avail; + int hdr_used; +}; + +static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz) { + struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; size_t total_read = 0; - switch (st->z_state) { + switch (st->base.z_state) { case z_done: return 0; case z_error: @@ -178,42 +180,43 @@ static ssize_t read_istream_loose(struct odb_read_stream *st, char *buf, size_t break; } - if (st->u.loose.hdr_used < st->u.loose.hdr_avail) { - size_t to_copy = st->u.loose.hdr_avail - st->u.loose.hdr_used; + if (st->hdr_used < st->hdr_avail) { + size_t to_copy = st->hdr_avail - st->hdr_used; if (sz < to_copy) to_copy = sz; - memcpy(buf, st->u.loose.hdr + st->u.loose.hdr_used, to_copy); - st->u.loose.hdr_used += to_copy; + memcpy(buf, st->hdr + st->hdr_used, to_copy); + st->hdr_used += to_copy; total_read += to_copy; } while (total_read < sz) { int status; - st->z.next_out = (unsigned char *)buf + total_read; - st->z.avail_out = sz - total_read; - status = git_inflate(&st->z, Z_FINISH); + st->base.z.next_out = (unsigned char *)buf + total_read; + st->base.z.avail_out = sz - total_read; + status = git_inflate(&st->base.z, Z_FINISH); - total_read = st->z.next_out - (unsigned char *)buf; + total_read = st->base.z.next_out - (unsigned char *)buf; if (status == Z_STREAM_END) { - git_inflate_end(&st->z); - st->z_state = z_done; + git_inflate_end(&st->base.z); + st->base.z_state = z_done; break; } if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { - git_inflate_end(&st->z); - st->z_state = z_error; + git_inflate_end(&st->base.z); + st->base.z_state = z_error; return -1; } } return total_read; } -static int close_istream_loose(struct odb_read_stream *st) +static int close_istream_loose(struct odb_read_stream *_st) { - close_deflated_stream(st); - munmap(st->u.loose.mapped, st->u.loose.mapsize); + struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; + close_deflated_stream(&st->base); + munmap(st->mapped, st->mapsize); return 0; } @@ -222,7 +225,7 @@ static int open_istream_loose(struct odb_read_stream **out, const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; - struct odb_read_stream *st; + struct odb_loose_read_stream *st; struct odb_source *source; unsigned long mapsize; void *mapped; @@ -244,8 +247,8 @@ static int open_istream_loose(struct odb_read_stream **out, */ CALLOC_ARRAY(st, 1); - switch (unpack_loose_header(&st->z, mapped, mapsize, st->u.loose.hdr, - sizeof(st->u.loose.hdr))) { + switch (unpack_loose_header(&st->base.z, mapped, mapsize, st->hdr, + sizeof(st->hdr))) { case ULHR_OK: break; case ULHR_BAD: @@ -253,26 +256,26 @@ static int open_istream_loose(struct odb_read_stream **out, goto error; } - oi.sizep = &st->size; - oi.typep = &st->type; + oi.sizep = &st->base.size; + oi.typep = &st->base.type; - if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || st->type < 0) + if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0) goto error; - st->u.loose.mapped = mapped; - st->u.loose.mapsize = mapsize; - st->u.loose.hdr_used = strlen(st->u.loose.hdr) + 1; - st->u.loose.hdr_avail = st->z.total_out; - st->z_state = z_used; - st->close = close_istream_loose; - st->read = read_istream_loose; + st->mapped = mapped; + st->mapsize = mapsize; + st->hdr_used = strlen(st->hdr) + 1; + st->hdr_avail = st->base.z.total_out; + st->base.z_state = z_used; + st->base.close = close_istream_loose; + st->base.read = read_istream_loose; - *out = st; + *out = &st->base; return 0; error: - git_inflate_end(&st->z); - munmap(st->u.loose.mapped, st->u.loose.mapsize); + git_inflate_end(&st->base.z); + munmap(st->mapped, st->mapsize); free(st); return -1; } From 5f0d8d2e8d3f992f58af247b6d21509c3c7595ca Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:33 +0100 Subject: [PATCH 09/26] streaming: create structure for packed object streams As explained in a preceding commit, we want to get rid of the union of stream-type specific data in `struct odb_read_stream`. Create a new structure for packed object streams to move towards this design. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 75 ++++++++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 35 deletions(-) diff --git a/streaming.c b/streaming.c index ac7b3026f5a604..788f04e83ef6c8 100644 --- a/streaming.c +++ b/streaming.c @@ -39,11 +39,6 @@ struct odb_read_stream { enum { z_unused, z_used, z_done, z_error } z_state; union { - struct { - struct packed_git *pack; - off_t pos; - } in_pack; - struct filtered_istream filtered; } u; }; @@ -287,16 +282,23 @@ static int open_istream_loose(struct odb_read_stream **out, * *****************************************************************/ -static ssize_t read_istream_pack_non_delta(struct odb_read_stream *st, char *buf, +struct odb_packed_read_stream { + struct odb_read_stream base; + struct packed_git *pack; + off_t pos; +}; + +static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *buf, size_t sz) { + struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; size_t total_read = 0; - switch (st->z_state) { + switch (st->base.z_state) { case z_unused: - memset(&st->z, 0, sizeof(st->z)); - git_inflate_init(&st->z); - st->z_state = z_used; + memset(&st->base.z, 0, sizeof(st->base.z)); + git_inflate_init(&st->base.z); + st->base.z_state = z_used; break; case z_done: return 0; @@ -311,21 +313,21 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *st, char *buf struct pack_window *window = NULL; unsigned char *mapped; - mapped = use_pack(st->u.in_pack.pack, &window, - st->u.in_pack.pos, &st->z.avail_in); + mapped = use_pack(st->pack, &window, + st->pos, &st->base.z.avail_in); - st->z.next_out = (unsigned char *)buf + total_read; - st->z.avail_out = sz - total_read; - st->z.next_in = mapped; - status = git_inflate(&st->z, Z_FINISH); + st->base.z.next_out = (unsigned char *)buf + total_read; + st->base.z.avail_out = sz - total_read; + st->base.z.next_in = mapped; + status = git_inflate(&st->base.z, Z_FINISH); - st->u.in_pack.pos += st->z.next_in - mapped; - total_read = st->z.next_out - (unsigned char *)buf; + st->pos += st->base.z.next_in - mapped; + total_read = st->base.z.next_out - (unsigned char *)buf; unuse_pack(&window); if (status == Z_STREAM_END) { - git_inflate_end(&st->z); - st->z_state = z_done; + git_inflate_end(&st->base.z); + st->base.z_state = z_done; break; } @@ -338,17 +340,18 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *st, char *buf * or truncated), then use_pack() catches that and will die(). */ if (status != Z_OK && status != Z_BUF_ERROR) { - git_inflate_end(&st->z); - st->z_state = z_error; + git_inflate_end(&st->base.z); + st->base.z_state = z_error; return -1; } } return total_read; } -static int close_istream_pack_non_delta(struct odb_read_stream *st) +static int close_istream_pack_non_delta(struct odb_read_stream *_st) { - close_deflated_stream(st); + struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; + close_deflated_stream(&st->base); return 0; } @@ -358,19 +361,17 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out, struct packed_git *pack, off_t offset) { - struct odb_read_stream stream = { - .close = close_istream_pack_non_delta, - .read = read_istream_pack_non_delta, - }; + struct odb_packed_read_stream *stream; struct pack_window *window; enum object_type in_pack_type; + size_t size; window = NULL; in_pack_type = unpack_object_header(pack, &window, &offset, - &stream.size); + &size); unuse_pack(&window); switch (in_pack_type) { default: @@ -381,13 +382,17 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out, case OBJ_TAG: break; } - stream.type = in_pack_type; - stream.z_state = z_unused; - stream.u.in_pack.pack = pack; - stream.u.in_pack.pos = offset; - CALLOC_ARRAY(*out, 1); - **out = stream; + CALLOC_ARRAY(stream, 1); + stream->base.close = close_istream_pack_non_delta; + stream->base.read = read_istream_pack_non_delta; + stream->base.type = in_pack_type; + stream->base.size = size; + stream->base.z_state = z_unused; + stream->pack = pack; + stream->pos = offset; + + *out = &stream->base; return 0; } From 1154b2d2e511113e9b7d567788b72acb05713915 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:34 +0100 Subject: [PATCH 10/26] streaming: create structure for filtered object streams As explained in a preceding commit, we want to get rid of the union of stream-type specific data in `struct odb_read_stream`. Create a new structure for filtered object streams to move towards this design. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 54 +++++++++++++++++++++++++---------------------------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/streaming.c b/streaming.c index 788f04e83ef6c8..199cca5abb0eaa 100644 --- a/streaming.c +++ b/streaming.c @@ -19,16 +19,6 @@ typedef ssize_t (*read_istream_fn)(struct odb_read_stream *, char *, size_t); #define FILTER_BUFFER (1024*16) -struct filtered_istream { - struct odb_read_stream *upstream; - struct stream_filter *filter; - char ibuf[FILTER_BUFFER]; - char obuf[FILTER_BUFFER]; - int i_end, i_ptr; - int o_end, o_ptr; - int input_finished; -}; - struct odb_read_stream { close_istream_fn close; read_istream_fn read; @@ -37,10 +27,6 @@ struct odb_read_stream { unsigned long size; /* inflated size of full object */ git_zstream z; enum { z_unused, z_used, z_done, z_error } z_state; - - union { - struct filtered_istream filtered; - } u; }; /***************************************************************** @@ -62,16 +48,28 @@ static void close_deflated_stream(struct odb_read_stream *st) * *****************************************************************/ -static int close_istream_filtered(struct odb_read_stream *st) +struct odb_filtered_read_stream { + struct odb_read_stream base; + struct odb_read_stream *upstream; + struct stream_filter *filter; + char ibuf[FILTER_BUFFER]; + char obuf[FILTER_BUFFER]; + int i_end, i_ptr; + int o_end, o_ptr; + int input_finished; +}; + +static int close_istream_filtered(struct odb_read_stream *_fs) { - free_stream_filter(st->u.filtered.filter); - return close_istream(st->u.filtered.upstream); + struct odb_filtered_read_stream *fs = (struct odb_filtered_read_stream *)_fs; + free_stream_filter(fs->filter); + return close_istream(fs->upstream); } -static ssize_t read_istream_filtered(struct odb_read_stream *st, char *buf, +static ssize_t read_istream_filtered(struct odb_read_stream *_fs, char *buf, size_t sz) { - struct filtered_istream *fs = &(st->u.filtered); + struct odb_filtered_read_stream *fs = (struct odb_filtered_read_stream *)_fs; size_t filled = 0; while (sz) { @@ -131,19 +129,17 @@ static ssize_t read_istream_filtered(struct odb_read_stream *st, char *buf, static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, struct stream_filter *filter) { - struct odb_read_stream *ifs = xmalloc(sizeof(*ifs)); - struct filtered_istream *fs = &(ifs->u.filtered); + struct odb_filtered_read_stream *fs; - ifs->close = close_istream_filtered; - ifs->read = read_istream_filtered; + CALLOC_ARRAY(fs, 1); + fs->base.close = close_istream_filtered; + fs->base.read = read_istream_filtered; fs->upstream = st; fs->filter = filter; - fs->i_end = fs->i_ptr = 0; - fs->o_end = fs->o_ptr = 0; - fs->input_finished = 0; - ifs->size = -1; /* unknown */ - ifs->type = st->type; - return ifs; + fs->base.size = -1; /* unknown */ + fs->base.type = st->type; + + return &fs->base; } /***************************************************************** From eb5abbb4e6a8c06f5c6275bbb541bf7d736171c5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:35 +0100 Subject: [PATCH 11/26] streaming: move zlib stream into backends While all backend-specific data is now contained in a backend-specific structure, we still share the zlib stream across the loose and packed objects. Refactor the code and move it into the specific structures so that we fully detangle the different backends from one another. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 104 ++++++++++++++++++++++++++-------------------------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/streaming.c b/streaming.c index 199cca5abb0eaa..46fddaf2cad0ba 100644 --- a/streaming.c +++ b/streaming.c @@ -25,23 +25,8 @@ struct odb_read_stream { enum object_type type; unsigned long size; /* inflated size of full object */ - git_zstream z; - enum { z_unused, z_used, z_done, z_error } z_state; }; -/***************************************************************** - * - * Common helpers - * - *****************************************************************/ - -static void close_deflated_stream(struct odb_read_stream *st) -{ - if (st->z_state == z_used) - git_inflate_end(&st->z); -} - - /***************************************************************** * * Filtered stream @@ -150,6 +135,12 @@ static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, struct odb_loose_read_stream { struct odb_read_stream base; + git_zstream z; + enum { + ODB_LOOSE_READ_STREAM_INUSE, + ODB_LOOSE_READ_STREAM_DONE, + ODB_LOOSE_READ_STREAM_ERROR, + } z_state; void *mapped; unsigned long mapsize; char hdr[32]; @@ -162,10 +153,10 @@ static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; size_t total_read = 0; - switch (st->base.z_state) { - case z_done: + switch (st->z_state) { + case ODB_LOOSE_READ_STREAM_DONE: return 0; - case z_error: + case ODB_LOOSE_READ_STREAM_ERROR: return -1; default: break; @@ -183,20 +174,20 @@ static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t while (total_read < sz) { int status; - st->base.z.next_out = (unsigned char *)buf + total_read; - st->base.z.avail_out = sz - total_read; - status = git_inflate(&st->base.z, Z_FINISH); + st->z.next_out = (unsigned char *)buf + total_read; + st->z.avail_out = sz - total_read; + status = git_inflate(&st->z, Z_FINISH); - total_read = st->base.z.next_out - (unsigned char *)buf; + total_read = st->z.next_out - (unsigned char *)buf; if (status == Z_STREAM_END) { - git_inflate_end(&st->base.z); - st->base.z_state = z_done; + git_inflate_end(&st->z); + st->z_state = ODB_LOOSE_READ_STREAM_DONE; break; } if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { - git_inflate_end(&st->base.z); - st->base.z_state = z_error; + git_inflate_end(&st->z); + st->z_state = ODB_LOOSE_READ_STREAM_ERROR; return -1; } } @@ -206,7 +197,8 @@ static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t static int close_istream_loose(struct odb_read_stream *_st) { struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; - close_deflated_stream(&st->base); + if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE) + git_inflate_end(&st->z); munmap(st->mapped, st->mapsize); return 0; } @@ -238,7 +230,7 @@ static int open_istream_loose(struct odb_read_stream **out, */ CALLOC_ARRAY(st, 1); - switch (unpack_loose_header(&st->base.z, mapped, mapsize, st->hdr, + switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr, sizeof(st->hdr))) { case ULHR_OK: break; @@ -256,8 +248,8 @@ static int open_istream_loose(struct odb_read_stream **out, st->mapped = mapped; st->mapsize = mapsize; st->hdr_used = strlen(st->hdr) + 1; - st->hdr_avail = st->base.z.total_out; - st->base.z_state = z_used; + st->hdr_avail = st->z.total_out; + st->z_state = ODB_LOOSE_READ_STREAM_INUSE; st->base.close = close_istream_loose; st->base.read = read_istream_loose; @@ -265,7 +257,7 @@ static int open_istream_loose(struct odb_read_stream **out, return 0; error: - git_inflate_end(&st->base.z); + git_inflate_end(&st->z); munmap(st->mapped, st->mapsize); free(st); return -1; @@ -281,6 +273,13 @@ static int open_istream_loose(struct odb_read_stream **out, struct odb_packed_read_stream { struct odb_read_stream base; struct packed_git *pack; + git_zstream z; + enum { + ODB_PACKED_READ_STREAM_UNINITIALIZED, + ODB_PACKED_READ_STREAM_INUSE, + ODB_PACKED_READ_STREAM_DONE, + ODB_PACKED_READ_STREAM_ERROR, + } z_state; off_t pos; }; @@ -290,17 +289,17 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *bu struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; size_t total_read = 0; - switch (st->base.z_state) { - case z_unused: - memset(&st->base.z, 0, sizeof(st->base.z)); - git_inflate_init(&st->base.z); - st->base.z_state = z_used; + switch (st->z_state) { + case ODB_PACKED_READ_STREAM_UNINITIALIZED: + memset(&st->z, 0, sizeof(st->z)); + git_inflate_init(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_INUSE; break; - case z_done: + case ODB_PACKED_READ_STREAM_DONE: return 0; - case z_error: + case ODB_PACKED_READ_STREAM_ERROR: return -1; - case z_used: + case ODB_PACKED_READ_STREAM_INUSE: break; } @@ -310,20 +309,20 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *bu unsigned char *mapped; mapped = use_pack(st->pack, &window, - st->pos, &st->base.z.avail_in); + st->pos, &st->z.avail_in); - st->base.z.next_out = (unsigned char *)buf + total_read; - st->base.z.avail_out = sz - total_read; - st->base.z.next_in = mapped; - status = git_inflate(&st->base.z, Z_FINISH); + st->z.next_out = (unsigned char *)buf + total_read; + st->z.avail_out = sz - total_read; + st->z.next_in = mapped; + status = git_inflate(&st->z, Z_FINISH); - st->pos += st->base.z.next_in - mapped; - total_read = st->base.z.next_out - (unsigned char *)buf; + st->pos += st->z.next_in - mapped; + total_read = st->z.next_out - (unsigned char *)buf; unuse_pack(&window); if (status == Z_STREAM_END) { - git_inflate_end(&st->base.z); - st->base.z_state = z_done; + git_inflate_end(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_DONE; break; } @@ -336,8 +335,8 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *bu * or truncated), then use_pack() catches that and will die(). */ if (status != Z_OK && status != Z_BUF_ERROR) { - git_inflate_end(&st->base.z); - st->base.z_state = z_error; + git_inflate_end(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_ERROR; return -1; } } @@ -347,7 +346,8 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *bu static int close_istream_pack_non_delta(struct odb_read_stream *_st) { struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; - close_deflated_stream(&st->base); + if (st->z_state == ODB_PACKED_READ_STREAM_INUSE) + git_inflate_end(&st->z); return 0; } @@ -384,7 +384,7 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out, stream->base.read = read_istream_pack_non_delta; stream->base.type = in_pack_type; stream->base.size = size; - stream->base.z_state = z_unused; + stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED; stream->pack = pack; stream->pos = offset; From 385e18810f10ec0ce0a266d25da4e1878c8ce15a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:36 +0100 Subject: [PATCH 12/26] packfile: introduce function to read object info from a store Extract the logic to read object info for a packed object from `do_oid_object_into_extended()` into a standalone function that operates on the packfile store. This function will be used in a subsequent commit. Note that this change allows us to make `find_pack_entry()` an internal implementation detail. As a consequence though we have to move around `packfile_store_freshen_object()` so that it is defined after that function. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 29 +++------------------- packfile.c | 71 +++++++++++++++++++++++++++++++++++++++++------------- packfile.h | 12 ++++++++- 3 files changed, 69 insertions(+), 43 deletions(-) diff --git a/odb.c b/odb.c index 3ec21ef24e16bb..f4cbee4b042d83 100644 --- a/odb.c +++ b/odb.c @@ -666,8 +666,6 @@ static int do_oid_object_info_extended(struct object_database *odb, { static struct object_info blank_oi = OBJECT_INFO_INIT; const struct cached_object *co; - struct pack_entry e; - int rtype; const struct object_id *real = oid; int already_retried = 0; @@ -702,8 +700,8 @@ static int do_oid_object_info_extended(struct object_database *odb, while (1) { struct odb_source *source; - if (find_pack_entry(odb->repo, real, &e)) - break; + if (!packfile_store_read_object_info(odb->packfiles, real, oi, flags)) + return 0; /* Most likely it's a loose object. */ for (source = odb->sources; source; source = source->next) @@ -713,8 +711,8 @@ static int do_oid_object_info_extended(struct object_database *odb, /* Not a loose object; someone else may have just packed it. */ if (!(flags & OBJECT_INFO_QUICK)) { odb_reprepare(odb->repo->objects); - if (find_pack_entry(odb->repo, real, &e)) - break; + if (!packfile_store_read_object_info(odb->packfiles, real, oi, flags)) + return 0; } /* @@ -747,25 +745,6 @@ static int do_oid_object_info_extended(struct object_database *odb, } return -1; } - - if (oi == &blank_oi) - /* - * We know that the caller doesn't actually need the - * information below, so return early. - */ - return 0; - rtype = packed_object_info(odb->repo, e.p, e.offset, oi); - if (rtype < 0) { - mark_bad_packed_object(e.p, real); - return do_oid_object_info_extended(odb, real, oi, 0); - } else if (oi->whence == OI_PACKED) { - oi->u.packed.offset = e.offset; - oi->u.packed.pack = e.p; - oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA || - rtype == OBJ_OFS_DELTA); - } - - return 0; } static int oid_object_info_convert(struct repository *r, diff --git a/packfile.c b/packfile.c index 40f733dd234900..b4bc40d895c8da 100644 --- a/packfile.c +++ b/packfile.c @@ -819,22 +819,6 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store, return p; } -int packfile_store_freshen_object(struct packfile_store *store, - const struct object_id *oid) -{ - struct pack_entry e; - if (!find_pack_entry(store->odb->repo, oid, &e)) - return 0; - if (e.p->is_cruft) - return 0; - if (e.p->freshened) - return 1; - if (utime(e.p->pack_name, NULL)) - return 0; - e.p->freshened = 1; - return 1; -} - void (*report_garbage)(unsigned seen_bits, const char *path); static void report_helper(const struct string_list *list, @@ -2064,7 +2048,9 @@ static int fill_pack_entry(const struct object_id *oid, return 1; } -int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e) +static int find_pack_entry(struct repository *r, + const struct object_id *oid, + struct pack_entry *e) { struct list_head *pos; @@ -2087,6 +2073,57 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa return 0; } +int packfile_store_freshen_object(struct packfile_store *store, + const struct object_id *oid) +{ + struct pack_entry e; + if (!find_pack_entry(store->odb->repo, oid, &e)) + return 0; + if (e.p->is_cruft) + return 0; + if (e.p->freshened) + return 1; + if (utime(e.p->pack_name, NULL)) + return 0; + e.p->freshened = 1; + return 1; +} + +int packfile_store_read_object_info(struct packfile_store *store, + const struct object_id *oid, + struct object_info *oi, + unsigned flags UNUSED) +{ + static struct object_info blank_oi = OBJECT_INFO_INIT; + struct pack_entry e; + int rtype; + + if (!find_pack_entry(store->odb->repo, oid, &e)) + return 1; + + /* + * We know that the caller doesn't actually need the + * information below, so return early. + */ + if (oi == &blank_oi) + return 0; + + rtype = packed_object_info(store->odb->repo, e.p, e.offset, oi); + if (rtype < 0) { + mark_bad_packed_object(e.p, oid); + return -1; + } + + if (oi->whence == OI_PACKED) { + oi->u.packed.offset = e.offset; + oi->u.packed.pack = e.p; + oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA || + rtype == OBJ_OFS_DELTA); + } + + return 0; +} + static void maybe_invalidate_kept_pack_cache(struct repository *r, unsigned flags) { diff --git a/packfile.h b/packfile.h index 58fcc88e20224b..0a98bddd811921 100644 --- a/packfile.h +++ b/packfile.h @@ -144,6 +144,17 @@ void packfile_store_add_pack(struct packfile_store *store, #define repo_for_each_pack(repo, p) \ for (p = packfile_store_get_packs(repo->objects->packfiles); p; p = p->next) +/* + * Try to read the object identified by its ID from the object store and + * populate the object info with its data. Returns 1 in case the object was + * not found, 0 if it was and read successfully, and a negative error code in + * case the object was corrupted. + */ +int packfile_store_read_object_info(struct packfile_store *store, + const struct object_id *oid, + struct object_info *oi, + unsigned flags); + /* * Get all packs managed by the given store, including packfiles that are * referenced by multi-pack indices. @@ -357,7 +368,6 @@ const struct packed_git *has_packed_and_bad(struct repository *, const struct ob * Iff a pack file in the given repository contains the object named by sha1, * return true and store its location to e. */ -int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e); int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e); int has_object_pack(struct repository *r, const struct object_id *oid); From 4c89d31494bff4bde6079a0e0821f1437e37d07b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:37 +0100 Subject: [PATCH 13/26] streaming: rely on object sources to create object stream When creating an object stream we first look up the object info and, if it's present, we call into the respective backend that contains the object to create a new stream for it. This has the consequence that, for loose object source, we basically iterate through the object sources twice: we first discover that the file exists as a loose object in the first place by iterating through all sources. And, once we have discovered it, we again walk through all sources to try and map the object. The same issue will eventually also surface once the packfile store becomes per-object-source. Furthermore, it feels rather pointless to first look up the object only to then try and read it. Refactor the logic to be centered around sources instead. Instead of first reading the object, we immediately ask the source to create the object stream for us. If the object exists we get stream, otherwise we'll try the next source. Like this we only have to iterate through sources once. But even more importantly, this change also helps us to make the whole logic pluggable. The object read stream subsystem does not need to be aware of the different source backends anymore, but eventually it'll only have to call the source's callback function. Note that at the current point in time we aren't fully there yet: - The packfile store still sits on the object database level and is thus agnostic of the sources. - We still have to call into both the packfile store and the loose object source. But both of these issues will soon be addressed. This refactoring results in a slight change to semantics: previously, it was `odb_read_object_info_extended()` that picked the source for us, and it would have favored packed (non-deltified) objects over loose objects. And while we still favor packed over loose objects for a single source with the new logic, we'll now favor a loose object from an earlier source over a packed object from a later source. Ultimately this shouldn't matter though: the stream doesn't indicate to the caller which source it is from and whether it was created from a packed or loose object, so such details are opaque to the caller. And other than that we should be able to assume that two objects with the same object ID should refer to the same content, so the streamed data would be the same, too. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 65 ++++++++++++++++++++--------------------------------- 1 file changed, 24 insertions(+), 41 deletions(-) diff --git a/streaming.c b/streaming.c index 46fddaf2cad0ba..f0f7d31956f59b 100644 --- a/streaming.c +++ b/streaming.c @@ -204,21 +204,15 @@ static int close_istream_loose(struct odb_read_stream *_st) } static int open_istream_loose(struct odb_read_stream **out, - struct repository *r, + struct odb_source *source, const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; struct odb_loose_read_stream *st; - struct odb_source *source; unsigned long mapsize; void *mapped; - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) { - mapped = odb_source_loose_map_object(source, oid, &mapsize); - if (mapped) - break; - } + mapped = odb_source_loose_map_object(source, oid, &mapsize); if (!mapped) return -1; @@ -352,21 +346,25 @@ static int close_istream_pack_non_delta(struct odb_read_stream *_st) } static int open_istream_pack_non_delta(struct odb_read_stream **out, - struct repository *r UNUSED, - const struct object_id *oid UNUSED, - struct packed_git *pack, - off_t offset) + struct object_database *odb, + const struct object_id *oid) { struct odb_packed_read_stream *stream; - struct pack_window *window; + struct pack_window *window = NULL; + struct object_info oi = OBJECT_INFO_INIT; enum object_type in_pack_type; - size_t size; + unsigned long size; - window = NULL; + oi.sizep = &size; + + if (packfile_store_read_object_info(odb->packfiles, oid, &oi, 0) || + oi.u.packed.is_delta || + repo_settings_get_big_file_threshold(the_repository) >= size) + return -1; - in_pack_type = unpack_object_header(pack, + in_pack_type = unpack_object_header(oi.u.packed.pack, &window, - &offset, + &oi.u.packed.offset, &size); unuse_pack(&window); switch (in_pack_type) { @@ -385,8 +383,8 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out, stream->base.type = in_pack_type; stream->base.size = size; stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED; - stream->pack = pack; - stream->pos = offset; + stream->pack = oi.u.packed.pack; + stream->pos = oi.u.packed.offset; *out = &stream->base; @@ -463,30 +461,15 @@ static int istream_source(struct odb_read_stream **out, struct repository *r, const struct object_id *oid) { - unsigned long size; - int status; - struct object_info oi = OBJECT_INFO_INIT; - - oi.sizep = &size; - status = odb_read_object_info_extended(r->objects, oid, &oi, 0); - if (status < 0) - return status; + struct odb_source *source; - switch (oi.whence) { - case OI_LOOSE: - if (open_istream_loose(out, r, oid) < 0) - break; - return 0; - case OI_PACKED: - if (oi.u.packed.is_delta || - repo_settings_get_big_file_threshold(the_repository) >= size || - open_istream_pack_non_delta(out, r, oid, oi.u.packed.pack, - oi.u.packed.offset) < 0) - break; + if (!open_istream_pack_non_delta(out, r->objects, oid)) return 0; - default: - break; - } + + odb_prepare_alternates(r->objects); + for (source = r->objects->sources; source; source = source->next) + if (!open_istream_loose(out, source, oid)) + return 0; return open_istream_incore(out, r, oid); } From c26da3446e98ad4aa98ec9154c70c6fd35cb9ad6 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:38 +0100 Subject: [PATCH 14/26] streaming: get rid of `the_repository` Subsequent commits will move the backend-specific logic of object streaming into their respective subsystems. These subsystems have gotten rid of `the_repository` already, but we still use it in two locations in the streaming subsystem. Prepare for the move by fixing those two cases. Converting the logic in `open_istream_pack_non_delta()` is trivial as we already got the object database as input. But for `stream_blob_to_fd()` we have to add a new parameter to make it accessible. So, as we already have to adjust all callers anyway, rename the function to `odb_stream_blob_to_fd()` to indicate it's part of the object subsystem. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 2 +- builtin/fsck.c | 3 ++- builtin/log.c | 4 ++-- entry.c | 2 +- parallel-checkout.c | 3 ++- streaming.c | 13 +++++++------ streaming.h | 18 +++++++++++++++++- 7 files changed, 32 insertions(+), 13 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 983ecec837b03b..120d626d66e140 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -95,7 +95,7 @@ static int filter_object(const char *path, unsigned mode, static int stream_blob(const struct object_id *oid) { - if (stream_blob_to_fd(1, oid, NULL, 0)) + if (odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0)) die("unable to stream %s to stdout", oid_to_hex(oid)); return 0; } diff --git a/builtin/fsck.c b/builtin/fsck.c index b1a650c6731d32..1a348d43c26020 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -340,7 +340,8 @@ static void check_unreachable_object(struct object *obj) } f = xfopen(filename, "w"); if (obj->type == OBJ_BLOB) { - if (stream_blob_to_fd(fileno(f), &obj->oid, NULL, 1)) + if (odb_stream_blob_to_fd(the_repository->objects, fileno(f), + &obj->oid, NULL, 1)) die_errno(_("could not write '%s'"), filename); } else fprintf(f, "%s\n", describe_object(&obj->oid)); diff --git a/builtin/log.c b/builtin/log.c index c8319b8af38c8c..e7b83a6e00a708 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -584,7 +584,7 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c fflush(rev->diffopt.file); if (!rev->diffopt.flags.textconv_set_via_cmdline || !rev->diffopt.flags.allow_textconv) - return stream_blob_to_fd(1, oid, NULL, 0); + return odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0); if (get_oid_with_context(the_repository, obj_name, GET_OID_RECORD_PATH, @@ -594,7 +594,7 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c !textconv_object(the_repository, obj_context.path, obj_context.mode, &oidc, 1, &buf, &size)) { object_context_release(&obj_context); - return stream_blob_to_fd(1, oid, NULL, 0); + return odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0); } if (!buf) diff --git a/entry.c b/entry.c index cae02eb50398d7..38dfe670f79920 100644 --- a/entry.c +++ b/entry.c @@ -139,7 +139,7 @@ static int streaming_write_entry(const struct cache_entry *ce, char *path, if (fd < 0) return -1; - result |= stream_blob_to_fd(fd, &ce->oid, filter, 1); + result |= odb_stream_blob_to_fd(the_repository->objects, fd, &ce->oid, filter, 1); *fstat_done = fstat_checkout_output(fd, state, statbuf); result |= close(fd); diff --git a/parallel-checkout.c b/parallel-checkout.c index fba6aa65a6e852..1cb6701b926dcf 100644 --- a/parallel-checkout.c +++ b/parallel-checkout.c @@ -281,7 +281,8 @@ static int write_pc_item_to_fd(struct parallel_checkout_item *pc_item, int fd, filter = get_stream_filter_ca(&pc_item->ca, &pc_item->ce->oid); if (filter) { - if (stream_blob_to_fd(fd, &pc_item->ce->oid, filter, 1)) { + if (odb_stream_blob_to_fd(the_repository->objects, fd, + &pc_item->ce->oid, filter, 1)) { /* On error, reset fd to try writing without streaming */ if (reset_fd(fd, path)) return -1; diff --git a/streaming.c b/streaming.c index f0f7d31956f59b..807a6e03a85b49 100644 --- a/streaming.c +++ b/streaming.c @@ -2,8 +2,6 @@ * Copyright (c) 2011, Google Inc. */ -#define USE_THE_REPOSITORY_VARIABLE - #include "git-compat-util.h" #include "convert.h" #include "environment.h" @@ -359,7 +357,7 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out, if (packfile_store_read_object_info(odb->packfiles, oid, &oi, 0) || oi.u.packed.is_delta || - repo_settings_get_big_file_threshold(the_repository) >= size) + repo_settings_get_big_file_threshold(odb->repo) >= size) return -1; in_pack_type = unpack_object_header(oi.u.packed.pack, @@ -518,8 +516,11 @@ struct odb_read_stream *open_istream(struct repository *r, return st; } -int stream_blob_to_fd(int fd, const struct object_id *oid, struct stream_filter *filter, - int can_seek) +int odb_stream_blob_to_fd(struct object_database *odb, + int fd, + const struct object_id *oid, + struct stream_filter *filter, + int can_seek) { struct odb_read_stream *st; enum object_type type; @@ -527,7 +528,7 @@ int stream_blob_to_fd(int fd, const struct object_id *oid, struct stream_filter ssize_t kept = 0; int result = -1; - st = open_istream(the_repository, oid, &type, &sz, filter); + st = open_istream(odb->repo, oid, &type, &sz, filter); if (!st) { if (filter) free_stream_filter(filter); diff --git a/streaming.h b/streaming.h index f5ff5d7ac9a573..148f6b30697ab7 100644 --- a/streaming.h +++ b/streaming.h @@ -6,6 +6,7 @@ #include "object.h" +struct object_database; /* opaque */ struct odb_read_stream; struct stream_filter; @@ -16,6 +17,21 @@ struct odb_read_stream *open_istream(struct repository *, const struct object_id int close_istream(struct odb_read_stream *); ssize_t read_istream(struct odb_read_stream *, void *, size_t); -int stream_blob_to_fd(int fd, const struct object_id *, struct stream_filter *, int can_seek); +/* + * Look up the object by its ID and write the full contents to the file + * descriptor. The object must be a blob, or the function will fail. When + * provided, the filter is used to transform the blob contents. + * + * `can_seek` should be set to 1 in case the given file descriptor can be + * seek(3p)'d on. This is used to support files with holes in case a + * significant portion of the blob contains NUL bytes. + * + * Returns a negative error code on failure, 0 on success. + */ +int odb_stream_blob_to_fd(struct object_database *odb, + int fd, + const struct object_id *oid, + struct stream_filter *filter, + int can_seek); #endif /* STREAMING_H */ From ffc9a3448500caa50766876ef2169e0f26ad3b3c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:39 +0100 Subject: [PATCH 15/26] streaming: make the `odb_read_stream` definition public Subsequent commits will move the backend-specific logic of setting up an object read stream into the specific subsystems. As the backends are now the ones that are responsible for allocating the stream they'll need to have the stream definition available to them. Make the stream definition public to prepare for this. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- streaming.c | 11 ----------- streaming.h | 15 ++++++++++++++- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/streaming.c b/streaming.c index 807a6e03a85b49..0635b7c12e2233 100644 --- a/streaming.c +++ b/streaming.c @@ -12,19 +12,8 @@ #include "replace-object.h" #include "packfile.h" -typedef int (*close_istream_fn)(struct odb_read_stream *); -typedef ssize_t (*read_istream_fn)(struct odb_read_stream *, char *, size_t); - #define FILTER_BUFFER (1024*16) -struct odb_read_stream { - close_istream_fn close; - read_istream_fn read; - - enum object_type type; - unsigned long size; /* inflated size of full object */ -}; - /***************************************************************** * * Filtered stream diff --git a/streaming.h b/streaming.h index 148f6b30697ab7..acfdef1598db52 100644 --- a/streaming.h +++ b/streaming.h @@ -7,10 +7,23 @@ #include "object.h" struct object_database; -/* opaque */ struct odb_read_stream; struct stream_filter; +typedef int (*odb_read_stream_close_fn)(struct odb_read_stream *); +typedef ssize_t (*odb_read_stream_read_fn)(struct odb_read_stream *, char *, size_t); + +/* + * A stream that can be used to read an object from the object database without + * loading all of it into memory. + */ +struct odb_read_stream { + odb_read_stream_close_fn close; + odb_read_stream_read_fn read; + enum object_type type; + unsigned long size; /* inflated size of full object */ +}; + struct odb_read_stream *open_istream(struct repository *, const struct object_id *, enum object_type *, unsigned long *, struct stream_filter *); From bc30a2f5dff6dd39966819ca3771ab5e9e072123 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:40 +0100 Subject: [PATCH 16/26] streaming: move logic to read loose objects streams into backend Move the logic to read loose object streams into the respective subsystem. This allows us to make a couple of function declarations private. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 167 +++++++++++++++++++++++++++++++++++++++++++++++--- object-file.h | 42 ++----------- streaming.c | 133 +--------------------------------------- 3 files changed, 164 insertions(+), 178 deletions(-) diff --git a/object-file.c b/object-file.c index b62b21a45289fc..8c67847feaceb6 100644 --- a/object-file.c +++ b/object-file.c @@ -234,9 +234,9 @@ static void *map_fd(int fd, const char *path, unsigned long *size) return map; } -void *odb_source_loose_map_object(struct odb_source *source, - const struct object_id *oid, - unsigned long *size) +static void *odb_source_loose_map_object(struct odb_source *source, + const struct object_id *oid, + unsigned long *size) { const char *p; int fd = open_loose_object(source->loose, oid, &p); @@ -246,11 +246,29 @@ void *odb_source_loose_map_object(struct odb_source *source, return map_fd(fd, p, size); } -enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, - unsigned char *map, - unsigned long mapsize, - void *buffer, - unsigned long bufsiz) +enum unpack_loose_header_result { + ULHR_OK, + ULHR_BAD, + ULHR_TOO_LONG, +}; + +/** + * unpack_loose_header() initializes the data stream needed to unpack + * a loose object header. + * + * Returns: + * + * - ULHR_OK on success + * - ULHR_BAD on error + * - ULHR_TOO_LONG if the header was too long + * + * It will only parse up to MAX_HEADER_LEN bytes. + */ +static enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, + unsigned char *map, + unsigned long mapsize, + void *buffer, + unsigned long bufsiz) { int status; @@ -329,11 +347,18 @@ static void *unpack_loose_rest(git_zstream *stream, } /* + * parse_loose_header() parses the starting " \0" of an + * object. If it doesn't follow that format -1 is returned. To check + * the validity of the populate the "typep" in the "struct + * object_info". It will be OBJ_BAD if the object type is unknown. The + * parsed can be retrieved via "oi->sizep", and from there + * passed to unpack_loose_rest(). + * * We used to just use "sscanf()", but that's actually way * too permissive for what we want to check. So do an anal * object header parse by hand. */ -int parse_loose_header(const char *hdr, struct object_info *oi) +static int parse_loose_header(const char *hdr, struct object_info *oi) { const char *type_buf = hdr; size_t size; @@ -1976,3 +2001,127 @@ void odb_source_loose_free(struct odb_source_loose *loose) loose_object_map_clear(&loose->map); free(loose); } + +struct odb_loose_read_stream { + struct odb_read_stream base; + git_zstream z; + enum { + ODB_LOOSE_READ_STREAM_INUSE, + ODB_LOOSE_READ_STREAM_DONE, + ODB_LOOSE_READ_STREAM_ERROR, + } z_state; + void *mapped; + unsigned long mapsize; + char hdr[32]; + int hdr_avail; + int hdr_used; +}; + +static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz) +{ + struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; + size_t total_read = 0; + + switch (st->z_state) { + case ODB_LOOSE_READ_STREAM_DONE: + return 0; + case ODB_LOOSE_READ_STREAM_ERROR: + return -1; + default: + break; + } + + if (st->hdr_used < st->hdr_avail) { + size_t to_copy = st->hdr_avail - st->hdr_used; + if (sz < to_copy) + to_copy = sz; + memcpy(buf, st->hdr + st->hdr_used, to_copy); + st->hdr_used += to_copy; + total_read += to_copy; + } + + while (total_read < sz) { + int status; + + st->z.next_out = (unsigned char *)buf + total_read; + st->z.avail_out = sz - total_read; + status = git_inflate(&st->z, Z_FINISH); + + total_read = st->z.next_out - (unsigned char *)buf; + + if (status == Z_STREAM_END) { + git_inflate_end(&st->z); + st->z_state = ODB_LOOSE_READ_STREAM_DONE; + break; + } + if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { + git_inflate_end(&st->z); + st->z_state = ODB_LOOSE_READ_STREAM_ERROR; + return -1; + } + } + return total_read; +} + +static int close_istream_loose(struct odb_read_stream *_st) +{ + struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; + if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE) + git_inflate_end(&st->z); + munmap(st->mapped, st->mapsize); + return 0; +} + +int odb_source_loose_read_object_stream(struct odb_read_stream **out, + struct odb_source *source, + const struct object_id *oid) +{ + struct object_info oi = OBJECT_INFO_INIT; + struct odb_loose_read_stream *st; + unsigned long mapsize; + void *mapped; + + mapped = odb_source_loose_map_object(source, oid, &mapsize); + if (!mapped) + return -1; + + /* + * Note: we must allocate this structure early even though we may still + * fail. This is because we need to initialize the zlib stream, and it + * is not possible to copy the stream around after the fact because it + * has self-referencing pointers. + */ + CALLOC_ARRAY(st, 1); + + switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr, + sizeof(st->hdr))) { + case ULHR_OK: + break; + case ULHR_BAD: + case ULHR_TOO_LONG: + goto error; + } + + oi.sizep = &st->base.size; + oi.typep = &st->base.type; + + if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0) + goto error; + + st->mapped = mapped; + st->mapsize = mapsize; + st->hdr_used = strlen(st->hdr) + 1; + st->hdr_avail = st->z.total_out; + st->z_state = ODB_LOOSE_READ_STREAM_INUSE; + st->base.close = close_istream_loose; + st->base.read = read_istream_loose; + + *out = &st->base; + + return 0; +error: + git_inflate_end(&st->z); + munmap(st->mapped, st->mapsize); + free(st); + return -1; +} diff --git a/object-file.h b/object-file.h index eeffa67bbda631..1229d5f675b44a 100644 --- a/object-file.h +++ b/object-file.h @@ -16,6 +16,8 @@ enum { int index_fd(struct index_state *istate, struct object_id *oid, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags); int index_path(struct index_state *istate, struct object_id *oid, const char *path, struct stat *st, unsigned flags); +struct object_info; +struct odb_read_stream; struct odb_source; struct odb_source_loose { @@ -47,9 +49,9 @@ int odb_source_loose_read_object_info(struct odb_source *source, const struct object_id *oid, struct object_info *oi, int flags); -void *odb_source_loose_map_object(struct odb_source *source, - const struct object_id *oid, - unsigned long *size); +int odb_source_loose_read_object_stream(struct odb_read_stream **out, + struct odb_source *source, + const struct object_id *oid); /* * Return true iff an object database source has a loose object @@ -143,40 +145,6 @@ int for_each_loose_object(struct object_database *odb, int format_object_header(char *str, size_t size, enum object_type type, size_t objsize); -/** - * unpack_loose_header() initializes the data stream needed to unpack - * a loose object header. - * - * Returns: - * - * - ULHR_OK on success - * - ULHR_BAD on error - * - ULHR_TOO_LONG if the header was too long - * - * It will only parse up to MAX_HEADER_LEN bytes. - */ -enum unpack_loose_header_result { - ULHR_OK, - ULHR_BAD, - ULHR_TOO_LONG, -}; -enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, - unsigned char *map, - unsigned long mapsize, - void *buffer, - unsigned long bufsiz); - -/** - * parse_loose_header() parses the starting " \0" of an - * object. If it doesn't follow that format -1 is returned. To check - * the validity of the populate the "typep" in the "struct - * object_info". It will be OBJ_BAD if the object type is unknown. The - * parsed can be retrieved via "oi->sizep", and from there - * passed to unpack_loose_rest(). - */ -struct object_info; -int parse_loose_header(const char *hdr, struct object_info *oi); - int force_object_loose(struct odb_source *source, const struct object_id *oid, time_t mtime); diff --git a/streaming.c b/streaming.c index 0635b7c12e2233..d5acc1c39650e4 100644 --- a/streaming.c +++ b/streaming.c @@ -114,137 +114,6 @@ static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, return &fs->base; } -/***************************************************************** - * - * Loose object stream - * - *****************************************************************/ - -struct odb_loose_read_stream { - struct odb_read_stream base; - git_zstream z; - enum { - ODB_LOOSE_READ_STREAM_INUSE, - ODB_LOOSE_READ_STREAM_DONE, - ODB_LOOSE_READ_STREAM_ERROR, - } z_state; - void *mapped; - unsigned long mapsize; - char hdr[32]; - int hdr_avail; - int hdr_used; -}; - -static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz) -{ - struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; - size_t total_read = 0; - - switch (st->z_state) { - case ODB_LOOSE_READ_STREAM_DONE: - return 0; - case ODB_LOOSE_READ_STREAM_ERROR: - return -1; - default: - break; - } - - if (st->hdr_used < st->hdr_avail) { - size_t to_copy = st->hdr_avail - st->hdr_used; - if (sz < to_copy) - to_copy = sz; - memcpy(buf, st->hdr + st->hdr_used, to_copy); - st->hdr_used += to_copy; - total_read += to_copy; - } - - while (total_read < sz) { - int status; - - st->z.next_out = (unsigned char *)buf + total_read; - st->z.avail_out = sz - total_read; - status = git_inflate(&st->z, Z_FINISH); - - total_read = st->z.next_out - (unsigned char *)buf; - - if (status == Z_STREAM_END) { - git_inflate_end(&st->z); - st->z_state = ODB_LOOSE_READ_STREAM_DONE; - break; - } - if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { - git_inflate_end(&st->z); - st->z_state = ODB_LOOSE_READ_STREAM_ERROR; - return -1; - } - } - return total_read; -} - -static int close_istream_loose(struct odb_read_stream *_st) -{ - struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st; - if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE) - git_inflate_end(&st->z); - munmap(st->mapped, st->mapsize); - return 0; -} - -static int open_istream_loose(struct odb_read_stream **out, - struct odb_source *source, - const struct object_id *oid) -{ - struct object_info oi = OBJECT_INFO_INIT; - struct odb_loose_read_stream *st; - unsigned long mapsize; - void *mapped; - - mapped = odb_source_loose_map_object(source, oid, &mapsize); - if (!mapped) - return -1; - - /* - * Note: we must allocate this structure early even though we may still - * fail. This is because we need to initialize the zlib stream, and it - * is not possible to copy the stream around after the fact because it - * has self-referencing pointers. - */ - CALLOC_ARRAY(st, 1); - - switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr, - sizeof(st->hdr))) { - case ULHR_OK: - break; - case ULHR_BAD: - case ULHR_TOO_LONG: - goto error; - } - - oi.sizep = &st->base.size; - oi.typep = &st->base.type; - - if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0) - goto error; - - st->mapped = mapped; - st->mapsize = mapsize; - st->hdr_used = strlen(st->hdr) + 1; - st->hdr_avail = st->z.total_out; - st->z_state = ODB_LOOSE_READ_STREAM_INUSE; - st->base.close = close_istream_loose; - st->base.read = read_istream_loose; - - *out = &st->base; - - return 0; -error: - git_inflate_end(&st->z); - munmap(st->mapped, st->mapsize); - free(st); - return -1; -} - - /***************************************************************** * * Non-delta packed object stream @@ -455,7 +324,7 @@ static int istream_source(struct odb_read_stream **out, odb_prepare_alternates(r->objects); for (source = r->objects->sources; source; source = source->next) - if (!open_istream_loose(out, source, oid)) + if (!odb_source_loose_read_object_stream(out, source, oid)) return 0; return open_istream_incore(out, r, oid); From 8c1b84bc977bf1e4515efe0386de87257ec28689 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:41 +0100 Subject: [PATCH 17/26] streaming: move logic to read packed objects streams into backend Move the logic to read packed object streams into the respective subsystem. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++++ packfile.h | 5 ++ streaming.c | 136 +--------------------------------------------------- 3 files changed, 134 insertions(+), 135 deletions(-) diff --git a/packfile.c b/packfile.c index b4bc40d895c8da..ad56ce0b905c0d 100644 --- a/packfile.c +++ b/packfile.c @@ -20,6 +20,7 @@ #include "tree.h" #include "object-file.h" #include "odb.h" +#include "streaming.h" #include "midx.h" #include "commit-graph.h" #include "pack-revindex.h" @@ -2406,3 +2407,130 @@ void packfile_store_close(struct packfile_store *store) close_pack(p); } } + +struct odb_packed_read_stream { + struct odb_read_stream base; + struct packed_git *pack; + git_zstream z; + enum { + ODB_PACKED_READ_STREAM_UNINITIALIZED, + ODB_PACKED_READ_STREAM_INUSE, + ODB_PACKED_READ_STREAM_DONE, + ODB_PACKED_READ_STREAM_ERROR, + } z_state; + off_t pos; +}; + +static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *buf, + size_t sz) +{ + struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; + size_t total_read = 0; + + switch (st->z_state) { + case ODB_PACKED_READ_STREAM_UNINITIALIZED: + memset(&st->z, 0, sizeof(st->z)); + git_inflate_init(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_INUSE; + break; + case ODB_PACKED_READ_STREAM_DONE: + return 0; + case ODB_PACKED_READ_STREAM_ERROR: + return -1; + case ODB_PACKED_READ_STREAM_INUSE: + break; + } + + while (total_read < sz) { + int status; + struct pack_window *window = NULL; + unsigned char *mapped; + + mapped = use_pack(st->pack, &window, + st->pos, &st->z.avail_in); + + st->z.next_out = (unsigned char *)buf + total_read; + st->z.avail_out = sz - total_read; + st->z.next_in = mapped; + status = git_inflate(&st->z, Z_FINISH); + + st->pos += st->z.next_in - mapped; + total_read = st->z.next_out - (unsigned char *)buf; + unuse_pack(&window); + + if (status == Z_STREAM_END) { + git_inflate_end(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_DONE; + break; + } + + /* + * Unlike the loose object case, we do not have to worry here + * about running out of input bytes and spinning infinitely. If + * we get Z_BUF_ERROR due to too few input bytes, then we'll + * replenish them in the next use_pack() call when we loop. If + * we truly hit the end of the pack (i.e., because it's corrupt + * or truncated), then use_pack() catches that and will die(). + */ + if (status != Z_OK && status != Z_BUF_ERROR) { + git_inflate_end(&st->z); + st->z_state = ODB_PACKED_READ_STREAM_ERROR; + return -1; + } + } + return total_read; +} + +static int close_istream_pack_non_delta(struct odb_read_stream *_st) +{ + struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; + if (st->z_state == ODB_PACKED_READ_STREAM_INUSE) + git_inflate_end(&st->z); + return 0; +} + +int packfile_store_read_object_stream(struct odb_read_stream **out, + struct packfile_store *store, + const struct object_id *oid) +{ + struct odb_packed_read_stream *stream; + struct pack_window *window = NULL; + struct object_info oi = OBJECT_INFO_INIT; + enum object_type in_pack_type; + unsigned long size; + + oi.sizep = &size; + + if (packfile_store_read_object_info(store, oid, &oi, 0) || + oi.u.packed.is_delta || + repo_settings_get_big_file_threshold(store->odb->repo) >= size) + return -1; + + in_pack_type = unpack_object_header(oi.u.packed.pack, + &window, + &oi.u.packed.offset, + &size); + unuse_pack(&window); + switch (in_pack_type) { + default: + return -1; /* we do not do deltas for now */ + case OBJ_COMMIT: + case OBJ_TREE: + case OBJ_BLOB: + case OBJ_TAG: + break; + } + + CALLOC_ARRAY(stream, 1); + stream->base.close = close_istream_pack_non_delta; + stream->base.read = read_istream_pack_non_delta; + stream->base.type = in_pack_type; + stream->base.size = size; + stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED; + stream->pack = oi.u.packed.pack; + stream->pos = oi.u.packed.offset; + + *out = &stream->base; + + return 0; +} diff --git a/packfile.h b/packfile.h index 0a98bddd811921..3fcc5ae6e08c4b 100644 --- a/packfile.h +++ b/packfile.h @@ -8,6 +8,7 @@ /* in odb.h */ struct object_info; +struct odb_read_stream; struct packed_git { struct hashmap_entry packmap_ent; @@ -144,6 +145,10 @@ void packfile_store_add_pack(struct packfile_store *store, #define repo_for_each_pack(repo, p) \ for (p = packfile_store_get_packs(repo->objects->packfiles); p; p = p->next) +int packfile_store_read_object_stream(struct odb_read_stream **out, + struct packfile_store *store, + const struct object_id *oid); + /* * Try to read the object identified by its ID from the object store and * populate the object info with its data. Returns 1 in case the object was diff --git a/streaming.c b/streaming.c index d5acc1c39650e4..3140728a70bde7 100644 --- a/streaming.c +++ b/streaming.c @@ -114,140 +114,6 @@ static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st, return &fs->base; } -/***************************************************************** - * - * Non-delta packed object stream - * - *****************************************************************/ - -struct odb_packed_read_stream { - struct odb_read_stream base; - struct packed_git *pack; - git_zstream z; - enum { - ODB_PACKED_READ_STREAM_UNINITIALIZED, - ODB_PACKED_READ_STREAM_INUSE, - ODB_PACKED_READ_STREAM_DONE, - ODB_PACKED_READ_STREAM_ERROR, - } z_state; - off_t pos; -}; - -static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *buf, - size_t sz) -{ - struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; - size_t total_read = 0; - - switch (st->z_state) { - case ODB_PACKED_READ_STREAM_UNINITIALIZED: - memset(&st->z, 0, sizeof(st->z)); - git_inflate_init(&st->z); - st->z_state = ODB_PACKED_READ_STREAM_INUSE; - break; - case ODB_PACKED_READ_STREAM_DONE: - return 0; - case ODB_PACKED_READ_STREAM_ERROR: - return -1; - case ODB_PACKED_READ_STREAM_INUSE: - break; - } - - while (total_read < sz) { - int status; - struct pack_window *window = NULL; - unsigned char *mapped; - - mapped = use_pack(st->pack, &window, - st->pos, &st->z.avail_in); - - st->z.next_out = (unsigned char *)buf + total_read; - st->z.avail_out = sz - total_read; - st->z.next_in = mapped; - status = git_inflate(&st->z, Z_FINISH); - - st->pos += st->z.next_in - mapped; - total_read = st->z.next_out - (unsigned char *)buf; - unuse_pack(&window); - - if (status == Z_STREAM_END) { - git_inflate_end(&st->z); - st->z_state = ODB_PACKED_READ_STREAM_DONE; - break; - } - - /* - * Unlike the loose object case, we do not have to worry here - * about running out of input bytes and spinning infinitely. If - * we get Z_BUF_ERROR due to too few input bytes, then we'll - * replenish them in the next use_pack() call when we loop. If - * we truly hit the end of the pack (i.e., because it's corrupt - * or truncated), then use_pack() catches that and will die(). - */ - if (status != Z_OK && status != Z_BUF_ERROR) { - git_inflate_end(&st->z); - st->z_state = ODB_PACKED_READ_STREAM_ERROR; - return -1; - } - } - return total_read; -} - -static int close_istream_pack_non_delta(struct odb_read_stream *_st) -{ - struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st; - if (st->z_state == ODB_PACKED_READ_STREAM_INUSE) - git_inflate_end(&st->z); - return 0; -} - -static int open_istream_pack_non_delta(struct odb_read_stream **out, - struct object_database *odb, - const struct object_id *oid) -{ - struct odb_packed_read_stream *stream; - struct pack_window *window = NULL; - struct object_info oi = OBJECT_INFO_INIT; - enum object_type in_pack_type; - unsigned long size; - - oi.sizep = &size; - - if (packfile_store_read_object_info(odb->packfiles, oid, &oi, 0) || - oi.u.packed.is_delta || - repo_settings_get_big_file_threshold(odb->repo) >= size) - return -1; - - in_pack_type = unpack_object_header(oi.u.packed.pack, - &window, - &oi.u.packed.offset, - &size); - unuse_pack(&window); - switch (in_pack_type) { - default: - return -1; /* we do not do deltas for now */ - case OBJ_COMMIT: - case OBJ_TREE: - case OBJ_BLOB: - case OBJ_TAG: - break; - } - - CALLOC_ARRAY(stream, 1); - stream->base.close = close_istream_pack_non_delta; - stream->base.read = read_istream_pack_non_delta; - stream->base.type = in_pack_type; - stream->base.size = size; - stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED; - stream->pack = oi.u.packed.pack; - stream->pos = oi.u.packed.offset; - - *out = &stream->base; - - return 0; -} - - /***************************************************************** * * In-core stream @@ -319,7 +185,7 @@ static int istream_source(struct odb_read_stream **out, { struct odb_source *source; - if (!open_istream_pack_non_delta(out, r->objects, oid)) + if (!packfile_store_read_object_stream(out, r->objects->packfiles, oid)) return 0; odb_prepare_alternates(r->objects); From 378ec56beba161abbef6e2c87d9bc2ac43c355f3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:42 +0100 Subject: [PATCH 18/26] streaming: refactor interface to be object-database-centric Refactor the streaming interface to be centered around object databases instead of centered around the repository. Rename the functions accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- archive-tar.c | 6 +++--- archive-zip.c | 12 ++++++------ builtin/index-pack.c | 8 ++++---- builtin/pack-objects.c | 14 +++++++------- object-file.c | 8 ++++---- streaming.c | 44 +++++++++++++++++++++--------------------- streaming.h | 30 +++++++++++++++++++++++----- 7 files changed, 71 insertions(+), 51 deletions(-) diff --git a/archive-tar.c b/archive-tar.c index dc1eda09e01e2b..4d87b28504615a 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -135,16 +135,16 @@ static int stream_blocked(struct repository *r, const struct object_id *oid) char buf[BLOCKSIZE]; ssize_t readlen; - st = open_istream(r, oid, &type, &sz, NULL); + st = odb_read_stream_open(r->objects, oid, &type, &sz, NULL); if (!st) return error(_("cannot stream blob %s"), oid_to_hex(oid)); for (;;) { - readlen = read_istream(st, buf, sizeof(buf)); + readlen = odb_read_stream_read(st, buf, sizeof(buf)); if (readlen <= 0) break; do_write_blocked(buf, readlen); } - close_istream(st); + odb_read_stream_close(st); if (!readlen) finish_record(); return readlen; diff --git a/archive-zip.c b/archive-zip.c index 40a9c93ff95233..c44684aebcf18d 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -348,8 +348,8 @@ static int write_zip_entry(struct archiver_args *args, if (!buffer) { enum object_type type; - stream = open_istream(args->repo, oid, &type, &size, - NULL); + stream = odb_read_stream_open(args->repo->objects, oid, + &type, &size, NULL); if (!stream) return error(_("cannot stream blob %s"), oid_to_hex(oid)); @@ -429,7 +429,7 @@ static int write_zip_entry(struct archiver_args *args, ssize_t readlen; for (;;) { - readlen = read_istream(stream, buf, sizeof(buf)); + readlen = odb_read_stream_read(stream, buf, sizeof(buf)); if (readlen <= 0) break; crc = crc32(crc, buf, readlen); @@ -439,7 +439,7 @@ static int write_zip_entry(struct archiver_args *args, buf, readlen); write_or_die(1, buf, readlen); } - close_istream(stream); + odb_read_stream_close(stream); if (readlen) return readlen; @@ -462,7 +462,7 @@ static int write_zip_entry(struct archiver_args *args, zstream.avail_out = sizeof(compressed); for (;;) { - readlen = read_istream(stream, buf, sizeof(buf)); + readlen = odb_read_stream_read(stream, buf, sizeof(buf)); if (readlen <= 0) break; crc = crc32(crc, buf, readlen); @@ -486,7 +486,7 @@ static int write_zip_entry(struct archiver_args *args, } } - close_istream(stream); + odb_read_stream_close(stream); if (readlen) return readlen; diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 5f90f12f92d9c4..fb76ef0f4c17c3 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -779,7 +779,7 @@ static int compare_objects(const unsigned char *buf, unsigned long size, } while (size) { - ssize_t len = read_istream(data->st, data->buf, size); + ssize_t len = odb_read_stream_read(data->st, data->buf, size); if (len == 0) die(_("SHA1 COLLISION FOUND WITH %s !"), oid_to_hex(&data->entry->idx.oid)); @@ -807,15 +807,15 @@ static int check_collison(struct object_entry *entry) memset(&data, 0, sizeof(data)); data.entry = entry; - data.st = open_istream(the_repository, &entry->idx.oid, &type, &size, - NULL); + data.st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, + &type, &size, NULL); if (!data.st) return -1; if (size != entry->size || type != entry->type) die(_("SHA1 COLLISION FOUND WITH %s !"), oid_to_hex(&entry->idx.oid)); unpack_data(entry, compare_objects, &data); - close_istream(data.st); + odb_read_stream_close(data.st); free(data.buf); return 0; } diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index c693d948e193ed..1353c2384c336e 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -417,7 +417,7 @@ static unsigned long write_large_blob_data(struct odb_read_stream *st, struct ha for (;;) { ssize_t readlen; int zret = Z_OK; - readlen = read_istream(st, ibuf, sizeof(ibuf)); + readlen = odb_read_stream_read(st, ibuf, sizeof(ibuf)); if (readlen == -1) die(_("unable to read %s"), oid_to_hex(oid)); @@ -520,8 +520,8 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent if (oe_type(entry) == OBJ_BLOB && oe_size_greater_than(&to_pack, entry, repo_settings_get_big_file_threshold(the_repository)) && - (st = open_istream(the_repository, &entry->idx.oid, &type, - &size, NULL)) != NULL) + (st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, + &type, &size, NULL)) != NULL) buf = NULL; else { buf = odb_read_object(the_repository->objects, @@ -577,7 +577,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent dheader[--pos] = 128 | (--ofs & 127); if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) { if (st) - close_istream(st); + odb_read_stream_close(st); free(buf); return 0; } @@ -591,7 +591,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent */ if (limit && hdrlen + hashsz + datalen + hashsz >= limit) { if (st) - close_istream(st); + odb_read_stream_close(st); free(buf); return 0; } @@ -601,7 +601,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent } else { if (limit && hdrlen + datalen + hashsz >= limit) { if (st) - close_istream(st); + odb_read_stream_close(st); free(buf); return 0; } @@ -609,7 +609,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent } if (st) { datalen = write_large_blob_data(st, f, &entry->idx.oid); - close_istream(st); + odb_read_stream_close(st); } else { hashwrite(f, buf, datalen); free(buf); diff --git a/object-file.c b/object-file.c index 8c67847feaceb6..9ba40a848c034a 100644 --- a/object-file.c +++ b/object-file.c @@ -139,7 +139,7 @@ int stream_object_signature(struct repository *r, const struct object_id *oid) char hdr[MAX_HEADER_LEN]; int hdrlen; - st = open_istream(r, oid, &obj_type, &size, NULL); + st = odb_read_stream_open(r->objects, oid, &obj_type, &size, NULL); if (!st) return -1; @@ -151,10 +151,10 @@ int stream_object_signature(struct repository *r, const struct object_id *oid) git_hash_update(&c, hdr, hdrlen); for (;;) { char buf[1024 * 16]; - ssize_t readlen = read_istream(st, buf, sizeof(buf)); + ssize_t readlen = odb_read_stream_read(st, buf, sizeof(buf)); if (readlen < 0) { - close_istream(st); + odb_read_stream_close(st); return -1; } if (!readlen) @@ -162,7 +162,7 @@ int stream_object_signature(struct repository *r, const struct object_id *oid) git_hash_update(&c, buf, readlen); } git_hash_final_oid(&real_oid, &c); - close_istream(st); + odb_read_stream_close(st); return !oideq(oid, &real_oid) ? -1 : 0; } diff --git a/streaming.c b/streaming.c index 3140728a70bde7..06993a751c6194 100644 --- a/streaming.c +++ b/streaming.c @@ -35,7 +35,7 @@ static int close_istream_filtered(struct odb_read_stream *_fs) { struct odb_filtered_read_stream *fs = (struct odb_filtered_read_stream *)_fs; free_stream_filter(fs->filter); - return close_istream(fs->upstream); + return odb_read_stream_close(fs->upstream); } static ssize_t read_istream_filtered(struct odb_read_stream *_fs, char *buf, @@ -87,7 +87,7 @@ static ssize_t read_istream_filtered(struct odb_read_stream *_fs, char *buf, /* refill the input from the upstream */ if (!fs->input_finished) { - fs->i_end = read_istream(fs->upstream, fs->ibuf, FILTER_BUFFER); + fs->i_end = odb_read_stream_read(fs->upstream, fs->ibuf, FILTER_BUFFER); if (fs->i_end < 0) return -1; if (fs->i_end) @@ -149,7 +149,7 @@ static ssize_t read_istream_incore(struct odb_read_stream *_st, char *buf, size_ } static int open_istream_incore(struct odb_read_stream **out, - struct repository *r, + struct object_database *odb, const struct object_id *oid) { struct object_info oi = OBJECT_INFO_INIT; @@ -163,7 +163,7 @@ static int open_istream_incore(struct odb_read_stream **out, oi.typep = &stream.base.type; oi.sizep = &stream.base.size; oi.contentp = (void **)&stream.buf; - ret = odb_read_object_info_extended(r->objects, oid, &oi, + ret = odb_read_object_info_extended(odb, oid, &oi, OBJECT_INFO_DIE_IF_CORRUPT); if (ret) return ret; @@ -180,47 +180,47 @@ static int open_istream_incore(struct odb_read_stream **out, *****************************************************************************/ static int istream_source(struct odb_read_stream **out, - struct repository *r, + struct object_database *odb, const struct object_id *oid) { struct odb_source *source; - if (!packfile_store_read_object_stream(out, r->objects->packfiles, oid)) + if (!packfile_store_read_object_stream(out, odb->packfiles, oid)) return 0; - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) + odb_prepare_alternates(odb); + for (source = odb->sources; source; source = source->next) if (!odb_source_loose_read_object_stream(out, source, oid)) return 0; - return open_istream_incore(out, r, oid); + return open_istream_incore(out, odb, oid); } /**************************************************************** * Users of streaming interface ****************************************************************/ -int close_istream(struct odb_read_stream *st) +int odb_read_stream_close(struct odb_read_stream *st) { int r = st->close(st); free(st); return r; } -ssize_t read_istream(struct odb_read_stream *st, void *buf, size_t sz) +ssize_t odb_read_stream_read(struct odb_read_stream *st, void *buf, size_t sz) { return st->read(st, buf, sz); } -struct odb_read_stream *open_istream(struct repository *r, - const struct object_id *oid, - enum object_type *type, - unsigned long *size, - struct stream_filter *filter) +struct odb_read_stream *odb_read_stream_open(struct object_database *odb, + const struct object_id *oid, + enum object_type *type, + unsigned long *size, + struct stream_filter *filter) { struct odb_read_stream *st; - const struct object_id *real = lookup_replace_object(r, oid); - int ret = istream_source(&st, r, real); + const struct object_id *real = lookup_replace_object(odb->repo, oid); + int ret = istream_source(&st, odb, real); if (ret) return NULL; @@ -229,7 +229,7 @@ struct odb_read_stream *open_istream(struct repository *r, /* Add "&& !is_null_stream_filter(filter)" for performance */ struct odb_read_stream *nst = attach_stream_filter(st, filter); if (!nst) { - close_istream(st); + odb_read_stream_close(st); return NULL; } st = nst; @@ -252,7 +252,7 @@ int odb_stream_blob_to_fd(struct object_database *odb, ssize_t kept = 0; int result = -1; - st = open_istream(odb->repo, oid, &type, &sz, filter); + st = odb_read_stream_open(odb, oid, &type, &sz, filter); if (!st) { if (filter) free_stream_filter(filter); @@ -263,7 +263,7 @@ int odb_stream_blob_to_fd(struct object_database *odb, for (;;) { char buf[1024 * 16]; ssize_t wrote, holeto; - ssize_t readlen = read_istream(st, buf, sizeof(buf)); + ssize_t readlen = odb_read_stream_read(st, buf, sizeof(buf)); if (readlen < 0) goto close_and_exit; @@ -294,6 +294,6 @@ int odb_stream_blob_to_fd(struct object_database *odb, result = 0; close_and_exit: - close_istream(st); + odb_read_stream_close(st); return result; } diff --git a/streaming.h b/streaming.h index acfdef1598db52..7cb55213b780ff 100644 --- a/streaming.h +++ b/streaming.h @@ -24,11 +24,31 @@ struct odb_read_stream { unsigned long size; /* inflated size of full object */ }; -struct odb_read_stream *open_istream(struct repository *, const struct object_id *, - enum object_type *, unsigned long *, - struct stream_filter *); -int close_istream(struct odb_read_stream *); -ssize_t read_istream(struct odb_read_stream *, void *, size_t); +/* + * Create a new object stream for the given object database. Populates the type + * and size pointers with the object's info. An optional filter can be used to + * transform the object's content. + * + * Returns the stream on success, a `NULL` pointer otherwise. + */ +struct odb_read_stream *odb_read_stream_open(struct object_database *odb, + const struct object_id *oid, + enum object_type *type, + unsigned long *size, + struct stream_filter *filter); + +/* + * Close the given read stream and release all resources associated with it. + * Returns 0 on success, a negative error code otherwise. + */ +int odb_read_stream_close(struct odb_read_stream *stream); + +/* + * Read data from the stream into the buffer. Returns 0 on EOF and the number + * of bytes read on success. Returns a negative error code in case reading from + * the stream fails. + */ +ssize_t odb_read_stream_read(struct odb_read_stream *stream, void *buf, size_t len); /* * Look up the object by its ID and write the full contents to the file From 1599b68d5e960a12f5ac624f81c70ece317db5a6 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:43 +0100 Subject: [PATCH 19/26] streaming: move into object database subsystem The "streaming" terminology is somewhat generic, so it may not be immediately obvious that "streaming.{c,h}" is specific to the object database. Rectify this by moving it into the "odb/" directory so that it can be immediately attributed to the object subsystem. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Makefile | 2 +- archive-tar.c | 2 +- archive-zip.c | 2 +- builtin/cat-file.c | 2 +- builtin/fsck.c | 2 +- builtin/index-pack.c | 2 +- builtin/log.c | 2 +- builtin/pack-objects.c | 2 +- entry.c | 2 +- meson.build | 2 +- object-file.c | 2 +- streaming.c => odb/streaming.c | 2 +- streaming.h => odb/streaming.h | 0 packfile.c | 2 +- parallel-checkout.c | 2 +- 15 files changed, 14 insertions(+), 14 deletions(-) rename streaming.c => odb/streaming.c (99%) rename streaming.h => odb/streaming.h (100%) diff --git a/Makefile b/Makefile index 7e0f77e2988e3b..6d8dcc4622b059 100644 --- a/Makefile +++ b/Makefile @@ -1201,6 +1201,7 @@ LIB_OBJS += object-file.o LIB_OBJS += object-name.o LIB_OBJS += object.o LIB_OBJS += odb.o +LIB_OBJS += odb/streaming.o LIB_OBJS += oid-array.o LIB_OBJS += oidmap.o LIB_OBJS += oidset.o @@ -1294,7 +1295,6 @@ LIB_OBJS += split-index.o LIB_OBJS += stable-qsort.o LIB_OBJS += statinfo.o LIB_OBJS += strbuf.o -LIB_OBJS += streaming.o LIB_OBJS += string-list.o LIB_OBJS += strmap.o LIB_OBJS += strvec.o diff --git a/archive-tar.c b/archive-tar.c index 4d87b28504615a..494b9f0667a523 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -12,8 +12,8 @@ #include "tar.h" #include "archive.h" #include "odb.h" +#include "odb/streaming.h" #include "strbuf.h" -#include "streaming.h" #include "run-command.h" #include "write-or-die.h" diff --git a/archive-zip.c b/archive-zip.c index c44684aebcf18d..a0bdc2fe3b2e5e 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -10,9 +10,9 @@ #include "gettext.h" #include "git-zlib.h" #include "hex.h" -#include "streaming.h" #include "utf8.h" #include "odb.h" +#include "odb/streaming.h" #include "strbuf.h" #include "userdiff.h" #include "write-or-die.h" diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 120d626d66e140..505ddaa12f5309 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -18,13 +18,13 @@ #include "list-objects-filter-options.h" #include "parse-options.h" #include "userdiff.h" -#include "streaming.h" #include "oid-array.h" #include "packfile.h" #include "pack-bitmap.h" #include "object-file.h" #include "object-name.h" #include "odb.h" +#include "odb/streaming.h" #include "replace-object.h" #include "promisor-remote.h" #include "mailmap.h" diff --git a/builtin/fsck.c b/builtin/fsck.c index 1a348d43c26020..c7d2eea287fe7d 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -13,11 +13,11 @@ #include "fsck.h" #include "parse-options.h" #include "progress.h" -#include "streaming.h" #include "packfile.h" #include "object-file.h" #include "object-name.h" #include "odb.h" +#include "odb/streaming.h" #include "path.h" #include "read-cache-ll.h" #include "replace-object.h" diff --git a/builtin/index-pack.c b/builtin/index-pack.c index fb76ef0f4c17c3..581023495fdc9c 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -16,12 +16,12 @@ #include "progress.h" #include "fsck.h" #include "strbuf.h" -#include "streaming.h" #include "thread-utils.h" #include "packfile.h" #include "pack-revindex.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "oid-array.h" #include "oidset.h" #include "path.h" diff --git a/builtin/log.c b/builtin/log.c index e7b83a6e00a708..d4cf9c59c81a83 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -16,6 +16,7 @@ #include "refs.h" #include "object-name.h" #include "odb.h" +#include "odb/streaming.h" #include "pager.h" #include "color.h" #include "commit.h" @@ -35,7 +36,6 @@ #include "parse-options.h" #include "line-log.h" #include "branch.h" -#include "streaming.h" #include "version.h" #include "mailmap.h" #include "progress.h" diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 1353c2384c336e..f109e26786e621 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -22,7 +22,6 @@ #include "pack-objects.h" #include "progress.h" #include "refs.h" -#include "streaming.h" #include "thread-utils.h" #include "pack-bitmap.h" #include "delta-islands.h" @@ -33,6 +32,7 @@ #include "packfile.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "replace-object.h" #include "dir.h" #include "midx.h" diff --git a/entry.c b/entry.c index 38dfe670f79920..7817aee362ed9e 100644 --- a/entry.c +++ b/entry.c @@ -2,13 +2,13 @@ #include "git-compat-util.h" #include "odb.h" +#include "odb/streaming.h" #include "dir.h" #include "environment.h" #include "gettext.h" #include "hex.h" #include "name-hash.h" #include "sparse-index.h" -#include "streaming.h" #include "submodule.h" #include "symlinks.h" #include "progress.h" diff --git a/meson.build b/meson.build index 1f95a06edb7829..fc82929b379dc5 100644 --- a/meson.build +++ b/meson.build @@ -397,6 +397,7 @@ libgit_sources = [ 'object-name.c', 'object.c', 'odb.c', + 'odb/streaming.c', 'oid-array.c', 'oidmap.c', 'oidset.c', @@ -490,7 +491,6 @@ libgit_sources = [ 'stable-qsort.c', 'statinfo.c', 'strbuf.c', - 'streaming.c', 'string-list.c', 'strmap.c', 'strvec.c', diff --git a/object-file.c b/object-file.c index 9ba40a848c034a..9601fdb12dc9a8 100644 --- a/object-file.c +++ b/object-file.c @@ -20,13 +20,13 @@ #include "object-file-convert.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "oidtree.h" #include "pack.h" #include "packfile.h" #include "path.h" #include "read-cache-ll.h" #include "setup.h" -#include "streaming.h" #include "tempfile.h" #include "tmp-objdir.h" diff --git a/streaming.c b/odb/streaming.c similarity index 99% rename from streaming.c rename to odb/streaming.c index 06993a751c6194..7ef58adaa2a09e 100644 --- a/streaming.c +++ b/odb/streaming.c @@ -5,10 +5,10 @@ #include "git-compat-util.h" #include "convert.h" #include "environment.h" -#include "streaming.h" #include "repository.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "replace-object.h" #include "packfile.h" diff --git a/streaming.h b/odb/streaming.h similarity index 100% rename from streaming.h rename to odb/streaming.h diff --git a/packfile.c b/packfile.c index ad56ce0b905c0d..7a16aaa90d0a2f 100644 --- a/packfile.c +++ b/packfile.c @@ -20,7 +20,7 @@ #include "tree.h" #include "object-file.h" #include "odb.h" -#include "streaming.h" +#include "odb/streaming.h" #include "midx.h" #include "commit-graph.h" #include "pack-revindex.h" diff --git a/parallel-checkout.c b/parallel-checkout.c index 1cb6701b926dcf..0bf4bd6d4abd8c 100644 --- a/parallel-checkout.c +++ b/parallel-checkout.c @@ -13,7 +13,7 @@ #include "read-cache-ll.h" #include "run-command.h" #include "sigchain.h" -#include "streaming.h" +#include "odb/streaming.h" #include "symlinks.h" #include "thread-utils.h" #include "trace2.h" From 7b940286527ec2175dffbb317f47e080bb37cf3e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Sun, 23 Nov 2025 19:59:44 +0100 Subject: [PATCH 20/26] streaming: drop redundant type and size pointers In the preceding commits we have turned `struct odb_read_stream` into a publicly visible structure. Furthermore, this structure now contains the type and size of the object that we are about to stream. Consequently, the out-pointers that we used before to propagate the type and size of the streamed object are now somewhat redundant with the data contained in the structure itself. Drop these out-pointers and adapt callers accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- archive-tar.c | 4 +--- archive-zip.c | 5 ++--- builtin/index-pack.c | 7 ++----- builtin/pack-objects.c | 6 ++++-- object-file.c | 6 ++---- odb/streaming.c | 10 ++-------- odb/streaming.h | 7 ++----- 7 files changed, 15 insertions(+), 30 deletions(-) diff --git a/archive-tar.c b/archive-tar.c index 494b9f0667a523..0fc70d13a8807e 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -130,12 +130,10 @@ static void write_trailer(void) static int stream_blocked(struct repository *r, const struct object_id *oid) { struct odb_read_stream *st; - enum object_type type; - unsigned long sz; char buf[BLOCKSIZE]; ssize_t readlen; - st = odb_read_stream_open(r->objects, oid, &type, &sz, NULL); + st = odb_read_stream_open(r->objects, oid, NULL); if (!st) return error(_("cannot stream blob %s"), oid_to_hex(oid)); for (;;) { diff --git a/archive-zip.c b/archive-zip.c index a0bdc2fe3b2e5e..97ea8d60d6187b 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -347,12 +347,11 @@ static int write_zip_entry(struct archiver_args *args, method = ZIP_METHOD_DEFLATE; if (!buffer) { - enum object_type type; - stream = odb_read_stream_open(args->repo->objects, oid, - &type, &size, NULL); + stream = odb_read_stream_open(args->repo->objects, oid, NULL); if (!stream) return error(_("cannot stream blob %s"), oid_to_hex(oid)); + size = stream->size; flags |= ZIP_STREAM; out = NULL; } else { diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 581023495fdc9c..b01cb77f4a8500 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -798,8 +798,6 @@ static int compare_objects(const unsigned char *buf, unsigned long size, static int check_collison(struct object_entry *entry) { struct compare_data data; - enum object_type type; - unsigned long size; if (entry->size <= repo_settings_get_big_file_threshold(the_repository) || entry->type != OBJ_BLOB) @@ -807,11 +805,10 @@ static int check_collison(struct object_entry *entry) memset(&data, 0, sizeof(data)); data.entry = entry; - data.st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, - &type, &size, NULL); + data.st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, NULL); if (!data.st) return -1; - if (size != entry->size || type != entry->type) + if (data.st->size != entry->size || data.st->type != entry->type) die(_("SHA1 COLLISION FOUND WITH %s !"), oid_to_hex(&entry->idx.oid)); unpack_data(entry, compare_objects, &data); diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index f109e26786e621..0d1d6995bfc35a 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -521,9 +521,11 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent oe_size_greater_than(&to_pack, entry, repo_settings_get_big_file_threshold(the_repository)) && (st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, - &type, &size, NULL)) != NULL) + NULL)) != NULL) { buf = NULL; - else { + type = st->type; + size = st->size; + } else { buf = odb_read_object(the_repository->objects, &entry->idx.oid, &type, &size); diff --git a/object-file.c b/object-file.c index 9601fdb12dc9a8..12177a7dd707a8 100644 --- a/object-file.c +++ b/object-file.c @@ -132,19 +132,17 @@ int check_object_signature(struct repository *r, const struct object_id *oid, int stream_object_signature(struct repository *r, const struct object_id *oid) { struct object_id real_oid; - unsigned long size; - enum object_type obj_type; struct odb_read_stream *st; struct git_hash_ctx c; char hdr[MAX_HEADER_LEN]; int hdrlen; - st = odb_read_stream_open(r->objects, oid, &obj_type, &size, NULL); + st = odb_read_stream_open(r->objects, oid, NULL); if (!st) return -1; /* Generate the header */ - hdrlen = format_object_header(hdr, sizeof(hdr), obj_type, size); + hdrlen = format_object_header(hdr, sizeof(hdr), st->type, st->size); /* Sha1.. */ r->hash_algo->init_fn(&c); diff --git a/odb/streaming.c b/odb/streaming.c index 7ef58adaa2a09e..745cd486fbb33d 100644 --- a/odb/streaming.c +++ b/odb/streaming.c @@ -214,8 +214,6 @@ ssize_t odb_read_stream_read(struct odb_read_stream *st, void *buf, size_t sz) struct odb_read_stream *odb_read_stream_open(struct object_database *odb, const struct object_id *oid, - enum object_type *type, - unsigned long *size, struct stream_filter *filter) { struct odb_read_stream *st; @@ -235,8 +233,6 @@ struct odb_read_stream *odb_read_stream_open(struct object_database *odb, st = nst; } - *size = st->size; - *type = st->type; return st; } @@ -247,18 +243,16 @@ int odb_stream_blob_to_fd(struct object_database *odb, int can_seek) { struct odb_read_stream *st; - enum object_type type; - unsigned long sz; ssize_t kept = 0; int result = -1; - st = odb_read_stream_open(odb, oid, &type, &sz, filter); + st = odb_read_stream_open(odb, oid, filter); if (!st) { if (filter) free_stream_filter(filter); return result; } - if (type != OBJ_BLOB) + if (st->type != OBJ_BLOB) goto close_and_exit; for (;;) { char buf[1024 * 16]; diff --git a/odb/streaming.h b/odb/streaming.h index 7cb55213b780ff..c7861f7e13c606 100644 --- a/odb/streaming.h +++ b/odb/streaming.h @@ -25,16 +25,13 @@ struct odb_read_stream { }; /* - * Create a new object stream for the given object database. Populates the type - * and size pointers with the object's info. An optional filter can be used to - * transform the object's content. + * Create a new object stream for the given object database. An optional filter + * can be used to transform the object's content. * * Returns the stream on success, a `NULL` pointer otherwise. */ struct odb_read_stream *odb_read_stream_open(struct object_database *odb, const struct object_id *oid, - enum object_type *type, - unsigned long *size, struct stream_filter *filter); /* From e1ecf0dd6897eae1594b7e9345605b8f88485b95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 6 Dec 2025 14:27:39 +0100 Subject: [PATCH 21/26] wrapper: add git_mkdtemp() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend git_mkstemps_mode() to optionally call mkdir(2) instead of open(2), then use that ability to create a mkdtemp(3) replacement, git_mkdtemp(). We'll start using it in the next commit. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- wrapper.c | 21 +++++++++++++++++++-- wrapper.h | 2 ++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/wrapper.c b/wrapper.c index 3d507d42045203..89f6effe84371b 100644 --- a/wrapper.c +++ b/wrapper.c @@ -446,7 +446,11 @@ int xmkstemp(char *filename_template) #undef TMP_MAX #define TMP_MAX 16384 -int git_mkstemps_mode(char *pattern, int suffix_len, int mode) +/* + * Returns -1 on error, 0 if it created a directory, or an open file + * descriptor to the created regular file. + */ +static int git_mkdstemps_mode(char *pattern, int suffix_len, int mode, bool dir) { static const char letters[] = "abcdefghijklmnopqrstuvwxyz" @@ -488,7 +492,10 @@ int git_mkstemps_mode(char *pattern, int suffix_len, int mode) v /= num_letters; } - fd = open(pattern, O_CREAT | O_EXCL | O_RDWR, mode); + if (dir) + fd = mkdir(pattern, mode); + else + fd = open(pattern, O_CREAT | O_EXCL | O_RDWR, mode); if (fd >= 0) return fd; /* @@ -503,6 +510,16 @@ int git_mkstemps_mode(char *pattern, int suffix_len, int mode) return -1; } +char *git_mkdtemp(char *pattern) +{ + return git_mkdstemps_mode(pattern, 0, 0700, true) ? NULL : pattern; +} + +int git_mkstemps_mode(char *pattern, int suffix_len, int mode) +{ + return git_mkdstemps_mode(pattern, suffix_len, mode, false); +} + int git_mkstemp_mode(char *pattern, int mode) { /* mkstemp is just mkstemps with no suffix */ diff --git a/wrapper.h b/wrapper.h index 44a8597ac31426..15ac3bab6e9748 100644 --- a/wrapper.h +++ b/wrapper.h @@ -37,6 +37,8 @@ int xsnprintf(char *dst, size_t max, const char *fmt, ...); int xgethostname(char *buf, size_t len); +char *git_mkdtemp(char *pattern); + /* set default permissions by passing mode arguments to open(2) */ int git_mkstemps_mode(char *pattern, int suffix_len, int mode); int git_mkstemp_mode(char *pattern, int mode); From 5ecd3590a3052820eeb3f1d6764584c537b68938 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 6 Dec 2025 14:27:47 +0100 Subject: [PATCH 22/26] compat: use git_mkdtemp() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A file might appear at the path returned by mktemp(3) before we call mkdir(2). Use the more robust git_mkdtemp() instead, which retries a number of times and doesn't need to call lstat(2). Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- compat/mkdtemp.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/compat/mkdtemp.c b/compat/mkdtemp.c index 11361195925c67..fcdd4e01e14613 100644 --- a/compat/mkdtemp.c +++ b/compat/mkdtemp.c @@ -2,7 +2,5 @@ char *gitmkdtemp(char *template) { - if (!*mktemp(template) || mkdir(template, 0700)) - return NULL; - return template; + return git_mkdtemp(template); } From 47bf14750eee7e43e12d20414d3698f203245a35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 6 Dec 2025 14:28:26 +0100 Subject: [PATCH 23/26] compat: remove mingw_mktemp() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the mktemp(3) compatibility function now that its last caller was removed by the previous commit. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- compat/mingw-posix.h | 3 --- compat/mingw.c | 12 ------------ 2 files changed, 15 deletions(-) diff --git a/compat/mingw-posix.h b/compat/mingw-posix.h index 631a20868489be..0939feff27ffec 100644 --- a/compat/mingw-posix.h +++ b/compat/mingw-posix.h @@ -241,9 +241,6 @@ int mingw_chdir(const char *dirname); int mingw_chmod(const char *filename, int mode); #define chmod mingw_chmod -char *mingw_mktemp(char *template); -#define mktemp mingw_mktemp - char *mingw_getcwd(char *pointer, int len); #define getcwd mingw_getcwd diff --git a/compat/mingw.c b/compat/mingw.c index 736a07a028ab4d..abdc9684214dac 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1162,18 +1162,6 @@ unsigned int sleep (unsigned int seconds) return 0; } -char *mingw_mktemp(char *template) -{ - wchar_t wtemplate[MAX_PATH]; - if (xutftowcs_path(wtemplate, template) < 0) - return NULL; - if (!_wmktemp(wtemplate)) - return NULL; - if (xwcstoutf(template, wtemplate, strlen(template) + 1) < 0) - return NULL; - return template; -} - int mkstemp(char *template) { return git_mkstemp_mode(template, 0600); From 7bef658135944d26acf3e1ec9316ca11f4369cf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 6 Dec 2025 14:29:43 +0100 Subject: [PATCH 24/26] banned.h: ban mktemp(3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Older versions of mktemp(3) generate easily guessable file names. The function checks if the generated name is used, which is unreliable, as a file with that name might then be created by some other process before we can do it ourselves. The function was dropped from POSIX due to its security problems. Forbid its use. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- banned.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/banned.h b/banned.h index 44e76bd90af769..2b934c8c4381b5 100644 --- a/banned.h +++ b/banned.h @@ -41,4 +41,7 @@ #undef asctime_r #define asctime_r(t, buf) BANNED(asctime_r) +#undef mktemp +#define mktemp(x) BANNED(mktemp) + #endif /* BANNED_H */ From 10bba537c4c23e713af05be700748c6a3c25bf68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 6 Dec 2025 14:35:39 +0100 Subject: [PATCH 25/26] compat: remove gitmkdtemp() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gitmkdtemp() has become a trivial wrapper around git_mkdtemp(). Remove this now unnecessary layer of indirection. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- Makefile | 1 - compat/mkdtemp.c | 6 ------ compat/posix.h | 3 +-- contrib/buildsystems/CMakeLists.txt | 4 ---- meson.build | 2 +- 5 files changed, 2 insertions(+), 14 deletions(-) delete mode 100644 compat/mkdtemp.c diff --git a/Makefile b/Makefile index 7e0f77e2988e3b..8f74b25fe7f9e9 100644 --- a/Makefile +++ b/Makefile @@ -1917,7 +1917,6 @@ ifdef NO_SETENV endif ifdef NO_MKDTEMP COMPAT_CFLAGS += -DNO_MKDTEMP - COMPAT_OBJS += compat/mkdtemp.o endif ifdef MKDIR_WO_TRAILING_SLASH COMPAT_CFLAGS += -DMKDIR_WO_TRAILING_SLASH diff --git a/compat/mkdtemp.c b/compat/mkdtemp.c deleted file mode 100644 index fcdd4e01e14613..00000000000000 --- a/compat/mkdtemp.c +++ /dev/null @@ -1,6 +0,0 @@ -#include "../git-compat-util.h" - -char *gitmkdtemp(char *template) -{ - return git_mkdtemp(template); -} diff --git a/compat/posix.h b/compat/posix.h index 067a00f33b83f3..245386fa4a9f4e 100644 --- a/compat/posix.h +++ b/compat/posix.h @@ -329,8 +329,7 @@ int gitsetenv(const char *, const char *, int); #endif #ifdef NO_MKDTEMP -#define mkdtemp gitmkdtemp -char *gitmkdtemp(char *); +#define mkdtemp git_mkdtemp #endif #ifdef NO_UNSETENV diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index edb0fc04ad7649..b84d8a7c762f06 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -411,10 +411,6 @@ if(NOT HAVE_SETENV) list(APPEND compat_SOURCES compat/setenv.c) endif() -if(NOT HAVE_MKDTEMP) - list(APPEND compat_SOURCES compat/mkdtemp.c) -endif() - if(NOT HAVE_PREAD) list(APPEND compat_SOURCES compat/pread.c) endif() diff --git a/meson.build b/meson.build index 1f95a06edb7829..4a42e783b1bb77 100644 --- a/meson.build +++ b/meson.build @@ -1401,7 +1401,7 @@ checkfuncs = { 'strlcpy' : ['strlcpy.c'], 'strtoull' : [], 'setenv' : ['setenv.c'], - 'mkdtemp' : ['mkdtemp.c'], + 'mkdtemp' : [], 'initgroups' : [], 'strtoumax' : ['strtoumax.c', 'strtoimax.c'], 'pread' : ['pread.c'], From e7ef0ca622016d12a85836928a03959de4537c2f Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 16 Dec 2025 11:08:23 +0900 Subject: [PATCH 26/26] The ninth batch Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.53.0.adoc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/RelNotes/2.53.0.adoc b/Documentation/RelNotes/2.53.0.adoc index 41ae2a5a7a4696..f28c8202919dc9 100644 --- a/Documentation/RelNotes/2.53.0.adoc +++ b/Documentation/RelNotes/2.53.0.adoc @@ -60,6 +60,13 @@ Performance, Internal Implementation, Development Support etc. "git diff --find-copioes-harder", also making the operation run faster. + * The "git_istream" abstraction has been revamped to make it easier + to interface with pluggable object database design. + + * Rewrite the only use of "mktemp()" that is subject to TOCTOU race + and Stop using the insecure "mktemp()" function. + (merge 10bba537c4 rs/ban-mktemp later to maint). + Fixes since v2.52 ----------------- @@ -167,6 +174,9 @@ Fixes since v2.52 pathspec, which has been corrected. (merge 05491b90ce js/last-modified-with-sparse-checkouts later to maint). + * Emulation code clean-up. + (merge 42aa7603aa gf/win32-pthread-cond-init later to maint). + * Other code cleanup, docfix, build fix, etc. (merge 46207a54cc qj/doc-http-bad-want-response later to maint). (merge df90eccd93 kh/doc-commit-extra-references later to maint).