From 42aa7603aa752850c8ad89cca61e280dab520faf Mon Sep 17 00:00:00 2001
From: Greg Funni <gfunni234@gmail.com>
Date: Thu, 20 Nov 2025 21:43:36 +0000
Subject: [PATCH 01/26] win32: pthread_cond_init should return a value

This value is not checked, but it must return to match POSIX

Signed-off-by: Greg Funni <gfunni234@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 compat/win32/pthread.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compat/win32/pthread.h b/compat/win32/pthread.h
index e2b5c4f64c9b91..000604cdf69ffc 100644
--- a/compat/win32/pthread.h
+++ b/compat/win32/pthread.h
@@ -34,7 +34,7 @@ typedef int pthread_mutexattr_t;
 
 #define pthread_cond_t CONDITION_VARIABLE
 
-#define pthread_cond_init(a,b) InitializeConditionVariable((a))
+#define pthread_cond_init(a,b) return_0((InitializeConditionVariable((a)), 0))
 #define pthread_cond_destroy(a) do {} while (0)
 #define pthread_cond_wait(a,b) return_0(SleepConditionVariableCS((a), (b), INFINITE))
 #define pthread_cond_signal WakeConditionVariable

From 6bdda3a3b00fff9a1d64d1bb4732f0c446d7012c Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Sun, 23 Nov 2025 19:59:26 +0100
Subject: [PATCH 02/26] streaming: rename `git_istream` into `odb_read_stream`

In the following patches we are about to make the `git_istream` more
generic so that it becomes fully controlled by the specific object
source that wants to create it. As part of these refactorings we'll
fully move the structure into the object database subsystem.

Prepare for this change by renaming the structure from `git_istream`
to `odb_read_stream`. This mirrors the `odb_write_stream` structure that
we already have.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 archive-tar.c          |  2 +-
 archive-zip.c          |  2 +-
 builtin/index-pack.c   |  2 +-
 builtin/pack-objects.c |  4 +--
 object-file.c          |  2 +-
 streaming.c            | 62 +++++++++++++++++++++---------------------
 streaming.h            | 12 ++++----
 7 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/archive-tar.c b/archive-tar.c
index 73b63ddc41bad6..dc1eda09e01e2b 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -129,7 +129,7 @@ static void write_trailer(void)
  */
 static int stream_blocked(struct repository *r, const struct object_id *oid)
 {
-	struct git_istream *st;
+	struct odb_read_stream *st;
 	enum object_type type;
 	unsigned long sz;
 	char buf[BLOCKSIZE];
diff --git a/archive-zip.c b/archive-zip.c
index bea5bdd43dc43e..40a9c93ff95233 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -309,7 +309,7 @@ static int write_zip_entry(struct archiver_args *args,
 	enum zip_method method;
 	unsigned char *out;
 	void *deflated = NULL;
-	struct git_istream *stream = NULL;
+	struct odb_read_stream *stream = NULL;
 	unsigned long flags = 0;
 	int is_binary = -1;
 	const char *path_without_prefix = path + args->baselen;
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 2b78ba7fe4d14a..5f90f12f92d9c4 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -762,7 +762,7 @@ static void find_ref_delta_children(const struct object_id *oid,
 
 struct compare_data {
 	struct object_entry *entry;
-	struct git_istream *st;
+	struct odb_read_stream *st;
 	unsigned char *buf;
 	unsigned long buf_size;
 };
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 69e80b1443a9b7..c693d948e193ed 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -404,7 +404,7 @@ static unsigned long do_compress(void **pptr, unsigned long size)
 	return stream.total_out;
 }
 
-static unsigned long write_large_blob_data(struct git_istream *st, struct hashfile *f,
+static unsigned long write_large_blob_data(struct odb_read_stream *st, struct hashfile *f,
 					   const struct object_id *oid)
 {
 	git_zstream stream;
@@ -513,7 +513,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
 	unsigned hdrlen;
 	enum object_type type;
 	void *buf;
-	struct git_istream *st = NULL;
+	struct odb_read_stream *st = NULL;
 	const unsigned hashsz = the_hash_algo->rawsz;
 
 	if (!usable_delta) {
diff --git a/object-file.c b/object-file.c
index 811c569ed36aa4..b62b21a45289fc 100644
--- a/object-file.c
+++ b/object-file.c
@@ -134,7 +134,7 @@ int stream_object_signature(struct repository *r, const struct object_id *oid)
 	struct object_id real_oid;
 	unsigned long size;
 	enum object_type obj_type;
-	struct git_istream *st;
+	struct odb_read_stream *st;
 	struct git_hash_ctx c;
 	char hdr[MAX_HEADER_LEN];
 	int hdrlen;
diff --git a/streaming.c b/streaming.c
index 00ad649ae397f3..1fb4b7c1c002e8 100644
--- a/streaming.c
+++ b/streaming.c
@@ -14,17 +14,17 @@
 #include "replace-object.h"
 #include "packfile.h"
 
-typedef int (*open_istream_fn)(struct git_istream *,
+typedef int (*open_istream_fn)(struct odb_read_stream *,
 			       struct repository *,
 			       const struct object_id *,
 			       enum object_type *);
-typedef int (*close_istream_fn)(struct git_istream *);
-typedef ssize_t (*read_istream_fn)(struct git_istream *, char *, size_t);
+typedef int (*close_istream_fn)(struct odb_read_stream *);
+typedef ssize_t (*read_istream_fn)(struct odb_read_stream *, char *, size_t);
 
 #define FILTER_BUFFER (1024*16)
 
 struct filtered_istream {
-	struct git_istream *upstream;
+	struct odb_read_stream *upstream;
 	struct stream_filter *filter;
 	char ibuf[FILTER_BUFFER];
 	char obuf[FILTER_BUFFER];
@@ -33,7 +33,7 @@ struct filtered_istream {
 	int input_finished;
 };
 
-struct git_istream {
+struct odb_read_stream {
 	open_istream_fn open;
 	close_istream_fn close;
 	read_istream_fn read;
@@ -71,7 +71,7 @@ struct git_istream {
  *
  *****************************************************************/
 
-static void close_deflated_stream(struct git_istream *st)
+static void close_deflated_stream(struct odb_read_stream *st)
 {
 	if (st->z_state == z_used)
 		git_inflate_end(&st->z);
@@ -84,13 +84,13 @@ static void close_deflated_stream(struct git_istream *st)
  *
  *****************************************************************/
 
-static int close_istream_filtered(struct git_istream *st)
+static int close_istream_filtered(struct odb_read_stream *st)
 {
 	free_stream_filter(st->u.filtered.filter);
 	return close_istream(st->u.filtered.upstream);
 }
 
-static ssize_t read_istream_filtered(struct git_istream *st, char *buf,
+static ssize_t read_istream_filtered(struct odb_read_stream *st, char *buf,
 				     size_t sz)
 {
 	struct filtered_istream *fs = &(st->u.filtered);
@@ -150,10 +150,10 @@ static ssize_t read_istream_filtered(struct git_istream *st, char *buf,
 	return filled;
 }
 
-static struct git_istream *attach_stream_filter(struct git_istream *st,
-						struct stream_filter *filter)
+static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st,
+						    struct stream_filter *filter)
 {
-	struct git_istream *ifs = xmalloc(sizeof(*ifs));
+	struct odb_read_stream *ifs = xmalloc(sizeof(*ifs));
 	struct filtered_istream *fs = &(ifs->u.filtered);
 
 	ifs->close = close_istream_filtered;
@@ -173,7 +173,7 @@ static struct git_istream *attach_stream_filter(struct git_istream *st,
  *
  *****************************************************************/
 
-static ssize_t read_istream_loose(struct git_istream *st, char *buf, size_t sz)
+static ssize_t read_istream_loose(struct odb_read_stream *st, char *buf, size_t sz)
 {
 	size_t total_read = 0;
 
@@ -218,14 +218,14 @@ static ssize_t read_istream_loose(struct git_istream *st, char *buf, size_t sz)
 	return total_read;
 }
 
-static int close_istream_loose(struct git_istream *st)
+static int close_istream_loose(struct odb_read_stream *st)
 {
 	close_deflated_stream(st);
 	munmap(st->u.loose.mapped, st->u.loose.mapsize);
 	return 0;
 }
 
-static int open_istream_loose(struct git_istream *st, struct repository *r,
+static int open_istream_loose(struct odb_read_stream *st, struct repository *r,
 			      const struct object_id *oid,
 			      enum object_type *type)
 {
@@ -277,7 +277,7 @@ static int open_istream_loose(struct git_istream *st, struct repository *r,
  *
  *****************************************************************/
 
-static ssize_t read_istream_pack_non_delta(struct git_istream *st, char *buf,
+static ssize_t read_istream_pack_non_delta(struct odb_read_stream *st, char *buf,
 					   size_t sz)
 {
 	size_t total_read = 0;
@@ -336,13 +336,13 @@ static ssize_t read_istream_pack_non_delta(struct git_istream *st, char *buf,
 	return total_read;
 }
 
-static int close_istream_pack_non_delta(struct git_istream *st)
+static int close_istream_pack_non_delta(struct odb_read_stream *st)
 {
 	close_deflated_stream(st);
 	return 0;
 }
 
-static int open_istream_pack_non_delta(struct git_istream *st,
+static int open_istream_pack_non_delta(struct odb_read_stream *st,
 				       struct repository *r UNUSED,
 				       const struct object_id *oid UNUSED,
 				       enum object_type *type UNUSED)
@@ -380,13 +380,13 @@ static int open_istream_pack_non_delta(struct git_istream *st,
  *
  *****************************************************************/
 
-static int close_istream_incore(struct git_istream *st)
+static int close_istream_incore(struct odb_read_stream *st)
 {
 	free(st->u.incore.buf);
 	return 0;
 }
 
-static ssize_t read_istream_incore(struct git_istream *st, char *buf, size_t sz)
+static ssize_t read_istream_incore(struct odb_read_stream *st, char *buf, size_t sz)
 {
 	size_t read_size = sz;
 	size_t remainder = st->size - st->u.incore.read_ptr;
@@ -400,7 +400,7 @@ static ssize_t read_istream_incore(struct git_istream *st, char *buf, size_t sz)
 	return read_size;
 }
 
-static int open_istream_incore(struct git_istream *st, struct repository *r,
+static int open_istream_incore(struct odb_read_stream *st, struct repository *r,
 			       const struct object_id *oid, enum object_type *type)
 {
 	struct object_info oi = OBJECT_INFO_INIT;
@@ -420,7 +420,7 @@ static int open_istream_incore(struct git_istream *st, struct repository *r,
  * static helpers variables and functions for users of streaming interface
  *****************************************************************************/
 
-static int istream_source(struct git_istream *st,
+static int istream_source(struct odb_read_stream *st,
 			  struct repository *r,
 			  const struct object_id *oid,
 			  enum object_type *type)
@@ -458,25 +458,25 @@ static int istream_source(struct git_istream *st,
  * Users of streaming interface
  ****************************************************************/
 
-int close_istream(struct git_istream *st)
+int close_istream(struct odb_read_stream *st)
 {
 	int r = st->close(st);
 	free(st);
 	return r;
 }
 
-ssize_t read_istream(struct git_istream *st, void *buf, size_t sz)
+ssize_t read_istream(struct odb_read_stream *st, void *buf, size_t sz)
 {
 	return st->read(st, buf, sz);
 }
 
-struct git_istream *open_istream(struct repository *r,
-				 const struct object_id *oid,
-				 enum object_type *type,
-				 unsigned long *size,
-				 struct stream_filter *filter)
+struct odb_read_stream *open_istream(struct repository *r,
+				     const struct object_id *oid,
+				     enum object_type *type,
+				     unsigned long *size,
+				     struct stream_filter *filter)
 {
-	struct git_istream *st = xmalloc(sizeof(*st));
+	struct odb_read_stream *st = xmalloc(sizeof(*st));
 	const struct object_id *real = lookup_replace_object(r, oid);
 	int ret = istream_source(st, r, real, type);
 
@@ -493,7 +493,7 @@ struct git_istream *open_istream(struct repository *r,
 	}
 	if (filter) {
 		/* Add "&& !is_null_stream_filter(filter)" for performance */
-		struct git_istream *nst = attach_stream_filter(st, filter);
+		struct odb_read_stream *nst = attach_stream_filter(st, filter);
 		if (!nst) {
 			close_istream(st);
 			return NULL;
@@ -508,7 +508,7 @@ struct git_istream *open_istream(struct repository *r,
 int stream_blob_to_fd(int fd, const struct object_id *oid, struct stream_filter *filter,
 		      int can_seek)
 {
-	struct git_istream *st;
+	struct odb_read_stream *st;
 	enum object_type type;
 	unsigned long sz;
 	ssize_t kept = 0;
diff --git a/streaming.h b/streaming.h
index bd27f59e5764ae..f5ff5d7ac9a573 100644
--- a/streaming.h
+++ b/streaming.h
@@ -7,14 +7,14 @@
 #include "object.h"
 
 /* opaque */
-struct git_istream;
+struct odb_read_stream;
 struct stream_filter;
 
-struct git_istream *open_istream(struct repository *, const struct object_id *,
-				 enum object_type *, unsigned long *,
-				 struct stream_filter *);
-int close_istream(struct git_istream *);
-ssize_t read_istream(struct git_istream *, void *, size_t);
+struct odb_read_stream *open_istream(struct repository *, const struct object_id *,
+				     enum object_type *, unsigned long *,
+				     struct stream_filter *);
+int close_istream(struct odb_read_stream *);
+ssize_t read_istream(struct odb_read_stream *, void *, size_t);
 
 int stream_blob_to_fd(int fd, const struct object_id *, struct stream_filter *, int can_seek);
 

From 70c8b5f5453b9f128a72fad4398acfb9e7d869c4 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Sun, 23 Nov 2025 19:59:27 +0100
Subject: [PATCH 03/26] streaming: drop the `open()` callback function

When creating a read stream we first populate the structure with the
open callback function and then subsequently call the function. This
layout is somewhat weird though:

  - The structure needs to be allocated and partially populated with the
    open function before we can properly initialize it.

  - We only ever call the `open()` callback function right after having
    populated the `struct odb_read_stream::open` member, and it's never
    called thereafter again. So it is somewhat pointless to store the
    callback in the first place.

Especially the first point creates a problem for us. In subsequent
commits we'll want to fully move construction of the read source into
the respective object sources. E.g., the loose object source will be the
one that is responsible for creating the structure. But this creates a
problem: if we first need to create the structure so that we can call
the source-specific callback we cannot fully handle creation of the
structure in the source itself.

We could of course work around that and have the loose object source
create the structure and populate its `open()` callback, only. But
this doesn't really buy us anything due to the second bullet point
above.

Instead, drop the callback entirely and refactor `istream_source()` so
that we open the streams immediately. This unblocks a subsequent step,
where we'll also start to allocate the structure in the source-specific
logic.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 streaming.c | 37 +++++++++++++++----------------------
 1 file changed, 15 insertions(+), 22 deletions(-)

diff --git a/streaming.c b/streaming.c
index 1fb4b7c1c002e8..1bb3f393b87519 100644
--- a/streaming.c
+++ b/streaming.c
@@ -14,10 +14,6 @@
 #include "replace-object.h"
 #include "packfile.h"
 
-typedef int (*open_istream_fn)(struct odb_read_stream *,
-			       struct repository *,
-			       const struct object_id *,
-			       enum object_type *);
 typedef int (*close_istream_fn)(struct odb_read_stream *);
 typedef ssize_t (*read_istream_fn)(struct odb_read_stream *, char *, size_t);
 
@@ -34,7 +30,6 @@ struct filtered_istream {
 };
 
 struct odb_read_stream {
-	open_istream_fn open;
 	close_istream_fn close;
 	read_istream_fn read;
 
@@ -437,21 +432,25 @@ static int istream_source(struct odb_read_stream *st,
 
 	switch (oi.whence) {
 	case OI_LOOSE:
-		st->open = open_istream_loose;
+		if (open_istream_loose(st, r, oid, type) < 0)
+			break;
 		return 0;
 	case OI_PACKED:
-		if (!oi.u.packed.is_delta &&
-		    repo_settings_get_big_file_threshold(the_repository) < size) {
-			st->u.in_pack.pack = oi.u.packed.pack;
-			st->u.in_pack.pos = oi.u.packed.offset;
-			st->open = open_istream_pack_non_delta;
-			return 0;
-		}
-		/* fallthru */
-	default:
-		st->open = open_istream_incore;
+		if (oi.u.packed.is_delta ||
+		    repo_settings_get_big_file_threshold(the_repository) >= size)
+			break;
+
+		st->u.in_pack.pack = oi.u.packed.pack;
+		st->u.in_pack.pos = oi.u.packed.offset;
+		if (open_istream_pack_non_delta(st, r, oid, type) < 0)
+			break;
+
 		return 0;
+	default:
+		break;
 	}
+
+	return open_istream_incore(st, r, oid, type);
 }
 
 /****************************************************************
@@ -485,12 +484,6 @@ struct odb_read_stream *open_istream(struct repository *r,
 		return NULL;
 	}
 
-	if (st->open(st, r, real, type)) {
-		if (open_istream_incore(st, r, real, type)) {
-			free(st);
-			return NULL;
-		}
-	}
 	if (filter) {
 		/* Add "&& !is_null_stream_filter(filter)" for performance */
 		struct odb_read_stream *nst = attach_stream_filter(st, filter);

From 3f64deabdf0a2a9664acec61698affc449e07496 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Sun, 23 Nov 2025 19:59:28 +0100
Subject: [PATCH 04/26] streaming: propagate final object type via the stream

When opening the read stream for a specific object the caller is also
expected to pass in a pointer to the object type. This type is passed
down via multiple levels and will eventually be populated with the type
of the looked-up object.

The way we propagate down the pointer though is somewhat non-obvious.
While `istream_source()` still expects the pointer and looks it up via
`odb_read_object_info_extended()`, we also pass it down even further
into the format-specific callbacks that perform another lookup. This is
quite confusing overall.

Refactor the code so that the responsibility to populate the object type
rests solely with the format-specific callbacks. This will allow us to
drop the call to `odb_read_object_info_extended()` in `istream_source()`
entirely in a subsequent patch.

Furthermore, instead of propagating the type via an in-pointer, we now
propagate the type via a new field in the object stream. It already has
a `size` field, so it's only natural to have a second field that
contains the object type.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 streaming.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/streaming.c b/streaming.c
index 1bb3f393b87519..665624ddc0494e 100644
--- a/streaming.c
+++ b/streaming.c
@@ -33,6 +33,7 @@ struct odb_read_stream {
 	close_istream_fn close;
 	read_istream_fn read;
 
+	enum object_type type;
 	unsigned long size; /* inflated size of full object */
 	git_zstream z;
 	enum { z_unused, z_used, z_done, z_error } z_state;
@@ -159,6 +160,7 @@ static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st,
 	fs->o_end = fs->o_ptr = 0;
 	fs->input_finished = 0;
 	ifs->size = -1; /* unknown */
+	ifs->type = st->type;
 	return ifs;
 }
 
@@ -221,14 +223,13 @@ static int close_istream_loose(struct odb_read_stream *st)
 }
 
 static int open_istream_loose(struct odb_read_stream *st, struct repository *r,
-			      const struct object_id *oid,
-			      enum object_type *type)
+			      const struct object_id *oid)
 {
 	struct object_info oi = OBJECT_INFO_INIT;
 	struct odb_source *source;
 
 	oi.sizep = &st->size;
-	oi.typep = type;
+	oi.typep = &st->type;
 
 	odb_prepare_alternates(r->objects);
 	for (source = r->objects->sources; source; source = source->next) {
@@ -249,7 +250,7 @@ static int open_istream_loose(struct odb_read_stream *st, struct repository *r,
 	case ULHR_TOO_LONG:
 		goto error;
 	}
-	if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || *type < 0)
+	if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || st->type < 0)
 		goto error;
 
 	st->u.loose.hdr_used = strlen(st->u.loose.hdr) + 1;
@@ -339,8 +340,7 @@ static int close_istream_pack_non_delta(struct odb_read_stream *st)
 
 static int open_istream_pack_non_delta(struct odb_read_stream *st,
 				       struct repository *r UNUSED,
-				       const struct object_id *oid UNUSED,
-				       enum object_type *type UNUSED)
+				       const struct object_id *oid UNUSED)
 {
 	struct pack_window *window;
 	enum object_type in_pack_type;
@@ -361,6 +361,7 @@ static int open_istream_pack_non_delta(struct odb_read_stream *st,
 	case OBJ_TAG:
 		break;
 	}
+	st->type = in_pack_type;
 	st->z_state = z_unused;
 	st->close = close_istream_pack_non_delta;
 	st->read = read_istream_pack_non_delta;
@@ -396,7 +397,7 @@ static ssize_t read_istream_incore(struct odb_read_stream *st, char *buf, size_t
 }
 
 static int open_istream_incore(struct odb_read_stream *st, struct repository *r,
-			       const struct object_id *oid, enum object_type *type)
+			       const struct object_id *oid)
 {
 	struct object_info oi = OBJECT_INFO_INIT;
 
@@ -404,7 +405,7 @@ static int open_istream_incore(struct odb_read_stream *st, struct repository *r,
 	st->close = close_istream_incore;
 	st->read = read_istream_incore;
 
-	oi.typep = type;
+	oi.typep = &st->type;
 	oi.sizep = &st->size;
 	oi.contentp = (void **)&st->u.incore.buf;
 	return odb_read_object_info_extended(r->objects, oid, &oi,
@@ -417,14 +418,12 @@ static int open_istream_incore(struct odb_read_stream *st, struct repository *r,
 
 static int istream_source(struct odb_read_stream *st,
 			  struct repository *r,
-			  const struct object_id *oid,
-			  enum object_type *type)
+			  const struct object_id *oid)
 {
 	unsigned long size;
 	int status;
 	struct object_info oi = OBJECT_INFO_INIT;
 
-	oi.typep = type;
 	oi.sizep = &size;
 	status = odb_read_object_info_extended(r->objects, oid, &oi, 0);
 	if (status < 0)
@@ -432,7 +431,7 @@ static int istream_source(struct odb_read_stream *st,
 
 	switch (oi.whence) {
 	case OI_LOOSE:
-		if (open_istream_loose(st, r, oid, type) < 0)
+		if (open_istream_loose(st, r, oid) < 0)
 			break;
 		return 0;
 	case OI_PACKED:
@@ -442,7 +441,7 @@ static int istream_source(struct odb_read_stream *st,
 
 		st->u.in_pack.pack = oi.u.packed.pack;
 		st->u.in_pack.pos = oi.u.packed.offset;
-		if (open_istream_pack_non_delta(st, r, oid, type) < 0)
+		if (open_istream_pack_non_delta(st, r, oid) < 0)
 			break;
 
 		return 0;
@@ -450,7 +449,7 @@ static int istream_source(struct odb_read_stream *st,
 		break;
 	}
 
-	return open_istream_incore(st, r, oid, type);
+	return open_istream_incore(st, r, oid);
 }
 
 /****************************************************************
@@ -477,7 +476,7 @@ struct odb_read_stream *open_istream(struct repository *r,
 {
 	struct odb_read_stream *st = xmalloc(sizeof(*st));
 	const struct object_id *real = lookup_replace_object(r, oid);
-	int ret = istream_source(st, r, real, type);
+	int ret = istream_source(st, r, real);
 
 	if (ret) {
 		free(st);
@@ -495,6 +494,7 @@ struct odb_read_stream *open_istream(struct repository *r,
 	}
 
 	*size = st->size;
+	*type = st->type;
 	return st;
 }
 

From 3c7722dd4d376e0fce4c48f723fe8b69af785998 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Sun, 23 Nov 2025 19:59:29 +0100
Subject: [PATCH 05/26] streaming: explicitly pass packfile info when streaming
 a packed object

When streaming a packed object we first populate the stream with
information about the pack that contains the object before calling
`open_istream_pack_non_delta()`. This is done because we have already
looked up both the pack and the object's offset, so it would be a waste
of time to look up this information again.

But the way this is done makes for a somewhat awkward calling interface,
as the caller now needs to be aware of how exactly the function itself
behaves.

Refactor the code so that we instead explicitly pass the packfile info
into `open_istream_pack_non_delta()`. This makes the calling convention
explicit, but more importantly this allows us to refactor the function
so that it becomes its responsibility to allocate the stream itself in a
subsequent patch.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 streaming.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/streaming.c b/streaming.c
index 665624ddc0494e..bf277daadd48c2 100644
--- a/streaming.c
+++ b/streaming.c
@@ -340,16 +340,18 @@ static int close_istream_pack_non_delta(struct odb_read_stream *st)
 
 static int open_istream_pack_non_delta(struct odb_read_stream *st,
 				       struct repository *r UNUSED,
-				       const struct object_id *oid UNUSED)
+				       const struct object_id *oid UNUSED,
+				       struct packed_git *pack,
+				       off_t offset)
 {
 	struct pack_window *window;
 	enum object_type in_pack_type;
 
 	window = NULL;
 
-	in_pack_type = unpack_object_header(st->u.in_pack.pack,
+	in_pack_type = unpack_object_header(pack,
 					    &window,
-					    &st->u.in_pack.pos,
+					    &offset,
 					    &st->size);
 	unuse_pack(&window);
 	switch (in_pack_type) {
@@ -365,6 +367,8 @@ static int open_istream_pack_non_delta(struct odb_read_stream *st,
 	st->z_state = z_unused;
 	st->close = close_istream_pack_non_delta;
 	st->read = read_istream_pack_non_delta;
+	st->u.in_pack.pack = pack;
+	st->u.in_pack.pos = offset;
 
 	return 0;
 }
@@ -436,14 +440,10 @@ static int istream_source(struct odb_read_stream *st,
 		return 0;
 	case OI_PACKED:
 		if (oi.u.packed.is_delta ||
-		    repo_settings_get_big_file_threshold(the_repository) >= size)
+		    repo_settings_get_big_file_threshold(the_repository) >= size ||
+		    open_istream_pack_non_delta(st, r, oid, oi.u.packed.pack,
+						oi.u.packed.offset) < 0)
 			break;
-
-		st->u.in_pack.pack = oi.u.packed.pack;
-		st->u.in_pack.pos = oi.u.packed.offset;
-		if (open_istream_pack_non_delta(st, r, oid) < 0)
-			break;
-
 		return 0;
 	default:
 		break;

From 595296e124f5e8a67c4669fcaeb1b28e71c2d751 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Sun, 23 Nov 2025 19:59:30 +0100
Subject: [PATCH 06/26] streaming: allocate stream inside the backend-specific
 logic

When creating a new stream we first allocate it and then call into
backend-specific logic to populate the stream. This design requires that
the stream itself contains a `union` with backend-specific members that
then ultimately get populated by the backend-specific logic.

This works, but it's awkward in the context of pluggable object
databases. Each backend will need its own member in that union, and as
the structure itself is completely opaque (it's only defined in
"streaming.c") it also has the consequence that we must have the logic
that is specific to backends in "streaming.c".

Ideally though, the infrastructure would be reversed: we have a generic
`struct odb_read_stream` and some helper functions in "streaming.c",
whereas the backend-specific logic sits in the backend's subsystem
itself.

This can be realized by using a design that is similar to how we handle
reference databases: instead of having a union of members, we instead
have backend-specific structures with a `struct odb_read_stream base`
as its first member. The backends would thus hand out the pointer to the
base, but internally they know to cast back to the backend-specific
type.

This means though that we need to allocate different structures
depending on the backend. To prepare for this, move allocation of the
structure into the backend-specific functions that open a new stream.
Subsequent commits will then create those new backend-specific structs.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 streaming.c | 103 +++++++++++++++++++++++++++++++++-------------------
 1 file changed, 65 insertions(+), 38 deletions(-)

diff --git a/streaming.c b/streaming.c
index bf277daadd48c2..a2c2d887387c57 100644
--- a/streaming.c
+++ b/streaming.c
@@ -222,27 +222,34 @@ static int close_istream_loose(struct odb_read_stream *st)
 	return 0;
 }
 
-static int open_istream_loose(struct odb_read_stream *st, struct repository *r,
+static int open_istream_loose(struct odb_read_stream **out,
+			      struct repository *r,
 			      const struct object_id *oid)
 {
 	struct object_info oi = OBJECT_INFO_INIT;
+	struct odb_read_stream *st;
 	struct odb_source *source;
-
-	oi.sizep = &st->size;
-	oi.typep = &st->type;
+	unsigned long mapsize;
+	void *mapped;
 
 	odb_prepare_alternates(r->objects);
 	for (source = r->objects->sources; source; source = source->next) {
-		st->u.loose.mapped = odb_source_loose_map_object(source, oid,
-								 &st->u.loose.mapsize);
-		if (st->u.loose.mapped)
+		mapped = odb_source_loose_map_object(source, oid, &mapsize);
+		if (mapped)
 			break;
 	}
-	if (!st->u.loose.mapped)
+	if (!mapped)
 		return -1;
 
-	switch (unpack_loose_header(&st->z, st->u.loose.mapped,
-				    st->u.loose.mapsize, st->u.loose.hdr,
+	/*
+	 * Note: we must allocate this structure early even though we may still
+	 * fail. This is because we need to initialize the zlib stream, and it
+	 * is not possible to copy the stream around after the fact because it
+	 * has self-referencing pointers.
+	 */
+	CALLOC_ARRAY(st, 1);
+
+	switch (unpack_loose_header(&st->z, mapped, mapsize, st->u.loose.hdr,
 				    sizeof(st->u.loose.hdr))) {
 	case ULHR_OK:
 		break;
@@ -250,19 +257,28 @@ static int open_istream_loose(struct odb_read_stream *st, struct repository *r,
 	case ULHR_TOO_LONG:
 		goto error;
 	}
+
+	oi.sizep = &st->size;
+	oi.typep = &st->type;
+
 	if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || st->type < 0)
 		goto error;
 
+	st->u.loose.mapped = mapped;
+	st->u.loose.mapsize = mapsize;
 	st->u.loose.hdr_used = strlen(st->u.loose.hdr) + 1;
 	st->u.loose.hdr_avail = st->z.total_out;
 	st->z_state = z_used;
 	st->close = close_istream_loose;
 	st->read = read_istream_loose;
 
+	*out = st;
+
 	return 0;
 error:
 	git_inflate_end(&st->z);
 	munmap(st->u.loose.mapped, st->u.loose.mapsize);
+	free(st);
 	return -1;
 }
 
@@ -338,12 +354,16 @@ static int close_istream_pack_non_delta(struct odb_read_stream *st)
 	return 0;
 }
 
-static int open_istream_pack_non_delta(struct odb_read_stream *st,
+static int open_istream_pack_non_delta(struct odb_read_stream **out,
 				       struct repository *r UNUSED,
 				       const struct object_id *oid UNUSED,
 				       struct packed_git *pack,
 				       off_t offset)
 {
+	struct odb_read_stream stream = {
+		.close = close_istream_pack_non_delta,
+		.read = read_istream_pack_non_delta,
+	};
 	struct pack_window *window;
 	enum object_type in_pack_type;
 
@@ -352,7 +372,7 @@ static int open_istream_pack_non_delta(struct odb_read_stream *st,
 	in_pack_type = unpack_object_header(pack,
 					    &window,
 					    &offset,
-					    &st->size);
+					    &stream.size);
 	unuse_pack(&window);
 	switch (in_pack_type) {
 	default:
@@ -363,12 +383,13 @@ static int open_istream_pack_non_delta(struct odb_read_stream *st,
 	case OBJ_TAG:
 		break;
 	}
-	st->type = in_pack_type;
-	st->z_state = z_unused;
-	st->close = close_istream_pack_non_delta;
-	st->read = read_istream_pack_non_delta;
-	st->u.in_pack.pack = pack;
-	st->u.in_pack.pos = offset;
+	stream.type = in_pack_type;
+	stream.z_state = z_unused;
+	stream.u.in_pack.pack = pack;
+	stream.u.in_pack.pos = offset;
+
+	CALLOC_ARRAY(*out, 1);
+	**out = stream;
 
 	return 0;
 }
@@ -400,27 +421,35 @@ static ssize_t read_istream_incore(struct odb_read_stream *st, char *buf, size_t
 	return read_size;
 }
 
-static int open_istream_incore(struct odb_read_stream *st, struct repository *r,
+static int open_istream_incore(struct odb_read_stream **out,
+			       struct repository *r,
 			       const struct object_id *oid)
 {
 	struct object_info oi = OBJECT_INFO_INIT;
-
-	st->u.incore.read_ptr = 0;
-	st->close = close_istream_incore;
-	st->read = read_istream_incore;
-
-	oi.typep = &st->type;
-	oi.sizep = &st->size;
-	oi.contentp = (void **)&st->u.incore.buf;
-	return odb_read_object_info_extended(r->objects, oid, &oi,
-					     OBJECT_INFO_DIE_IF_CORRUPT);
+	struct odb_read_stream stream = {
+		.close = close_istream_incore,
+		.read = read_istream_incore,
+	};
+	int ret;
+
+	oi.typep = &stream.type;
+	oi.sizep = &stream.size;
+	oi.contentp = (void **)&stream.u.incore.buf;
+	ret = odb_read_object_info_extended(r->objects, oid, &oi,
+					    OBJECT_INFO_DIE_IF_CORRUPT);
+	if (ret)
+		return ret;
+
+	CALLOC_ARRAY(*out, 1);
+	**out = stream;
+	return 0;
 }
 
 /*****************************************************************************
  * static helpers variables and functions for users of streaming interface
  *****************************************************************************/
 
-static int istream_source(struct odb_read_stream *st,
+static int istream_source(struct odb_read_stream **out,
 			  struct repository *r,
 			  const struct object_id *oid)
 {
@@ -435,13 +464,13 @@ static int istream_source(struct odb_read_stream *st,
 
 	switch (oi.whence) {
 	case OI_LOOSE:
-		if (open_istream_loose(st, r, oid) < 0)
+		if (open_istream_loose(out, r, oid) < 0)
 			break;
 		return 0;
 	case OI_PACKED:
 		if (oi.u.packed.is_delta ||
 		    repo_settings_get_big_file_threshold(the_repository) >= size ||
-		    open_istream_pack_non_delta(st, r, oid, oi.u.packed.pack,
+		    open_istream_pack_non_delta(out, r, oid, oi.u.packed.pack,
 						oi.u.packed.offset) < 0)
 			break;
 		return 0;
@@ -449,7 +478,7 @@ static int istream_source(struct odb_read_stream *st,
 		break;
 	}
 
-	return open_istream_incore(st, r, oid);
+	return open_istream_incore(out, r, oid);
 }
 
 /****************************************************************
@@ -474,14 +503,12 @@ struct odb_read_stream *open_istream(struct repository *r,
 				     unsigned long *size,
 				     struct stream_filter *filter)
 {
-	struct odb_read_stream *st = xmalloc(sizeof(*st));
+	struct odb_read_stream *st;
 	const struct object_id *real = lookup_replace_object(r, oid);
-	int ret = istream_source(st, r, real);
+	int ret = istream_source(&st, r, real);
 
-	if (ret) {
-		free(st);
+	if (ret)
 		return NULL;
-	}
 
 	if (filter) {
 		/* Add "&& !is_null_stream_filter(filter)" for performance */

From e030d0aeb5ebf79cdc4910e79d59e33998de78cd Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Sun, 23 Nov 2025 19:59:31 +0100
Subject: [PATCH 07/26] streaming: create structure for in-core object streams

As explained in a preceding commit, we want to get rid of the union of
stream-type specific data in `struct odb_read_stream`. Create a new
structure for in-core object streams to move towards this design.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 streaming.c | 44 +++++++++++++++++++++++++-------------------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/streaming.c b/streaming.c
index a2c2d887387c57..35307d72295988 100644
--- a/streaming.c
+++ b/streaming.c
@@ -39,11 +39,6 @@ struct odb_read_stream {
 	enum { z_unused, z_used, z_done, z_error } z_state;
 
 	union {
-		struct {
-			char *buf; /* from odb_read_object_info_extended() */
-			unsigned long read_ptr;
-		} incore;
-
 		struct {
 			void *mapped;
 			unsigned long mapsize;
@@ -401,22 +396,30 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out,
  *
  *****************************************************************/
 
-static int close_istream_incore(struct odb_read_stream *st)
+struct odb_incore_read_stream {
+	struct odb_read_stream base;
+	char *buf; /* from odb_read_object_info_extended() */
+	unsigned long read_ptr;
+};
+
+static int close_istream_incore(struct odb_read_stream *_st)
 {
-	free(st->u.incore.buf);
+	struct odb_incore_read_stream *st = (struct odb_incore_read_stream *)_st;
+	free(st->buf);
 	return 0;
 }
 
-static ssize_t read_istream_incore(struct odb_read_stream *st, char *buf, size_t sz)
+static ssize_t read_istream_incore(struct odb_read_stream *_st, char *buf, size_t sz)
 {
+	struct odb_incore_read_stream *st = (struct odb_incore_read_stream *)_st;
 	size_t read_size = sz;
-	size_t remainder = st->size - st->u.incore.read_ptr;
+	size_t remainder = st->base.size - st->read_ptr;
 
 	if (remainder <= read_size)
 		read_size = remainder;
 	if (read_size) {
-		memcpy(buf, st->u.incore.buf + st->u.incore.read_ptr, read_size);
-		st->u.incore.read_ptr += read_size;
+		memcpy(buf, st->buf + st->read_ptr, read_size);
+		st->read_ptr += read_size;
 	}
 	return read_size;
 }
@@ -426,22 +429,25 @@ static int open_istream_incore(struct odb_read_stream **out,
 			       const struct object_id *oid)
 {
 	struct object_info oi = OBJECT_INFO_INIT;
-	struct odb_read_stream stream = {
-		.close = close_istream_incore,
-		.read = read_istream_incore,
+	struct odb_incore_read_stream stream = {
+		.base.close = close_istream_incore,
+		.base.read = read_istream_incore,
 	};
+	struct odb_incore_read_stream *st;
 	int ret;
 
-	oi.typep = &stream.type;
-	oi.sizep = &stream.size;
-	oi.contentp = (void **)&stream.u.incore.buf;
+	oi.typep = &stream.base.type;
+	oi.sizep = &stream.base.size;
+	oi.contentp = (void **)&stream.buf;
 	ret = odb_read_object_info_extended(r->objects, oid, &oi,
 					    OBJECT_INFO_DIE_IF_CORRUPT);
 	if (ret)
 		return ret;
 
-	CALLOC_ARRAY(*out, 1);
-	**out = stream;
+	CALLOC_ARRAY(st, 1);
+	*st = stream;
+	*out = &st->base;
+
 	return 0;
 }
 

From b7774c0f0de43379c40984b4ede265a512c1a4f0 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Sun, 23 Nov 2025 19:59:32 +0100
Subject: [PATCH 08/26] streaming: create structure for loose object streams

As explained in a preceding commit, we want to get rid of the union of
stream-type specific data in `struct odb_read_stream`. Create a new
structure for loose object streams to move towards this design.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 streaming.c | 85 +++++++++++++++++++++++++++--------------------------
 1 file changed, 44 insertions(+), 41 deletions(-)

diff --git a/streaming.c b/streaming.c
index 35307d72295988..ac7b3026f5a604 100644
--- a/streaming.c
+++ b/streaming.c
@@ -39,14 +39,6 @@ struct odb_read_stream {
 	enum { z_unused, z_used, z_done, z_error } z_state;
 
 	union {
-		struct {
-			void *mapped;
-			unsigned long mapsize;
-			char hdr[32];
-			int hdr_avail;
-			int hdr_used;
-		} loose;
-
 		struct {
 			struct packed_git *pack;
 			off_t pos;
@@ -165,11 +157,21 @@ static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st,
  *
  *****************************************************************/
 
-static ssize_t read_istream_loose(struct odb_read_stream *st, char *buf, size_t sz)
+struct odb_loose_read_stream {
+	struct odb_read_stream base;
+	void *mapped;
+	unsigned long mapsize;
+	char hdr[32];
+	int hdr_avail;
+	int hdr_used;
+};
+
+static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz)
 {
+	struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st;
 	size_t total_read = 0;
 
-	switch (st->z_state) {
+	switch (st->base.z_state) {
 	case z_done:
 		return 0;
 	case z_error:
@@ -178,42 +180,43 @@ static ssize_t read_istream_loose(struct odb_read_stream *st, char *buf, size_t
 		break;
 	}
 
-	if (st->u.loose.hdr_used < st->u.loose.hdr_avail) {
-		size_t to_copy = st->u.loose.hdr_avail - st->u.loose.hdr_used;
+	if (st->hdr_used < st->hdr_avail) {
+		size_t to_copy = st->hdr_avail - st->hdr_used;
 		if (sz < to_copy)
 			to_copy = sz;
-		memcpy(buf, st->u.loose.hdr + st->u.loose.hdr_used, to_copy);
-		st->u.loose.hdr_used += to_copy;
+		memcpy(buf, st->hdr + st->hdr_used, to_copy);
+		st->hdr_used += to_copy;
 		total_read += to_copy;
 	}
 
 	while (total_read < sz) {
 		int status;
 
-		st->z.next_out = (unsigned char *)buf + total_read;
-		st->z.avail_out = sz - total_read;
-		status = git_inflate(&st->z, Z_FINISH);
+		st->base.z.next_out = (unsigned char *)buf + total_read;
+		st->base.z.avail_out = sz - total_read;
+		status = git_inflate(&st->base.z, Z_FINISH);
 
-		total_read = st->z.next_out - (unsigned char *)buf;
+		total_read = st->base.z.next_out - (unsigned char *)buf;
 
 		if (status == Z_STREAM_END) {
-			git_inflate_end(&st->z);
-			st->z_state = z_done;
+			git_inflate_end(&st->base.z);
+			st->base.z_state = z_done;
 			break;
 		}
 		if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) {
-			git_inflate_end(&st->z);
-			st->z_state = z_error;
+			git_inflate_end(&st->base.z);
+			st->base.z_state = z_error;
 			return -1;
 		}
 	}
 	return total_read;
 }
 
-static int close_istream_loose(struct odb_read_stream *st)
+static int close_istream_loose(struct odb_read_stream *_st)
 {
-	close_deflated_stream(st);
-	munmap(st->u.loose.mapped, st->u.loose.mapsize);
+	struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st;
+	close_deflated_stream(&st->base);
+	munmap(st->mapped, st->mapsize);
 	return 0;
 }
 
@@ -222,7 +225,7 @@ static int open_istream_loose(struct odb_read_stream **out,
 			      const struct object_id *oid)
 {
 	struct object_info oi = OBJECT_INFO_INIT;
-	struct odb_read_stream *st;
+	struct odb_loose_read_stream *st;
 	struct odb_source *source;
 	unsigned long mapsize;
 	void *mapped;
@@ -244,8 +247,8 @@ static int open_istream_loose(struct odb_read_stream **out,
 	 */
 	CALLOC_ARRAY(st, 1);
 
-	switch (unpack_loose_header(&st->z, mapped, mapsize, st->u.loose.hdr,
-				    sizeof(st->u.loose.hdr))) {
+	switch (unpack_loose_header(&st->base.z, mapped, mapsize, st->hdr,
+				    sizeof(st->hdr))) {
 	case ULHR_OK:
 		break;
 	case ULHR_BAD:
@@ -253,26 +256,26 @@ static int open_istream_loose(struct odb_read_stream **out,
 		goto error;
 	}
 
-	oi.sizep = &st->size;
-	oi.typep = &st->type;
+	oi.sizep = &st->base.size;
+	oi.typep = &st->base.type;
 
-	if (parse_loose_header(st->u.loose.hdr, &oi) < 0 || st->type < 0)
+	if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
 		goto error;
 
-	st->u.loose.mapped = mapped;
-	st->u.loose.mapsize = mapsize;
-	st->u.loose.hdr_used = strlen(st->u.loose.hdr) + 1;
-	st->u.loose.hdr_avail = st->z.total_out;
-	st->z_state = z_used;
-	st->close = close_istream_loose;
-	st->read = read_istream_loose;
+	st->mapped = mapped;
+	st->mapsize = mapsize;
+	st->hdr_used = strlen(st->hdr) + 1;
+	st->hdr_avail = st->base.z.total_out;
+	st->base.z_state = z_used;
+	st->base.close = close_istream_loose;
+	st->base.read = read_istream_loose;
 
-	*out = st;
+	*out = &st->base;
 
 	return 0;
 error:
-	git_inflate_end(&st->z);
-	munmap(st->u.loose.mapped, st->u.loose.mapsize);
+	git_inflate_end(&st->base.z);
+	munmap(st->mapped, st->mapsize);
 	free(st);
 	return -1;
 }

From 5f0d8d2e8d3f992f58af247b6d21509c3c7595ca Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Sun, 23 Nov 2025 19:59:33 +0100
Subject: [PATCH 09/26] streaming: create structure for packed object streams

As explained in a preceding commit, we want to get rid of the union of
stream-type specific data in `struct odb_read_stream`. Create a new
structure for packed object streams to move towards this design.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 streaming.c | 75 ++++++++++++++++++++++++++++-------------------------
 1 file changed, 40 insertions(+), 35 deletions(-)

diff --git a/streaming.c b/streaming.c
index ac7b3026f5a604..788f04e83ef6c8 100644
--- a/streaming.c
+++ b/streaming.c
@@ -39,11 +39,6 @@ struct odb_read_stream {
 	enum { z_unused, z_used, z_done, z_error } z_state;
 
 	union {
-		struct {
-			struct packed_git *pack;
-			off_t pos;
-		} in_pack;
-
 		struct filtered_istream filtered;
 	} u;
 };
@@ -287,16 +282,23 @@ static int open_istream_loose(struct odb_read_stream **out,
  *
  *****************************************************************/
 
-static ssize_t read_istream_pack_non_delta(struct odb_read_stream *st, char *buf,
+struct odb_packed_read_stream {
+	struct odb_read_stream base;
+	struct packed_git *pack;
+	off_t pos;
+};
+
+static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *buf,
 					   size_t sz)
 {
+	struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st;
 	size_t total_read = 0;
 
-	switch (st->z_state) {
+	switch (st->base.z_state) {
 	case z_unused:
-		memset(&st->z, 0, sizeof(st->z));
-		git_inflate_init(&st->z);
-		st->z_state = z_used;
+		memset(&st->base.z, 0, sizeof(st->base.z));
+		git_inflate_init(&st->base.z);
+		st->base.z_state = z_used;
 		break;
 	case z_done:
 		return 0;
@@ -311,21 +313,21 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *st, char *buf
 		struct pack_window *window = NULL;
 		unsigned char *mapped;
 
-		mapped = use_pack(st->u.in_pack.pack, &window,
-				  st->u.in_pack.pos, &st->z.avail_in);
+		mapped = use_pack(st->pack, &window,
+				  st->pos, &st->base.z.avail_in);
 
-		st->z.next_out = (unsigned char *)buf + total_read;
-		st->z.avail_out = sz - total_read;
-		st->z.next_in = mapped;
-		status = git_inflate(&st->z, Z_FINISH);
+		st->base.z.next_out = (unsigned char *)buf + total_read;
+		st->base.z.avail_out = sz - total_read;
+		st->base.z.next_in = mapped;
+		status = git_inflate(&st->base.z, Z_FINISH);
 
-		st->u.in_pack.pos += st->z.next_in - mapped;
-		total_read = st->z.next_out - (unsigned char *)buf;
+		st->pos += st->base.z.next_in - mapped;
+		total_read = st->base.z.next_out - (unsigned char *)buf;
 		unuse_pack(&window);
 
 		if (status == Z_STREAM_END) {
-			git_inflate_end(&st->z);
-			st->z_state = z_done;
+			git_inflate_end(&st->base.z);
+			st->base.z_state = z_done;
 			break;
 		}
 
@@ -338,17 +340,18 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *st, char *buf
 		 * or truncated), then use_pack() catches that and will die().
 		 */
 		if (status != Z_OK && status != Z_BUF_ERROR) {
-			git_inflate_end(&st->z);
-			st->z_state = z_error;
+			git_inflate_end(&st->base.z);
+			st->base.z_state = z_error;
 			return -1;
 		}
 	}
 	return total_read;
 }
 
-static int close_istream_pack_non_delta(struct odb_read_stream *st)
+static int close_istream_pack_non_delta(struct odb_read_stream *_st)
 {
-	close_deflated_stream(st);
+	struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st;
+	close_deflated_stream(&st->base);
 	return 0;
 }
 
@@ -358,19 +361,17 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out,
 				       struct packed_git *pack,
 				       off_t offset)
 {
-	struct odb_read_stream stream = {
-		.close = close_istream_pack_non_delta,
-		.read = read_istream_pack_non_delta,
-	};
+	struct odb_packed_read_stream *stream;
 	struct pack_window *window;
 	enum object_type in_pack_type;
+	size_t size;
 
 	window = NULL;
 
 	in_pack_type = unpack_object_header(pack,
 					    &window,
 					    &offset,
-					    &stream.size);
+					    &size);
 	unuse_pack(&window);
 	switch (in_pack_type) {
 	default:
@@ -381,13 +382,17 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out,
 	case OBJ_TAG:
 		break;
 	}
-	stream.type = in_pack_type;
-	stream.z_state = z_unused;
-	stream.u.in_pack.pack = pack;
-	stream.u.in_pack.pos = offset;
 
-	CALLOC_ARRAY(*out, 1);
-	**out = stream;
+	CALLOC_ARRAY(stream, 1);
+	stream->base.close = close_istream_pack_non_delta;
+	stream->base.read = read_istream_pack_non_delta;
+	stream->base.type = in_pack_type;
+	stream->base.size = size;
+	stream->base.z_state = z_unused;
+	stream->pack = pack;
+	stream->pos = offset;
+
+	*out = &stream->base;
 
 	return 0;
 }

From 1154b2d2e511113e9b7d567788b72acb05713915 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Sun, 23 Nov 2025 19:59:34 +0100
Subject: [PATCH 10/26] streaming: create structure for filtered object streams

As explained in a preceding commit, we want to get rid of the union of
stream-type specific data in `struct odb_read_stream`. Create a new
structure for filtered object streams to move towards this design.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 streaming.c | 54 +++++++++++++++++++++++++----------------------------
 1 file changed, 25 insertions(+), 29 deletions(-)

diff --git a/streaming.c b/streaming.c
index 788f04e83ef6c8..199cca5abb0eaa 100644
--- a/streaming.c
+++ b/streaming.c
@@ -19,16 +19,6 @@ typedef ssize_t (*read_istream_fn)(struct odb_read_stream *, char *, size_t);
 
 #define FILTER_BUFFER (1024*16)
 
-struct filtered_istream {
-	struct odb_read_stream *upstream;
-	struct stream_filter *filter;
-	char ibuf[FILTER_BUFFER];
-	char obuf[FILTER_BUFFER];
-	int i_end, i_ptr;
-	int o_end, o_ptr;
-	int input_finished;
-};
-
 struct odb_read_stream {
 	close_istream_fn close;
 	read_istream_fn read;
@@ -37,10 +27,6 @@ struct odb_read_stream {
 	unsigned long size; /* inflated size of full object */
 	git_zstream z;
 	enum { z_unused, z_used, z_done, z_error } z_state;
-
-	union {
-		struct filtered_istream filtered;
-	} u;
 };
 
 /*****************************************************************
@@ -62,16 +48,28 @@ static void close_deflated_stream(struct odb_read_stream *st)
  *
  *****************************************************************/
 
-static int close_istream_filtered(struct odb_read_stream *st)
+struct odb_filtered_read_stream {
+	struct odb_read_stream base;
+	struct odb_read_stream *upstream;
+	struct stream_filter *filter;
+	char ibuf[FILTER_BUFFER];
+	char obuf[FILTER_BUFFER];
+	int i_end, i_ptr;
+	int o_end, o_ptr;
+	int input_finished;
+};
+
+static int close_istream_filtered(struct odb_read_stream *_fs)
 {
-	free_stream_filter(st->u.filtered.filter);
-	return close_istream(st->u.filtered.upstream);
+	struct odb_filtered_read_stream *fs = (struct odb_filtered_read_stream *)_fs;
+	free_stream_filter(fs->filter);
+	return close_istream(fs->upstream);
 }
 
-static ssize_t read_istream_filtered(struct odb_read_stream *st, char *buf,
+static ssize_t read_istream_filtered(struct odb_read_stream *_fs, char *buf,
 				     size_t sz)
 {
-	struct filtered_istream *fs = &(st->u.filtered);
+	struct odb_filtered_read_stream *fs = (struct odb_filtered_read_stream *)_fs;
 	size_t filled = 0;
 
 	while (sz) {
@@ -131,19 +129,17 @@ static ssize_t read_istream_filtered(struct odb_read_stream *st, char *buf,
 static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st,
 						    struct stream_filter *filter)
 {
-	struct odb_read_stream *ifs = xmalloc(sizeof(*ifs));
-	struct filtered_istream *fs = &(ifs->u.filtered);
+	struct odb_filtered_read_stream *fs;
 
-	ifs->close = close_istream_filtered;
-	ifs->read = read_istream_filtered;
+	CALLOC_ARRAY(fs, 1);
+	fs->base.close = close_istream_filtered;
+	fs->base.read = read_istream_filtered;
 	fs->upstream = st;
 	fs->filter = filter;
-	fs->i_end = fs->i_ptr = 0;
-	fs->o_end = fs->o_ptr = 0;
-	fs->input_finished = 0;
-	ifs->size = -1; /* unknown */
-	ifs->type = st->type;
-	return ifs;
+	fs->base.size = -1; /* unknown */
+	fs->base.type = st->type;
+
+	return &fs->base;
 }
 
 /*****************************************************************

From eb5abbb4e6a8c06f5c6275bbb541bf7d736171c5 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Sun, 23 Nov 2025 19:59:35 +0100
Subject: [PATCH 11/26] streaming: move zlib stream into backends

While all backend-specific data is now contained in a backend-specific
structure, we still share the zlib stream across the loose and packed
objects.

Refactor the code and move it into the specific structures so that we
fully detangle the different backends from one another.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 streaming.c | 104 ++++++++++++++++++++++++++--------------------------
 1 file changed, 52 insertions(+), 52 deletions(-)

diff --git a/streaming.c b/streaming.c
index 199cca5abb0eaa..46fddaf2cad0ba 100644
--- a/streaming.c
+++ b/streaming.c
@@ -25,23 +25,8 @@ struct odb_read_stream {
 
 	enum object_type type;
 	unsigned long size; /* inflated size of full object */
-	git_zstream z;
-	enum { z_unused, z_used, z_done, z_error } z_state;
 };
 
-/*****************************************************************
- *
- * Common helpers
- *
- *****************************************************************/
-
-static void close_deflated_stream(struct odb_read_stream *st)
-{
-	if (st->z_state == z_used)
-		git_inflate_end(&st->z);
-}
-
-
 /*****************************************************************
  *
  * Filtered stream
@@ -150,6 +135,12 @@ static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st,
 
 struct odb_loose_read_stream {
 	struct odb_read_stream base;
+	git_zstream z;
+	enum {
+		ODB_LOOSE_READ_STREAM_INUSE,
+		ODB_LOOSE_READ_STREAM_DONE,
+		ODB_LOOSE_READ_STREAM_ERROR,
+	} z_state;
 	void *mapped;
 	unsigned long mapsize;
 	char hdr[32];
@@ -162,10 +153,10 @@ static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t
 	struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st;
 	size_t total_read = 0;
 
-	switch (st->base.z_state) {
-	case z_done:
+	switch (st->z_state) {
+	case ODB_LOOSE_READ_STREAM_DONE:
 		return 0;
-	case z_error:
+	case ODB_LOOSE_READ_STREAM_ERROR:
 		return -1;
 	default:
 		break;
@@ -183,20 +174,20 @@ static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t
 	while (total_read < sz) {
 		int status;
 
-		st->base.z.next_out = (unsigned char *)buf + total_read;
-		st->base.z.avail_out = sz - total_read;
-		status = git_inflate(&st->base.z, Z_FINISH);
+		st->z.next_out = (unsigned char *)buf + total_read;
+		st->z.avail_out = sz - total_read;
+		status = git_inflate(&st->z, Z_FINISH);
 
-		total_read = st->base.z.next_out - (unsigned char *)buf;
+		total_read = st->z.next_out - (unsigned char *)buf;
 
 		if (status == Z_STREAM_END) {
-			git_inflate_end(&st->base.z);
-			st->base.z_state = z_done;
+			git_inflate_end(&st->z);
+			st->z_state = ODB_LOOSE_READ_STREAM_DONE;
 			break;
 		}
 		if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) {
-			git_inflate_end(&st->base.z);
-			st->base.z_state = z_error;
+			git_inflate_end(&st->z);
+			st->z_state = ODB_LOOSE_READ_STREAM_ERROR;
 			return -1;
 		}
 	}
@@ -206,7 +197,8 @@ static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t
 static int close_istream_loose(struct odb_read_stream *_st)
 {
 	struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st;
-	close_deflated_stream(&st->base);
+	if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE)
+		git_inflate_end(&st->z);
 	munmap(st->mapped, st->mapsize);
 	return 0;
 }
@@ -238,7 +230,7 @@ static int open_istream_loose(struct odb_read_stream **out,
 	 */
 	CALLOC_ARRAY(st, 1);
 
-	switch (unpack_loose_header(&st->base.z, mapped, mapsize, st->hdr,
+	switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr,
 				    sizeof(st->hdr))) {
 	case ULHR_OK:
 		break;
@@ -256,8 +248,8 @@ static int open_istream_loose(struct odb_read_stream **out,
 	st->mapped = mapped;
 	st->mapsize = mapsize;
 	st->hdr_used = strlen(st->hdr) + 1;
-	st->hdr_avail = st->base.z.total_out;
-	st->base.z_state = z_used;
+	st->hdr_avail = st->z.total_out;
+	st->z_state = ODB_LOOSE_READ_STREAM_INUSE;
 	st->base.close = close_istream_loose;
 	st->base.read = read_istream_loose;
 
@@ -265,7 +257,7 @@ static int open_istream_loose(struct odb_read_stream **out,
 
 	return 0;
 error:
-	git_inflate_end(&st->base.z);
+	git_inflate_end(&st->z);
 	munmap(st->mapped, st->mapsize);
 	free(st);
 	return -1;
@@ -281,6 +273,13 @@ static int open_istream_loose(struct odb_read_stream **out,
 struct odb_packed_read_stream {
 	struct odb_read_stream base;
 	struct packed_git *pack;
+	git_zstream z;
+	enum {
+		ODB_PACKED_READ_STREAM_UNINITIALIZED,
+		ODB_PACKED_READ_STREAM_INUSE,
+		ODB_PACKED_READ_STREAM_DONE,
+		ODB_PACKED_READ_STREAM_ERROR,
+	} z_state;
 	off_t pos;
 };
 
@@ -290,17 +289,17 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *bu
 	struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st;
 	size_t total_read = 0;
 
-	switch (st->base.z_state) {
-	case z_unused:
-		memset(&st->base.z, 0, sizeof(st->base.z));
-		git_inflate_init(&st->base.z);
-		st->base.z_state = z_used;
+	switch (st->z_state) {
+	case ODB_PACKED_READ_STREAM_UNINITIALIZED:
+		memset(&st->z, 0, sizeof(st->z));
+		git_inflate_init(&st->z);
+		st->z_state = ODB_PACKED_READ_STREAM_INUSE;
 		break;
-	case z_done:
+	case ODB_PACKED_READ_STREAM_DONE:
 		return 0;
-	case z_error:
+	case ODB_PACKED_READ_STREAM_ERROR:
 		return -1;
-	case z_used:
+	case ODB_PACKED_READ_STREAM_INUSE:
 		break;
 	}
 
@@ -310,20 +309,20 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *bu
 		unsigned char *mapped;
 
 		mapped = use_pack(st->pack, &window,
-				  st->pos, &st->base.z.avail_in);
+				  st->pos, &st->z.avail_in);
 
-		st->base.z.next_out = (unsigned char *)buf + total_read;
-		st->base.z.avail_out = sz - total_read;
-		st->base.z.next_in = mapped;
-		status = git_inflate(&st->base.z, Z_FINISH);
+		st->z.next_out = (unsigned char *)buf + total_read;
+		st->z.avail_out = sz - total_read;
+		st->z.next_in = mapped;
+		status = git_inflate(&st->z, Z_FINISH);
 
-		st->pos += st->base.z.next_in - mapped;
-		total_read = st->base.z.next_out - (unsigned char *)buf;
+		st->pos += st->z.next_in - mapped;
+		total_read = st->z.next_out - (unsigned char *)buf;
 		unuse_pack(&window);
 
 		if (status == Z_STREAM_END) {
-			git_inflate_end(&st->base.z);
-			st->base.z_state = z_done;
+			git_inflate_end(&st->z);
+			st->z_state = ODB_PACKED_READ_STREAM_DONE;
 			break;
 		}
 
@@ -336,8 +335,8 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *bu
 		 * or truncated), then use_pack() catches that and will die().
 		 */
 		if (status != Z_OK && status != Z_BUF_ERROR) {
-			git_inflate_end(&st->base.z);
-			st->base.z_state = z_error;
+			git_inflate_end(&st->z);
+			st->z_state = ODB_PACKED_READ_STREAM_ERROR;
 			return -1;
 		}
 	}
@@ -347,7 +346,8 @@ static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *bu
 static int close_istream_pack_non_delta(struct odb_read_stream *_st)
 {
 	struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st;
-	close_deflated_stream(&st->base);
+	if (st->z_state == ODB_PACKED_READ_STREAM_INUSE)
+		git_inflate_end(&st->z);
 	return 0;
 }
 
@@ -384,7 +384,7 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out,
 	stream->base.read = read_istream_pack_non_delta;
 	stream->base.type = in_pack_type;
 	stream->base.size = size;
-	stream->base.z_state = z_unused;
+	stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED;
 	stream->pack = pack;
 	stream->pos = offset;
 

From 385e18810f10ec0ce0a266d25da4e1878c8ce15a Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Sun, 23 Nov 2025 19:59:36 +0100
Subject: [PATCH 12/26] packfile: introduce function to read object info from a
 store

Extract the logic to read object info for a packed object from
`do_oid_object_into_extended()` into a standalone function that operates
on the packfile store. This function will be used in a subsequent
commit.

Note that this change allows us to make `find_pack_entry()` an internal
implementation detail. As a consequence though we have to move around
`packfile_store_freshen_object()` so that it is defined after that
function.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 odb.c      | 29 +++-------------------
 packfile.c | 71 +++++++++++++++++++++++++++++++++++++++++-------------
 packfile.h | 12 ++++++++-
 3 files changed, 69 insertions(+), 43 deletions(-)

diff --git a/odb.c b/odb.c
index 3ec21ef24e16bb..f4cbee4b042d83 100644
--- a/odb.c
+++ b/odb.c
@@ -666,8 +666,6 @@ static int do_oid_object_info_extended(struct object_database *odb,
 {
 	static struct object_info blank_oi = OBJECT_INFO_INIT;
 	const struct cached_object *co;
-	struct pack_entry e;
-	int rtype;
 	const struct object_id *real = oid;
 	int already_retried = 0;
 
@@ -702,8 +700,8 @@ static int do_oid_object_info_extended(struct object_database *odb,
 	while (1) {
 		struct odb_source *source;
 
-		if (find_pack_entry(odb->repo, real, &e))
-			break;
+		if (!packfile_store_read_object_info(odb->packfiles, real, oi, flags))
+			return 0;
 
 		/* Most likely it's a loose object. */
 		for (source = odb->sources; source; source = source->next)
@@ -713,8 +711,8 @@ static int do_oid_object_info_extended(struct object_database *odb,
 		/* Not a loose object; someone else may have just packed it. */
 		if (!(flags & OBJECT_INFO_QUICK)) {
 			odb_reprepare(odb->repo->objects);
-			if (find_pack_entry(odb->repo, real, &e))
-				break;
+			if (!packfile_store_read_object_info(odb->packfiles, real, oi, flags))
+				return 0;
 		}
 
 		/*
@@ -747,25 +745,6 @@ static int do_oid_object_info_extended(struct object_database *odb,
 		}
 		return -1;
 	}
-
-	if (oi == &blank_oi)
-		/*
-		 * We know that the caller doesn't actually need the
-		 * information below, so return early.
-		 */
-		return 0;
-	rtype = packed_object_info(odb->repo, e.p, e.offset, oi);
-	if (rtype < 0) {
-		mark_bad_packed_object(e.p, real);
-		return do_oid_object_info_extended(odb, real, oi, 0);
-	} else if (oi->whence == OI_PACKED) {
-		oi->u.packed.offset = e.offset;
-		oi->u.packed.pack = e.p;
-		oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||
-					 rtype == OBJ_OFS_DELTA);
-	}
-
-	return 0;
 }
 
 static int oid_object_info_convert(struct repository *r,
diff --git a/packfile.c b/packfile.c
index 40f733dd234900..b4bc40d895c8da 100644
--- a/packfile.c
+++ b/packfile.c
@@ -819,22 +819,6 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store,
 	return p;
 }
 
-int packfile_store_freshen_object(struct packfile_store *store,
-				  const struct object_id *oid)
-{
-	struct pack_entry e;
-	if (!find_pack_entry(store->odb->repo, oid, &e))
-		return 0;
-	if (e.p->is_cruft)
-		return 0;
-	if (e.p->freshened)
-		return 1;
-	if (utime(e.p->pack_name, NULL))
-		return 0;
-	e.p->freshened = 1;
-	return 1;
-}
-
 void (*report_garbage)(unsigned seen_bits, const char *path);
 
 static void report_helper(const struct string_list *list,
@@ -2064,7 +2048,9 @@ static int fill_pack_entry(const struct object_id *oid,
 	return 1;
 }
 
-int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e)
+static int find_pack_entry(struct repository *r,
+			   const struct object_id *oid,
+			   struct pack_entry *e)
 {
 	struct list_head *pos;
 
@@ -2087,6 +2073,57 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa
 	return 0;
 }
 
+int packfile_store_freshen_object(struct packfile_store *store,
+				  const struct object_id *oid)
+{
+	struct pack_entry e;
+	if (!find_pack_entry(store->odb->repo, oid, &e))
+		return 0;
+	if (e.p->is_cruft)
+		return 0;
+	if (e.p->freshened)
+		return 1;
+	if (utime(e.p->pack_name, NULL))
+		return 0;
+	e.p->freshened = 1;
+	return 1;
+}
+
+int packfile_store_read_object_info(struct packfile_store *store,
+				    const struct object_id *oid,
+				    struct object_info *oi,
+				    unsigned flags UNUSED)
+{
+	static struct object_info blank_oi = OBJECT_INFO_INIT;
+	struct pack_entry e;
+	int rtype;
+
+	if (!find_pack_entry(store->odb->repo, oid, &e))
+		return 1;
+
+	/*
+	 * We know that the caller doesn't actually need the
+	 * information below, so return early.
+	 */
+	if (oi == &blank_oi)
+		return 0;
+
+	rtype = packed_object_info(store->odb->repo, e.p, e.offset, oi);
+	if (rtype < 0) {
+		mark_bad_packed_object(e.p, oid);
+		return -1;
+	}
+
+	if (oi->whence == OI_PACKED) {
+		oi->u.packed.offset = e.offset;
+		oi->u.packed.pack = e.p;
+		oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||
+					 rtype == OBJ_OFS_DELTA);
+	}
+
+	return 0;
+}
+
 static void maybe_invalidate_kept_pack_cache(struct repository *r,
 					     unsigned flags)
 {
diff --git a/packfile.h b/packfile.h
index 58fcc88e20224b..0a98bddd811921 100644
--- a/packfile.h
+++ b/packfile.h
@@ -144,6 +144,17 @@ void packfile_store_add_pack(struct packfile_store *store,
 #define repo_for_each_pack(repo, p) \
 	for (p = packfile_store_get_packs(repo->objects->packfiles); p; p = p->next)
 
+/*
+ * Try to read the object identified by its ID from the object store and
+ * populate the object info with its data. Returns 1 in case the object was
+ * not found, 0 if it was and read successfully, and a negative error code in
+ * case the object was corrupted.
+ */
+int packfile_store_read_object_info(struct packfile_store *store,
+				    const struct object_id *oid,
+				    struct object_info *oi,
+				    unsigned flags);
+
 /*
  * Get all packs managed by the given store, including packfiles that are
  * referenced by multi-pack indices.
@@ -357,7 +368,6 @@ const struct packed_git *has_packed_and_bad(struct repository *, const struct ob
  * Iff a pack file in the given repository contains the object named by sha1,
  * return true and store its location to e.
  */
-int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e);
 int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e);
 
 int has_object_pack(struct repository *r, const struct object_id *oid);

From 4c89d31494bff4bde6079a0e0821f1437e37d07b Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Sun, 23 Nov 2025 19:59:37 +0100
Subject: [PATCH 13/26] streaming: rely on object sources to create object
 stream

When creating an object stream we first look up the object info and, if
it's present, we call into the respective backend that contains the
object to create a new stream for it.

This has the consequence that, for loose object source, we basically
iterate through the object sources twice: we first discover that the
file exists as a loose object in the first place by iterating through
all sources. And, once we have discovered it, we again walk through all
sources to try and map the object. The same issue will eventually also
surface once the packfile store becomes per-object-source.

Furthermore, it feels rather pointless to first look up the object only
to then try and read it.

Refactor the logic to be centered around sources instead. Instead of
first reading the object, we immediately ask the source to create the
object stream for us. If the object exists we get stream, otherwise
we'll try the next source.

Like this we only have to iterate through sources once. But even more
importantly, this change also helps us to make the whole logic
pluggable. The object read stream subsystem does not need to be aware of
the different source backends anymore, but eventually it'll only have to
call the source's callback function.

Note that at the current point in time we aren't fully there yet:

  - The packfile store still sits on the object database level and is
    thus agnostic of the sources.

  - We still have to call into both the packfile store and the loose
    object source.

But both of these issues will soon be addressed.

This refactoring results in a slight change to semantics: previously, it
was `odb_read_object_info_extended()` that picked the source for us, and
it would have favored packed (non-deltified) objects over loose objects.
And while we still favor packed over loose objects for a single source
with the new logic, we'll now favor a loose object from an earlier
source over a packed object from a later source.

Ultimately this shouldn't matter though: the stream doesn't indicate to
the caller which source it is from and whether it was created from a
packed or loose object, so such details are opaque to the caller. And
other than that we should be able to assume that two objects with the
same object ID should refer to the same content, so the streamed data
would be the same, too.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 streaming.c | 65 ++++++++++++++++++++---------------------------------
 1 file changed, 24 insertions(+), 41 deletions(-)

diff --git a/streaming.c b/streaming.c
index 46fddaf2cad0ba..f0f7d31956f59b 100644
--- a/streaming.c
+++ b/streaming.c
@@ -204,21 +204,15 @@ static int close_istream_loose(struct odb_read_stream *_st)
 }
 
 static int open_istream_loose(struct odb_read_stream **out,
-			      struct repository *r,
+			      struct odb_source *source,
 			      const struct object_id *oid)
 {
 	struct object_info oi = OBJECT_INFO_INIT;
 	struct odb_loose_read_stream *st;
-	struct odb_source *source;
 	unsigned long mapsize;
 	void *mapped;
 
-	odb_prepare_alternates(r->objects);
-	for (source = r->objects->sources; source; source = source->next) {
-		mapped = odb_source_loose_map_object(source, oid, &mapsize);
-		if (mapped)
-			break;
-	}
+	mapped = odb_source_loose_map_object(source, oid, &mapsize);
 	if (!mapped)
 		return -1;
 
@@ -352,21 +346,25 @@ static int close_istream_pack_non_delta(struct odb_read_stream *_st)
 }
 
 static int open_istream_pack_non_delta(struct odb_read_stream **out,
-				       struct repository *r UNUSED,
-				       const struct object_id *oid UNUSED,
-				       struct packed_git *pack,
-				       off_t offset)
+				       struct object_database *odb,
+				       const struct object_id *oid)
 {
 	struct odb_packed_read_stream *stream;
-	struct pack_window *window;
+	struct pack_window *window = NULL;
+	struct object_info oi = OBJECT_INFO_INIT;
 	enum object_type in_pack_type;
-	size_t size;
+	unsigned long size;
 
-	window = NULL;
+	oi.sizep = &size;
+
+	if (packfile_store_read_object_info(odb->packfiles, oid, &oi, 0) ||
+	    oi.u.packed.is_delta ||
+	    repo_settings_get_big_file_threshold(the_repository) >= size)
+		return -1;
 
-	in_pack_type = unpack_object_header(pack,
+	in_pack_type = unpack_object_header(oi.u.packed.pack,
 					    &window,
-					    &offset,
+					    &oi.u.packed.offset,
 					    &size);
 	unuse_pack(&window);
 	switch (in_pack_type) {
@@ -385,8 +383,8 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out,
 	stream->base.type = in_pack_type;
 	stream->base.size = size;
 	stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED;
-	stream->pack = pack;
-	stream->pos = offset;
+	stream->pack = oi.u.packed.pack;
+	stream->pos = oi.u.packed.offset;
 
 	*out = &stream->base;
 
@@ -463,30 +461,15 @@ static int istream_source(struct odb_read_stream **out,
 			  struct repository *r,
 			  const struct object_id *oid)
 {
-	unsigned long size;
-	int status;
-	struct object_info oi = OBJECT_INFO_INIT;
-
-	oi.sizep = &size;
-	status = odb_read_object_info_extended(r->objects, oid, &oi, 0);
-	if (status < 0)
-		return status;
+	struct odb_source *source;
 
-	switch (oi.whence) {
-	case OI_LOOSE:
-		if (open_istream_loose(out, r, oid) < 0)
-			break;
-		return 0;
-	case OI_PACKED:
-		if (oi.u.packed.is_delta ||
-		    repo_settings_get_big_file_threshold(the_repository) >= size ||
-		    open_istream_pack_non_delta(out, r, oid, oi.u.packed.pack,
-						oi.u.packed.offset) < 0)
-			break;
+	if (!open_istream_pack_non_delta(out, r->objects, oid))
 		return 0;
-	default:
-		break;
-	}
+
+	odb_prepare_alternates(r->objects);
+	for (source = r->objects->sources; source; source = source->next)
+		if (!open_istream_loose(out, source, oid))
+			return 0;
 
 	return open_istream_incore(out, r, oid);
 }

From c26da3446e98ad4aa98ec9154c70c6fd35cb9ad6 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Sun, 23 Nov 2025 19:59:38 +0100
Subject: [PATCH 14/26] streaming: get rid of `the_repository`

Subsequent commits will move the backend-specific logic of object
streaming into their respective subsystems. These subsystems have gotten
rid of `the_repository` already, but we still use it in two locations in
the streaming subsystem.

Prepare for the move by fixing those two cases. Converting the logic in
`open_istream_pack_non_delta()` is trivial as we already got the object
database as input.

But for `stream_blob_to_fd()` we have to add a new parameter to make it
accessible. So, as we already have to adjust all callers anyway, rename
the function to `odb_stream_blob_to_fd()` to indicate it's part of the
object subsystem.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/cat-file.c  |  2 +-
 builtin/fsck.c      |  3 ++-
 builtin/log.c       |  4 ++--
 entry.c             |  2 +-
 parallel-checkout.c |  3 ++-
 streaming.c         | 13 +++++++------
 streaming.h         | 18 +++++++++++++++++-
 7 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 983ecec837b03b..120d626d66e140 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -95,7 +95,7 @@ static int filter_object(const char *path, unsigned mode,
 
 static int stream_blob(const struct object_id *oid)
 {
-	if (stream_blob_to_fd(1, oid, NULL, 0))
+	if (odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0))
 		die("unable to stream %s to stdout", oid_to_hex(oid));
 	return 0;
 }
diff --git a/builtin/fsck.c b/builtin/fsck.c
index b1a650c6731d32..1a348d43c26020 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -340,7 +340,8 @@ static void check_unreachable_object(struct object *obj)
 			}
 			f = xfopen(filename, "w");
 			if (obj->type == OBJ_BLOB) {
-				if (stream_blob_to_fd(fileno(f), &obj->oid, NULL, 1))
+				if (odb_stream_blob_to_fd(the_repository->objects, fileno(f),
+							  &obj->oid, NULL, 1))
 					die_errno(_("could not write '%s'"), filename);
 			} else
 				fprintf(f, "%s\n", describe_object(&obj->oid));
diff --git a/builtin/log.c b/builtin/log.c
index c8319b8af38c8c..e7b83a6e00a708 100644
--- a/builtin/log.c
+++ b/builtin/log.c
@@ -584,7 +584,7 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c
 	fflush(rev->diffopt.file);
 	if (!rev->diffopt.flags.textconv_set_via_cmdline ||
 	    !rev->diffopt.flags.allow_textconv)
-		return stream_blob_to_fd(1, oid, NULL, 0);
+		return odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0);
 
 	if (get_oid_with_context(the_repository, obj_name,
 				 GET_OID_RECORD_PATH,
@@ -594,7 +594,7 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c
 	    !textconv_object(the_repository, obj_context.path,
 			     obj_context.mode, &oidc, 1, &buf, &size)) {
 		object_context_release(&obj_context);
-		return stream_blob_to_fd(1, oid, NULL, 0);
+		return odb_stream_blob_to_fd(the_repository->objects, 1, oid, NULL, 0);
 	}
 
 	if (!buf)
diff --git a/entry.c b/entry.c
index cae02eb50398d7..38dfe670f79920 100644
--- a/entry.c
+++ b/entry.c
@@ -139,7 +139,7 @@ static int streaming_write_entry(const struct cache_entry *ce, char *path,
 	if (fd < 0)
 		return -1;
 
-	result |= stream_blob_to_fd(fd, &ce->oid, filter, 1);
+	result |= odb_stream_blob_to_fd(the_repository->objects, fd, &ce->oid, filter, 1);
 	*fstat_done = fstat_checkout_output(fd, state, statbuf);
 	result |= close(fd);
 
diff --git a/parallel-checkout.c b/parallel-checkout.c
index fba6aa65a6e852..1cb6701b926dcf 100644
--- a/parallel-checkout.c
+++ b/parallel-checkout.c
@@ -281,7 +281,8 @@ static int write_pc_item_to_fd(struct parallel_checkout_item *pc_item, int fd,
 
 	filter = get_stream_filter_ca(&pc_item->ca, &pc_item->ce->oid);
 	if (filter) {
-		if (stream_blob_to_fd(fd, &pc_item->ce->oid, filter, 1)) {
+		if (odb_stream_blob_to_fd(the_repository->objects, fd,
+					  &pc_item->ce->oid, filter, 1)) {
 			/* On error, reset fd to try writing without streaming */
 			if (reset_fd(fd, path))
 				return -1;
diff --git a/streaming.c b/streaming.c
index f0f7d31956f59b..807a6e03a85b49 100644
--- a/streaming.c
+++ b/streaming.c
@@ -2,8 +2,6 @@
  * Copyright (c) 2011, Google Inc.
  */
 
-#define USE_THE_REPOSITORY_VARIABLE
-
 #include "git-compat-util.h"
 #include "convert.h"
 #include "environment.h"
@@ -359,7 +357,7 @@ static int open_istream_pack_non_delta(struct odb_read_stream **out,
 
 	if (packfile_store_read_object_info(odb->packfiles, oid, &oi, 0) ||
 	    oi.u.packed.is_delta ||
-	    repo_settings_get_big_file_threshold(the_repository) >= size)
+	    repo_settings_get_big_file_threshold(odb->repo) >= size)
 		return -1;
 
 	in_pack_type = unpack_object_header(oi.u.packed.pack,
@@ -518,8 +516,11 @@ struct odb_read_stream *open_istream(struct repository *r,
 	return st;
 }
 
-int stream_blob_to_fd(int fd, const struct object_id *oid, struct stream_filter *filter,
-		      int can_seek)
+int odb_stream_blob_to_fd(struct object_database *odb,
+			  int fd,
+			  const struct object_id *oid,
+			  struct stream_filter *filter,
+			  int can_seek)
 {
 	struct odb_read_stream *st;
 	enum object_type type;
@@ -527,7 +528,7 @@ int stream_blob_to_fd(int fd, const struct object_id *oid, struct stream_filter
 	ssize_t kept = 0;
 	int result = -1;
 
-	st = open_istream(the_repository, oid, &type, &sz, filter);
+	st = open_istream(odb->repo, oid, &type, &sz, filter);
 	if (!st) {
 		if (filter)
 			free_stream_filter(filter);
diff --git a/streaming.h b/streaming.h
index f5ff5d7ac9a573..148f6b30697ab7 100644
--- a/streaming.h
+++ b/streaming.h
@@ -6,6 +6,7 @@
 
 #include "object.h"
 
+struct object_database;
 /* opaque */
 struct odb_read_stream;
 struct stream_filter;
@@ -16,6 +17,21 @@ struct odb_read_stream *open_istream(struct repository *, const struct object_id
 int close_istream(struct odb_read_stream *);
 ssize_t read_istream(struct odb_read_stream *, void *, size_t);
 
-int stream_blob_to_fd(int fd, const struct object_id *, struct stream_filter *, int can_seek);
+/*
+ * Look up the object by its ID and write the full contents to the file
+ * descriptor. The object must be a blob, or the function will fail. When
+ * provided, the filter is used to transform the blob contents.
+ *
+ * `can_seek` should be set to 1 in case the given file descriptor can be
+ * seek(3p)'d on. This is used to support files with holes in case a
+ * significant portion of the blob contains NUL bytes.
+ *
+ * Returns a negative error code on failure, 0 on success.
+ */
+int odb_stream_blob_to_fd(struct object_database *odb,
+			  int fd,
+			  const struct object_id *oid,
+			  struct stream_filter *filter,
+			  int can_seek);
 
 #endif /* STREAMING_H */

From ffc9a3448500caa50766876ef2169e0f26ad3b3c Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Sun, 23 Nov 2025 19:59:39 +0100
Subject: [PATCH 15/26] streaming: make the `odb_read_stream` definition public

Subsequent commits will move the backend-specific logic of setting up an
object read stream into the specific subsystems. As the backends are now
the ones that are responsible for allocating the stream they'll need to
have the stream definition available to them.

Make the stream definition public to prepare for this.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 streaming.c | 11 -----------
 streaming.h | 15 ++++++++++++++-
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/streaming.c b/streaming.c
index 807a6e03a85b49..0635b7c12e2233 100644
--- a/streaming.c
+++ b/streaming.c
@@ -12,19 +12,8 @@
 #include "replace-object.h"
 #include "packfile.h"
 
-typedef int (*close_istream_fn)(struct odb_read_stream *);
-typedef ssize_t (*read_istream_fn)(struct odb_read_stream *, char *, size_t);
-
 #define FILTER_BUFFER (1024*16)
 
-struct odb_read_stream {
-	close_istream_fn close;
-	read_istream_fn read;
-
-	enum object_type type;
-	unsigned long size; /* inflated size of full object */
-};
-
 /*****************************************************************
  *
  * Filtered stream
diff --git a/streaming.h b/streaming.h
index 148f6b30697ab7..acfdef1598db52 100644
--- a/streaming.h
+++ b/streaming.h
@@ -7,10 +7,23 @@
 #include "object.h"
 
 struct object_database;
-/* opaque */
 struct odb_read_stream;
 struct stream_filter;
 
+typedef int (*odb_read_stream_close_fn)(struct odb_read_stream *);
+typedef ssize_t (*odb_read_stream_read_fn)(struct odb_read_stream *, char *, size_t);
+
+/*
+ * A stream that can be used to read an object from the object database without
+ * loading all of it into memory.
+ */
+struct odb_read_stream {
+	odb_read_stream_close_fn close;
+	odb_read_stream_read_fn read;
+	enum object_type type;
+	unsigned long size; /* inflated size of full object */
+};
+
 struct odb_read_stream *open_istream(struct repository *, const struct object_id *,
 				     enum object_type *, unsigned long *,
 				     struct stream_filter *);

From bc30a2f5dff6dd39966819ca3771ab5e9e072123 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Sun, 23 Nov 2025 19:59:40 +0100
Subject: [PATCH 16/26] streaming: move logic to read loose objects streams
 into backend

Move the logic to read loose object streams into the respective
subsystem. This allows us to make a couple of function declarations
private.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 object-file.c | 167 +++++++++++++++++++++++++++++++++++++++++++++++---
 object-file.h |  42 ++-----------
 streaming.c   | 133 +---------------------------------------
 3 files changed, 164 insertions(+), 178 deletions(-)

diff --git a/object-file.c b/object-file.c
index b62b21a45289fc..8c67847feaceb6 100644
--- a/object-file.c
+++ b/object-file.c
@@ -234,9 +234,9 @@ static void *map_fd(int fd, const char *path, unsigned long *size)
 	return map;
 }
 
-void *odb_source_loose_map_object(struct odb_source *source,
-				  const struct object_id *oid,
-				  unsigned long *size)
+static void *odb_source_loose_map_object(struct odb_source *source,
+					 const struct object_id *oid,
+					 unsigned long *size)
 {
 	const char *p;
 	int fd = open_loose_object(source->loose, oid, &p);
@@ -246,11 +246,29 @@ void *odb_source_loose_map_object(struct odb_source *source,
 	return map_fd(fd, p, size);
 }
 
-enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
-						    unsigned char *map,
-						    unsigned long mapsize,
-						    void *buffer,
-						    unsigned long bufsiz)
+enum unpack_loose_header_result {
+	ULHR_OK,
+	ULHR_BAD,
+	ULHR_TOO_LONG,
+};
+
+/**
+ * unpack_loose_header() initializes the data stream needed to unpack
+ * a loose object header.
+ *
+ * Returns:
+ *
+ * - ULHR_OK on success
+ * - ULHR_BAD on error
+ * - ULHR_TOO_LONG if the header was too long
+ *
+ * It will only parse up to MAX_HEADER_LEN bytes.
+ */
+static enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
+							   unsigned char *map,
+							   unsigned long mapsize,
+							   void *buffer,
+							   unsigned long bufsiz)
 {
 	int status;
 
@@ -329,11 +347,18 @@ static void *unpack_loose_rest(git_zstream *stream,
 }
 
 /*
+ * parse_loose_header() parses the starting "<type> <len>\0" of an
+ * object. If it doesn't follow that format -1 is returned. To check
+ * the validity of the <type> populate the "typep" in the "struct
+ * object_info". It will be OBJ_BAD if the object type is unknown. The
+ * parsed <len> can be retrieved via "oi->sizep", and from there
+ * passed to unpack_loose_rest().
+ *
  * We used to just use "sscanf()", but that's actually way
  * too permissive for what we want to check. So do an anal
  * object header parse by hand.
  */
-int parse_loose_header(const char *hdr, struct object_info *oi)
+static int parse_loose_header(const char *hdr, struct object_info *oi)
 {
 	const char *type_buf = hdr;
 	size_t size;
@@ -1976,3 +2001,127 @@ void odb_source_loose_free(struct odb_source_loose *loose)
 	loose_object_map_clear(&loose->map);
 	free(loose);
 }
+
+struct odb_loose_read_stream {
+	struct odb_read_stream base;
+	git_zstream z;
+	enum {
+		ODB_LOOSE_READ_STREAM_INUSE,
+		ODB_LOOSE_READ_STREAM_DONE,
+		ODB_LOOSE_READ_STREAM_ERROR,
+	} z_state;
+	void *mapped;
+	unsigned long mapsize;
+	char hdr[32];
+	int hdr_avail;
+	int hdr_used;
+};
+
+static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz)
+{
+	struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st;
+	size_t total_read = 0;
+
+	switch (st->z_state) {
+	case ODB_LOOSE_READ_STREAM_DONE:
+		return 0;
+	case ODB_LOOSE_READ_STREAM_ERROR:
+		return -1;
+	default:
+		break;
+	}
+
+	if (st->hdr_used < st->hdr_avail) {
+		size_t to_copy = st->hdr_avail - st->hdr_used;
+		if (sz < to_copy)
+			to_copy = sz;
+		memcpy(buf, st->hdr + st->hdr_used, to_copy);
+		st->hdr_used += to_copy;
+		total_read += to_copy;
+	}
+
+	while (total_read < sz) {
+		int status;
+
+		st->z.next_out = (unsigned char *)buf + total_read;
+		st->z.avail_out = sz - total_read;
+		status = git_inflate(&st->z, Z_FINISH);
+
+		total_read = st->z.next_out - (unsigned char *)buf;
+
+		if (status == Z_STREAM_END) {
+			git_inflate_end(&st->z);
+			st->z_state = ODB_LOOSE_READ_STREAM_DONE;
+			break;
+		}
+		if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) {
+			git_inflate_end(&st->z);
+			st->z_state = ODB_LOOSE_READ_STREAM_ERROR;
+			return -1;
+		}
+	}
+	return total_read;
+}
+
+static int close_istream_loose(struct odb_read_stream *_st)
+{
+	struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st;
+	if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE)
+		git_inflate_end(&st->z);
+	munmap(st->mapped, st->mapsize);
+	return 0;
+}
+
+int odb_source_loose_read_object_stream(struct odb_read_stream **out,
+					struct odb_source *source,
+					const struct object_id *oid)
+{
+	struct object_info oi = OBJECT_INFO_INIT;
+	struct odb_loose_read_stream *st;
+	unsigned long mapsize;
+	void *mapped;
+
+	mapped = odb_source_loose_map_object(source, oid, &mapsize);
+	if (!mapped)
+		return -1;
+
+	/*
+	 * Note: we must allocate this structure early even though we may still
+	 * fail. This is because we need to initialize the zlib stream, and it
+	 * is not possible to copy the stream around after the fact because it
+	 * has self-referencing pointers.
+	 */
+	CALLOC_ARRAY(st, 1);
+
+	switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr,
+				    sizeof(st->hdr))) {
+	case ULHR_OK:
+		break;
+	case ULHR_BAD:
+	case ULHR_TOO_LONG:
+		goto error;
+	}
+
+	oi.sizep = &st->base.size;
+	oi.typep = &st->base.type;
+
+	if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
+		goto error;
+
+	st->mapped = mapped;
+	st->mapsize = mapsize;
+	st->hdr_used = strlen(st->hdr) + 1;
+	st->hdr_avail = st->z.total_out;
+	st->z_state = ODB_LOOSE_READ_STREAM_INUSE;
+	st->base.close = close_istream_loose;
+	st->base.read = read_istream_loose;
+
+	*out = &st->base;
+
+	return 0;
+error:
+	git_inflate_end(&st->z);
+	munmap(st->mapped, st->mapsize);
+	free(st);
+	return -1;
+}
diff --git a/object-file.h b/object-file.h
index eeffa67bbda631..1229d5f675b44a 100644
--- a/object-file.h
+++ b/object-file.h
@@ -16,6 +16,8 @@ enum {
 int index_fd(struct index_state *istate, struct object_id *oid, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags);
 int index_path(struct index_state *istate, struct object_id *oid, const char *path, struct stat *st, unsigned flags);
 
+struct object_info;
+struct odb_read_stream;
 struct odb_source;
 
 struct odb_source_loose {
@@ -47,9 +49,9 @@ int odb_source_loose_read_object_info(struct odb_source *source,
 				      const struct object_id *oid,
 				      struct object_info *oi, int flags);
 
-void *odb_source_loose_map_object(struct odb_source *source,
-				  const struct object_id *oid,
-				  unsigned long *size);
+int odb_source_loose_read_object_stream(struct odb_read_stream **out,
+					struct odb_source *source,
+					const struct object_id *oid);
 
 /*
  * Return true iff an object database source has a loose object
@@ -143,40 +145,6 @@ int for_each_loose_object(struct object_database *odb,
 int format_object_header(char *str, size_t size, enum object_type type,
 			 size_t objsize);
 
-/**
- * unpack_loose_header() initializes the data stream needed to unpack
- * a loose object header.
- *
- * Returns:
- *
- * - ULHR_OK on success
- * - ULHR_BAD on error
- * - ULHR_TOO_LONG if the header was too long
- *
- * It will only parse up to MAX_HEADER_LEN bytes.
- */
-enum unpack_loose_header_result {
-	ULHR_OK,
-	ULHR_BAD,
-	ULHR_TOO_LONG,
-};
-enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
-						    unsigned char *map,
-						    unsigned long mapsize,
-						    void *buffer,
-						    unsigned long bufsiz);
-
-/**
- * parse_loose_header() parses the starting "<type> <len>\0" of an
- * object. If it doesn't follow that format -1 is returned. To check
- * the validity of the <type> populate the "typep" in the "struct
- * object_info". It will be OBJ_BAD if the object type is unknown. The
- * parsed <len> can be retrieved via "oi->sizep", and from there
- * passed to unpack_loose_rest().
- */
-struct object_info;
-int parse_loose_header(const char *hdr, struct object_info *oi);
-
 int force_object_loose(struct odb_source *source,
 		       const struct object_id *oid, time_t mtime);
 
diff --git a/streaming.c b/streaming.c
index 0635b7c12e2233..d5acc1c39650e4 100644
--- a/streaming.c
+++ b/streaming.c
@@ -114,137 +114,6 @@ static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st,
 	return &fs->base;
 }
 
-/*****************************************************************
- *
- * Loose object stream
- *
- *****************************************************************/
-
-struct odb_loose_read_stream {
-	struct odb_read_stream base;
-	git_zstream z;
-	enum {
-		ODB_LOOSE_READ_STREAM_INUSE,
-		ODB_LOOSE_READ_STREAM_DONE,
-		ODB_LOOSE_READ_STREAM_ERROR,
-	} z_state;
-	void *mapped;
-	unsigned long mapsize;
-	char hdr[32];
-	int hdr_avail;
-	int hdr_used;
-};
-
-static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz)
-{
-	struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st;
-	size_t total_read = 0;
-
-	switch (st->z_state) {
-	case ODB_LOOSE_READ_STREAM_DONE:
-		return 0;
-	case ODB_LOOSE_READ_STREAM_ERROR:
-		return -1;
-	default:
-		break;
-	}
-
-	if (st->hdr_used < st->hdr_avail) {
-		size_t to_copy = st->hdr_avail - st->hdr_used;
-		if (sz < to_copy)
-			to_copy = sz;
-		memcpy(buf, st->hdr + st->hdr_used, to_copy);
-		st->hdr_used += to_copy;
-		total_read += to_copy;
-	}
-
-	while (total_read < sz) {
-		int status;
-
-		st->z.next_out = (unsigned char *)buf + total_read;
-		st->z.avail_out = sz - total_read;
-		status = git_inflate(&st->z, Z_FINISH);
-
-		total_read = st->z.next_out - (unsigned char *)buf;
-
-		if (status == Z_STREAM_END) {
-			git_inflate_end(&st->z);
-			st->z_state = ODB_LOOSE_READ_STREAM_DONE;
-			break;
-		}
-		if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) {
-			git_inflate_end(&st->z);
-			st->z_state = ODB_LOOSE_READ_STREAM_ERROR;
-			return -1;
-		}
-	}
-	return total_read;
-}
-
-static int close_istream_loose(struct odb_read_stream *_st)
-{
-	struct odb_loose_read_stream *st = (struct odb_loose_read_stream *)_st;
-	if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE)
-		git_inflate_end(&st->z);
-	munmap(st->mapped, st->mapsize);
-	return 0;
-}
-
-static int open_istream_loose(struct odb_read_stream **out,
-			      struct odb_source *source,
-			      const struct object_id *oid)
-{
-	struct object_info oi = OBJECT_INFO_INIT;
-	struct odb_loose_read_stream *st;
-	unsigned long mapsize;
-	void *mapped;
-
-	mapped = odb_source_loose_map_object(source, oid, &mapsize);
-	if (!mapped)
-		return -1;
-
-	/*
-	 * Note: we must allocate this structure early even though we may still
-	 * fail. This is because we need to initialize the zlib stream, and it
-	 * is not possible to copy the stream around after the fact because it
-	 * has self-referencing pointers.
-	 */
-	CALLOC_ARRAY(st, 1);
-
-	switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr,
-				    sizeof(st->hdr))) {
-	case ULHR_OK:
-		break;
-	case ULHR_BAD:
-	case ULHR_TOO_LONG:
-		goto error;
-	}
-
-	oi.sizep = &st->base.size;
-	oi.typep = &st->base.type;
-
-	if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0)
-		goto error;
-
-	st->mapped = mapped;
-	st->mapsize = mapsize;
-	st->hdr_used = strlen(st->hdr) + 1;
-	st->hdr_avail = st->z.total_out;
-	st->z_state = ODB_LOOSE_READ_STREAM_INUSE;
-	st->base.close = close_istream_loose;
-	st->base.read = read_istream_loose;
-
-	*out = &st->base;
-
-	return 0;
-error:
-	git_inflate_end(&st->z);
-	munmap(st->mapped, st->mapsize);
-	free(st);
-	return -1;
-}
-
-
 /*****************************************************************
  *
  * Non-delta packed object stream
@@ -455,7 +324,7 @@ static int istream_source(struct odb_read_stream **out,
 
 	odb_prepare_alternates(r->objects);
 	for (source = r->objects->sources; source; source = source->next)
-		if (!open_istream_loose(out, source, oid))
+		if (!odb_source_loose_read_object_stream(out, source, oid))
 			return 0;
 
 	return open_istream_incore(out, r, oid);

From 8c1b84bc977bf1e4515efe0386de87257ec28689 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Sun, 23 Nov 2025 19:59:41 +0100
Subject: [PATCH 17/26] streaming: move logic to read packed objects streams
 into backend

Move the logic to read packed object streams into the respective
subsystem.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 packfile.c  | 128 +++++++++++++++++++++++++++++++++++++++++++++++++
 packfile.h  |   5 ++
 streaming.c | 136 +---------------------------------------------------
 3 files changed, 134 insertions(+), 135 deletions(-)

diff --git a/packfile.c b/packfile.c
index b4bc40d895c8da..ad56ce0b905c0d 100644
--- a/packfile.c
+++ b/packfile.c
@@ -20,6 +20,7 @@
 #include "tree.h"
 #include "object-file.h"
 #include "odb.h"
+#include "streaming.h"
 #include "midx.h"
 #include "commit-graph.h"
 #include "pack-revindex.h"
@@ -2406,3 +2407,130 @@ void packfile_store_close(struct packfile_store *store)
 		close_pack(p);
 	}
 }
+
+struct odb_packed_read_stream {
+	struct odb_read_stream base;
+	struct packed_git *pack;
+	git_zstream z;
+	enum {
+		ODB_PACKED_READ_STREAM_UNINITIALIZED,
+		ODB_PACKED_READ_STREAM_INUSE,
+		ODB_PACKED_READ_STREAM_DONE,
+		ODB_PACKED_READ_STREAM_ERROR,
+	} z_state;
+	off_t pos;
+};
+
+static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *buf,
+					   size_t sz)
+{
+	struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st;
+	size_t total_read = 0;
+
+	switch (st->z_state) {
+	case ODB_PACKED_READ_STREAM_UNINITIALIZED:
+		memset(&st->z, 0, sizeof(st->z));
+		git_inflate_init(&st->z);
+		st->z_state = ODB_PACKED_READ_STREAM_INUSE;
+		break;
+	case ODB_PACKED_READ_STREAM_DONE:
+		return 0;
+	case ODB_PACKED_READ_STREAM_ERROR:
+		return -1;
+	case ODB_PACKED_READ_STREAM_INUSE:
+		break;
+	}
+
+	while (total_read < sz) {
+		int status;
+		struct pack_window *window = NULL;
+		unsigned char *mapped;
+
+		mapped = use_pack(st->pack, &window,
+				  st->pos, &st->z.avail_in);
+
+		st->z.next_out = (unsigned char *)buf + total_read;
+		st->z.avail_out = sz - total_read;
+		st->z.next_in = mapped;
+		status = git_inflate(&st->z, Z_FINISH);
+
+		st->pos += st->z.next_in - mapped;
+		total_read = st->z.next_out - (unsigned char *)buf;
+		unuse_pack(&window);
+
+		if (status == Z_STREAM_END) {
+			git_inflate_end(&st->z);
+			st->z_state = ODB_PACKED_READ_STREAM_DONE;
+			break;
+		}
+
+		/*
+		 * Unlike the loose object case, we do not have to worry here
+		 * about running out of input bytes and spinning infinitely. If
+		 * we get Z_BUF_ERROR due to too few input bytes, then we'll
+		 * replenish them in the next use_pack() call when we loop. If
+		 * we truly hit the end of the pack (i.e., because it's corrupt
+		 * or truncated), then use_pack() catches that and will die().
+		 */
+		if (status != Z_OK && status != Z_BUF_ERROR) {
+			git_inflate_end(&st->z);
+			st->z_state = ODB_PACKED_READ_STREAM_ERROR;
+			return -1;
+		}
+	}
+	return total_read;
+}
+
+static int close_istream_pack_non_delta(struct odb_read_stream *_st)
+{
+	struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st;
+	if (st->z_state == ODB_PACKED_READ_STREAM_INUSE)
+		git_inflate_end(&st->z);
+	return 0;
+}
+
+int packfile_store_read_object_stream(struct odb_read_stream **out,
+				      struct packfile_store *store,
+				      const struct object_id *oid)
+{
+	struct odb_packed_read_stream *stream;
+	struct pack_window *window = NULL;
+	struct object_info oi = OBJECT_INFO_INIT;
+	enum object_type in_pack_type;
+	unsigned long size;
+
+	oi.sizep = &size;
+
+	if (packfile_store_read_object_info(store, oid, &oi, 0) ||
+	    oi.u.packed.is_delta ||
+	    repo_settings_get_big_file_threshold(store->odb->repo) >= size)
+		return -1;
+
+	in_pack_type = unpack_object_header(oi.u.packed.pack,
+					    &window,
+					    &oi.u.packed.offset,
+					    &size);
+	unuse_pack(&window);
+	switch (in_pack_type) {
+	default:
+		return -1; /* we do not do deltas for now */
+	case OBJ_COMMIT:
+	case OBJ_TREE:
+	case OBJ_BLOB:
+	case OBJ_TAG:
+		break;
+	}
+
+	CALLOC_ARRAY(stream, 1);
+	stream->base.close = close_istream_pack_non_delta;
+	stream->base.read = read_istream_pack_non_delta;
+	stream->base.type = in_pack_type;
+	stream->base.size = size;
+	stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED;
+	stream->pack = oi.u.packed.pack;
+	stream->pos = oi.u.packed.offset;
+
+	*out = &stream->base;
+
+	return 0;
+}
diff --git a/packfile.h b/packfile.h
index 0a98bddd811921..3fcc5ae6e08c4b 100644
--- a/packfile.h
+++ b/packfile.h
@@ -8,6 +8,7 @@
 
 /* in odb.h */
 struct object_info;
+struct odb_read_stream;
 
 struct packed_git {
 	struct hashmap_entry packmap_ent;
@@ -144,6 +145,10 @@ void packfile_store_add_pack(struct packfile_store *store,
 #define repo_for_each_pack(repo, p) \
 	for (p = packfile_store_get_packs(repo->objects->packfiles); p; p = p->next)
 
+int packfile_store_read_object_stream(struct odb_read_stream **out,
+				      struct packfile_store *store,
+				      const struct object_id *oid);
+
 /*
  * Try to read the object identified by its ID from the object store and
  * populate the object info with its data. Returns 1 in case the object was
diff --git a/streaming.c b/streaming.c
index d5acc1c39650e4..3140728a70bde7 100644
--- a/streaming.c
+++ b/streaming.c
@@ -114,140 +114,6 @@ static struct odb_read_stream *attach_stream_filter(struct odb_read_stream *st,
 	return &fs->base;
 }
 
-/*****************************************************************
- *
- * Non-delta packed object stream
- *
- *****************************************************************/
-
-struct odb_packed_read_stream {
-	struct odb_read_stream base;
-	struct packed_git *pack;
-	git_zstream z;
-	enum {
-		ODB_PACKED_READ_STREAM_UNINITIALIZED,
-		ODB_PACKED_READ_STREAM_INUSE,
-		ODB_PACKED_READ_STREAM_DONE,
-		ODB_PACKED_READ_STREAM_ERROR,
-	} z_state;
-	off_t pos;
-};
-
-static ssize_t read_istream_pack_non_delta(struct odb_read_stream *_st, char *buf,
-					   size_t sz)
-{
-	struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st;
-	size_t total_read = 0;
-
-	switch (st->z_state) {
-	case ODB_PACKED_READ_STREAM_UNINITIALIZED:
-		memset(&st->z, 0, sizeof(st->z));
-		git_inflate_init(&st->z);
-		st->z_state = ODB_PACKED_READ_STREAM_INUSE;
-		break;
-	case ODB_PACKED_READ_STREAM_DONE:
-		return 0;
-	case ODB_PACKED_READ_STREAM_ERROR:
-		return -1;
-	case ODB_PACKED_READ_STREAM_INUSE:
-		break;
-	}
-
-	while (total_read < sz) {
-		int status;
-		struct pack_window *window = NULL;
-		unsigned char *mapped;
-
-		mapped = use_pack(st->pack, &window,
-				  st->pos, &st->z.avail_in);
-
-		st->z.next_out = (unsigned char *)buf + total_read;
-		st->z.avail_out = sz - total_read;
-		st->z.next_in = mapped;
-		status = git_inflate(&st->z, Z_FINISH);
-
-		st->pos += st->z.next_in - mapped;
-		total_read = st->z.next_out - (unsigned char *)buf;
-		unuse_pack(&window);
-
-		if (status == Z_STREAM_END) {
-			git_inflate_end(&st->z);
-			st->z_state = ODB_PACKED_READ_STREAM_DONE;
-			break;
-		}
-
-		/*
-		 * Unlike the loose object case, we do not have to worry here
-		 * about running out of input bytes and spinning infinitely. If
-		 * we get Z_BUF_ERROR due to too few input bytes, then we'll
-		 * replenish them in the next use_pack() call when we loop. If
-		 * we truly hit the end of the pack (i.e., because it's corrupt
-		 * or truncated), then use_pack() catches that and will die().
-		 */
-		if (status != Z_OK && status != Z_BUF_ERROR) {
-			git_inflate_end(&st->z);
-			st->z_state = ODB_PACKED_READ_STREAM_ERROR;
-			return -1;
-		}
-	}
-	return total_read;
-}
-
-static int close_istream_pack_non_delta(struct odb_read_stream *_st)
-{
-	struct odb_packed_read_stream *st = (struct odb_packed_read_stream *)_st;
-	if (st->z_state == ODB_PACKED_READ_STREAM_INUSE)
-		git_inflate_end(&st->z);
-	return 0;
-}
-
-static int open_istream_pack_non_delta(struct odb_read_stream **out,
-				       struct object_database *odb,
-				       const struct object_id *oid)
-{
-	struct odb_packed_read_stream *stream;
-	struct pack_window *window = NULL;
-	struct object_info oi = OBJECT_INFO_INIT;
-	enum object_type in_pack_type;
-	unsigned long size;
-
-	oi.sizep = &size;
-
-	if (packfile_store_read_object_info(odb->packfiles, oid, &oi, 0) ||
-	    oi.u.packed.is_delta ||
-	    repo_settings_get_big_file_threshold(odb->repo) >= size)
-		return -1;
-
-	in_pack_type = unpack_object_header(oi.u.packed.pack,
-					    &window,
-					    &oi.u.packed.offset,
-					    &size);
-	unuse_pack(&window);
-	switch (in_pack_type) {
-	default:
-		return -1; /* we do not do deltas for now */
-	case OBJ_COMMIT:
-	case OBJ_TREE:
-	case OBJ_BLOB:
-	case OBJ_TAG:
-		break;
-	}
-
-	CALLOC_ARRAY(stream, 1);
-	stream->base.close = close_istream_pack_non_delta;
-	stream->base.read = read_istream_pack_non_delta;
-	stream->base.type = in_pack_type;
-	stream->base.size = size;
-	stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED;
-	stream->pack = oi.u.packed.pack;
-	stream->pos = oi.u.packed.offset;
-
-	*out = &stream->base;
-
-	return 0;
-}
-
-
 /*****************************************************************
  *
  * In-core stream
@@ -319,7 +185,7 @@ static int istream_source(struct odb_read_stream **out,
 {
 	struct odb_source *source;
 
-	if (!open_istream_pack_non_delta(out, r->objects, oid))
+	if (!packfile_store_read_object_stream(out, r->objects->packfiles, oid))
 		return 0;
 
 	odb_prepare_alternates(r->objects);

From 378ec56beba161abbef6e2c87d9bc2ac43c355f3 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Sun, 23 Nov 2025 19:59:42 +0100
Subject: [PATCH 18/26] streaming: refactor interface to be
 object-database-centric

Refactor the streaming interface to be centered around object databases
instead of centered around the repository. Rename the functions
accordingly.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 archive-tar.c          |  6 +++---
 archive-zip.c          | 12 ++++++------
 builtin/index-pack.c   |  8 ++++----
 builtin/pack-objects.c | 14 +++++++-------
 object-file.c          |  8 ++++----
 streaming.c            | 44 +++++++++++++++++++++---------------------
 streaming.h            | 30 +++++++++++++++++++++++-----
 7 files changed, 71 insertions(+), 51 deletions(-)

diff --git a/archive-tar.c b/archive-tar.c
index dc1eda09e01e2b..4d87b28504615a 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -135,16 +135,16 @@ static int stream_blocked(struct repository *r, const struct object_id *oid)
 	char buf[BLOCKSIZE];
 	ssize_t readlen;
 
-	st = open_istream(r, oid, &type, &sz, NULL);
+	st = odb_read_stream_open(r->objects, oid, &type, &sz, NULL);
 	if (!st)
 		return error(_("cannot stream blob %s"), oid_to_hex(oid));
 	for (;;) {
-		readlen = read_istream(st, buf, sizeof(buf));
+		readlen = odb_read_stream_read(st, buf, sizeof(buf));
 		if (readlen <= 0)
 			break;
 		do_write_blocked(buf, readlen);
 	}
-	close_istream(st);
+	odb_read_stream_close(st);
 	if (!readlen)
 		finish_record();
 	return readlen;
diff --git a/archive-zip.c b/archive-zip.c
index 40a9c93ff95233..c44684aebcf18d 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -348,8 +348,8 @@ static int write_zip_entry(struct archiver_args *args,
 
 		if (!buffer) {
 			enum object_type type;
-			stream = open_istream(args->repo, oid, &type, &size,
-					      NULL);
+			stream = odb_read_stream_open(args->repo->objects, oid,
+						      &type, &size, NULL);
 			if (!stream)
 				return error(_("cannot stream blob %s"),
 					     oid_to_hex(oid));
@@ -429,7 +429,7 @@ static int write_zip_entry(struct archiver_args *args,
 		ssize_t readlen;
 
 		for (;;) {
-			readlen = read_istream(stream, buf, sizeof(buf));
+			readlen = odb_read_stream_read(stream, buf, sizeof(buf));
 			if (readlen <= 0)
 				break;
 			crc = crc32(crc, buf, readlen);
@@ -439,7 +439,7 @@ static int write_zip_entry(struct archiver_args *args,
 							    buf, readlen);
 			write_or_die(1, buf, readlen);
 		}
-		close_istream(stream);
+		odb_read_stream_close(stream);
 		if (readlen)
 			return readlen;
 
@@ -462,7 +462,7 @@ static int write_zip_entry(struct archiver_args *args,
 		zstream.avail_out = sizeof(compressed);
 
 		for (;;) {
-			readlen = read_istream(stream, buf, sizeof(buf));
+			readlen = odb_read_stream_read(stream, buf, sizeof(buf));
 			if (readlen <= 0)
 				break;
 			crc = crc32(crc, buf, readlen);
@@ -486,7 +486,7 @@ static int write_zip_entry(struct archiver_args *args,
 			}
 
 		}
-		close_istream(stream);
+		odb_read_stream_close(stream);
 		if (readlen)
 			return readlen;
 
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 5f90f12f92d9c4..fb76ef0f4c17c3 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -779,7 +779,7 @@ static int compare_objects(const unsigned char *buf, unsigned long size,
 	}
 
 	while (size) {
-		ssize_t len = read_istream(data->st, data->buf, size);
+		ssize_t len = odb_read_stream_read(data->st, data->buf, size);
 		if (len == 0)
 			die(_("SHA1 COLLISION FOUND WITH %s !"),
 			    oid_to_hex(&data->entry->idx.oid));
@@ -807,15 +807,15 @@ static int check_collison(struct object_entry *entry)
 
 	memset(&data, 0, sizeof(data));
 	data.entry = entry;
-	data.st = open_istream(the_repository, &entry->idx.oid, &type, &size,
-			       NULL);
+	data.st = odb_read_stream_open(the_repository->objects, &entry->idx.oid,
+				       &type, &size, NULL);
 	if (!data.st)
 		return -1;
 	if (size != entry->size || type != entry->type)
 		die(_("SHA1 COLLISION FOUND WITH %s !"),
 		    oid_to_hex(&entry->idx.oid));
 	unpack_data(entry, compare_objects, &data);
-	close_istream(data.st);
+	odb_read_stream_close(data.st);
 	free(data.buf);
 	return 0;
 }
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index c693d948e193ed..1353c2384c336e 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -417,7 +417,7 @@ static unsigned long write_large_blob_data(struct odb_read_stream *st, struct ha
 	for (;;) {
 		ssize_t readlen;
 		int zret = Z_OK;
-		readlen = read_istream(st, ibuf, sizeof(ibuf));
+		readlen = odb_read_stream_read(st, ibuf, sizeof(ibuf));
 		if (readlen == -1)
 			die(_("unable to read %s"), oid_to_hex(oid));
 
@@ -520,8 +520,8 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
 		if (oe_type(entry) == OBJ_BLOB &&
 		    oe_size_greater_than(&to_pack, entry,
 					 repo_settings_get_big_file_threshold(the_repository)) &&
-		    (st = open_istream(the_repository, &entry->idx.oid, &type,
-				       &size, NULL)) != NULL)
+		    (st = odb_read_stream_open(the_repository->objects, &entry->idx.oid,
+					       &type, &size, NULL)) != NULL)
 			buf = NULL;
 		else {
 			buf = odb_read_object(the_repository->objects,
@@ -577,7 +577,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
 			dheader[--pos] = 128 | (--ofs & 127);
 		if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) {
 			if (st)
-				close_istream(st);
+				odb_read_stream_close(st);
 			free(buf);
 			return 0;
 		}
@@ -591,7 +591,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
 		 */
 		if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
 			if (st)
-				close_istream(st);
+				odb_read_stream_close(st);
 			free(buf);
 			return 0;
 		}
@@ -601,7 +601,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
 	} else {
 		if (limit && hdrlen + datalen + hashsz >= limit) {
 			if (st)
-				close_istream(st);
+				odb_read_stream_close(st);
 			free(buf);
 			return 0;
 		}
@@ -609,7 +609,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
 	}
 	if (st) {
 		datalen = write_large_blob_data(st, f, &entry->idx.oid);
-		close_istream(st);
+		odb_read_stream_close(st);
 	} else {
 		hashwrite(f, buf, datalen);
 		free(buf);
diff --git a/object-file.c b/object-file.c
index 8c67847feaceb6..9ba40a848c034a 100644
--- a/object-file.c
+++ b/object-file.c
@@ -139,7 +139,7 @@ int stream_object_signature(struct repository *r, const struct object_id *oid)
 	char hdr[MAX_HEADER_LEN];
 	int hdrlen;
 
-	st = open_istream(r, oid, &obj_type, &size, NULL);
+	st = odb_read_stream_open(r->objects, oid, &obj_type, &size, NULL);
 	if (!st)
 		return -1;
 
@@ -151,10 +151,10 @@ int stream_object_signature(struct repository *r, const struct object_id *oid)
 	git_hash_update(&c, hdr, hdrlen);
 	for (;;) {
 		char buf[1024 * 16];
-		ssize_t readlen = read_istream(st, buf, sizeof(buf));
+		ssize_t readlen = odb_read_stream_read(st, buf, sizeof(buf));
 
 		if (readlen < 0) {
-			close_istream(st);
+			odb_read_stream_close(st);
 			return -1;
 		}
 		if (!readlen)
@@ -162,7 +162,7 @@ int stream_object_signature(struct repository *r, const struct object_id *oid)
 		git_hash_update(&c, buf, readlen);
 	}
 	git_hash_final_oid(&real_oid, &c);
-	close_istream(st);
+	odb_read_stream_close(st);
 	return !oideq(oid, &real_oid) ? -1 : 0;
 }
 
diff --git a/streaming.c b/streaming.c
index 3140728a70bde7..06993a751c6194 100644
--- a/streaming.c
+++ b/streaming.c
@@ -35,7 +35,7 @@ static int close_istream_filtered(struct odb_read_stream *_fs)
 {
 	struct odb_filtered_read_stream *fs = (struct odb_filtered_read_stream *)_fs;
 	free_stream_filter(fs->filter);
-	return close_istream(fs->upstream);
+	return odb_read_stream_close(fs->upstream);
 }
 
 static ssize_t read_istream_filtered(struct odb_read_stream *_fs, char *buf,
@@ -87,7 +87,7 @@ static ssize_t read_istream_filtered(struct odb_read_stream *_fs, char *buf,
 
 		/* refill the input from the upstream */
 		if (!fs->input_finished) {
-			fs->i_end = read_istream(fs->upstream, fs->ibuf, FILTER_BUFFER);
+			fs->i_end = odb_read_stream_read(fs->upstream, fs->ibuf, FILTER_BUFFER);
 			if (fs->i_end < 0)
 				return -1;
 			if (fs->i_end)
@@ -149,7 +149,7 @@ static ssize_t read_istream_incore(struct odb_read_stream *_st, char *buf, size_
 }
 
 static int open_istream_incore(struct odb_read_stream **out,
-			       struct repository *r,
+			       struct object_database *odb,
 			       const struct object_id *oid)
 {
 	struct object_info oi = OBJECT_INFO_INIT;
@@ -163,7 +163,7 @@ static int open_istream_incore(struct odb_read_stream **out,
 	oi.typep = &stream.base.type;
 	oi.sizep = &stream.base.size;
 	oi.contentp = (void **)&stream.buf;
-	ret = odb_read_object_info_extended(r->objects, oid, &oi,
+	ret = odb_read_object_info_extended(odb, oid, &oi,
 					    OBJECT_INFO_DIE_IF_CORRUPT);
 	if (ret)
 		return ret;
@@ -180,47 +180,47 @@ static int open_istream_incore(struct odb_read_stream **out,
  *****************************************************************************/
 
 static int istream_source(struct odb_read_stream **out,
-			  struct repository *r,
+			  struct object_database *odb,
 			  const struct object_id *oid)
 {
 	struct odb_source *source;
 
-	if (!packfile_store_read_object_stream(out, r->objects->packfiles, oid))
+	if (!packfile_store_read_object_stream(out, odb->packfiles, oid))
 		return 0;
 
-	odb_prepare_alternates(r->objects);
-	for (source = r->objects->sources; source; source = source->next)
+	odb_prepare_alternates(odb);
+	for (source = odb->sources; source; source = source->next)
 		if (!odb_source_loose_read_object_stream(out, source, oid))
 			return 0;
 
-	return open_istream_incore(out, r, oid);
+	return open_istream_incore(out, odb, oid);
 }
 
 /****************************************************************
  * Users of streaming interface
  ****************************************************************/
 
-int close_istream(struct odb_read_stream *st)
+int odb_read_stream_close(struct odb_read_stream *st)
 {
 	int r = st->close(st);
 	free(st);
 	return r;
 }
 
-ssize_t read_istream(struct odb_read_stream *st, void *buf, size_t sz)
+ssize_t odb_read_stream_read(struct odb_read_stream *st, void *buf, size_t sz)
 {
 	return st->read(st, buf, sz);
 }
 
-struct odb_read_stream *open_istream(struct repository *r,
-				     const struct object_id *oid,
-				     enum object_type *type,
-				     unsigned long *size,
-				     struct stream_filter *filter)
+struct odb_read_stream *odb_read_stream_open(struct object_database *odb,
+					     const struct object_id *oid,
+					     enum object_type *type,
+					     unsigned long *size,
+					     struct stream_filter *filter)
 {
 	struct odb_read_stream *st;
-	const struct object_id *real = lookup_replace_object(r, oid);
-	int ret = istream_source(&st, r, real);
+	const struct object_id *real = lookup_replace_object(odb->repo, oid);
+	int ret = istream_source(&st, odb, real);
 
 	if (ret)
 		return NULL;
@@ -229,7 +229,7 @@ struct odb_read_stream *open_istream(struct repository *r,
 		/* Add "&& !is_null_stream_filter(filter)" for performance */
 		struct odb_read_stream *nst = attach_stream_filter(st, filter);
 		if (!nst) {
-			close_istream(st);
+			odb_read_stream_close(st);
 			return NULL;
 		}
 		st = nst;
@@ -252,7 +252,7 @@ int odb_stream_blob_to_fd(struct object_database *odb,
 	ssize_t kept = 0;
 	int result = -1;
 
-	st = open_istream(odb->repo, oid, &type, &sz, filter);
+	st = odb_read_stream_open(odb, oid, &type, &sz, filter);
 	if (!st) {
 		if (filter)
 			free_stream_filter(filter);
@@ -263,7 +263,7 @@ int odb_stream_blob_to_fd(struct object_database *odb,
 	for (;;) {
 		char buf[1024 * 16];
 		ssize_t wrote, holeto;
-		ssize_t readlen = read_istream(st, buf, sizeof(buf));
+		ssize_t readlen = odb_read_stream_read(st, buf, sizeof(buf));
 
 		if (readlen < 0)
 			goto close_and_exit;
@@ -294,6 +294,6 @@ int odb_stream_blob_to_fd(struct object_database *odb,
 	result = 0;
 
  close_and_exit:
-	close_istream(st);
+	odb_read_stream_close(st);
 	return result;
 }
diff --git a/streaming.h b/streaming.h
index acfdef1598db52..7cb55213b780ff 100644
--- a/streaming.h
+++ b/streaming.h
@@ -24,11 +24,31 @@ struct odb_read_stream {
 	unsigned long size; /* inflated size of full object */
 };
 
-struct odb_read_stream *open_istream(struct repository *, const struct object_id *,
-				     enum object_type *, unsigned long *,
-				     struct stream_filter *);
-int close_istream(struct odb_read_stream *);
-ssize_t read_istream(struct odb_read_stream *, void *, size_t);
+/*
+ * Create a new object stream for the given object database. Populates the type
+ * and size pointers with the object's info. An optional filter can be used to
+ * transform the object's content.
+ *
+ * Returns the stream on success, a `NULL` pointer otherwise.
+ */
+struct odb_read_stream *odb_read_stream_open(struct object_database *odb,
+					     const struct object_id *oid,
+					     enum object_type *type,
+					     unsigned long *size,
+					     struct stream_filter *filter);
+
+/*
+ * Close the given read stream and release all resources associated with it.
+ * Returns 0 on success, a negative error code otherwise.
+ */
+int odb_read_stream_close(struct odb_read_stream *stream);
+
+/*
+ * Read data from the stream into the buffer. Returns 0 on EOF and the number
+ * of bytes read on success. Returns a negative error code in case reading from
+ * the stream fails.
+ */
+ssize_t odb_read_stream_read(struct odb_read_stream *stream, void *buf, size_t len);
 
 /*
  * Look up the object by its ID and write the full contents to the file

From 1599b68d5e960a12f5ac624f81c70ece317db5a6 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Sun, 23 Nov 2025 19:59:43 +0100
Subject: [PATCH 19/26] streaming: move into object database subsystem

The "streaming" terminology is somewhat generic, so it may not be
immediately obvious that "streaming.{c,h}" is specific to the object
database. Rectify this by moving it into the "odb/" directory so that it
can be immediately attributed to the object subsystem.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Makefile                       | 2 +-
 archive-tar.c                  | 2 +-
 archive-zip.c                  | 2 +-
 builtin/cat-file.c             | 2 +-
 builtin/fsck.c                 | 2 +-
 builtin/index-pack.c           | 2 +-
 builtin/log.c                  | 2 +-
 builtin/pack-objects.c         | 2 +-
 entry.c                        | 2 +-
 meson.build                    | 2 +-
 object-file.c                  | 2 +-
 streaming.c => odb/streaming.c | 2 +-
 streaming.h => odb/streaming.h | 0
 packfile.c                     | 2 +-
 parallel-checkout.c            | 2 +-
 15 files changed, 14 insertions(+), 14 deletions(-)
 rename streaming.c => odb/streaming.c (99%)
 rename streaming.h => odb/streaming.h (100%)

diff --git a/Makefile b/Makefile
index 7e0f77e2988e3b..6d8dcc4622b059 100644
--- a/Makefile
+++ b/Makefile
@@ -1201,6 +1201,7 @@ LIB_OBJS += object-file.o
 LIB_OBJS += object-name.o
 LIB_OBJS += object.o
 LIB_OBJS += odb.o
+LIB_OBJS += odb/streaming.o
 LIB_OBJS += oid-array.o
 LIB_OBJS += oidmap.o
 LIB_OBJS += oidset.o
@@ -1294,7 +1295,6 @@ LIB_OBJS += split-index.o
 LIB_OBJS += stable-qsort.o
 LIB_OBJS += statinfo.o
 LIB_OBJS += strbuf.o
-LIB_OBJS += streaming.o
 LIB_OBJS += string-list.o
 LIB_OBJS += strmap.o
 LIB_OBJS += strvec.o
diff --git a/archive-tar.c b/archive-tar.c
index 4d87b28504615a..494b9f0667a523 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -12,8 +12,8 @@
 #include "tar.h"
 #include "archive.h"
 #include "odb.h"
+#include "odb/streaming.h"
 #include "strbuf.h"
-#include "streaming.h"
 #include "run-command.h"
 #include "write-or-die.h"
 
diff --git a/archive-zip.c b/archive-zip.c
index c44684aebcf18d..a0bdc2fe3b2e5e 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -10,9 +10,9 @@
 #include "gettext.h"
 #include "git-zlib.h"
 #include "hex.h"
-#include "streaming.h"
 #include "utf8.h"
 #include "odb.h"
+#include "odb/streaming.h"
 #include "strbuf.h"
 #include "userdiff.h"
 #include "write-or-die.h"
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 120d626d66e140..505ddaa12f5309 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -18,13 +18,13 @@
 #include "list-objects-filter-options.h"
 #include "parse-options.h"
 #include "userdiff.h"
-#include "streaming.h"
 #include "oid-array.h"
 #include "packfile.h"
 #include "pack-bitmap.h"
 #include "object-file.h"
 #include "object-name.h"
 #include "odb.h"
+#include "odb/streaming.h"
 #include "replace-object.h"
 #include "promisor-remote.h"
 #include "mailmap.h"
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 1a348d43c26020..c7d2eea287fe7d 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -13,11 +13,11 @@
 #include "fsck.h"
 #include "parse-options.h"
 #include "progress.h"
-#include "streaming.h"
 #include "packfile.h"
 #include "object-file.h"
 #include "object-name.h"
 #include "odb.h"
+#include "odb/streaming.h"
 #include "path.h"
 #include "read-cache-ll.h"
 #include "replace-object.h"
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index fb76ef0f4c17c3..581023495fdc9c 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -16,12 +16,12 @@
 #include "progress.h"
 #include "fsck.h"
 #include "strbuf.h"
-#include "streaming.h"
 #include "thread-utils.h"
 #include "packfile.h"
 #include "pack-revindex.h"
 #include "object-file.h"
 #include "odb.h"
+#include "odb/streaming.h"
 #include "oid-array.h"
 #include "oidset.h"
 #include "path.h"
diff --git a/builtin/log.c b/builtin/log.c
index e7b83a6e00a708..d4cf9c59c81a83 100644
--- a/builtin/log.c
+++ b/builtin/log.c
@@ -16,6 +16,7 @@
 #include "refs.h"
 #include "object-name.h"
 #include "odb.h"
+#include "odb/streaming.h"
 #include "pager.h"
 #include "color.h"
 #include "commit.h"
@@ -35,7 +36,6 @@
 #include "parse-options.h"
 #include "line-log.h"
 #include "branch.h"
-#include "streaming.h"
 #include "version.h"
 #include "mailmap.h"
 #include "progress.h"
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 1353c2384c336e..f109e26786e621 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -22,7 +22,6 @@
 #include "pack-objects.h"
 #include "progress.h"
 #include "refs.h"
-#include "streaming.h"
 #include "thread-utils.h"
 #include "pack-bitmap.h"
 #include "delta-islands.h"
@@ -33,6 +32,7 @@
 #include "packfile.h"
 #include "object-file.h"
 #include "odb.h"
+#include "odb/streaming.h"
 #include "replace-object.h"
 #include "dir.h"
 #include "midx.h"
diff --git a/entry.c b/entry.c
index 38dfe670f79920..7817aee362ed9e 100644
--- a/entry.c
+++ b/entry.c
@@ -2,13 +2,13 @@
 
 #include "git-compat-util.h"
 #include "odb.h"
+#include "odb/streaming.h"
 #include "dir.h"
 #include "environment.h"
 #include "gettext.h"
 #include "hex.h"
 #include "name-hash.h"
 #include "sparse-index.h"
-#include "streaming.h"
 #include "submodule.h"
 #include "symlinks.h"
 #include "progress.h"
diff --git a/meson.build b/meson.build
index 1f95a06edb7829..fc82929b379dc5 100644
--- a/meson.build
+++ b/meson.build
@@ -397,6 +397,7 @@ libgit_sources = [
   'object-name.c',
   'object.c',
   'odb.c',
+  'odb/streaming.c',
   'oid-array.c',
   'oidmap.c',
   'oidset.c',
@@ -490,7 +491,6 @@ libgit_sources = [
   'stable-qsort.c',
   'statinfo.c',
   'strbuf.c',
-  'streaming.c',
   'string-list.c',
   'strmap.c',
   'strvec.c',
diff --git a/object-file.c b/object-file.c
index 9ba40a848c034a..9601fdb12dc9a8 100644
--- a/object-file.c
+++ b/object-file.c
@@ -20,13 +20,13 @@
 #include "object-file-convert.h"
 #include "object-file.h"
 #include "odb.h"
+#include "odb/streaming.h"
 #include "oidtree.h"
 #include "pack.h"
 #include "packfile.h"
 #include "path.h"
 #include "read-cache-ll.h"
 #include "setup.h"
-#include "streaming.h"
 #include "tempfile.h"
 #include "tmp-objdir.h"
 
diff --git a/streaming.c b/odb/streaming.c
similarity index 99%
rename from streaming.c
rename to odb/streaming.c
index 06993a751c6194..7ef58adaa2a09e 100644
--- a/streaming.c
+++ b/odb/streaming.c
@@ -5,10 +5,10 @@
 #include "git-compat-util.h"
 #include "convert.h"
 #include "environment.h"
-#include "streaming.h"
 #include "repository.h"
 #include "object-file.h"
 #include "odb.h"
+#include "odb/streaming.h"
 #include "replace-object.h"
 #include "packfile.h"
 
diff --git a/streaming.h b/odb/streaming.h
similarity index 100%
rename from streaming.h
rename to odb/streaming.h
diff --git a/packfile.c b/packfile.c
index ad56ce0b905c0d..7a16aaa90d0a2f 100644
--- a/packfile.c
+++ b/packfile.c
@@ -20,7 +20,7 @@
 #include "tree.h"
 #include "object-file.h"
 #include "odb.h"
-#include "streaming.h"
+#include "odb/streaming.h"
 #include "midx.h"
 #include "commit-graph.h"
 #include "pack-revindex.h"
diff --git a/parallel-checkout.c b/parallel-checkout.c
index 1cb6701b926dcf..0bf4bd6d4abd8c 100644
--- a/parallel-checkout.c
+++ b/parallel-checkout.c
@@ -13,7 +13,7 @@
 #include "read-cache-ll.h"
 #include "run-command.h"
 #include "sigchain.h"
-#include "streaming.h"
+#include "odb/streaming.h"
 #include "symlinks.h"
 #include "thread-utils.h"
 #include "trace2.h"

From 7b940286527ec2175dffbb317f47e080bb37cf3e Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Sun, 23 Nov 2025 19:59:44 +0100
Subject: [PATCH 20/26] streaming: drop redundant type and size pointers

In the preceding commits we have turned `struct odb_read_stream` into a
publicly visible structure. Furthermore, this structure now contains the
type and size of the object that we are about to stream. Consequently,
the out-pointers that we used before to propagate the type and size of
the streamed object are now somewhat redundant with the data contained
in the structure itself.

Drop these out-pointers and adapt callers accordingly.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 archive-tar.c          |  4 +---
 archive-zip.c          |  5 ++---
 builtin/index-pack.c   |  7 ++-----
 builtin/pack-objects.c |  6 ++++--
 object-file.c          |  6 ++----
 odb/streaming.c        | 10 ++--------
 odb/streaming.h        |  7 ++-----
 7 files changed, 15 insertions(+), 30 deletions(-)

diff --git a/archive-tar.c b/archive-tar.c
index 494b9f0667a523..0fc70d13a8807e 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -130,12 +130,10 @@ static void write_trailer(void)
 static int stream_blocked(struct repository *r, const struct object_id *oid)
 {
 	struct odb_read_stream *st;
-	enum object_type type;
-	unsigned long sz;
 	char buf[BLOCKSIZE];
 	ssize_t readlen;
 
-	st = odb_read_stream_open(r->objects, oid, &type, &sz, NULL);
+	st = odb_read_stream_open(r->objects, oid, NULL);
 	if (!st)
 		return error(_("cannot stream blob %s"), oid_to_hex(oid));
 	for (;;) {
diff --git a/archive-zip.c b/archive-zip.c
index a0bdc2fe3b2e5e..97ea8d60d6187b 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -347,12 +347,11 @@ static int write_zip_entry(struct archiver_args *args,
 			method = ZIP_METHOD_DEFLATE;
 
 		if (!buffer) {
-			enum object_type type;
-			stream = odb_read_stream_open(args->repo->objects, oid,
-						      &type, &size, NULL);
+			stream = odb_read_stream_open(args->repo->objects, oid, NULL);
 			if (!stream)
 				return error(_("cannot stream blob %s"),
 					     oid_to_hex(oid));
+			size = stream->size;
 			flags |= ZIP_STREAM;
 			out = NULL;
 		} else {
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 581023495fdc9c..b01cb77f4a8500 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -798,8 +798,6 @@ static int compare_objects(const unsigned char *buf, unsigned long size,
 static int check_collison(struct object_entry *entry)
 {
 	struct compare_data data;
-	enum object_type type;
-	unsigned long size;
 
 	if (entry->size <= repo_settings_get_big_file_threshold(the_repository) ||
 	    entry->type != OBJ_BLOB)
@@ -807,11 +805,10 @@ static int check_collison(struct object_entry *entry)
 
 	memset(&data, 0, sizeof(data));
 	data.entry = entry;
-	data.st = odb_read_stream_open(the_repository->objects, &entry->idx.oid,
-				       &type, &size, NULL);
+	data.st = odb_read_stream_open(the_repository->objects, &entry->idx.oid, NULL);
 	if (!data.st)
 		return -1;
-	if (size != entry->size || type != entry->type)
+	if (data.st->size != entry->size || data.st->type != entry->type)
 		die(_("SHA1 COLLISION FOUND WITH %s !"),
 		    oid_to_hex(&entry->idx.oid));
 	unpack_data(entry, compare_objects, &data);
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index f109e26786e621..0d1d6995bfc35a 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -521,9 +521,11 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
 		    oe_size_greater_than(&to_pack, entry,
 					 repo_settings_get_big_file_threshold(the_repository)) &&
 		    (st = odb_read_stream_open(the_repository->objects, &entry->idx.oid,
-					       &type, &size, NULL)) != NULL)
+					       NULL)) != NULL) {
 			buf = NULL;
-		else {
+			type = st->type;
+			size = st->size;
+		} else {
 			buf = odb_read_object(the_repository->objects,
 					      &entry->idx.oid, &type,
 					      &size);
diff --git a/object-file.c b/object-file.c
index 9601fdb12dc9a8..12177a7dd707a8 100644
--- a/object-file.c
+++ b/object-file.c
@@ -132,19 +132,17 @@ int check_object_signature(struct repository *r, const struct object_id *oid,
 int stream_object_signature(struct repository *r, const struct object_id *oid)
 {
 	struct object_id real_oid;
-	unsigned long size;
-	enum object_type obj_type;
 	struct odb_read_stream *st;
 	struct git_hash_ctx c;
 	char hdr[MAX_HEADER_LEN];
 	int hdrlen;
 
-	st = odb_read_stream_open(r->objects, oid, &obj_type, &size, NULL);
+	st = odb_read_stream_open(r->objects, oid, NULL);
 	if (!st)
 		return -1;
 
 	/* Generate the header */
-	hdrlen = format_object_header(hdr, sizeof(hdr), obj_type, size);
+	hdrlen = format_object_header(hdr, sizeof(hdr), st->type, st->size);
 
 	/* Sha1.. */
 	r->hash_algo->init_fn(&c);
diff --git a/odb/streaming.c b/odb/streaming.c
index 7ef58adaa2a09e..745cd486fbb33d 100644
--- a/odb/streaming.c
+++ b/odb/streaming.c
@@ -214,8 +214,6 @@ ssize_t odb_read_stream_read(struct odb_read_stream *st, void *buf, size_t sz)
 
 struct odb_read_stream *odb_read_stream_open(struct object_database *odb,
 					     const struct object_id *oid,
-					     enum object_type *type,
-					     unsigned long *size,
 					     struct stream_filter *filter)
 {
 	struct odb_read_stream *st;
@@ -235,8 +233,6 @@ struct odb_read_stream *odb_read_stream_open(struct object_database *odb,
 		st = nst;
 	}
 
-	*size = st->size;
-	*type = st->type;
 	return st;
 }
 
@@ -247,18 +243,16 @@ int odb_stream_blob_to_fd(struct object_database *odb,
 			  int can_seek)
 {
 	struct odb_read_stream *st;
-	enum object_type type;
-	unsigned long sz;
 	ssize_t kept = 0;
 	int result = -1;
 
-	st = odb_read_stream_open(odb, oid, &type, &sz, filter);
+	st = odb_read_stream_open(odb, oid, filter);
 	if (!st) {
 		if (filter)
 			free_stream_filter(filter);
 		return result;
 	}
-	if (type != OBJ_BLOB)
+	if (st->type != OBJ_BLOB)
 		goto close_and_exit;
 	for (;;) {
 		char buf[1024 * 16];
diff --git a/odb/streaming.h b/odb/streaming.h
index 7cb55213b780ff..c7861f7e13c606 100644
--- a/odb/streaming.h
+++ b/odb/streaming.h
@@ -25,16 +25,13 @@ struct odb_read_stream {
 };
 
 /*
- * Create a new object stream for the given object database. Populates the type
- * and size pointers with the object's info. An optional filter can be used to
- * transform the object's content.
+ * Create a new object stream for the given object database. An optional filter
+ * can be used to transform the object's content.
  *
  * Returns the stream on success, a `NULL` pointer otherwise.
  */
 struct odb_read_stream *odb_read_stream_open(struct object_database *odb,
 					     const struct object_id *oid,
-					     enum object_type *type,
-					     unsigned long *size,
 					     struct stream_filter *filter);
 
 /*

From e1ecf0dd6897eae1594b7e9345605b8f88485b95 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= <l.s.r@web.de>
Date: Sat, 6 Dec 2025 14:27:39 +0100
Subject: [PATCH 21/26] wrapper: add git_mkdtemp()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extend git_mkstemps_mode() to optionally call mkdir(2) instead of
open(2), then use that ability to create a mkdtemp(3) replacement,
git_mkdtemp().  We'll start using it in the next commit.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 wrapper.c | 21 +++++++++++++++++++--
 wrapper.h |  2 ++
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/wrapper.c b/wrapper.c
index 3d507d42045203..89f6effe84371b 100644
--- a/wrapper.c
+++ b/wrapper.c
@@ -446,7 +446,11 @@ int xmkstemp(char *filename_template)
 #undef TMP_MAX
 #define TMP_MAX 16384
 
-int git_mkstemps_mode(char *pattern, int suffix_len, int mode)
+/*
+ * Returns -1 on error, 0 if it created a directory, or an open file
+ * descriptor to the created regular file.
+ */
+static int git_mkdstemps_mode(char *pattern, int suffix_len, int mode, bool dir)
 {
 	static const char letters[] =
 		"abcdefghijklmnopqrstuvwxyz"
@@ -488,7 +492,10 @@ int git_mkstemps_mode(char *pattern, int suffix_len, int mode)
 			v /= num_letters;
 		}
 
-		fd = open(pattern, O_CREAT | O_EXCL | O_RDWR, mode);
+		if (dir)
+			fd = mkdir(pattern, mode);
+		else
+			fd = open(pattern, O_CREAT | O_EXCL | O_RDWR, mode);
 		if (fd >= 0)
 			return fd;
 		/*
@@ -503,6 +510,16 @@ int git_mkstemps_mode(char *pattern, int suffix_len, int mode)
 	return -1;
 }
 
+char *git_mkdtemp(char *pattern)
+{
+	return git_mkdstemps_mode(pattern, 0, 0700, true) ? NULL : pattern;
+}
+
+int git_mkstemps_mode(char *pattern, int suffix_len, int mode)
+{
+	return git_mkdstemps_mode(pattern, suffix_len, mode, false);
+}
+
 int git_mkstemp_mode(char *pattern, int mode)
 {
 	/* mkstemp is just mkstemps with no suffix */
diff --git a/wrapper.h b/wrapper.h
index 44a8597ac31426..15ac3bab6e9748 100644
--- a/wrapper.h
+++ b/wrapper.h
@@ -37,6 +37,8 @@ int xsnprintf(char *dst, size_t max, const char *fmt, ...);
 
 int xgethostname(char *buf, size_t len);
 
+char *git_mkdtemp(char *pattern);
+
 /* set default permissions by passing mode arguments to open(2) */
 int git_mkstemps_mode(char *pattern, int suffix_len, int mode);
 int git_mkstemp_mode(char *pattern, int mode);

From 5ecd3590a3052820eeb3f1d6764584c537b68938 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= <l.s.r@web.de>
Date: Sat, 6 Dec 2025 14:27:47 +0100
Subject: [PATCH 22/26] compat: use git_mkdtemp()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A file might appear at the path returned by mktemp(3) before we call
mkdir(2).  Use the more robust git_mkdtemp() instead, which retries a
number of times and doesn't need to call lstat(2).

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 compat/mkdtemp.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/compat/mkdtemp.c b/compat/mkdtemp.c
index 11361195925c67..fcdd4e01e14613 100644
--- a/compat/mkdtemp.c
+++ b/compat/mkdtemp.c
@@ -2,7 +2,5 @@
 
 char *gitmkdtemp(char *template)
 {
-	if (!*mktemp(template) || mkdir(template, 0700))
-		return NULL;
-	return template;
+	return git_mkdtemp(template);
 }

From 47bf14750eee7e43e12d20414d3698f203245a35 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= <l.s.r@web.de>
Date: Sat, 6 Dec 2025 14:28:26 +0100
Subject: [PATCH 23/26] compat: remove mingw_mktemp()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove the mktemp(3) compatibility function now that its last caller was
removed by the previous commit.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 compat/mingw-posix.h |  3 ---
 compat/mingw.c       | 12 ------------
 2 files changed, 15 deletions(-)

diff --git a/compat/mingw-posix.h b/compat/mingw-posix.h
index 631a20868489be..0939feff27ffec 100644
--- a/compat/mingw-posix.h
+++ b/compat/mingw-posix.h
@@ -241,9 +241,6 @@ int mingw_chdir(const char *dirname);
 int mingw_chmod(const char *filename, int mode);
 #define chmod mingw_chmod
 
-char *mingw_mktemp(char *template);
-#define mktemp mingw_mktemp
-
 char *mingw_getcwd(char *pointer, int len);
 #define getcwd mingw_getcwd
 
diff --git a/compat/mingw.c b/compat/mingw.c
index 736a07a028ab4d..abdc9684214dac 100644
--- a/compat/mingw.c
+++ b/compat/mingw.c
@@ -1162,18 +1162,6 @@ unsigned int sleep (unsigned int seconds)
 	return 0;
 }
 
-char *mingw_mktemp(char *template)
-{
-	wchar_t wtemplate[MAX_PATH];
-	if (xutftowcs_path(wtemplate, template) < 0)
-		return NULL;
-	if (!_wmktemp(wtemplate))
-		return NULL;
-	if (xwcstoutf(template, wtemplate, strlen(template) + 1) < 0)
-		return NULL;
-	return template;
-}
-
 int mkstemp(char *template)
 {
 	return git_mkstemp_mode(template, 0600);

From 7bef658135944d26acf3e1ec9316ca11f4369cf8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= <l.s.r@web.de>
Date: Sat, 6 Dec 2025 14:29:43 +0100
Subject: [PATCH 24/26] banned.h: ban mktemp(3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Older versions of mktemp(3) generate easily guessable file names.  The
function checks if the generated name is used, which is unreliable, as
a file with that name might then be created by some other process before
we can do it ourselves.  The function was dropped from POSIX due to its
security problems.  Forbid its use.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 banned.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/banned.h b/banned.h
index 44e76bd90af769..2b934c8c4381b5 100644
--- a/banned.h
+++ b/banned.h
@@ -41,4 +41,7 @@
 #undef asctime_r
 #define asctime_r(t, buf) BANNED(asctime_r)
 
+#undef mktemp
+#define mktemp(x) BANNED(mktemp)
+
 #endif /* BANNED_H */

From 10bba537c4c23e713af05be700748c6a3c25bf68 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= <l.s.r@web.de>
Date: Sat, 6 Dec 2025 14:35:39 +0100
Subject: [PATCH 25/26] compat: remove gitmkdtemp()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

gitmkdtemp() has become a trivial wrapper around git_mkdtemp().  Remove
this now unnecessary layer of indirection.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Makefile                            | 1 -
 compat/mkdtemp.c                    | 6 ------
 compat/posix.h                      | 3 +--
 contrib/buildsystems/CMakeLists.txt | 4 ----
 meson.build                         | 2 +-
 5 files changed, 2 insertions(+), 14 deletions(-)
 delete mode 100644 compat/mkdtemp.c

diff --git a/Makefile b/Makefile
index 7e0f77e2988e3b..8f74b25fe7f9e9 100644
--- a/Makefile
+++ b/Makefile
@@ -1917,7 +1917,6 @@ ifdef NO_SETENV
 endif
 ifdef NO_MKDTEMP
 	COMPAT_CFLAGS += -DNO_MKDTEMP
-	COMPAT_OBJS += compat/mkdtemp.o
 endif
 ifdef MKDIR_WO_TRAILING_SLASH
 	COMPAT_CFLAGS += -DMKDIR_WO_TRAILING_SLASH
diff --git a/compat/mkdtemp.c b/compat/mkdtemp.c
deleted file mode 100644
index fcdd4e01e14613..00000000000000
--- a/compat/mkdtemp.c
+++ /dev/null
@@ -1,6 +0,0 @@
-#include "../git-compat-util.h"
-
-char *gitmkdtemp(char *template)
-{
-	return git_mkdtemp(template);
-}
diff --git a/compat/posix.h b/compat/posix.h
index 067a00f33b83f3..245386fa4a9f4e 100644
--- a/compat/posix.h
+++ b/compat/posix.h
@@ -329,8 +329,7 @@ int gitsetenv(const char *, const char *, int);
 #endif
 
 #ifdef NO_MKDTEMP
-#define mkdtemp gitmkdtemp
-char *gitmkdtemp(char *);
+#define mkdtemp git_mkdtemp
 #endif
 
 #ifdef NO_UNSETENV
diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt
index edb0fc04ad7649..b84d8a7c762f06 100644
--- a/contrib/buildsystems/CMakeLists.txt
+++ b/contrib/buildsystems/CMakeLists.txt
@@ -411,10 +411,6 @@ if(NOT HAVE_SETENV)
 	list(APPEND compat_SOURCES compat/setenv.c)
 endif()
 
-if(NOT HAVE_MKDTEMP)
-	list(APPEND compat_SOURCES compat/mkdtemp.c)
-endif()
-
 if(NOT HAVE_PREAD)
 	list(APPEND compat_SOURCES compat/pread.c)
 endif()
diff --git a/meson.build b/meson.build
index 1f95a06edb7829..4a42e783b1bb77 100644
--- a/meson.build
+++ b/meson.build
@@ -1401,7 +1401,7 @@ checkfuncs = {
   'strlcpy' : ['strlcpy.c'],
   'strtoull' : [],
   'setenv' : ['setenv.c'],
-  'mkdtemp' : ['mkdtemp.c'],
+  'mkdtemp' : [],
   'initgroups' : [],
   'strtoumax' : ['strtoumax.c', 'strtoimax.c'],
   'pread' : ['pread.c'],

From e7ef0ca622016d12a85836928a03959de4537c2f Mon Sep 17 00:00:00 2001
From: Junio C Hamano <gitster@pobox.com>
Date: Tue, 16 Dec 2025 11:08:23 +0900
Subject: [PATCH 26/26] The ninth batch

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/RelNotes/2.53.0.adoc | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/Documentation/RelNotes/2.53.0.adoc b/Documentation/RelNotes/2.53.0.adoc
index 41ae2a5a7a4696..f28c8202919dc9 100644
--- a/Documentation/RelNotes/2.53.0.adoc
+++ b/Documentation/RelNotes/2.53.0.adoc
@@ -60,6 +60,13 @@ Performance, Internal Implementation, Development Support etc.
    "git diff --find-copioes-harder", also making the operation run
    faster.
 
+ * The "git_istream" abstraction has been revamped to make it easier
+   to interface with pluggable object database design.
+
+ * Rewrite the only use of "mktemp()" that is subject to TOCTOU race
+   and Stop using the insecure "mktemp()" function.
+   (merge 10bba537c4 rs/ban-mktemp later to maint).
+
 
 Fixes since v2.52
 -----------------
@@ -167,6 +174,9 @@ Fixes since v2.52
    pathspec, which has been corrected.
    (merge 05491b90ce js/last-modified-with-sparse-checkouts later to maint).
 
+ * Emulation code clean-up.
+   (merge 42aa7603aa gf/win32-pthread-cond-init later to maint).
+
  * Other code cleanup, docfix, build fix, etc.
    (merge 46207a54cc qj/doc-http-bad-want-response later to maint).
    (merge df90eccd93 kh/doc-commit-extra-references later to maint).