From f7700e452c79ff5582626bb3377d0429a921d869 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 24 Oct 2025 07:21:36 +0200 Subject: [PATCH 1/3] fix: refspec for shallow clones uses a single-branch (#2227) When doing shallow clones (depth != NoChange), it now uses a single-branch refspec instead of fetching all branches. This matches Git's behavior and significantly reduces the repository size for shallow clones. For shallow clones: - If ref_name is specified: uses that branch - Otherwise: attempts to detect from Protocol V1 handshake or falls back to init.defaultBranch config or "main" This addresses issue #2227 where `gix clone --depth 1` was creating repositories ~130MB vs Git's ~70MB due to fetching all branches. Co-authored-by: Byron <63622+Byron@users.noreply.github.com> --- gix/src/clone/fetch/mod.rs | 82 +++++++++++++++++++++++++++++++++++--- gix/tests/gix/clone.rs | 35 ++++++++++++++++ 2 files changed, 111 insertions(+), 6 deletions(-) diff --git a/gix/src/clone/fetch/mod.rs b/gix/src/clone/fetch/mod.rs index 34641a89fb1..c56f413061b 100644 --- a/gix/src/clone/fetch/mod.rs +++ b/gix/src/clone/fetch/mod.rs @@ -47,6 +47,8 @@ pub enum Error { }, #[error(transparent)] CommitterOrFallback(#[from] crate::config::time::Error), + #[error(transparent)] + RefMap(#[from] crate::remote::ref_map::Error), } /// Modification @@ -101,14 +103,81 @@ impl PrepareFetch { }; let mut remote = repo.remote_at(self.url.clone())?; + + // For shallow clones without custom configuration, we'll use a single-branch refspec + // to match git's behavior (matching git's single-branch behavior for shallow clones). + let use_single_branch_for_shallow = self.shallow != remote::fetch::Shallow::NoChange + && self.configure_remote.is_none() + && remote.fetch_specs.is_empty(); + + let target_ref = if use_single_branch_for_shallow { + // Determine target branch from user-specified ref_name or default branch + if let Some(ref_name) = &self.ref_name { + // User specified a branch, use that + Some(format!("refs/heads/{}", ref_name.as_ref().as_bstr())) + } else { + // For shallow clones without a specified ref, we need to determine the default branch. + // We'll connect to get HEAD information. For Protocol V2, we need to explicitly list refs. + let mut connection = remote.connect(remote::Direction::Fetch).await?; + + // Perform handshake and try to get HEAD from it (works for Protocol V1) + let _ = connection.ref_map_by_ref(&mut progress, Default::default()).await?; + + let target = if let Some(handshake) = &connection.handshake { + // Protocol V1: refs are in handshake + handshake.refs.as_ref().and_then(|refs| { + refs.iter().find_map(|r| match r { + gix_protocol::handshake::Ref::Symbolic { + full_ref_name, target, .. + } if full_ref_name == "HEAD" => Some(target.to_string()), + _ => None, + }) + }) + } else { + None + }; + + // For Protocol V2 or if we couldn't determine HEAD, use the configured default branch + let fallback_branch = target + .or_else(|| { + repo.config + .resolved + .string(crate::config::tree::Init::DEFAULT_BRANCH) + .and_then(|name| name.to_str().ok().map(|s| format!("refs/heads/{}", s))) + }) + .unwrap_or_else(|| "refs/heads/main".to_string()); + + // Drop the connection explicitly to release the borrow on remote + drop(connection); + + Some(fallback_branch) + } + } else { + None + }; + + // Set up refspec based on whether we're doing a single-branch shallow clone if remote.fetch_specs.is_empty() { - remote = remote - .with_refspecs( - Some(format!("+refs/heads/*:refs/remotes/{remote_name}/*").as_str()), - remote::Direction::Fetch, - ) - .expect("valid static spec"); + if let Some(target_ref) = &target_ref { + // Single-branch refspec for shallow clones + let short_name = target_ref.strip_prefix("refs/heads/").unwrap_or(target_ref.as_str()); + remote = remote + .with_refspecs( + Some(format!("+{target_ref}:refs/remotes/{remote_name}/{short_name}").as_str()), + remote::Direction::Fetch, + ) + .expect("valid refspec"); + } else { + // Wildcard refspec for non-shallow clones or when target couldn't be determined + remote = remote + .with_refspecs( + Some(format!("+refs/heads/*:refs/remotes/{remote_name}/*").as_str()), + remote::Direction::Fetch, + ) + .expect("valid static spec"); + } } + let mut clone_fetch_tags = None; if let Some(f) = self.configure_remote.as_mut() { remote = f(remote).map_err(Error::RemoteConfiguration)?; @@ -133,6 +202,7 @@ impl PrepareFetch { .expect("valid") .to_owned(); let pending_pack: remote::fetch::Prepare<'_, '_, _> = { + // For shallow clones, we already connected once, so we need to connect again let mut connection = remote.connect(remote::Direction::Fetch).await?; if let Some(f) = self.configure_connection.as_mut() { f(&mut connection).map_err(Error::RemoteConnection)?; diff --git a/gix/tests/gix/clone.rs b/gix/tests/gix/clone.rs index 573a8e3ddfe..3624afb7e55 100644 --- a/gix/tests/gix/clone.rs +++ b/gix/tests/gix/clone.rs @@ -83,6 +83,41 @@ mod blocking_io { Ok(()) } + #[test] + fn shallow_clone_uses_single_branch_refspec() -> crate::Result { + let tmp = gix_testtools::tempfile::TempDir::new()?; + let (repo, _out) = gix::prepare_clone_bare(remote::repo("base").path(), tmp.path())? + .with_shallow(Shallow::DepthAtRemote(1.try_into()?)) + .fetch_only(gix::progress::Discard, &std::sync::atomic::AtomicBool::default())?; + + assert!(repo.is_shallow(), "repository should be shallow"); + + // Verify that only a single-branch refspec was configured + let remote = repo.find_remote("origin")?; + let refspecs: Vec<_> = remote + .refspecs(Direction::Fetch) + .iter() + .map(|spec| spec.to_ref().to_bstring()) + .collect(); + + assert_eq!(refspecs.len(), 1, "shallow clone should have only one fetch refspec"); + + // The refspec should be for a single branch (main), not a wildcard + let refspec_str = refspecs[0].to_str().expect("valid utf8"); + assert!( + !refspec_str.contains("*"), + "shallow clone refspec should not use wildcard: {}", + refspec_str + ); + assert!( + refspec_str.contains("refs/heads/main"), + "shallow clone refspec should reference the main branch: {}", + refspec_str + ); + + Ok(()) + } + #[test] fn from_shallow_prohibited_with_option() -> crate::Result { let tmp = gix_testtools::tempfile::TempDir::new()?; From d2cd2effb00bc15c80fc36870c943d4d2e236c41 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 2 Nov 2025 14:11:42 +0100 Subject: [PATCH 2/3] feat: `Category::to_full_name()` can now be passed a full name without prefix duplication. This is for convenience, as it can then be passed a valid partial name. --- gix-ref/src/fullname.rs | 13 +++++++++---- gix-ref/tests/refs/fullname.rs | 19 +++++++++++++++++++ 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/gix-ref/src/fullname.rs b/gix-ref/src/fullname.rs index 423baf5a26b..40cf7bc8661 100644 --- a/gix-ref/src/fullname.rs +++ b/gix-ref/src/fullname.rs @@ -1,8 +1,7 @@ +use crate::{bstr::ByteVec, name::is_pseudo_ref, Category, FullName, FullNameRef, Namespace, PartialNameRef}; use gix_object::bstr::{BStr, BString, ByteSlice}; use std::{borrow::Borrow, path::Path}; -use crate::{bstr::ByteVec, name::is_pseudo_ref, Category, FullName, FullNameRef, Namespace, PartialNameRef}; - impl TryFrom<&str> for FullName { type Error = gix_validate::reference::name::Error; @@ -165,6 +164,8 @@ impl FullNameRef { impl Category<'_> { /// As the inverse of [`FullNameRef::category_and_short_name()`], use the prefix of this category alongside /// the `short_name` to create a valid fully qualified [reference name](FullName). + /// + /// If `short_name` already contains the prefix that it would receive (and is thus a full name), no duplication will occur. pub fn to_full_name<'a>(&self, short_name: impl Into<&'a BStr>) -> Result { let mut out: BString = self.prefix().into(); let short_name = short_name.into(); @@ -185,8 +186,12 @@ impl Category<'_> { | Category::PseudoRef | Category::MainPseudoRef => short_name, }; - out.extend_from_slice(partial_name); - FullName::try_from(out) + if out.is_empty() || !partial_name.starts_with(&out) { + out.extend_from_slice(partial_name); + FullName::try_from(out) + } else { + FullName::try_from(partial_name.as_bstr()) + } } } diff --git a/gix-ref/tests/refs/fullname.rs b/gix-ref/tests/refs/fullname.rs index 3208b19dde6..ebc7f8fe01b 100644 --- a/gix-ref/tests/refs/fullname.rs +++ b/gix-ref/tests/refs/fullname.rs @@ -117,6 +117,25 @@ fn shorten_and_category() { ); } +#[test] +fn to_full_name() -> gix_testtools::Result { + assert_eq!( + Category::LocalBranch.to_full_name("refs/heads/full")?.as_bstr(), + "refs/heads/full", + "prefixes aren't duplicated" + ); + + assert_eq!( + Category::LocalBranch + .to_full_name("refs/remotes/origin/other")? + .as_bstr(), + "refs/heads/refs/remotes/origin/other", + "full names with a different category will be prefixed, to support 'main-worktree' special cases" + ); + + Ok(()) +} + #[test] fn prefix_with_namespace_and_stripping() { let ns = gix_ref::namespace::expand("foo").unwrap(); From c331afc01388a37b66eaeb39c302742b3187de27 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 24 Oct 2025 07:24:12 +0200 Subject: [PATCH 3/3] refactor --- gitoxide-core/src/repository/fetch.rs | 2 +- gix/src/clone/fetch/mod.rs | 21 +++++++++------- gix/src/config/tree/sections/init.rs | 1 + gix/tests/gix/clone.rs | 36 ++++++++++++++++----------- 4 files changed, 36 insertions(+), 24 deletions(-) diff --git a/gitoxide-core/src/repository/fetch.rs b/gitoxide-core/src/repository/fetch.rs index 21198349bb3..2f143a9ec8e 100644 --- a/gitoxide-core/src/repository/fetch.rs +++ b/gitoxide-core/src/repository/fetch.rs @@ -320,7 +320,7 @@ pub(crate) mod function { err, "server sent {} tips, {} were filtered due to {} refspec(s).", map.remote_refs.len(), - map.remote_refs.len() - map.mappings.len(), + map.remote_refs.len().saturating_sub(map.mappings.len()), refspecs.len() )?; } diff --git a/gix/src/clone/fetch/mod.rs b/gix/src/clone/fetch/mod.rs index c56f413061b..46453d960e9 100644 --- a/gix/src/clone/fetch/mod.rs +++ b/gix/src/clone/fetch/mod.rs @@ -2,6 +2,7 @@ use crate::{ bstr::{BString, ByteSlice}, clone::PrepareFetch, }; +use gix_ref::Category; /// The error returned by [`PrepareFetch::fetch_only()`]. #[derive(Debug, thiserror::Error)] @@ -49,6 +50,8 @@ pub enum Error { CommitterOrFallback(#[from] crate::config::time::Error), #[error(transparent)] RefMap(#[from] crate::remote::ref_map::Error), + #[error(transparent)] + ReferenceName(#[from] gix_validate::reference::name::Error), } /// Modification @@ -107,14 +110,13 @@ impl PrepareFetch { // For shallow clones without custom configuration, we'll use a single-branch refspec // to match git's behavior (matching git's single-branch behavior for shallow clones). let use_single_branch_for_shallow = self.shallow != remote::fetch::Shallow::NoChange - && self.configure_remote.is_none() - && remote.fetch_specs.is_empty(); + && remote.fetch_specs.is_empty() + && self.fetch_options.extra_refspecs.is_empty(); let target_ref = if use_single_branch_for_shallow { // Determine target branch from user-specified ref_name or default branch if let Some(ref_name) = &self.ref_name { - // User specified a branch, use that - Some(format!("refs/heads/{}", ref_name.as_ref().as_bstr())) + Some(Category::LocalBranch.to_full_name(ref_name.as_ref().as_bstr())?) } else { // For shallow clones without a specified ref, we need to determine the default branch. // We'll connect to get HEAD information. For Protocol V2, we need to explicitly list refs. @@ -129,7 +131,7 @@ impl PrepareFetch { refs.iter().find_map(|r| match r { gix_protocol::handshake::Ref::Symbolic { full_ref_name, target, .. - } if full_ref_name == "HEAD" => Some(target.to_string()), + } if full_ref_name == "HEAD" => gix_ref::FullName::try_from(target).ok(), _ => None, }) }) @@ -143,9 +145,9 @@ impl PrepareFetch { repo.config .resolved .string(crate::config::tree::Init::DEFAULT_BRANCH) - .and_then(|name| name.to_str().ok().map(|s| format!("refs/heads/{}", s))) + .and_then(|name| Category::LocalBranch.to_full_name(name.as_bstr()).ok()) }) - .unwrap_or_else(|| "refs/heads/main".to_string()); + .unwrap_or_else(|| gix_ref::FullName::try_from("refs/heads/main").expect("known to be valid")); // Drop the connection explicitly to release the borrow on remote drop(connection); @@ -156,11 +158,12 @@ impl PrepareFetch { None }; - // Set up refspec based on whether we're doing a single-branch shallow clone + // Set up refspec based on whether we're doing a single-branch shallow clone, + // which requires a single ref to match Git unless it's overridden. if remote.fetch_specs.is_empty() { if let Some(target_ref) = &target_ref { // Single-branch refspec for shallow clones - let short_name = target_ref.strip_prefix("refs/heads/").unwrap_or(target_ref.as_str()); + let short_name = target_ref.shorten(); remote = remote .with_refspecs( Some(format!("+{target_ref}:refs/remotes/{remote_name}/{short_name}").as_str()), diff --git a/gix/src/config/tree/sections/init.rs b/gix/src/config/tree/sections/init.rs index de42d3b6257..453a906a168 100644 --- a/gix/src/config/tree/sections/init.rs +++ b/gix/src/config/tree/sections/init.rs @@ -5,6 +5,7 @@ use crate::{ impl Init { /// The `init.defaultBranch` key. + // TODO: review its usage for cases where this key is not set - sometimes it's 'master', sometimes it's 'main'. pub const DEFAULT_BRANCH: keys::Any = keys::Any::new("defaultBranch", &config::Tree::INIT) .with_deviation("If not set, we use `main` instead of `master`"); } diff --git a/gix/tests/gix/clone.rs b/gix/tests/gix/clone.rs index 3624afb7e55..c843645b571 100644 --- a/gix/tests/gix/clone.rs +++ b/gix/tests/gix/clone.rs @@ -4,6 +4,10 @@ use crate::{remote, util::restricted}; mod blocking_io { use std::{borrow::Cow, path::Path, sync::atomic::AtomicBool}; + use crate::{ + remote, + util::{hex_to_id, restricted}, + }; use gix::{ bstr::BString, config::tree::{Clone, Core, Init, Key}, @@ -14,11 +18,7 @@ mod blocking_io { }; use gix_object::bstr::ByteSlice; use gix_ref::TargetRef; - - use crate::{ - remote, - util::{hex_to_id, restricted}, - }; + use gix_refspec::parse::Operation; #[test] fn fetch_shallow_no_checkout_then_unshallow() -> crate::Result { @@ -104,15 +104,14 @@ mod blocking_io { // The refspec should be for a single branch (main), not a wildcard let refspec_str = refspecs[0].to_str().expect("valid utf8"); - assert!( - !refspec_str.contains("*"), - "shallow clone refspec should not use wildcard: {}", - refspec_str - ); - assert!( - refspec_str.contains("refs/heads/main"), - "shallow clone refspec should reference the main branch: {}", - refspec_str + assert_eq!( + refspec_str, + if cfg!(windows) { + "+refs/heads/master:refs/remotes/origin/master" + } else { + "+refs/heads/main:refs/remotes/origin/main" + }, + "shallow clone refspec should not use wildcard and should be the main branch: {refspec_str}" ); Ok(()) @@ -238,7 +237,16 @@ mod blocking_io { fn from_non_shallow_by_deepen_exclude_then_deepen_to_unshallow() -> crate::Result { let tmp = gix_testtools::tempfile::TempDir::new()?; let excluded_leaf_refs = ["g", "h", "j"]; + let (repo, _change) = gix::prepare_clone_bare(remote::repo("base").path(), tmp.path())? + .with_fetch_options(gix::remote::ref_map::Options { + extra_refspecs: vec![gix::refspec::parse( + "refs/heads/*:refs/remotes/origin/*".into(), + Operation::Fetch, + )? + .into()], + ..Default::default() + }) .with_shallow(Shallow::Exclude { remote_refs: excluded_leaf_refs .into_iter()