From 0924bd30cbec5b47b604f159a1efee72032740a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20R=C3=BC=C3=9Fler?= Date: Sat, 23 Aug 2025 19:18:38 +0200 Subject: [PATCH 1/2] feat!: add `DiffLineType` and `HunkHeader` This commit modifies the public API of `ConsumeHunk::consume_hunk` to use the new types `DiffLineType` and `HunkHeader`. It also shifts responsibility for adding newlines to the API's consumer. --- gix-diff/src/blob/unified_diff.rs | 211 ++++++++++++++--------- gix-diff/tests/diff/blob/unified_diff.rs | 76 ++++++-- 2 files changed, 199 insertions(+), 88 deletions(-) diff --git a/gix-diff/src/blob/unified_diff.rs b/gix-diff/src/blob/unified_diff.rs index 437ed132f0f..a2ab2cef1c8 100644 --- a/gix-diff/src/blob/unified_diff.rs +++ b/gix-diff/src/blob/unified_diff.rs @@ -26,6 +26,28 @@ impl ContextSize { } } +/// Represents the type of a line in a unified diff. +#[doc(alias = "git2")] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DiffLineType { + /// A line that exists in both the old and the new version is called a context line. + Context, + /// A line that was added in the new version. + Add, + /// A line that was removed from the old version. + Remove, +} + +impl DiffLineType { + const fn to_prefix(self) -> char { + match self { + DiffLineType::Context => ' ', + DiffLineType::Add => '+', + DiffLineType::Remove => '-', + } + } +} + /// Specify where to put a newline. #[derive(Debug, Copy, Clone)] pub enum NewlineSeparator<'a> { @@ -39,31 +61,45 @@ pub enum NewlineSeparator<'a> { AfterHeaderAndWhenNeeded(&'a str), } +/// Holds information about a unified diff hunk, specifically with respect to line numbers. +pub struct HunkHeader { + /// The 1-based start position in the 'before' lines. + pub before_hunk_start: u32, + /// The size of the 'before' hunk in lines. + pub before_hunk_len: u32, + /// The 1-based start position in the 'after' lines. + pub after_hunk_start: u32, + /// The size of the 'after' hunk in lines. + pub after_hunk_len: u32, +} + +impl std::fmt::Display for HunkHeader { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "@@ -{},{} +{},{} @@", + self.before_hunk_start, self.before_hunk_len, self.after_hunk_start, self.after_hunk_len + ) + } +} + /// A utility trait for use in [`UnifiedDiff`](super::UnifiedDiff). pub trait ConsumeHunk { /// The item this instance produces after consuming all hunks. type Out; - /// Consume a single `hunk` in unified diff format, that would be prefixed with `header`. - /// Note that all newlines are added. + /// Consume a single hunk. Note that it is the implementation's responsibility to add newlines + /// where requested by `newline`. /// /// Note that the [`UnifiedDiff`](super::UnifiedDiff) sink will wrap its output in an [`std::io::Result`]. /// After this method returned its first error, it will not be called anymore. - /// - /// The following is hunk-related information and the same that is used in the `header`. - /// * `before_hunk_start` is the 1-based first line of this hunk in the old file. - /// * `before_hunk_len` the amount of lines of this hunk in the old file. - /// * `after_hunk_start` is the 1-based first line of this hunk in the new file. - /// * `after_hunk_len` the amount of lines of this hunk in the new file. fn consume_hunk( &mut self, - before_hunk_start: u32, - before_hunk_len: u32, - after_hunk_start: u32, - after_hunk_len: u32, - header: &str, - hunk: &[u8], + header: HunkHeader, + lines: &[(DiffLineType, &[u8])], + newline: NewlineSeparator<'_>, ) -> std::io::Result<()>; + /// Called after the last hunk is consumed to produce an output. fn finish(self) -> Self::Out; } @@ -75,14 +111,10 @@ pub(super) mod _impl { use imara_diff::{intern, Sink}; use intern::{InternedInput, Interner, Token}; - use super::{ConsumeHunk, ContextSize, NewlineSeparator}; - - const CONTEXT: char = ' '; - const ADDITION: char = '+'; - const REMOVAL: char = '-'; + use super::{ConsumeHunk, ContextSize, DiffLineType, HunkHeader, NewlineSeparator}; - /// A [`Sink`] that creates a textual diff in the format typically output by git or `gnu-diff` if the `-u` option is used, - /// and passes it in full to a consumer. + /// A [`Sink`] that creates a unified diff. It can be used to create a textual diff in the + /// format typically output by `git` or `gnu-diff` if the `-u` option is used. pub struct UnifiedDiff<'a, T, D> where T: Hash + Eq + AsRef<[u8]>, @@ -108,8 +140,8 @@ pub(super) mod _impl { ctx_size: u32, newline: NewlineSeparator<'a>, - buffer: Vec, - header_buf: String, + buffer: Vec<(DiffLineType, &'a [u8])>, + delegate: D, err: Option, @@ -120,12 +152,13 @@ pub(super) mod _impl { T: Hash + Eq + AsRef<[u8]>, D: ConsumeHunk, { - /// Create a new instance to create unified diff using the lines in `input`, + /// Create a new instance to create a unified diff using the lines in `input`, /// which also must be used when running the diff algorithm. /// `context_size` is the amount of lines around each hunk which will be passed - ///to `consume_hunk`. + /// to `consume_hunk`. /// - /// `consume_hunk` is called for each hunk in unified-diff format, as created from each line separated by `newline_separator`. + /// `consume_hunk` is called for each hunk with all the information required to create a + /// unified diff. pub fn new( input: &'a InternedInput, consume_hunk: D, @@ -147,28 +180,16 @@ pub(super) mod _impl { newline: newline_separator, buffer: Vec::with_capacity(8), - header_buf: String::new(), delegate: consume_hunk, err: None, } } - fn print_tokens(&mut self, tokens: &[Token], prefix: char) { + fn print_tokens(&mut self, tokens: &[Token], line_type: DiffLineType) { for &token in tokens { - self.buffer.push_char(prefix); - let line = &self.interner[token]; - self.buffer.push_str(line); - match self.newline { - NewlineSeparator::AfterHeaderAndLine(nl) => { - self.buffer.push_str(nl); - } - NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => { - if !line.as_ref().ends_with_str(nl) { - self.buffer.push_str(nl); - } - } - } + let content = self.interner[token].as_ref(); + self.buffer.push((line_type, content)); } } @@ -183,38 +204,26 @@ pub(super) mod _impl { let hunk_start = self.before_hunk_start + 1; let hunk_end = self.after_hunk_start + 1; - self.header_buf.clear(); - std::fmt::Write::write_fmt( - &mut self.header_buf, - format_args!( - "@@ -{},{} +{},{} @@{nl}", - hunk_start, - self.before_hunk_len, - hunk_end, - self.after_hunk_len, - nl = match self.newline { - NewlineSeparator::AfterHeaderAndLine(nl) | NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => { - nl - } - } - ), - ) - .map_err(|err| std::io::Error::new(ErrorKind::Other, err))?; - self.delegate.consume_hunk( - hunk_start, - self.before_hunk_len, - hunk_end, - self.after_hunk_len, - &self.header_buf, - &self.buffer, - )?; + + let header = HunkHeader { + before_hunk_start: hunk_start, + before_hunk_len: self.before_hunk_len, + after_hunk_start: hunk_end, + after_hunk_len: self.after_hunk_len, + }; + + self.delegate.consume_hunk(header, &self.buffer, self.newline)?; self.reset_hunks(); Ok(()) } fn print_context_and_update_pos(&mut self, print: Range, move_to: u32) { - self.print_tokens(&self.before[print.start as usize..print.end as usize], CONTEXT); + self.print_tokens( + &self.before[print.start as usize..print.end as usize], + DiffLineType::Context, + ); + let len = print.end - print.start; self.ctx_pos = Some(move_to); self.before_hunk_len += len; @@ -270,8 +279,11 @@ pub(super) mod _impl { self.before_hunk_len += before.end - before.start; self.after_hunk_len += after.end - after.start; - self.print_tokens(&self.before[before.start as usize..before.end as usize], REMOVAL); - self.print_tokens(&self.after[after.start as usize..after.end as usize], ADDITION); + self.print_tokens( + &self.before[before.start as usize..before.end as usize], + DiffLineType::Remove, + ); + self.print_tokens(&self.after[after.start as usize..after.end as usize], DiffLineType::Add); } fn finish(mut self) -> Self::Out { @@ -289,12 +301,32 @@ pub(super) mod _impl { impl ConsumeHunk for String { type Out = Self; - fn consume_hunk(&mut self, _: u32, _: u32, _: u32, _: u32, header: &str, hunk: &[u8]) -> std::io::Result<()> { - self.push_str(header); - self.push_str( - hunk.to_str() - .map_err(|err| std::io::Error::new(ErrorKind::Other, err))?, - ); + fn consume_hunk( + &mut self, + header: HunkHeader, + lines: &[(DiffLineType, &[u8])], + newline: NewlineSeparator<'_>, + ) -> std::io::Result<()> { + self.push_str(&header.to_string()); + self.push_str(match newline { + NewlineSeparator::AfterHeaderAndLine(nl) | NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => nl, + }); + + for &(line_type, content) in lines { + self.push(line_type.to_prefix()); + self.push_str(std::str::from_utf8(content).map_err(|e| std::io::Error::new(ErrorKind::Other, e))?); + + match newline { + NewlineSeparator::AfterHeaderAndLine(nl) => { + self.push_str(nl); + } + NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => { + if !content.ends_with_str(nl) { + self.push_str(nl); + } + } + } + } Ok(()) } @@ -307,9 +339,32 @@ pub(super) mod _impl { impl ConsumeHunk for Vec { type Out = Self; - fn consume_hunk(&mut self, _: u32, _: u32, _: u32, _: u32, header: &str, hunk: &[u8]) -> std::io::Result<()> { - self.push_str(header); - self.push_str(hunk); + fn consume_hunk( + &mut self, + header: HunkHeader, + lines: &[(DiffLineType, &[u8])], + newline: NewlineSeparator<'_>, + ) -> std::io::Result<()> { + self.push_str(header.to_string()); + self.push_str(match newline { + NewlineSeparator::AfterHeaderAndLine(nl) | NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => nl, + }); + + for &(line_type, content) in lines { + self.push(line_type.to_prefix() as u8); + self.extend_from_slice(content); + + match newline { + NewlineSeparator::AfterHeaderAndLine(nl) => { + self.push_str(nl); + } + NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => { + if !content.ends_with_str(nl) { + self.push_str(nl); + } + } + } + } Ok(()) } diff --git a/gix-diff/tests/diff/blob/unified_diff.rs b/gix-diff/tests/diff/blob/unified_diff.rs index 4bb2df7d421..99dde8d355f 100644 --- a/gix-diff/tests/diff/blob/unified_diff.rs +++ b/gix-diff/tests/diff/blob/unified_diff.rs @@ -1,5 +1,5 @@ use gix_diff::blob::{ - unified_diff::{ConsumeHunk, ContextSize, NewlineSeparator}, + unified_diff::{ConsumeHunk, ContextSize, DiffLineType, HunkHeader, NewlineSeparator}, Algorithm, UnifiedDiff, }; @@ -399,6 +399,37 @@ fn removed_modified_added_with_newlines_in_tokens() -> crate::Result { ] ); + let actual = gix_diff::blob::diff( + Algorithm::Myers, + &interner, + UnifiedDiff::new( + &interner, + DiffLineTypeRecorder::default(), + NewlineSeparator::AfterHeaderAndWhenNeeded("\r\n"), + ContextSize::symmetrical(1), + ), + )?; + + assert_eq!( + actual, + &[ + vec![DiffLineType::Remove, DiffLineType::Context], + vec![ + DiffLineType::Context, + DiffLineType::Remove, + DiffLineType::Add, + DiffLineType::Context + ], + vec![ + DiffLineType::Context, + DiffLineType::Remove, + DiffLineType::Add, + DiffLineType::Add, + DiffLineType::Add + ] + ] + ); + Ok(()) } @@ -488,17 +519,19 @@ impl ConsumeHunk for Recorder { fn consume_hunk( &mut self, - before_hunk_start: u32, - before_hunk_len: u32, - after_hunk_start: u32, - after_hunk_len: u32, - header: &str, - _hunk: &[u8], + header: HunkHeader, + _hunk: &[(DiffLineType, &[u8])], + newline: NewlineSeparator<'_>, ) -> std::io::Result<()> { + let mut formatted_header = header.to_string(); + formatted_header.push_str(match newline { + NewlineSeparator::AfterHeaderAndLine(nl) | NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => nl, + }); + self.hunks.push(( - (before_hunk_start, before_hunk_len), - (after_hunk_start, after_hunk_len), - header.to_string(), + (header.before_hunk_start, header.before_hunk_len), + (header.after_hunk_start, header.after_hunk_len), + formatted_header, )); Ok(()) } @@ -507,3 +540,26 @@ impl ConsumeHunk for Recorder { self.hunks } } + +#[derive(Default)] +struct DiffLineTypeRecorder { + hunks: Vec>, +} + +impl ConsumeHunk for DiffLineTypeRecorder { + type Out = Vec>; + + fn consume_hunk( + &mut self, + _header: HunkHeader, + hunk: &[(DiffLineType, &[u8])], + _newline: NewlineSeparator<'_>, + ) -> std::io::Result<()> { + self.hunks.push(hunk.iter().map(|(line_type, _)| *line_type).collect()); + Ok(()) + } + + fn finish(self) -> Self::Out { + self.hunks + } +} From 91a611fbdb4005546eca814e4b62267798919994 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 29 Aug 2025 09:19:07 +0200 Subject: [PATCH 2/2] refactor - Make ports of old implementations easier - remove NewlineSeparator variants which don't seem used quite as much - it's always conditionally adding newlines. --- gitoxide-core/src/repository/diff.rs | 10 +- gix-diff/Cargo.toml | 2 +- gix-diff/src/blob/mod.rs | 2 +- gix-diff/src/blob/unified_diff.rs | 375 ----------------------- gix-diff/src/blob/unified_diff/impls.rs | 280 +++++++++++++++++ gix-diff/src/blob/unified_diff/mod.rs | 91 ++++++ gix-diff/tests/diff/blob/unified_diff.rs | 131 +++----- 7 files changed, 426 insertions(+), 465 deletions(-) delete mode 100644 gix-diff/src/blob/unified_diff.rs create mode 100644 gix-diff/src/blob/unified_diff/impls.rs create mode 100644 gix-diff/src/blob/unified_diff/mod.rs diff --git a/gitoxide-core/src/repository/diff.rs b/gitoxide-core/src/repository/diff.rs index a0396599b81..0fe2a460f5b 100644 --- a/gitoxide-core/src/repository/diff.rs +++ b/gitoxide-core/src/repository/diff.rs @@ -1,11 +1,8 @@ use anyhow::Context; +use gix::diff::blob::unified_diff::ConsumeBinaryHunk; use gix::{ bstr::{BString, ByteSlice}, - diff::blob::{ - intern::TokenSource, - unified_diff::{ContextSize, NewlineSeparator}, - UnifiedDiff, - }, + diff::blob::{intern::TokenSource, unified_diff::ContextSize, UnifiedDiff}, objs::tree::EntryMode, odb::store::RefreshMode, prelude::ObjectIdExt, @@ -206,8 +203,7 @@ pub fn file( let unified_diff = UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndLine("\n"), + ConsumeBinaryHunk::new(BString::default(), "\n"), ContextSize::symmetrical(3), ); diff --git a/gix-diff/Cargo.toml b/gix-diff/Cargo.toml index 87e08debaf0..2b176b697d0 100644 --- a/gix-diff/Cargo.toml +++ b/gix-diff/Cargo.toml @@ -9,7 +9,7 @@ description = "Calculate differences between various git objects" authors = ["Sebastian Thiel "] edition = "2021" include = ["src/**/*", "LICENSE-*"] -rust-version = "1.70" +rust-version = "1.74" autotests = false [features] diff --git a/gix-diff/src/blob/mod.rs b/gix-diff/src/blob/mod.rs index 541e978d752..5cf1f0c806e 100644 --- a/gix-diff/src/blob/mod.rs +++ b/gix-diff/src/blob/mod.rs @@ -12,7 +12,7 @@ pub mod pipeline; pub mod platform; pub mod unified_diff; -pub use unified_diff::_impl::UnifiedDiff; +pub use unified_diff::impls::UnifiedDiff; /// Information about the diff performed to detect similarity. #[derive(Debug, Default, Clone, Copy, PartialEq, PartialOrd)] diff --git a/gix-diff/src/blob/unified_diff.rs b/gix-diff/src/blob/unified_diff.rs deleted file mode 100644 index a2ab2cef1c8..00000000000 --- a/gix-diff/src/blob/unified_diff.rs +++ /dev/null @@ -1,375 +0,0 @@ -//! Facilities to produce the unified diff format. -//! -//! Originally based on . - -/// Defines the size of the context printed before and after each change. -/// -/// Similar to the `-U` option in git diff or gnu-diff. If the context overlaps -/// with previous or next change, the context gets reduced accordingly. -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)] -pub struct ContextSize { - /// Defines the size of the context printed before and after each change. - symmetrical: u32, -} - -impl Default for ContextSize { - fn default() -> Self { - ContextSize::symmetrical(3) - } -} - -/// Instantiation -impl ContextSize { - /// Create a symmetrical context with `n` lines before and after a changed hunk. - pub fn symmetrical(n: u32) -> Self { - ContextSize { symmetrical: n } - } -} - -/// Represents the type of a line in a unified diff. -#[doc(alias = "git2")] -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum DiffLineType { - /// A line that exists in both the old and the new version is called a context line. - Context, - /// A line that was added in the new version. - Add, - /// A line that was removed from the old version. - Remove, -} - -impl DiffLineType { - const fn to_prefix(self) -> char { - match self { - DiffLineType::Context => ' ', - DiffLineType::Add => '+', - DiffLineType::Remove => '-', - } - } -} - -/// Specify where to put a newline. -#[derive(Debug, Copy, Clone)] -pub enum NewlineSeparator<'a> { - /// Place the given newline separator, like `\n`, after each patch header as well as after each line. - /// This is the right choice if tokens don't include newlines. - AfterHeaderAndLine(&'a str), - /// Place the given newline separator, like `\n`, only after each patch header or if a line doesn't contain a newline. - /// This is the right choice if tokens do include newlines. - /// Note that diff-tokens *with* newlines may diff strangely at the end of files when lines have been appended, - /// as it will make the last line look like it changed just because the whitespace at the end 'changed'. - AfterHeaderAndWhenNeeded(&'a str), -} - -/// Holds information about a unified diff hunk, specifically with respect to line numbers. -pub struct HunkHeader { - /// The 1-based start position in the 'before' lines. - pub before_hunk_start: u32, - /// The size of the 'before' hunk in lines. - pub before_hunk_len: u32, - /// The 1-based start position in the 'after' lines. - pub after_hunk_start: u32, - /// The size of the 'after' hunk in lines. - pub after_hunk_len: u32, -} - -impl std::fmt::Display for HunkHeader { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "@@ -{},{} +{},{} @@", - self.before_hunk_start, self.before_hunk_len, self.after_hunk_start, self.after_hunk_len - ) - } -} - -/// A utility trait for use in [`UnifiedDiff`](super::UnifiedDiff). -pub trait ConsumeHunk { - /// The item this instance produces after consuming all hunks. - type Out; - - /// Consume a single hunk. Note that it is the implementation's responsibility to add newlines - /// where requested by `newline`. - /// - /// Note that the [`UnifiedDiff`](super::UnifiedDiff) sink will wrap its output in an [`std::io::Result`]. - /// After this method returned its first error, it will not be called anymore. - fn consume_hunk( - &mut self, - header: HunkHeader, - lines: &[(DiffLineType, &[u8])], - newline: NewlineSeparator<'_>, - ) -> std::io::Result<()>; - - /// Called after the last hunk is consumed to produce an output. - fn finish(self) -> Self::Out; -} - -pub(super) mod _impl { - use std::{hash::Hash, io::ErrorKind, ops::Range}; - - use bstr::{ByteSlice, ByteVec}; - use imara_diff::{intern, Sink}; - use intern::{InternedInput, Interner, Token}; - - use super::{ConsumeHunk, ContextSize, DiffLineType, HunkHeader, NewlineSeparator}; - - /// A [`Sink`] that creates a unified diff. It can be used to create a textual diff in the - /// format typically output by `git` or `gnu-diff` if the `-u` option is used. - pub struct UnifiedDiff<'a, T, D> - where - T: Hash + Eq + AsRef<[u8]>, - D: ConsumeHunk, - { - before: &'a [Token], - after: &'a [Token], - interner: &'a Interner, - - /// The 0-based start position in the 'before' tokens for the accumulated hunk for display in the header. - before_hunk_start: u32, - /// The size of the accumulated 'before' hunk in lines for display in the header. - before_hunk_len: u32, - /// The 0-based start position in the 'after' tokens for the accumulated hunk for display in the header. - after_hunk_start: u32, - /// The size of the accumulated 'after' hunk in lines. - after_hunk_len: u32, - // An index into `before` and the context line to print next, - // or `None` if this value was never computed to be the correct starting point for an accumulated hunk. - ctx_pos: Option, - - /// Symmetrical context before and after the changed hunk. - ctx_size: u32, - newline: NewlineSeparator<'a>, - - buffer: Vec<(DiffLineType, &'a [u8])>, - - delegate: D, - - err: Option, - } - - impl<'a, T, D> UnifiedDiff<'a, T, D> - where - T: Hash + Eq + AsRef<[u8]>, - D: ConsumeHunk, - { - /// Create a new instance to create a unified diff using the lines in `input`, - /// which also must be used when running the diff algorithm. - /// `context_size` is the amount of lines around each hunk which will be passed - /// to `consume_hunk`. - /// - /// `consume_hunk` is called for each hunk with all the information required to create a - /// unified diff. - pub fn new( - input: &'a InternedInput, - consume_hunk: D, - newline_separator: NewlineSeparator<'a>, - context_size: ContextSize, - ) -> Self { - Self { - interner: &input.interner, - before: &input.before, - after: &input.after, - - before_hunk_start: 0, - before_hunk_len: 0, - after_hunk_len: 0, - after_hunk_start: 0, - ctx_pos: None, - - ctx_size: context_size.symmetrical, - newline: newline_separator, - - buffer: Vec::with_capacity(8), - delegate: consume_hunk, - - err: None, - } - } - - fn print_tokens(&mut self, tokens: &[Token], line_type: DiffLineType) { - for &token in tokens { - let content = self.interner[token].as_ref(); - self.buffer.push((line_type, content)); - } - } - - fn flush_accumulated_hunk(&mut self) -> std::io::Result<()> { - if self.nothing_to_flush() { - return Ok(()); - } - - let ctx_pos = self.ctx_pos.expect("has been set if we started a hunk"); - let end = (ctx_pos + self.ctx_size).min(self.before.len() as u32); - self.print_context_and_update_pos(ctx_pos..end, end); - - let hunk_start = self.before_hunk_start + 1; - let hunk_end = self.after_hunk_start + 1; - - let header = HunkHeader { - before_hunk_start: hunk_start, - before_hunk_len: self.before_hunk_len, - after_hunk_start: hunk_end, - after_hunk_len: self.after_hunk_len, - }; - - self.delegate.consume_hunk(header, &self.buffer, self.newline)?; - - self.reset_hunks(); - Ok(()) - } - - fn print_context_and_update_pos(&mut self, print: Range, move_to: u32) { - self.print_tokens( - &self.before[print.start as usize..print.end as usize], - DiffLineType::Context, - ); - - let len = print.end - print.start; - self.ctx_pos = Some(move_to); - self.before_hunk_len += len; - self.after_hunk_len += len; - } - - fn reset_hunks(&mut self) { - self.buffer.clear(); - self.before_hunk_len = 0; - self.after_hunk_len = 0; - } - - fn nothing_to_flush(&self) -> bool { - self.before_hunk_len == 0 && self.after_hunk_len == 0 - } - } - - impl Sink for UnifiedDiff<'_, T, D> - where - T: Hash + Eq + AsRef<[u8]>, - D: ConsumeHunk, - { - type Out = std::io::Result; - - fn process_change(&mut self, before: Range, after: Range) { - if self.err.is_some() { - return; - } - let start_next_hunk = self - .ctx_pos - .is_some_and(|ctx_pos| before.start - ctx_pos > 2 * self.ctx_size); - if start_next_hunk { - if let Err(err) = self.flush_accumulated_hunk() { - self.err = Some(err); - return; - } - let ctx_pos = before.start - self.ctx_size; - self.ctx_pos = Some(ctx_pos); - self.before_hunk_start = ctx_pos; - self.after_hunk_start = after.start - self.ctx_size; - } - let ctx_pos = match self.ctx_pos { - None => { - // TODO: can this be made so the code above does the job? - let ctx_pos = before.start.saturating_sub(self.ctx_size); - self.before_hunk_start = ctx_pos; - self.after_hunk_start = after.start.saturating_sub(self.ctx_size); - ctx_pos - } - Some(pos) => pos, - }; - self.print_context_and_update_pos(ctx_pos..before.start, before.end); - self.before_hunk_len += before.end - before.start; - self.after_hunk_len += after.end - after.start; - - self.print_tokens( - &self.before[before.start as usize..before.end as usize], - DiffLineType::Remove, - ); - self.print_tokens(&self.after[after.start as usize..after.end as usize], DiffLineType::Add); - } - - fn finish(mut self) -> Self::Out { - if let Err(err) = self.flush_accumulated_hunk() { - self.err = Some(err); - } - if let Some(err) = self.err { - return Err(err); - } - Ok(self.delegate.finish()) - } - } - - /// An implementation that fails if the input isn't UTF-8. - impl ConsumeHunk for String { - type Out = Self; - - fn consume_hunk( - &mut self, - header: HunkHeader, - lines: &[(DiffLineType, &[u8])], - newline: NewlineSeparator<'_>, - ) -> std::io::Result<()> { - self.push_str(&header.to_string()); - self.push_str(match newline { - NewlineSeparator::AfterHeaderAndLine(nl) | NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => nl, - }); - - for &(line_type, content) in lines { - self.push(line_type.to_prefix()); - self.push_str(std::str::from_utf8(content).map_err(|e| std::io::Error::new(ErrorKind::Other, e))?); - - match newline { - NewlineSeparator::AfterHeaderAndLine(nl) => { - self.push_str(nl); - } - NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => { - if !content.ends_with_str(nl) { - self.push_str(nl); - } - } - } - } - Ok(()) - } - - fn finish(self) -> Self::Out { - self - } - } - - /// An implementation that writes hunks into a byte buffer. - impl ConsumeHunk for Vec { - type Out = Self; - - fn consume_hunk( - &mut self, - header: HunkHeader, - lines: &[(DiffLineType, &[u8])], - newline: NewlineSeparator<'_>, - ) -> std::io::Result<()> { - self.push_str(header.to_string()); - self.push_str(match newline { - NewlineSeparator::AfterHeaderAndLine(nl) | NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => nl, - }); - - for &(line_type, content) in lines { - self.push(line_type.to_prefix() as u8); - self.extend_from_slice(content); - - match newline { - NewlineSeparator::AfterHeaderAndLine(nl) => { - self.push_str(nl); - } - NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => { - if !content.ends_with_str(nl) { - self.push_str(nl); - } - } - } - } - Ok(()) - } - - fn finish(self) -> Self::Out { - self - } - } -} diff --git a/gix-diff/src/blob/unified_diff/impls.rs b/gix-diff/src/blob/unified_diff/impls.rs new file mode 100644 index 00000000000..22b41a57e42 --- /dev/null +++ b/gix-diff/src/blob/unified_diff/impls.rs @@ -0,0 +1,280 @@ +use bstr::{BString, ByteSlice, ByteVec}; +use imara_diff::{intern, Sink}; +use intern::{InternedInput, Interner, Token}; +use std::fmt::Write; +use std::{hash::Hash, ops::Range}; + +use super::{ConsumeBinaryHunk, ConsumeBinaryHunkDelegate, ConsumeHunk, ContextSize, DiffLineKind, HunkHeader}; + +/// A [`Sink`] that creates a unified diff. It can be used to create a textual diff in the +/// format typically output by `git` or `gnu-diff` if the `-u` option is used. +pub struct UnifiedDiff<'a, T, D> +where + T: Hash + Eq + AsRef<[u8]>, + D: ConsumeHunk, +{ + before: &'a [Token], + after: &'a [Token], + interner: &'a Interner, + + /// The 0-based start position in the 'before' tokens for the accumulated hunk for display in the header. + before_hunk_start: u32, + /// The size of the accumulated 'before' hunk in lines for display in the header. + before_hunk_len: u32, + /// The 0-based start position in the 'after' tokens for the accumulated hunk for display in the header. + after_hunk_start: u32, + /// The size of the accumulated 'after' hunk in lines. + after_hunk_len: u32, + // An index into `before` and the context line to print next, + // or `None` if this value was never computed to be the correct starting point for an accumulated hunk. + ctx_pos: Option, + + /// Symmetrical context before and after the changed hunk. + ctx_size: u32, + + buffer: Vec<(DiffLineKind, &'a [u8])>, + + delegate: D, + + err: Option, +} + +impl<'a, T, D> UnifiedDiff<'a, T, D> +where + T: Hash + Eq + AsRef<[u8]>, + D: ConsumeHunk, +{ + /// Create a new instance to create a unified diff using the lines in `input`, + /// which also must be used when running the diff algorithm. + /// `context_size` is the amount of lines around each hunk which will be passed + /// to `consume_hunk`. + /// + /// `consume_hunk` is called for each hunk with all the information required to create a + /// unified diff. + pub fn new(input: &'a InternedInput, consume_hunk: D, context_size: ContextSize) -> Self { + Self { + interner: &input.interner, + before: &input.before, + after: &input.after, + + before_hunk_start: 0, + before_hunk_len: 0, + after_hunk_len: 0, + after_hunk_start: 0, + ctx_pos: None, + + ctx_size: context_size.symmetrical, + + buffer: Vec::with_capacity(8), + delegate: consume_hunk, + + err: None, + } + } + + fn print_tokens(&mut self, tokens: &[Token], line_type: DiffLineKind) { + for &token in tokens { + let content = self.interner[token].as_ref(); + self.buffer.push((line_type, content)); + } + } + + fn flush_accumulated_hunk(&mut self) -> std::io::Result<()> { + if self.nothing_to_flush() { + return Ok(()); + } + + let ctx_pos = self.ctx_pos.expect("has been set if we started a hunk"); + let end = (ctx_pos + self.ctx_size).min(self.before.len() as u32); + self.print_context_and_update_pos(ctx_pos..end, end); + + let hunk_start = self.before_hunk_start + 1; + let hunk_end = self.after_hunk_start + 1; + + let header = HunkHeader { + before_hunk_start: hunk_start, + before_hunk_len: self.before_hunk_len, + after_hunk_start: hunk_end, + after_hunk_len: self.after_hunk_len, + }; + + self.delegate.consume_hunk(header, &self.buffer)?; + + self.reset_hunks(); + Ok(()) + } + + fn print_context_and_update_pos(&mut self, print: Range, move_to: u32) { + self.print_tokens( + &self.before[print.start as usize..print.end as usize], + DiffLineKind::Context, + ); + + let len = print.end - print.start; + self.ctx_pos = Some(move_to); + self.before_hunk_len += len; + self.after_hunk_len += len; + } + + fn reset_hunks(&mut self) { + self.buffer.clear(); + self.before_hunk_len = 0; + self.after_hunk_len = 0; + } + + fn nothing_to_flush(&self) -> bool { + self.before_hunk_len == 0 && self.after_hunk_len == 0 + } +} + +impl Sink for UnifiedDiff<'_, T, D> +where + T: Hash + Eq + AsRef<[u8]>, + D: ConsumeHunk, +{ + type Out = std::io::Result; + + fn process_change(&mut self, before: Range, after: Range) { + if self.err.is_some() { + return; + } + let start_next_hunk = self + .ctx_pos + .is_some_and(|ctx_pos| before.start - ctx_pos > 2 * self.ctx_size); + if start_next_hunk { + if let Err(err) = self.flush_accumulated_hunk() { + self.err = Some(err); + return; + } + let ctx_pos = before.start - self.ctx_size; + self.ctx_pos = Some(ctx_pos); + self.before_hunk_start = ctx_pos; + self.after_hunk_start = after.start - self.ctx_size; + } + let ctx_pos = match self.ctx_pos { + None => { + // TODO: can this be made so the code above does the job? + let ctx_pos = before.start.saturating_sub(self.ctx_size); + self.before_hunk_start = ctx_pos; + self.after_hunk_start = after.start.saturating_sub(self.ctx_size); + ctx_pos + } + Some(pos) => pos, + }; + self.print_context_and_update_pos(ctx_pos..before.start, before.end); + self.before_hunk_len += before.end - before.start; + self.after_hunk_len += after.end - after.start; + + self.print_tokens( + &self.before[before.start as usize..before.end as usize], + DiffLineKind::Remove, + ); + self.print_tokens(&self.after[after.start as usize..after.end as usize], DiffLineKind::Add); + } + + fn finish(mut self) -> Self::Out { + if let Err(err) = self.flush_accumulated_hunk() { + self.err = Some(err); + } + if let Some(err) = self.err { + return Err(err); + } + Ok(self.delegate.finish()) + } +} + +/// An implementation that fails if the input isn't UTF-8. +impl ConsumeHunk for ConsumeBinaryHunk<'_, D> +where + D: ConsumeBinaryHunkDelegate, +{ + type Out = D; + + fn consume_hunk(&mut self, header: HunkHeader, lines: &[(DiffLineKind, &[u8])]) -> std::io::Result<()> { + self.header_buf.clear(); + self.header_buf + .write_fmt(format_args!("{header}{nl}", nl = self.newline)) + .map_err(std::io::Error::other)?; + + let buf = &mut self.hunk_buf; + buf.clear(); + for &(line_type, content) in lines { + buf.push(line_type.to_prefix() as u8); + buf.push_str(std::str::from_utf8(content).map_err(std::io::Error::other)?); + + if !content.ends_with_str(self.newline) { + buf.push_str(self.newline); + } + } + + self.delegate.consume_binary_hunk(header, &self.header_buf, buf)?; + Ok(()) + } + + fn finish(self) -> Self::Out { + self.delegate + } +} + +/// An implementation that fails if the input isn't UTF-8. +impl ConsumeBinaryHunkDelegate for String { + fn consume_binary_hunk(&mut self, _header: HunkHeader, header_str: &str, hunk: &[u8]) -> std::io::Result<()> { + self.push_str(header_str); + self.push_str(hunk.to_str().map_err(std::io::Error::other)?); + Ok(()) + } +} + +/// An implementation that writes hunks into a byte buffer. +impl ConsumeBinaryHunkDelegate for Vec { + fn consume_binary_hunk(&mut self, _header: HunkHeader, header_str: &str, hunk: &[u8]) -> std::io::Result<()> { + self.push_str(header_str); + self.extend_from_slice(hunk); + Ok(()) + } +} + +/// An implementation that writes hunks into a hunman-readable byte buffer. +impl ConsumeBinaryHunkDelegate for BString { + fn consume_binary_hunk(&mut self, _header: HunkHeader, header_str: &str, hunk: &[u8]) -> std::io::Result<()> { + self.push_str(header_str); + self.extend_from_slice(hunk); + Ok(()) + } +} + +impl<'a, D> ConsumeBinaryHunk<'a, D> +where + D: ConsumeBinaryHunkDelegate, +{ + /// Create a new instance that writes stringified hunks to `delegate`, which uses `newline` to separate header and hunk, + /// as well as hunk lines that don't naturally end in a newline. + pub fn new(delegate: D, newline: &'a str) -> ConsumeBinaryHunk<'a, D> { + ConsumeBinaryHunk { + newline, + delegate, + header_buf: String::new(), + hunk_buf: Vec::with_capacity(128), + } + } +} + +impl DiffLineKind { + const fn to_prefix(self) -> char { + match self { + DiffLineKind::Context => ' ', + DiffLineKind::Add => '+', + DiffLineKind::Remove => '-', + } + } +} + +impl std::fmt::Display for HunkHeader { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "@@ -{},{} +{},{} @@", + self.before_hunk_start, self.before_hunk_len, self.after_hunk_start, self.after_hunk_len + ) + } +} diff --git a/gix-diff/src/blob/unified_diff/mod.rs b/gix-diff/src/blob/unified_diff/mod.rs new file mode 100644 index 00000000000..06eb5aa3101 --- /dev/null +++ b/gix-diff/src/blob/unified_diff/mod.rs @@ -0,0 +1,91 @@ +//! Facilities to produce the unified diff format. +//! +//! Originally based on . + +/// Defines the size of the context printed before and after each change. +/// +/// Similar to the `-U` option in git diff or gnu-diff. If the context overlaps +/// with previous or next change, the context gets reduced accordingly. +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)] +pub struct ContextSize { + /// Defines the size of the context printed before and after each change. + symmetrical: u32, +} + +impl Default for ContextSize { + fn default() -> Self { + ContextSize::symmetrical(3) + } +} + +/// Instantiation +impl ContextSize { + /// Create a symmetrical context with `n` lines before and after a changed hunk. + pub fn symmetrical(n: u32) -> Self { + ContextSize { symmetrical: n } + } +} + +/// Represents the type of a line in a unified diff. +#[doc(alias = "git2")] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)] +pub enum DiffLineKind { + /// A line that exists in both the old and the new version, added based on [`ContextSize`]. + Context, + /// A line that was added in the new version. + Add, + /// A line that was removed from the old version. + Remove, +} + +/// Holds information about a unified diff hunk, specifically with respect to line numbers. +#[derive(Default, Debug, Copy, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)] +pub struct HunkHeader { + /// The 1-based start position in the 'before' lines. + pub before_hunk_start: u32, + /// The size of the 'before' hunk in lines. + pub before_hunk_len: u32, + /// The 1-based start position in the 'after' lines. + pub after_hunk_start: u32, + /// The size of the 'after' hunk in lines. + pub after_hunk_len: u32, +} + +/// An adapter with [`ConsumeHunk`] implementation to call a delegate which receives each stringified hunk. +pub struct ConsumeBinaryHunk<'a, D> { + /// The newline to use to separate lines if these don't yet contain a newline. + /// It should also be used to separate the stringified header from the hunk itself. + pub newline: &'a str, + /// The delegate to receive stringified hunks. + pub delegate: D, + + header_buf: String, + hunk_buf: Vec, +} + +/// A trait for use in conjunction with [`ConsumeBinaryHunk`]. +pub trait ConsumeBinaryHunkDelegate { + /// Consume a single `hunk` in unified diff format, along with its `header_str` that already has a trailing newline added based + /// on the parent [`ConsumeBinaryHunk`] configuration, also in unified diff format. + /// The `header` is the data used to produce `header_str`. + fn consume_binary_hunk(&mut self, header: HunkHeader, header_str: &str, hunk: &[u8]) -> std::io::Result<()>; +} + +/// A utility trait for use in [`UnifiedDiff`](super::UnifiedDiff). +pub trait ConsumeHunk { + /// The item this instance produces after consuming all hunks. + type Out; + + /// Consume a single hunk which is represented by its `lines`, each of which with a `DiffLineKind` value + /// to know if it's added, removed or context. + /// The `header` specifies hunk offsets, which positions the `lines` in the old and new file respectively. + /// + /// Note that the [`UnifiedDiff`](super::UnifiedDiff) sink will wrap its output in an [`std::io::Result`]. + /// After this method returned its first error, it will not be called anymore. + fn consume_hunk(&mut self, header: HunkHeader, lines: &[(DiffLineKind, &[u8])]) -> std::io::Result<()>; + + /// Called after the last hunk is consumed to produce an output. + fn finish(self) -> Self::Out; +} + +pub(super) mod impls; diff --git a/gix-diff/tests/diff/blob/unified_diff.rs b/gix-diff/tests/diff/blob/unified_diff.rs index 99dde8d355f..782794d70c9 100644 --- a/gix-diff/tests/diff/blob/unified_diff.rs +++ b/gix-diff/tests/diff/blob/unified_diff.rs @@ -1,5 +1,6 @@ +use gix_diff::blob::unified_diff::ConsumeBinaryHunk; use gix_diff::blob::{ - unified_diff::{ConsumeHunk, ContextSize, DiffLineType, HunkHeader, NewlineSeparator}, + unified_diff::{ConsumeHunk, ContextSize, DiffLineKind, HunkHeader}, Algorithm, UnifiedDiff, }; @@ -14,8 +15,7 @@ fn removed_modified_added() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndLine("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(3), ), )?; @@ -43,8 +43,7 @@ fn removed_modified_added() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndLine("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(1), ), )?; @@ -69,8 +68,7 @@ fn removed_modified_added() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndLine("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(0), ), )?; @@ -89,12 +87,7 @@ fn removed_modified_added() -> crate::Result { let actual = gix_diff::blob::diff( Algorithm::Myers, &interner, - UnifiedDiff::new( - &interner, - Recorder::default(), - NewlineSeparator::AfterHeaderAndLine("\n"), - ContextSize::symmetrical(1), - ), + UnifiedDiff::new(&interner, Recorder::new("\n"), ContextSize::symmetrical(1)), )?; assert_eq!( actual, @@ -119,8 +112,7 @@ fn context_overlap_by_one_line_move_up() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndLine("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(3), ), )?; @@ -150,8 +142,7 @@ fn context_overlap_by_one_line_move_down() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndLine("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(3), ), )?; @@ -184,8 +175,7 @@ fn added_on_top_keeps_context_correctly_sized() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndWhenNeeded("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(3), ), )?; @@ -213,8 +203,7 @@ fn added_on_top_keeps_context_correctly_sized() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndWhenNeeded("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(3), ), )?; @@ -241,8 +230,7 @@ fn added_on_top_keeps_context_correctly_sized() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndWhenNeeded("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(3), ), )?; @@ -271,8 +259,7 @@ fn added_on_top_keeps_context_correctly_sized() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndWhenNeeded("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(3), ), )?; @@ -302,8 +289,7 @@ fn removed_modified_added_with_newlines_in_tokens() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndWhenNeeded("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(3), ), )?; @@ -333,8 +319,7 @@ fn removed_modified_added_with_newlines_in_tokens() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndWhenNeeded("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(1), ), )?; @@ -361,8 +346,7 @@ fn removed_modified_added_with_newlines_in_tokens() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndWhenNeeded("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(0), ), )?; @@ -383,12 +367,7 @@ fn removed_modified_added_with_newlines_in_tokens() -> crate::Result { let actual = gix_diff::blob::diff( Algorithm::Myers, &interner, - UnifiedDiff::new( - &interner, - Recorder::default(), - NewlineSeparator::AfterHeaderAndWhenNeeded("\r\n"), - ContextSize::symmetrical(1), - ), + UnifiedDiff::new(&interner, Recorder::new("\r\n"), ContextSize::symmetrical(1)), )?; assert_eq!( actual, @@ -402,30 +381,25 @@ fn removed_modified_added_with_newlines_in_tokens() -> crate::Result { let actual = gix_diff::blob::diff( Algorithm::Myers, &interner, - UnifiedDiff::new( - &interner, - DiffLineTypeRecorder::default(), - NewlineSeparator::AfterHeaderAndWhenNeeded("\r\n"), - ContextSize::symmetrical(1), - ), + UnifiedDiff::new(&interner, DiffLineKindRecorder::default(), ContextSize::symmetrical(1)), )?; assert_eq!( actual, &[ - vec![DiffLineType::Remove, DiffLineType::Context], + vec![DiffLineKind::Remove, DiffLineKind::Context], vec![ - DiffLineType::Context, - DiffLineType::Remove, - DiffLineType::Add, - DiffLineType::Context + DiffLineKind::Context, + DiffLineKind::Remove, + DiffLineKind::Add, + DiffLineKind::Context ], vec![ - DiffLineType::Context, - DiffLineType::Remove, - DiffLineType::Add, - DiffLineType::Add, - DiffLineType::Add + DiffLineKind::Context, + DiffLineKind::Remove, + DiffLineKind::Add, + DiffLineKind::Add, + DiffLineKind::Add ] ] ); @@ -445,8 +419,7 @@ fn all_added_or_removed() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndLine("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(context_lines), ), )?; @@ -470,8 +443,7 @@ fn all_added_or_removed() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndLine("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(context_lines), ), )?; @@ -498,8 +470,7 @@ fn empty() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndLine("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(3), ), )?; @@ -508,25 +479,27 @@ fn empty() -> crate::Result { Ok(()) } -#[derive(Default)] struct Recorder { #[allow(clippy::type_complexity)] hunks: Vec<((u32, u32), (u32, u32), String)>, + newline: &'static str, +} + +impl Recorder { + pub fn new(newline: &'static str) -> Self { + Recorder { + hunks: Vec::new(), + newline, + } + } } impl ConsumeHunk for Recorder { type Out = Vec<((u32, u32), (u32, u32), String)>; - fn consume_hunk( - &mut self, - header: HunkHeader, - _hunk: &[(DiffLineType, &[u8])], - newline: NewlineSeparator<'_>, - ) -> std::io::Result<()> { + fn consume_hunk(&mut self, header: HunkHeader, _hunk: &[(DiffLineKind, &[u8])]) -> std::io::Result<()> { let mut formatted_header = header.to_string(); - formatted_header.push_str(match newline { - NewlineSeparator::AfterHeaderAndLine(nl) | NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => nl, - }); + formatted_header.push_str(self.newline); self.hunks.push(( (header.before_hunk_start, header.before_hunk_len), @@ -542,24 +515,20 @@ impl ConsumeHunk for Recorder { } #[derive(Default)] -struct DiffLineTypeRecorder { - hunks: Vec>, +struct DiffLineKindRecorder { + line_kinds: Vec>, } -impl ConsumeHunk for DiffLineTypeRecorder { - type Out = Vec>; +impl ConsumeHunk for DiffLineKindRecorder { + type Out = Vec>; - fn consume_hunk( - &mut self, - _header: HunkHeader, - hunk: &[(DiffLineType, &[u8])], - _newline: NewlineSeparator<'_>, - ) -> std::io::Result<()> { - self.hunks.push(hunk.iter().map(|(line_type, _)| *line_type).collect()); + fn consume_hunk(&mut self, _header: HunkHeader, hunk: &[(DiffLineKind, &[u8])]) -> std::io::Result<()> { + self.line_kinds + .push(hunk.iter().map(|(line_type, _)| *line_type).collect()); Ok(()) } fn finish(self) -> Self::Out { - self.hunks + self.line_kinds } }