diff --git a/gitoxide-core/src/repository/diff.rs b/gitoxide-core/src/repository/diff.rs index a0396599b81..0fe2a460f5b 100644 --- a/gitoxide-core/src/repository/diff.rs +++ b/gitoxide-core/src/repository/diff.rs @@ -1,11 +1,8 @@ use anyhow::Context; +use gix::diff::blob::unified_diff::ConsumeBinaryHunk; use gix::{ bstr::{BString, ByteSlice}, - diff::blob::{ - intern::TokenSource, - unified_diff::{ContextSize, NewlineSeparator}, - UnifiedDiff, - }, + diff::blob::{intern::TokenSource, unified_diff::ContextSize, UnifiedDiff}, objs::tree::EntryMode, odb::store::RefreshMode, prelude::ObjectIdExt, @@ -206,8 +203,7 @@ pub fn file( let unified_diff = UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndLine("\n"), + ConsumeBinaryHunk::new(BString::default(), "\n"), ContextSize::symmetrical(3), ); diff --git a/gix-diff/Cargo.toml b/gix-diff/Cargo.toml index 87e08debaf0..2b176b697d0 100644 --- a/gix-diff/Cargo.toml +++ b/gix-diff/Cargo.toml @@ -9,7 +9,7 @@ description = "Calculate differences between various git objects" authors = ["Sebastian Thiel "] edition = "2021" include = ["src/**/*", "LICENSE-*"] -rust-version = "1.70" +rust-version = "1.74" autotests = false [features] diff --git a/gix-diff/src/blob/mod.rs b/gix-diff/src/blob/mod.rs index 541e978d752..5cf1f0c806e 100644 --- a/gix-diff/src/blob/mod.rs +++ b/gix-diff/src/blob/mod.rs @@ -12,7 +12,7 @@ pub mod pipeline; pub mod platform; pub mod unified_diff; -pub use unified_diff::_impl::UnifiedDiff; +pub use unified_diff::impls::UnifiedDiff; /// Information about the diff performed to detect similarity. #[derive(Debug, Default, Clone, Copy, PartialEq, PartialOrd)] diff --git a/gix-diff/src/blob/unified_diff.rs b/gix-diff/src/blob/unified_diff.rs deleted file mode 100644 index 437ed132f0f..00000000000 --- a/gix-diff/src/blob/unified_diff.rs +++ /dev/null @@ -1,320 +0,0 @@ -//! Facilities to produce the unified diff format. -//! -//! Originally based on . - -/// Defines the size of the context printed before and after each change. -/// -/// Similar to the `-U` option in git diff or gnu-diff. If the context overlaps -/// with previous or next change, the context gets reduced accordingly. -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)] -pub struct ContextSize { - /// Defines the size of the context printed before and after each change. - symmetrical: u32, -} - -impl Default for ContextSize { - fn default() -> Self { - ContextSize::symmetrical(3) - } -} - -/// Instantiation -impl ContextSize { - /// Create a symmetrical context with `n` lines before and after a changed hunk. - pub fn symmetrical(n: u32) -> Self { - ContextSize { symmetrical: n } - } -} - -/// Specify where to put a newline. -#[derive(Debug, Copy, Clone)] -pub enum NewlineSeparator<'a> { - /// Place the given newline separator, like `\n`, after each patch header as well as after each line. - /// This is the right choice if tokens don't include newlines. - AfterHeaderAndLine(&'a str), - /// Place the given newline separator, like `\n`, only after each patch header or if a line doesn't contain a newline. - /// This is the right choice if tokens do include newlines. - /// Note that diff-tokens *with* newlines may diff strangely at the end of files when lines have been appended, - /// as it will make the last line look like it changed just because the whitespace at the end 'changed'. - AfterHeaderAndWhenNeeded(&'a str), -} - -/// A utility trait for use in [`UnifiedDiff`](super::UnifiedDiff). -pub trait ConsumeHunk { - /// The item this instance produces after consuming all hunks. - type Out; - - /// Consume a single `hunk` in unified diff format, that would be prefixed with `header`. - /// Note that all newlines are added. - /// - /// Note that the [`UnifiedDiff`](super::UnifiedDiff) sink will wrap its output in an [`std::io::Result`]. - /// After this method returned its first error, it will not be called anymore. - /// - /// The following is hunk-related information and the same that is used in the `header`. - /// * `before_hunk_start` is the 1-based first line of this hunk in the old file. - /// * `before_hunk_len` the amount of lines of this hunk in the old file. - /// * `after_hunk_start` is the 1-based first line of this hunk in the new file. - /// * `after_hunk_len` the amount of lines of this hunk in the new file. - fn consume_hunk( - &mut self, - before_hunk_start: u32, - before_hunk_len: u32, - after_hunk_start: u32, - after_hunk_len: u32, - header: &str, - hunk: &[u8], - ) -> std::io::Result<()>; - /// Called after the last hunk is consumed to produce an output. - fn finish(self) -> Self::Out; -} - -pub(super) mod _impl { - use std::{hash::Hash, io::ErrorKind, ops::Range}; - - use bstr::{ByteSlice, ByteVec}; - use imara_diff::{intern, Sink}; - use intern::{InternedInput, Interner, Token}; - - use super::{ConsumeHunk, ContextSize, NewlineSeparator}; - - const CONTEXT: char = ' '; - const ADDITION: char = '+'; - const REMOVAL: char = '-'; - - /// A [`Sink`] that creates a textual diff in the format typically output by git or `gnu-diff` if the `-u` option is used, - /// and passes it in full to a consumer. - pub struct UnifiedDiff<'a, T, D> - where - T: Hash + Eq + AsRef<[u8]>, - D: ConsumeHunk, - { - before: &'a [Token], - after: &'a [Token], - interner: &'a Interner, - - /// The 0-based start position in the 'before' tokens for the accumulated hunk for display in the header. - before_hunk_start: u32, - /// The size of the accumulated 'before' hunk in lines for display in the header. - before_hunk_len: u32, - /// The 0-based start position in the 'after' tokens for the accumulated hunk for display in the header. - after_hunk_start: u32, - /// The size of the accumulated 'after' hunk in lines. - after_hunk_len: u32, - // An index into `before` and the context line to print next, - // or `None` if this value was never computed to be the correct starting point for an accumulated hunk. - ctx_pos: Option, - - /// Symmetrical context before and after the changed hunk. - ctx_size: u32, - newline: NewlineSeparator<'a>, - - buffer: Vec, - header_buf: String, - delegate: D, - - err: Option, - } - - impl<'a, T, D> UnifiedDiff<'a, T, D> - where - T: Hash + Eq + AsRef<[u8]>, - D: ConsumeHunk, - { - /// Create a new instance to create unified diff using the lines in `input`, - /// which also must be used when running the diff algorithm. - /// `context_size` is the amount of lines around each hunk which will be passed - ///to `consume_hunk`. - /// - /// `consume_hunk` is called for each hunk in unified-diff format, as created from each line separated by `newline_separator`. - pub fn new( - input: &'a InternedInput, - consume_hunk: D, - newline_separator: NewlineSeparator<'a>, - context_size: ContextSize, - ) -> Self { - Self { - interner: &input.interner, - before: &input.before, - after: &input.after, - - before_hunk_start: 0, - before_hunk_len: 0, - after_hunk_len: 0, - after_hunk_start: 0, - ctx_pos: None, - - ctx_size: context_size.symmetrical, - newline: newline_separator, - - buffer: Vec::with_capacity(8), - header_buf: String::new(), - delegate: consume_hunk, - - err: None, - } - } - - fn print_tokens(&mut self, tokens: &[Token], prefix: char) { - for &token in tokens { - self.buffer.push_char(prefix); - let line = &self.interner[token]; - self.buffer.push_str(line); - match self.newline { - NewlineSeparator::AfterHeaderAndLine(nl) => { - self.buffer.push_str(nl); - } - NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => { - if !line.as_ref().ends_with_str(nl) { - self.buffer.push_str(nl); - } - } - } - } - } - - fn flush_accumulated_hunk(&mut self) -> std::io::Result<()> { - if self.nothing_to_flush() { - return Ok(()); - } - - let ctx_pos = self.ctx_pos.expect("has been set if we started a hunk"); - let end = (ctx_pos + self.ctx_size).min(self.before.len() as u32); - self.print_context_and_update_pos(ctx_pos..end, end); - - let hunk_start = self.before_hunk_start + 1; - let hunk_end = self.after_hunk_start + 1; - self.header_buf.clear(); - std::fmt::Write::write_fmt( - &mut self.header_buf, - format_args!( - "@@ -{},{} +{},{} @@{nl}", - hunk_start, - self.before_hunk_len, - hunk_end, - self.after_hunk_len, - nl = match self.newline { - NewlineSeparator::AfterHeaderAndLine(nl) | NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => { - nl - } - } - ), - ) - .map_err(|err| std::io::Error::new(ErrorKind::Other, err))?; - self.delegate.consume_hunk( - hunk_start, - self.before_hunk_len, - hunk_end, - self.after_hunk_len, - &self.header_buf, - &self.buffer, - )?; - - self.reset_hunks(); - Ok(()) - } - - fn print_context_and_update_pos(&mut self, print: Range, move_to: u32) { - self.print_tokens(&self.before[print.start as usize..print.end as usize], CONTEXT); - let len = print.end - print.start; - self.ctx_pos = Some(move_to); - self.before_hunk_len += len; - self.after_hunk_len += len; - } - - fn reset_hunks(&mut self) { - self.buffer.clear(); - self.before_hunk_len = 0; - self.after_hunk_len = 0; - } - - fn nothing_to_flush(&self) -> bool { - self.before_hunk_len == 0 && self.after_hunk_len == 0 - } - } - - impl Sink for UnifiedDiff<'_, T, D> - where - T: Hash + Eq + AsRef<[u8]>, - D: ConsumeHunk, - { - type Out = std::io::Result; - - fn process_change(&mut self, before: Range, after: Range) { - if self.err.is_some() { - return; - } - let start_next_hunk = self - .ctx_pos - .is_some_and(|ctx_pos| before.start - ctx_pos > 2 * self.ctx_size); - if start_next_hunk { - if let Err(err) = self.flush_accumulated_hunk() { - self.err = Some(err); - return; - } - let ctx_pos = before.start - self.ctx_size; - self.ctx_pos = Some(ctx_pos); - self.before_hunk_start = ctx_pos; - self.after_hunk_start = after.start - self.ctx_size; - } - let ctx_pos = match self.ctx_pos { - None => { - // TODO: can this be made so the code above does the job? - let ctx_pos = before.start.saturating_sub(self.ctx_size); - self.before_hunk_start = ctx_pos; - self.after_hunk_start = after.start.saturating_sub(self.ctx_size); - ctx_pos - } - Some(pos) => pos, - }; - self.print_context_and_update_pos(ctx_pos..before.start, before.end); - self.before_hunk_len += before.end - before.start; - self.after_hunk_len += after.end - after.start; - - self.print_tokens(&self.before[before.start as usize..before.end as usize], REMOVAL); - self.print_tokens(&self.after[after.start as usize..after.end as usize], ADDITION); - } - - fn finish(mut self) -> Self::Out { - if let Err(err) = self.flush_accumulated_hunk() { - self.err = Some(err); - } - if let Some(err) = self.err { - return Err(err); - } - Ok(self.delegate.finish()) - } - } - - /// An implementation that fails if the input isn't UTF-8. - impl ConsumeHunk for String { - type Out = Self; - - fn consume_hunk(&mut self, _: u32, _: u32, _: u32, _: u32, header: &str, hunk: &[u8]) -> std::io::Result<()> { - self.push_str(header); - self.push_str( - hunk.to_str() - .map_err(|err| std::io::Error::new(ErrorKind::Other, err))?, - ); - Ok(()) - } - - fn finish(self) -> Self::Out { - self - } - } - - /// An implementation that writes hunks into a byte buffer. - impl ConsumeHunk for Vec { - type Out = Self; - - fn consume_hunk(&mut self, _: u32, _: u32, _: u32, _: u32, header: &str, hunk: &[u8]) -> std::io::Result<()> { - self.push_str(header); - self.push_str(hunk); - Ok(()) - } - - fn finish(self) -> Self::Out { - self - } - } -} diff --git a/gix-diff/src/blob/unified_diff/impls.rs b/gix-diff/src/blob/unified_diff/impls.rs new file mode 100644 index 00000000000..22b41a57e42 --- /dev/null +++ b/gix-diff/src/blob/unified_diff/impls.rs @@ -0,0 +1,280 @@ +use bstr::{BString, ByteSlice, ByteVec}; +use imara_diff::{intern, Sink}; +use intern::{InternedInput, Interner, Token}; +use std::fmt::Write; +use std::{hash::Hash, ops::Range}; + +use super::{ConsumeBinaryHunk, ConsumeBinaryHunkDelegate, ConsumeHunk, ContextSize, DiffLineKind, HunkHeader}; + +/// A [`Sink`] that creates a unified diff. It can be used to create a textual diff in the +/// format typically output by `git` or `gnu-diff` if the `-u` option is used. +pub struct UnifiedDiff<'a, T, D> +where + T: Hash + Eq + AsRef<[u8]>, + D: ConsumeHunk, +{ + before: &'a [Token], + after: &'a [Token], + interner: &'a Interner, + + /// The 0-based start position in the 'before' tokens for the accumulated hunk for display in the header. + before_hunk_start: u32, + /// The size of the accumulated 'before' hunk in lines for display in the header. + before_hunk_len: u32, + /// The 0-based start position in the 'after' tokens for the accumulated hunk for display in the header. + after_hunk_start: u32, + /// The size of the accumulated 'after' hunk in lines. + after_hunk_len: u32, + // An index into `before` and the context line to print next, + // or `None` if this value was never computed to be the correct starting point for an accumulated hunk. + ctx_pos: Option, + + /// Symmetrical context before and after the changed hunk. + ctx_size: u32, + + buffer: Vec<(DiffLineKind, &'a [u8])>, + + delegate: D, + + err: Option, +} + +impl<'a, T, D> UnifiedDiff<'a, T, D> +where + T: Hash + Eq + AsRef<[u8]>, + D: ConsumeHunk, +{ + /// Create a new instance to create a unified diff using the lines in `input`, + /// which also must be used when running the diff algorithm. + /// `context_size` is the amount of lines around each hunk which will be passed + /// to `consume_hunk`. + /// + /// `consume_hunk` is called for each hunk with all the information required to create a + /// unified diff. + pub fn new(input: &'a InternedInput, consume_hunk: D, context_size: ContextSize) -> Self { + Self { + interner: &input.interner, + before: &input.before, + after: &input.after, + + before_hunk_start: 0, + before_hunk_len: 0, + after_hunk_len: 0, + after_hunk_start: 0, + ctx_pos: None, + + ctx_size: context_size.symmetrical, + + buffer: Vec::with_capacity(8), + delegate: consume_hunk, + + err: None, + } + } + + fn print_tokens(&mut self, tokens: &[Token], line_type: DiffLineKind) { + for &token in tokens { + let content = self.interner[token].as_ref(); + self.buffer.push((line_type, content)); + } + } + + fn flush_accumulated_hunk(&mut self) -> std::io::Result<()> { + if self.nothing_to_flush() { + return Ok(()); + } + + let ctx_pos = self.ctx_pos.expect("has been set if we started a hunk"); + let end = (ctx_pos + self.ctx_size).min(self.before.len() as u32); + self.print_context_and_update_pos(ctx_pos..end, end); + + let hunk_start = self.before_hunk_start + 1; + let hunk_end = self.after_hunk_start + 1; + + let header = HunkHeader { + before_hunk_start: hunk_start, + before_hunk_len: self.before_hunk_len, + after_hunk_start: hunk_end, + after_hunk_len: self.after_hunk_len, + }; + + self.delegate.consume_hunk(header, &self.buffer)?; + + self.reset_hunks(); + Ok(()) + } + + fn print_context_and_update_pos(&mut self, print: Range, move_to: u32) { + self.print_tokens( + &self.before[print.start as usize..print.end as usize], + DiffLineKind::Context, + ); + + let len = print.end - print.start; + self.ctx_pos = Some(move_to); + self.before_hunk_len += len; + self.after_hunk_len += len; + } + + fn reset_hunks(&mut self) { + self.buffer.clear(); + self.before_hunk_len = 0; + self.after_hunk_len = 0; + } + + fn nothing_to_flush(&self) -> bool { + self.before_hunk_len == 0 && self.after_hunk_len == 0 + } +} + +impl Sink for UnifiedDiff<'_, T, D> +where + T: Hash + Eq + AsRef<[u8]>, + D: ConsumeHunk, +{ + type Out = std::io::Result; + + fn process_change(&mut self, before: Range, after: Range) { + if self.err.is_some() { + return; + } + let start_next_hunk = self + .ctx_pos + .is_some_and(|ctx_pos| before.start - ctx_pos > 2 * self.ctx_size); + if start_next_hunk { + if let Err(err) = self.flush_accumulated_hunk() { + self.err = Some(err); + return; + } + let ctx_pos = before.start - self.ctx_size; + self.ctx_pos = Some(ctx_pos); + self.before_hunk_start = ctx_pos; + self.after_hunk_start = after.start - self.ctx_size; + } + let ctx_pos = match self.ctx_pos { + None => { + // TODO: can this be made so the code above does the job? + let ctx_pos = before.start.saturating_sub(self.ctx_size); + self.before_hunk_start = ctx_pos; + self.after_hunk_start = after.start.saturating_sub(self.ctx_size); + ctx_pos + } + Some(pos) => pos, + }; + self.print_context_and_update_pos(ctx_pos..before.start, before.end); + self.before_hunk_len += before.end - before.start; + self.after_hunk_len += after.end - after.start; + + self.print_tokens( + &self.before[before.start as usize..before.end as usize], + DiffLineKind::Remove, + ); + self.print_tokens(&self.after[after.start as usize..after.end as usize], DiffLineKind::Add); + } + + fn finish(mut self) -> Self::Out { + if let Err(err) = self.flush_accumulated_hunk() { + self.err = Some(err); + } + if let Some(err) = self.err { + return Err(err); + } + Ok(self.delegate.finish()) + } +} + +/// An implementation that fails if the input isn't UTF-8. +impl ConsumeHunk for ConsumeBinaryHunk<'_, D> +where + D: ConsumeBinaryHunkDelegate, +{ + type Out = D; + + fn consume_hunk(&mut self, header: HunkHeader, lines: &[(DiffLineKind, &[u8])]) -> std::io::Result<()> { + self.header_buf.clear(); + self.header_buf + .write_fmt(format_args!("{header}{nl}", nl = self.newline)) + .map_err(std::io::Error::other)?; + + let buf = &mut self.hunk_buf; + buf.clear(); + for &(line_type, content) in lines { + buf.push(line_type.to_prefix() as u8); + buf.push_str(std::str::from_utf8(content).map_err(std::io::Error::other)?); + + if !content.ends_with_str(self.newline) { + buf.push_str(self.newline); + } + } + + self.delegate.consume_binary_hunk(header, &self.header_buf, buf)?; + Ok(()) + } + + fn finish(self) -> Self::Out { + self.delegate + } +} + +/// An implementation that fails if the input isn't UTF-8. +impl ConsumeBinaryHunkDelegate for String { + fn consume_binary_hunk(&mut self, _header: HunkHeader, header_str: &str, hunk: &[u8]) -> std::io::Result<()> { + self.push_str(header_str); + self.push_str(hunk.to_str().map_err(std::io::Error::other)?); + Ok(()) + } +} + +/// An implementation that writes hunks into a byte buffer. +impl ConsumeBinaryHunkDelegate for Vec { + fn consume_binary_hunk(&mut self, _header: HunkHeader, header_str: &str, hunk: &[u8]) -> std::io::Result<()> { + self.push_str(header_str); + self.extend_from_slice(hunk); + Ok(()) + } +} + +/// An implementation that writes hunks into a hunman-readable byte buffer. +impl ConsumeBinaryHunkDelegate for BString { + fn consume_binary_hunk(&mut self, _header: HunkHeader, header_str: &str, hunk: &[u8]) -> std::io::Result<()> { + self.push_str(header_str); + self.extend_from_slice(hunk); + Ok(()) + } +} + +impl<'a, D> ConsumeBinaryHunk<'a, D> +where + D: ConsumeBinaryHunkDelegate, +{ + /// Create a new instance that writes stringified hunks to `delegate`, which uses `newline` to separate header and hunk, + /// as well as hunk lines that don't naturally end in a newline. + pub fn new(delegate: D, newline: &'a str) -> ConsumeBinaryHunk<'a, D> { + ConsumeBinaryHunk { + newline, + delegate, + header_buf: String::new(), + hunk_buf: Vec::with_capacity(128), + } + } +} + +impl DiffLineKind { + const fn to_prefix(self) -> char { + match self { + DiffLineKind::Context => ' ', + DiffLineKind::Add => '+', + DiffLineKind::Remove => '-', + } + } +} + +impl std::fmt::Display for HunkHeader { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "@@ -{},{} +{},{} @@", + self.before_hunk_start, self.before_hunk_len, self.after_hunk_start, self.after_hunk_len + ) + } +} diff --git a/gix-diff/src/blob/unified_diff/mod.rs b/gix-diff/src/blob/unified_diff/mod.rs new file mode 100644 index 00000000000..06eb5aa3101 --- /dev/null +++ b/gix-diff/src/blob/unified_diff/mod.rs @@ -0,0 +1,91 @@ +//! Facilities to produce the unified diff format. +//! +//! Originally based on . + +/// Defines the size of the context printed before and after each change. +/// +/// Similar to the `-U` option in git diff or gnu-diff. If the context overlaps +/// with previous or next change, the context gets reduced accordingly. +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)] +pub struct ContextSize { + /// Defines the size of the context printed before and after each change. + symmetrical: u32, +} + +impl Default for ContextSize { + fn default() -> Self { + ContextSize::symmetrical(3) + } +} + +/// Instantiation +impl ContextSize { + /// Create a symmetrical context with `n` lines before and after a changed hunk. + pub fn symmetrical(n: u32) -> Self { + ContextSize { symmetrical: n } + } +} + +/// Represents the type of a line in a unified diff. +#[doc(alias = "git2")] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)] +pub enum DiffLineKind { + /// A line that exists in both the old and the new version, added based on [`ContextSize`]. + Context, + /// A line that was added in the new version. + Add, + /// A line that was removed from the old version. + Remove, +} + +/// Holds information about a unified diff hunk, specifically with respect to line numbers. +#[derive(Default, Debug, Copy, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)] +pub struct HunkHeader { + /// The 1-based start position in the 'before' lines. + pub before_hunk_start: u32, + /// The size of the 'before' hunk in lines. + pub before_hunk_len: u32, + /// The 1-based start position in the 'after' lines. + pub after_hunk_start: u32, + /// The size of the 'after' hunk in lines. + pub after_hunk_len: u32, +} + +/// An adapter with [`ConsumeHunk`] implementation to call a delegate which receives each stringified hunk. +pub struct ConsumeBinaryHunk<'a, D> { + /// The newline to use to separate lines if these don't yet contain a newline. + /// It should also be used to separate the stringified header from the hunk itself. + pub newline: &'a str, + /// The delegate to receive stringified hunks. + pub delegate: D, + + header_buf: String, + hunk_buf: Vec, +} + +/// A trait for use in conjunction with [`ConsumeBinaryHunk`]. +pub trait ConsumeBinaryHunkDelegate { + /// Consume a single `hunk` in unified diff format, along with its `header_str` that already has a trailing newline added based + /// on the parent [`ConsumeBinaryHunk`] configuration, also in unified diff format. + /// The `header` is the data used to produce `header_str`. + fn consume_binary_hunk(&mut self, header: HunkHeader, header_str: &str, hunk: &[u8]) -> std::io::Result<()>; +} + +/// A utility trait for use in [`UnifiedDiff`](super::UnifiedDiff). +pub trait ConsumeHunk { + /// The item this instance produces after consuming all hunks. + type Out; + + /// Consume a single hunk which is represented by its `lines`, each of which with a `DiffLineKind` value + /// to know if it's added, removed or context. + /// The `header` specifies hunk offsets, which positions the `lines` in the old and new file respectively. + /// + /// Note that the [`UnifiedDiff`](super::UnifiedDiff) sink will wrap its output in an [`std::io::Result`]. + /// After this method returned its first error, it will not be called anymore. + fn consume_hunk(&mut self, header: HunkHeader, lines: &[(DiffLineKind, &[u8])]) -> std::io::Result<()>; + + /// Called after the last hunk is consumed to produce an output. + fn finish(self) -> Self::Out; +} + +pub(super) mod impls; diff --git a/gix-diff/tests/diff/blob/unified_diff.rs b/gix-diff/tests/diff/blob/unified_diff.rs index 4bb2df7d421..782794d70c9 100644 --- a/gix-diff/tests/diff/blob/unified_diff.rs +++ b/gix-diff/tests/diff/blob/unified_diff.rs @@ -1,5 +1,6 @@ +use gix_diff::blob::unified_diff::ConsumeBinaryHunk; use gix_diff::blob::{ - unified_diff::{ConsumeHunk, ContextSize, NewlineSeparator}, + unified_diff::{ConsumeHunk, ContextSize, DiffLineKind, HunkHeader}, Algorithm, UnifiedDiff, }; @@ -14,8 +15,7 @@ fn removed_modified_added() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndLine("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(3), ), )?; @@ -43,8 +43,7 @@ fn removed_modified_added() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndLine("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(1), ), )?; @@ -69,8 +68,7 @@ fn removed_modified_added() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndLine("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(0), ), )?; @@ -89,12 +87,7 @@ fn removed_modified_added() -> crate::Result { let actual = gix_diff::blob::diff( Algorithm::Myers, &interner, - UnifiedDiff::new( - &interner, - Recorder::default(), - NewlineSeparator::AfterHeaderAndLine("\n"), - ContextSize::symmetrical(1), - ), + UnifiedDiff::new(&interner, Recorder::new("\n"), ContextSize::symmetrical(1)), )?; assert_eq!( actual, @@ -119,8 +112,7 @@ fn context_overlap_by_one_line_move_up() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndLine("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(3), ), )?; @@ -150,8 +142,7 @@ fn context_overlap_by_one_line_move_down() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndLine("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(3), ), )?; @@ -184,8 +175,7 @@ fn added_on_top_keeps_context_correctly_sized() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndWhenNeeded("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(3), ), )?; @@ -213,8 +203,7 @@ fn added_on_top_keeps_context_correctly_sized() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndWhenNeeded("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(3), ), )?; @@ -241,8 +230,7 @@ fn added_on_top_keeps_context_correctly_sized() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndWhenNeeded("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(3), ), )?; @@ -271,8 +259,7 @@ fn added_on_top_keeps_context_correctly_sized() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndWhenNeeded("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(3), ), )?; @@ -302,8 +289,7 @@ fn removed_modified_added_with_newlines_in_tokens() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndWhenNeeded("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(3), ), )?; @@ -333,8 +319,7 @@ fn removed_modified_added_with_newlines_in_tokens() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndWhenNeeded("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(1), ), )?; @@ -361,8 +346,7 @@ fn removed_modified_added_with_newlines_in_tokens() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndWhenNeeded("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(0), ), )?; @@ -383,12 +367,7 @@ fn removed_modified_added_with_newlines_in_tokens() -> crate::Result { let actual = gix_diff::blob::diff( Algorithm::Myers, &interner, - UnifiedDiff::new( - &interner, - Recorder::default(), - NewlineSeparator::AfterHeaderAndWhenNeeded("\r\n"), - ContextSize::symmetrical(1), - ), + UnifiedDiff::new(&interner, Recorder::new("\r\n"), ContextSize::symmetrical(1)), )?; assert_eq!( actual, @@ -399,6 +378,32 @@ fn removed_modified_added_with_newlines_in_tokens() -> crate::Result { ] ); + let actual = gix_diff::blob::diff( + Algorithm::Myers, + &interner, + UnifiedDiff::new(&interner, DiffLineKindRecorder::default(), ContextSize::symmetrical(1)), + )?; + + assert_eq!( + actual, + &[ + vec![DiffLineKind::Remove, DiffLineKind::Context], + vec![ + DiffLineKind::Context, + DiffLineKind::Remove, + DiffLineKind::Add, + DiffLineKind::Context + ], + vec![ + DiffLineKind::Context, + DiffLineKind::Remove, + DiffLineKind::Add, + DiffLineKind::Add, + DiffLineKind::Add + ] + ] + ); + Ok(()) } @@ -414,8 +419,7 @@ fn all_added_or_removed() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndLine("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(context_lines), ), )?; @@ -439,8 +443,7 @@ fn all_added_or_removed() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndLine("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(context_lines), ), )?; @@ -467,8 +470,7 @@ fn empty() -> crate::Result { &interner, UnifiedDiff::new( &interner, - String::new(), - NewlineSeparator::AfterHeaderAndLine("\n"), + ConsumeBinaryHunk::new(String::new(), "\n"), ContextSize::symmetrical(3), ), )?; @@ -477,28 +479,32 @@ fn empty() -> crate::Result { Ok(()) } -#[derive(Default)] struct Recorder { #[allow(clippy::type_complexity)] hunks: Vec<((u32, u32), (u32, u32), String)>, + newline: &'static str, +} + +impl Recorder { + pub fn new(newline: &'static str) -> Self { + Recorder { + hunks: Vec::new(), + newline, + } + } } impl ConsumeHunk for Recorder { type Out = Vec<((u32, u32), (u32, u32), String)>; - fn consume_hunk( - &mut self, - before_hunk_start: u32, - before_hunk_len: u32, - after_hunk_start: u32, - after_hunk_len: u32, - header: &str, - _hunk: &[u8], - ) -> std::io::Result<()> { + fn consume_hunk(&mut self, header: HunkHeader, _hunk: &[(DiffLineKind, &[u8])]) -> std::io::Result<()> { + let mut formatted_header = header.to_string(); + formatted_header.push_str(self.newline); + self.hunks.push(( - (before_hunk_start, before_hunk_len), - (after_hunk_start, after_hunk_len), - header.to_string(), + (header.before_hunk_start, header.before_hunk_len), + (header.after_hunk_start, header.after_hunk_len), + formatted_header, )); Ok(()) } @@ -507,3 +513,22 @@ impl ConsumeHunk for Recorder { self.hunks } } + +#[derive(Default)] +struct DiffLineKindRecorder { + line_kinds: Vec>, +} + +impl ConsumeHunk for DiffLineKindRecorder { + type Out = Vec>; + + fn consume_hunk(&mut self, _header: HunkHeader, hunk: &[(DiffLineKind, &[u8])]) -> std::io::Result<()> { + self.line_kinds + .push(hunk.iter().map(|(line_type, _)| *line_type).collect()); + Ok(()) + } + + fn finish(self) -> Self::Out { + self.line_kinds + } +}