diff --git a/Cargo.lock b/Cargo.lock index 9d92454251d..072b45faf98 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1755,11 +1755,11 @@ dependencies = [ "crc32fast", "crossbeam-channel", "document-features", - "flate2", "gix-path", "gix-trace", "gix-utils", "libc", + "libz-rs-sys", "once_cell", "parking_lot", "prodash", @@ -3322,9 +3322,9 @@ dependencies = [ [[package]] name = "libz-rs-sys" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "172a788537a2221661b480fee8dc5f96c580eb34fa88764d3205dc356c7e4221" +checksum = "840db8cf39d9ec4dd794376f38acc40d0fc65eec2a8f484f7fd375b84602becd" dependencies = [ "zlib-rs", ] @@ -5963,6 +5963,6 @@ dependencies = [ [[package]] name = "zlib-rs" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "626bd9fa9734751fc50d6060752170984d7053f5a39061f524cda68023d4db8a" +checksum = "2f06ae92f42f5e5c42443fd094f245eb656abf56dd7cce9b8b263236565e00f2" diff --git a/gix-features/Cargo.toml b/gix-features/Cargo.toml index a955c1f2f0a..357a663d72e 100644 --- a/gix-features/Cargo.toml +++ b/gix-features/Cargo.toml @@ -55,22 +55,8 @@ io-pipe = ["dep:bytes"] crc32 = ["dep:crc32fast"] ## Enable the usage of zlib-related utilities to compress or decompress data. -## This enables the `flate2` crate, and always uses the high-performance `zlib-rs` backend. -## Note that the various past features for selecting zlib backends are now deprecated and do nothing. -zlib = ["dep:flate2", "dep:thiserror"] -## Deprecated: gix always uses zlib-rs. -zlib-ng = ["zlib"] -## Deprecated: gix always uses zlib-rs now. As of zlib-rs 0.5.0 (used by flate2 -## 1.1.1), this no longer exports C symbols # by default, so it doesn't -## conflict with any other zlib library that might be loaded into the same -## address space. -zlib-rs = ["zlib"] -## Deprecated: gix always uses zlib-rs. -zlib-ng-compat = ["zlib"] -## Deprecated: gix always uses zlib-rs. -zlib-stock = ["zlib"] -## Deprecated: gix always uses zlib-rs. -zlib-rust-backend = ["zlib"] +## This enables and uses the high-performance `zlib-rs` backend. +zlib = ["dep:libz-rs-sys", "dep:thiserror"] #! ### Other @@ -121,7 +107,7 @@ bytesize = { version = "2.0.1", optional = true } bytes = { version = "1.0.0", optional = true } # zlib module -flate2 = { version = "1.1.1", optional = true, default-features = false, features = ["zlib-rs"] } +libz-rs-sys = { version = "0.5.2", optional = true } thiserror = { version = "2.0.0", optional = true } once_cell = { version = "1.21.3", optional = true } diff --git a/gix-features/src/lib.rs b/gix-features/src/lib.rs index 18342b1d92e..fe4b2e1e4bd 100644 --- a/gix-features/src/lib.rs +++ b/gix-features/src/lib.rs @@ -12,7 +12,7 @@ doc = ::document_features::document_features!() )] #![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg, doc_auto_cfg))] -#![deny(missing_docs, rust_2018_idioms, unsafe_code)] +#![deny(rust_2018_idioms, missing_docs)] /// pub mod cache; diff --git a/gix-features/src/zlib/mod.rs b/gix-features/src/zlib/mod.rs index f55660075eb..810715f3b15 100644 --- a/gix-features/src/zlib/mod.rs +++ b/gix-features/src/zlib/mod.rs @@ -1,4 +1,132 @@ -pub use flate2::{Decompress, Status}; +use std::ffi::c_int; + +/// A type to hold all state needed for decompressing a ZLIB encoded stream. +pub struct Decompress(libz_rs_sys::z_stream); + +unsafe impl Sync for Decompress {} +unsafe impl Send for Decompress {} + +impl Default for Decompress { + fn default() -> Self { + Self::new() + } +} + +impl Decompress { + /// The amount of bytes consumed from the input so far. + pub fn total_in(&self) -> u64 { + self.0.total_in as _ + } + + /// The amount of decompressed bytes that have been written to the output thus far. + pub fn total_out(&self) -> u64 { + self.0.total_out as _ + } + + /// Create a new instance. Note that it allocates in various ways and thus should be re-used. + pub fn new() -> Self { + let mut this = libz_rs_sys::z_stream::default(); + + unsafe { + libz_rs_sys::inflateInit_( + &mut this, + libz_rs_sys::zlibVersion(), + core::mem::size_of::() as core::ffi::c_int, + ); + } + + Self(this) + } + + /// Reset the state to allow handling a new stream. + pub fn reset(&mut self) { + unsafe { libz_rs_sys::inflateReset(&mut self.0) }; + } + + /// Decompress `input` and write all decompressed bytes into `output`, with `flush` defining some details about this. + pub fn decompress( + &mut self, + input: &[u8], + output: &mut [u8], + flush: FlushDecompress, + ) -> Result { + self.0.avail_in = input.len() as _; + self.0.avail_out = output.len() as _; + + self.0.next_in = input.as_ptr(); + self.0.next_out = output.as_mut_ptr(); + + match unsafe { libz_rs_sys::inflate(&mut self.0, flush as _) } { + libz_rs_sys::Z_OK => Ok(Status::Ok), + libz_rs_sys::Z_BUF_ERROR => Ok(Status::BufError), + libz_rs_sys::Z_STREAM_END => Ok(Status::StreamEnd), + + libz_rs_sys::Z_STREAM_ERROR => Err(DecompressError::StreamError), + libz_rs_sys::Z_DATA_ERROR => Err(DecompressError::DataError), + libz_rs_sys::Z_MEM_ERROR => Err(DecompressError::InsufficientMemory), + err => Err(DecompressError::Unknown { err }), + } + } +} + +impl Drop for Decompress { + fn drop(&mut self) { + unsafe { libz_rs_sys::inflateEnd(&mut self.0) }; + } +} + +/// The error produced by [`Decompress::decompress()`]. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum DecompressError { + #[error("stream error")] + StreamError, + #[error("Not enough memory")] + InsufficientMemory, + #[error("Invalid input data")] + DataError, + #[error("An unknown error occurred: {err}")] + Unknown { err: c_int }, +} + +/// The status returned by [`Decompress::decompress()`]. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Status { + /// The decompress operation went well. Not to be confused with `StreamEnd`, so one can continue + /// the decompression. + Ok, + /// An error occurred when decompression. + BufError, + /// The stream was fully decompressed. + StreamEnd, +} + +/// Values which indicate the form of flushing to be used when +/// decompressing in-memory data. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +#[non_exhaustive] +#[allow(clippy::unnecessary_cast)] +pub enum FlushDecompress { + /// A typical parameter for passing to compression/decompression functions, + /// this indicates that the underlying stream to decide how much data to + /// accumulate before producing output in order to maximize compression. + None = libz_rs_sys::Z_NO_FLUSH as isize, + + /// All pending output is flushed to the output buffer and the output is + /// aligned on a byte boundary so that the decompressor can get all input + /// data available so far. + /// + /// Flushing may degrade compression for some compression algorithms and so + /// it should only be used when necessary. This will complete the current + /// deflate block and follow it with an empty stored block. + Sync = libz_rs_sys::Z_SYNC_FLUSH as isize, + + /// Pending input is processed and pending output is flushed. + /// + /// The return value may indicate that the stream is not yet done and more + /// data has yet to be processed. + Finish = libz_rs_sys::Z_FINISH as isize, +} /// non-streaming interfaces for decompression pub mod inflate { @@ -8,33 +136,26 @@ pub mod inflate { pub enum Error { #[error("Could not write all bytes when decompressing content")] WriteInflated(#[from] std::io::Error), - #[error("Could not decode zip stream, status was '{0:?}'")] - Inflate(#[from] flate2::DecompressError), + #[error("Could not decode zip stream, status was '{0}'")] + Inflate(#[from] super::DecompressError), #[error("The zlib status indicated an error, status was '{0:?}'")] - Status(flate2::Status), + Status(super::Status), } } /// Decompress a few bytes of a zlib stream without allocation +#[derive(Default)] pub struct Inflate { /// The actual decompressor doing all the work. pub state: Decompress, } -impl Default for Inflate { - fn default() -> Self { - Inflate { - state: Decompress::new(true), - } - } -} - impl Inflate { /// Run the decompressor exactly once. Cannot be run multiple times - pub fn once(&mut self, input: &[u8], out: &mut [u8]) -> Result<(flate2::Status, usize, usize), inflate::Error> { + pub fn once(&mut self, input: &[u8], out: &mut [u8]) -> Result<(Status, usize, usize), inflate::Error> { let before_in = self.state.total_in(); let before_out = self.state.total_out(); - let status = self.state.decompress(input, out, flate2::FlushDecompress::None)?; + let status = self.state.decompress(input, out, FlushDecompress::None)?; Ok(( status, (self.state.total_in() - before_in) as usize, @@ -44,7 +165,7 @@ impl Inflate { /// Ready this instance for decoding another data stream. pub fn reset(&mut self) { - self.state.reset(true); + self.state.reset(); } } diff --git a/gix-features/src/zlib/stream/deflate/mod.rs b/gix-features/src/zlib/stream/deflate/mod.rs index 567e8fece76..d0c40399ad9 100644 --- a/gix-features/src/zlib/stream/deflate/mod.rs +++ b/gix-features/src/zlib/stream/deflate/mod.rs @@ -1,4 +1,5 @@ -use flate2::Compress; +use crate::zlib::Status; +use std::ffi::c_int; const BUF_SIZE: usize = 4096 * 8; @@ -24,15 +25,141 @@ where } } +/// Hold all state needed for compressing data. +pub struct Compress(libz_rs_sys::z_stream); + +unsafe impl Sync for Compress {} +unsafe impl Send for Compress {} + +impl Default for Compress { + fn default() -> Self { + Self::new() + } +} + +impl Compress { + /// The number of bytes that were read from the input. + pub fn total_in(&self) -> u64 { + self.0.total_in as _ + } + + /// The number of compressed bytes that were written to the output. + pub fn total_out(&self) -> u64 { + self.0.total_out as _ + } + + /// Create a new instance - this allocates so should be done with care. + pub fn new() -> Self { + let mut this = libz_rs_sys::z_stream::default(); + + unsafe { + libz_rs_sys::deflateInit_( + &mut this, + libz_rs_sys::Z_BEST_SPEED, + libz_rs_sys::zlibVersion(), + core::mem::size_of::() as core::ffi::c_int, + ); + } + + Self(this) + } + + /// Prepare the instance for a new stream. + pub fn reset(&mut self) { + unsafe { libz_rs_sys::deflateReset(&mut self.0) }; + } + + /// Compress `input` and write compressed bytes to `output`, with `flush` controlling additional characteristics. + pub fn compress(&mut self, input: &[u8], output: &mut [u8], flush: FlushCompress) -> Result { + self.0.avail_in = input.len() as _; + self.0.avail_out = output.len() as _; + + self.0.next_in = input.as_ptr(); + self.0.next_out = output.as_mut_ptr(); + + match unsafe { libz_rs_sys::deflate(&mut self.0, flush as _) } { + libz_rs_sys::Z_OK => Ok(Status::Ok), + libz_rs_sys::Z_BUF_ERROR => Ok(Status::BufError), + libz_rs_sys::Z_STREAM_END => Ok(Status::StreamEnd), + + libz_rs_sys::Z_STREAM_ERROR => Err(CompressError::StreamError), + libz_rs_sys::Z_MEM_ERROR => Err(CompressError::InsufficientMemory), + err => Err(CompressError::Unknown { err }), + } + } +} + +impl Drop for Compress { + fn drop(&mut self) { + unsafe { libz_rs_sys::deflateEnd(&mut self.0) }; + } +} + +/// The error produced by [`Compress::compress()`]. +#[derive(Debug, thiserror::Error)] +#[error("{msg}")] +#[allow(missing_docs)] +pub enum CompressError { + #[error("stream error")] + StreamError, + #[error("Not enough memory")] + InsufficientMemory, + #[error("An unknown error occurred: {err}")] + Unknown { err: c_int }, +} + +/// Values which indicate the form of flushing to be used when compressing +/// in-memory data. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +#[non_exhaustive] +#[allow(clippy::unnecessary_cast)] +pub enum FlushCompress { + /// A typical parameter for passing to compression/decompression functions, + /// this indicates that the underlying stream to decide how much data to + /// accumulate before producing output in order to maximize compression. + None = libz_rs_sys::Z_NO_FLUSH as isize, + + /// All pending output is flushed to the output buffer, but the output is + /// not aligned to a byte boundary. + /// + /// All input data so far will be available to the decompressor (as with + /// `Flush::Sync`). This completes the current deflate block and follows it + /// with an empty fixed codes block that is 10 bits long, and it assures + /// that enough bytes are output in order for the decompressor to finish the + /// block before the empty fixed code block. + Partial = libz_rs_sys::Z_PARTIAL_FLUSH as isize, + + /// All pending output is flushed to the output buffer and the output is + /// aligned on a byte boundary so that the decompressor can get all input + /// data available so far. + /// + /// Flushing may degrade compression for some compression algorithms and so + /// it should only be used when necessary. This will complete the current + /// deflate block and follow it with an empty stored block. + Sync = libz_rs_sys::Z_SYNC_FLUSH as isize, + + /// All output is flushed as with `Flush::Sync` and the compression state is + /// reset so decompression can restart from this point if previous + /// compressed data has been damaged or if random access is desired. + /// + /// Using this option too often can seriously degrade compression. + Full = libz_rs_sys::Z_FULL_FLUSH as isize, + + /// Pending input is processed and pending output is flushed. + /// + /// The return value may indicate that the stream is not yet done and more + /// data has yet to be processed. + Finish = libz_rs_sys::Z_FINISH as isize, +} + mod impls { use std::io; - use flate2::{Compress, Compression, FlushCompress, Status}; - - use crate::zlib::stream::deflate; + use crate::zlib::stream::deflate::{self, Compress, FlushCompress}; + use crate::zlib::Status; pub(crate) fn new_compress() -> Compress { - Compress::new(Compression::fast(), true) + Compress::new() } impl deflate::Write diff --git a/gix-features/src/zlib/stream/deflate/tests.rs b/gix-features/src/zlib/stream/deflate/tests.rs index 84cdb30138b..37e19e8e90c 100644 --- a/gix-features/src/zlib/stream/deflate/tests.rs +++ b/gix-features/src/zlib/stream/deflate/tests.rs @@ -5,9 +5,9 @@ mod deflate_stream { }; use bstr::ByteSlice; - use flate2::Decompress; use crate::zlib::stream::deflate; + use crate::zlib::Decompress; /// Provide streaming decompression using the `std::io::Read` trait. /// If `std::io::BufReader` is used, an allocation for the input buffer will be performed. @@ -22,7 +22,7 @@ mod deflate_stream { { pub fn from_read(read: R) -> InflateReader { InflateReader { - decompressor: Decompress::new(true), + decompressor: Decompress::new(), inner: read, } } diff --git a/gix-features/src/zlib/stream/inflate.rs b/gix-features/src/zlib/stream/inflate.rs index 11dc9280019..3654fe6a538 100644 --- a/gix-features/src/zlib/stream/inflate.rs +++ b/gix-features/src/zlib/stream/inflate.rs @@ -1,6 +1,6 @@ use std::{io, io::BufRead}; -use flate2::{Decompress, FlushDecompress, Status}; +use crate::zlib::{Decompress, FlushDecompress, Status}; /// Read bytes from `rd` and decompress them using `state` into a pre-allocated fitting buffer `dst`, returning the amount of bytes written. pub fn read(rd: &mut impl BufRead, state: &mut Decompress, mut dst: &mut [u8]) -> io::Result { diff --git a/gix-pack/src/data/input/bytes_to_entries.rs b/gix-pack/src/data/input/bytes_to_entries.rs index 7f54977504c..4f563621dfe 100644 --- a/gix-pack/src/data/input/bytes_to_entries.rs +++ b/gix-pack/src/data/input/bytes_to_entries.rs @@ -62,7 +62,7 @@ where ); Ok(BytesToEntriesIter { read, - decompressor: Decompress::new(true), + decompressor: Decompress::new(), compressed, offset: 12, had_error: false, @@ -101,7 +101,7 @@ where // Decompress object to learn its compressed bytes let compressed_buf = self.compressed_buf.take().unwrap_or_else(|| Vec::with_capacity(4096)); - self.decompressor.reset(true); + self.decompressor.reset(); let mut decompressed_reader = DecompressRead { inner: read_and_pass_to( &mut self.read, diff --git a/gix/tests/gix/repository/mod.rs b/gix/tests/gix/repository/mod.rs index 4253ff4b121..b186dec62a3 100644 --- a/gix/tests/gix/repository/mod.rs +++ b/gix/tests/gix/repository/mod.rs @@ -134,7 +134,7 @@ mod dirwalk { #[test] fn size_in_memory() { let actual_size = std::mem::size_of::(); - let limit = 1200; + let limit = 1250; assert!( actual_size <= limit, "size of Repository shouldn't change without us noticing, it's meant to be cloned: should have been below {limit:?}, was {actual_size} (bigger on windows)" diff --git a/gix/tests/gix/revision/spec/from_bytes/mod.rs b/gix/tests/gix/revision/spec/from_bytes/mod.rs index 407227f2f6b..c728932d8d1 100644 --- a/gix/tests/gix/revision/spec/from_bytes/mod.rs +++ b/gix/tests/gix/revision/spec/from_bytes/mod.rs @@ -126,8 +126,8 @@ fn bad_objects_are_valid_until_they_are_actually_read_from_the_odb() { Spec::from_id(hex_to_id("cafea31147e840161a1860c50af999917ae1536b").attach(&repo)) ); assert_eq!( - &format!("{:?}", parse_spec("cafea^{object}", &repo).unwrap_err())[..80], - r#"FindObject(Find(Loose(DecompressFile { source: Inflate(DecompressError(General {"# + &format!("{:?}", parse_spec("cafea^{object}", &repo).unwrap_err())[..65], + r#"FindObject(Find(Loose(DecompressFile { source: Inflate(DataError)"# ); } }