diff --git a/CHANGELOG.md b/CHANGELOG.md index 427c3f8a..aba4e73d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,9 +16,16 @@ The format is based on Keep a Changelog and this project adheres to - If there are breaking changes, put a short, actionable checklist here. -## [0.15.5-alpha] - 2025-10-27 +## [0.16.0-alpha] - 2025-11-19 +### Added +- Added feature-gated `bytes` integration for zero-copy conversion to `bytes::Bytes` (v1.11.0). + - New methods: `EntryHandle::as_bytes()` and `EntryHandle::into_bytes()`. + - Enable with `features = ["bytes"]` in your `Cargo.toml`. + - Perfect for network protocols and async I/O scenarios. + ### Changed -- Bumped Apache Arrow dependency to 57.0.0. (No other functional changes.) +- Regresses Apache Arrow dependency to 56.2.0 to be compatible with current [datafusion](https://crates.io/crates/datafusion) crate release. +- Updated `simd-r-drive-entry-handle` README with feature documentation and usage examples. --- diff --git a/Cargo.lock b/Cargo.lock index 15b12669..93db0760 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -119,9 +119,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "57.0.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4df8bb5b0bd64c0b9bc61317fcc480bad0f00e56d3bc32c69a4c8dada4786bae" +checksum = "6e833808ff2d94ed40d9379848a950d995043c7fb3e81a30b383f4c6033821cc" dependencies = [ "arrow-arith", "arrow-array", @@ -137,23 +137,23 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "57.0.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1a640186d3bd30a24cb42264c2dafb30e236a6f50d510e56d40b708c9582491" +checksum = "ad08897b81588f60ba983e3ca39bda2b179bdd84dced378e7df81a5313802ef8" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", - "num-traits", + "num", ] [[package]] name = "arrow-array" -version = "57.0.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "219fe420e6800979744c8393b687afb0252b3f8a89b91027d27887b72aa36d31" +checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d" dependencies = [ "ahash", "arrow-buffer", @@ -162,28 +162,25 @@ dependencies = [ "chrono", "half", "hashbrown 0.16.0", - "num-complex", - "num-integer", - "num-traits", + "num", ] [[package]] name = "arrow-buffer" -version = "57.0.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76885a2697a7edf6b59577f568b456afc94ce0e2edc15b784ce3685b6c3c5c27" +checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc" dependencies = [ "bytes", "half", - "num-bigint", - "num-traits", + "num", ] [[package]] name = "arrow-cast" -version = "57.0.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c9ebb4c987e6b3b236fb4a14b20b34835abfdd80acead3ccf1f9bf399e1f168" +checksum = "919418a0681298d3a77d1a315f625916cb5678ad0d74b9c60108eb15fd083023" dependencies = [ "arrow-array", "arrow-buffer", @@ -195,28 +192,27 @@ dependencies = [ "chrono", "half", "lexical-core", - "num-traits", + "num", "ryu", ] [[package]] name = "arrow-data" -version = "57.0.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "727681b95de313b600eddc2a37e736dcb21980a40f640314dcf360e2f36bc89b" +checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0" dependencies = [ "arrow-buffer", "arrow-schema", "half", - "num-integer", - "num-traits", + "num", ] [[package]] name = "arrow-ord" -version = "57.0.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "141c05298b21d03e88062317a1f1a73f5ba7b6eb041b350015b1cd6aabc0519b" +checksum = "3c8f82583eb4f8d84d4ee55fd1cb306720cddead7596edce95b50ee418edf66f" dependencies = [ "arrow-array", "arrow-buffer", @@ -227,9 +223,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "57.0.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f3c06a6abad6164508ed283c7a02151515cef3de4b4ff2cebbcaeb85533db2" +checksum = "9d07ba24522229d9085031df6b94605e0f4b26e099fb7cdeec37abd941a73753" dependencies = [ "arrow-array", "arrow-buffer", @@ -240,29 +236,29 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "57.0.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cfa7a03d1eee2a4d061476e1840ad5c9867a544ca6c4c59256496af5d0a8be5" +checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe" [[package]] name = "arrow-select" -version = "57.0.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bafa595babaad59f2455f4957d0f26448fb472722c186739f4fac0823a1bdb47" +checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a" dependencies = [ "ahash", "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", - "num-traits", + "num", ] [[package]] name = "arrow-string" -version = "57.0.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32f46457dbbb99f2650ff3ac23e46a929e0ab81db809b02aa5511c258348bef2" +checksum = "53f5183c150fbc619eede22b861ea7c0eebed8eaac0333eaa7f6da5205fd504d" dependencies = [ "arrow-array", "arrow-buffer", @@ -270,7 +266,7 @@ dependencies = [ "arrow-schema", "arrow-select", "memchr", - "num-traits", + "num", "regex", "regex-syntax", ] @@ -447,9 +443,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.10.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" +checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" [[package]] name = "cast" @@ -1343,6 +1339,20 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + [[package]] name = "num-bigint" version = "0.4.6" @@ -1371,6 +1381,28 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -1793,7 +1825,7 @@ dependencies = [ [[package]] name = "simd-r-drive" -version = "0.15.5-alpha" +version = "0.16.0-alpha" dependencies = [ "async-trait", "bincode", @@ -1821,16 +1853,17 @@ dependencies = [ [[package]] name = "simd-r-drive-entry-handle" -version = "0.15.5-alpha" +version = "0.16.0-alpha" dependencies = [ "arrow", + "bytes", "crc32fast", "memmap2", ] [[package]] name = "simd-r-drive-extensions" -version = "0.15.5-alpha" +version = "0.16.0-alpha" dependencies = [ "bincode", "doc-comment", @@ -1842,7 +1875,7 @@ dependencies = [ [[package]] name = "simd-r-drive-muxio-service-definition" -version = "0.15.5-alpha" +version = "0.16.0-alpha" dependencies = [ "bitcode", "muxio-rpc-service", @@ -1850,7 +1883,7 @@ dependencies = [ [[package]] name = "simd-r-drive-ws-client" -version = "0.15.5-alpha" +version = "0.16.0-alpha" dependencies = [ "async-trait", "muxio-rpc-service", @@ -1864,7 +1897,7 @@ dependencies = [ [[package]] name = "simd-r-drive-ws-server" -version = "0.15.5-alpha" +version = "0.16.0-alpha" dependencies = [ "clap", "indoc", diff --git a/Cargo.toml b/Cargo.toml index 4ce4c54e..5886d81e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [workspace.package] authors = ["Jeremy Harris "] -version = "0.15.5-alpha" +version = "0.16.0-alpha" edition = "2024" repository = "https://github.com/jzombie/rust-simd-r-drive" license = "Apache-2.0" @@ -54,6 +54,9 @@ parallel = ["rayon"] # Proxy: when users enable simd-r-drive/arrow, it enables the dep's arrow. arrow = ["simd-r-drive-entry-handle/arrow"] +# Proxy: when users enable simd-r-drive/bytes, it enables the dep's bytes. +bytes = ["simd-r-drive-entry-handle/bytes"] + [[bench]] name = "storage_benchmark" harness = false @@ -79,18 +82,19 @@ resolver = "2" [workspace.dependencies] # Intra-workspace crates -simd-r-drive = { path = ".", version = "0.15.5-alpha" } -simd-r-drive-entry-handle = { path = "./simd-r-drive-entry-handle", version = "0.15.5-alpha" } -simd-r-drive-ws-client = { path = "./experiments/simd-r-drive-ws-client", version = "0.15.5-alpha" } -simd-r-drive-muxio-service-definition = { path = "./experiments/simd-r-drive-muxio-service-definition", version = "0.15.5-alpha" } +simd-r-drive = { path = ".", version = "0.16.0-alpha" } +simd-r-drive-entry-handle = { path = "./simd-r-drive-entry-handle", version = "0.16.0-alpha" } +simd-r-drive-ws-client = { path = "./experiments/simd-r-drive-ws-client", version = "0.16.0-alpha" } +simd-r-drive-muxio-service-definition = { path = "./experiments/simd-r-drive-muxio-service-definition", version = "0.16.0-alpha" } muxio-tokio-rpc-client = "0.9.0-alpha" muxio-tokio-rpc-server = "0.9.0-alpha" muxio-rpc-service = "0.9.0-alpha" muxio-rpc-service-caller = "0.9.0-alpha" # Third-party crates (note, not all dependencies are used in the base drive) -arrow = { version = "57.0.0", default-features = false } +arrow = { version = "56.2.0", default-features = false } async-trait = "0.1.88" +bytes = "1.11.0" bincode = "1.3.3" # TODO: Replace with `bitcode` bitcode = "0.6.6" clap = "4.5.40" diff --git a/experiments/bindings/python-ws-client/Cargo.lock b/experiments/bindings/python-ws-client/Cargo.lock index a9a46655..da386261 100644 --- a/experiments/bindings/python-ws-client/Cargo.lock +++ b/experiments/bindings/python-ws-client/Cargo.lock @@ -1048,7 +1048,7 @@ dependencies = [ [[package]] name = "simd-r-drive" -version = "0.15.5-alpha" +version = "0.16.0-alpha" dependencies = [ "async-trait", "clap", @@ -1064,7 +1064,7 @@ dependencies = [ [[package]] name = "simd-r-drive-entry-handle" -version = "0.15.5-alpha" +version = "0.16.0-alpha" dependencies = [ "crc32fast", "memmap2", @@ -1072,7 +1072,7 @@ dependencies = [ [[package]] name = "simd-r-drive-muxio-service-definition" -version = "0.15.5-alpha" +version = "0.16.0-alpha" dependencies = [ "bitcode", "muxio-rpc-service", @@ -1080,7 +1080,7 @@ dependencies = [ [[package]] name = "simd-r-drive-ws-client" -version = "0.15.5-alpha" +version = "0.16.0-alpha" dependencies = [ "async-trait", "muxio-rpc-service", diff --git a/experiments/bindings/python_(old_client)/pyproject.toml b/experiments/bindings/python_(old_client)/pyproject.toml index 4bc77e3b..8912bf92 100644 --- a/experiments/bindings/python_(old_client)/pyproject.toml +++ b/experiments/bindings/python_(old_client)/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "simd-r-drive-py" -version = "0.15.5-alpha" +version = "0.16.0-alpha" description = "SIMD-optimized append-only schema-less storage engine. Key-based binary storage in a single-file storage container." repository = "https://github.com/jzombie/rust-simd-r-drive" license = "Apache-2.0" diff --git a/simd-r-drive-entry-handle/Cargo.toml b/simd-r-drive-entry-handle/Cargo.toml index 1ed41726..fb07b3b2 100644 --- a/simd-r-drive-entry-handle/Cargo.toml +++ b/simd-r-drive-entry-handle/Cargo.toml @@ -12,9 +12,11 @@ publish.workspace = true [dependencies] arrow = { workspace = true, optional = true } +bytes = { workspace = true, optional = true } crc32fast = { workspace = true } memmap2 = { workspace = true } [features] expose-internal-api = [] arrow = ["dep:arrow"] +bytes = ["dep:bytes"] diff --git a/simd-r-drive-entry-handle/README.md b/simd-r-drive-entry-handle/README.md index 6e9c49fc..b4cd59fd 100644 --- a/simd-r-drive-entry-handle/README.md +++ b/simd-r-drive-entry-handle/README.md @@ -2,10 +2,37 @@ Standalone, storage agnostic*, `mmap`-friendly definitions of [EntryHandle](./src/entry_handle.rs) and [EntryMetadata](./src/entry_metadata.rs) compatible with [SIMD R Drive](https://crates.io/crates/simd-r-drive). -Use these types from other storage backends (e.g., in-memory stores, object storage, custom files) that need to read/write the same binary layout—without depending on the full `SIMD R Drive` crate or even a local filesystem. They support zero-copy via `mmap` when available, but don’t require it. +Use these types from other storage backends (e.g., in-memory stores, object storage, custom files) that need to read/write the same binary layout—without depending on the full `SIMD R Drive` crate or even a local filesystem. They support zero-copy via `mmap` when available, but don't require it. * Note: This crate has not been tested in WASM and is likely not yet compatible. +## Features + +### `arrow` +Enables zero-copy conversion to Apache Arrow `Buffer` types via `as_arrow_buffer()` and `into_arrow_buffer()`. + +### `bytes` +Enables zero-copy conversion to `bytes::Bytes` via `as_bytes()` and `into_bytes()`. Perfect for network protocols and async I/O. + +### Usage Example + +```toml +[dependencies] +simd-r-drive-entry-handle = { version = "0.15", features = ["bytes"] } +``` + +```rust +use simd_r_drive_entry_handle::EntryHandle; + +// Create an in-memory entry +let data = b"Hello, zero-copy world!"; +let handle = EntryHandle::from_owned_bytes_anon(data, 12345)?; + +// Convert to bytes::Bytes without copying +let bytes = handle.as_bytes(); +assert_eq!(&bytes[..], data); +``` + ## License Licensed under the [Apache-2.0 License](./LICENSE). diff --git a/simd-r-drive-entry-handle/src/entry_handle.rs b/simd-r-drive-entry-handle/src/entry_handle.rs index 1fdec85c..10d2f5ca 100644 --- a/simd-r-drive-entry-handle/src/entry_handle.rs +++ b/simd-r-drive-entry-handle/src/entry_handle.rs @@ -449,3 +449,110 @@ impl AsRef<[u8]> for EntryHandle { self.as_slice() } } + +/// Zero-copy Bytes views over this entry. +/// +/// A wrapper type that owns an EntryHandle and provides AsRef<[u8]> for bytes::Bytes::from_owner +#[cfg(feature = "bytes")] +struct BytesOwner(Arc); + +#[cfg(feature = "bytes")] +impl AsRef<[u8]> for BytesOwner { + fn as_ref(&self) -> &[u8] { + self.0.as_slice() + } +} + +/// Safety: the pointer comes from an `Arc` and stays valid for the +/// life of the returned `Bytes` via the captured owner. The owner is an +/// `Arc`, which keeps the underlying `Arc` alive. +#[cfg(feature = "bytes")] +impl EntryHandle { + /// View the payload as a `bytes::Bytes` without copying. + /// + /// Feature: `bytes` + /// + /// Returns a zero-copy `bytes::Bytes` whose contents point at + /// the same bytes as `self.as_slice()`. The returned `Bytes` captures + /// an `Arc` internally, which keeps the `Arc` alive + /// for the lifetime of the `Bytes`. + /// + /// No allocation or memcpy of the payload occurs. The only work here is + /// constructing the `Bytes` and cloning the `Arc` owner. + /// + /// Safety + /// ------ + /// Uses `Bytes::from_owner` with a wrapper type that holds an `Arc`. + /// The memory remains valid because: + /// - The slice is backed by `Arc` which outlives the returned `Bytes` + /// - The handle is cloned and kept alive internally via the BytesOwner + /// - The memory is immutable for the lifetime of the `Bytes` + /// + /// Panics + /// ------ + /// This method should never panic as Rust guarantees `&[u8]::as_ptr()` + /// is valid for the lifetime of the slice. + pub fn as_bytes(&self) -> bytes::Bytes { + use bytes::Bytes; + use std::sync::Arc; + + #[cfg(any(test, debug_assertions))] + { + use crate::{ + constants::PAYLOAD_ALIGNMENT, debug_assert_aligned, debug_assert_aligned_offset, + }; + let slice = self.as_slice(); + // Assert actual pointer alignment. + debug_assert_aligned(slice.as_ptr(), PAYLOAD_ALIGNMENT as usize); + // Assert derived file offset alignment. + debug_assert_aligned_offset(self.range.start as u64); + } + + // Create an owner that implements AsRef<[u8]> and keeps the mmap alive + let owner = BytesOwner(Arc::new(self.clone())); + + // from_owner will create a zero-copy Bytes backed by our owner + Bytes::from_owner(owner) + } + + /// Convert this handle into a `bytes::Bytes` without copying. + /// + /// Feature: `bytes` + /// + /// Like [`as_bytes`](Self::as_bytes) but consumes `self` + /// to avoid one extra `Arc` clone. This is otherwise identical to the + /// borrowing variant and still performs zero copies of the payload. + /// + /// Safety + /// ------ + /// Same assumptions as [`as_bytes`](Self::as_bytes): + /// - Pointer is valid for `len` bytes and remains immutable while the + /// `Bytes` lives. + /// - Alignment is suitable for `u8`. + /// + /// Panics + /// ------ + /// See [`as_bytes`](Self::as_bytes). This method should never panic. + pub fn into_bytes(self) -> bytes::Bytes { + use bytes::Bytes; + use std::sync::Arc; + + #[cfg(any(test, debug_assertions))] + { + use crate::{ + constants::PAYLOAD_ALIGNMENT, debug_assert_aligned, debug_assert_aligned_offset, + }; + let slice = self.as_slice(); + // Assert actual pointer alignment. + debug_assert_aligned(slice.as_ptr(), PAYLOAD_ALIGNMENT as usize); + // Assert derived file offset alignment. + debug_assert_aligned_offset(self.range.start as u64); + } + + // Move self into an Arc via the owner + let owner = BytesOwner(Arc::new(self)); + + // from_owner will create a zero-copy Bytes backed by our owner + Bytes::from_owner(owner) + } +}