Skip to content

Commit 1df1632

Browse files
committed
Merge remote-tracking branch 'upstream/main' into feat/metrics
2 parents 38ad6a5 + 3bcd274 commit 1df1632

File tree

33 files changed

+1045
-429
lines changed

33 files changed

+1045
-429
lines changed

CHANGELOG.md

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,68 @@
11
# Changelog
22

3+
## [v0.18.0](https://github.com/delta-io/delta-kernel-rs/tree/v0.18.0/) (2025-11-19)
4+
5+
[Full Changelog](https://github.com/delta-io/delta-kernel-rs/compare/v0.17.1...v0.18.0)
6+
7+
### 🏗️ Breaking changes
8+
1. New Engine StorageHandler head API ([#1465])
9+
- Engine API implementers must add the `head` API to StorageHandler which fetches metadata about a file in storage
10+
2. Add remove_files API ([#1353])
11+
- The schema for scan rows (from `Scan::scan_metadata`) has been updated to include two
12+
new fields: `fileConstantValues.tags` and `fileConstantValues.defaultRowCommitVersion`.
13+
14+
### 🚀 Features / new APIs
15+
16+
1. Add parser for iceberg compat properties ([#1466])
17+
2. Pass ColumnMappingMode to physical_name ([#1403])
18+
3. Allow visiting entire domain metadata ([#1384])
19+
4. Add Table Feature Info ([#1462])
20+
5. *(FFI)* Snapshot log tail FFI ([#1379])
21+
6. Add generic is_feature_supported and is_feature_enabled methods to TableConfiguration ([#1405])
22+
7. Un-deprecate ArrayData.array_elements() ([#1493])
23+
8. Allow writes to CDF tables for add-only, remove-only, and non-data-change transactions ([#1490])
24+
9. *(catalog-managed)* UCCommitter ([#1418])
25+
26+
### 🐛 Bug Fixes
27+
28+
1. Eliminate endless busy looping in read_json_files on failed read ([#1489])
29+
2. Handle array/map types in ffi schema example and test ([#1497])
30+
31+
### 📚 Documentation
32+
33+
1. Fix docs for rustc 1.92+ ([#1470])
34+
35+
### 🚜 Refactor
36+
37+
1. Harmonize checkpoint and log compaction iterators ([#1436])
38+
2. Avoid overly complex itertools methods in log listing code ([#1434])
39+
3. Simplify creation of default engine in tests ([#1437])
40+
41+
### 🧪 Testing
42+
43+
1. Add tests for StructField.physical_name ([#1469])
44+
45+
[#1466]: https://github.com/delta-io/delta-kernel-rs/pull/1466
46+
[#1403]: https://github.com/delta-io/delta-kernel-rs/pull/1403
47+
[#1465]: https://github.com/delta-io/delta-kernel-rs/pull/1465
48+
[#1436]: https://github.com/delta-io/delta-kernel-rs/pull/1436
49+
[#1470]: https://github.com/delta-io/delta-kernel-rs/pull/1470
50+
[#1384]: https://github.com/delta-io/delta-kernel-rs/pull/1384
51+
[#1462]: https://github.com/delta-io/delta-kernel-rs/pull/1462
52+
[#1474]: https://github.com/delta-io/delta-kernel-rs/pull/1474
53+
[#1379]: https://github.com/delta-io/delta-kernel-rs/pull/1379
54+
[#1434]: https://github.com/delta-io/delta-kernel-rs/pull/1434
55+
[#1437]: https://github.com/delta-io/delta-kernel-rs/pull/1437
56+
[#1353]: https://github.com/delta-io/delta-kernel-rs/pull/1353
57+
[#1489]: https://github.com/delta-io/delta-kernel-rs/pull/1489
58+
[#1405]: https://github.com/delta-io/delta-kernel-rs/pull/1405
59+
[#1469]: https://github.com/delta-io/delta-kernel-rs/pull/1469
60+
[#1493]: https://github.com/delta-io/delta-kernel-rs/pull/1493
61+
[#1497]: https://github.com/delta-io/delta-kernel-rs/pull/1497
62+
[#1490]: https://github.com/delta-io/delta-kernel-rs/pull/1490
63+
[#1418]: https://github.com/delta-io/delta-kernel-rs/pull/1418
64+
65+
366
## [v0.17.1](https://github.com/delta-io/delta-kernel-rs/tree/v0.17.1/) (2025-11-13)
467

568
[Full Changelog](https://github.com/delta-io/delta-kernel-rs/compare/v0.17.0...v0.17.1)

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,4 @@ license = "Apache-2.0"
2727
repository = "https://github.com/delta-io/delta-kernel-rs"
2828
readme = "README.md"
2929
rust-version = "1.85"
30-
version = "0.17.1"
30+
version = "0.18.0"

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,10 @@ consumer's own `Engine` trait, the kernel has a feature flag to enable a default
5252
```toml
5353
# fewer dependencies, requires consumer to implement Engine trait.
5454
# allows consumers to implement their own in-memory format
55-
delta_kernel = "0.17.1"
55+
delta_kernel = "0.18.0"
5656

5757
# or turn on the default engine, based on latest arrow
58-
delta_kernel = { version = "0.17.1", features = ["default-engine", "arrow"] }
58+
delta_kernel = { version = "0.18.0", features = ["default-engine", "arrow"] }
5959
```
6060

6161
### Feature flags

acceptance/src/data.rs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,12 @@ use delta_kernel::arrow::array::{Array, RecordBatch};
44
use delta_kernel::arrow::compute::{concat_batches, lexsort_to_indices, take, SortColumn};
55
use delta_kernel::arrow::datatypes::{DataType, Schema};
66

7+
use delta_kernel::engine::arrow_data::EngineDataArrowExt as _;
78
use delta_kernel::parquet::arrow::async_reader::{
89
ParquetObjectReader, ParquetRecordBatchStreamBuilder,
910
};
1011
use delta_kernel::snapshot::Snapshot;
11-
use delta_kernel::{engine::arrow_data::ArrowEngineData, DeltaResult, Engine, Error};
12+
use delta_kernel::{DeltaResult, Engine, Error};
1213
use futures::{stream::TryStreamExt, StreamExt};
1314
use itertools::Itertools;
1415
use object_store::{local::LocalFileSystem, ObjectStore};
@@ -119,11 +120,7 @@ pub async fn assert_scan_metadata(
119120
let batches: Vec<RecordBatch> = scan
120121
.execute(engine)?
121122
.map(|data| -> DeltaResult<_> {
122-
let record_batch: RecordBatch = data?
123-
.into_any()
124-
.downcast::<ArrowEngineData>()
125-
.unwrap()
126-
.into();
123+
let record_batch = data?.try_into_record_batch()?;
127124
if schema.is_none() {
128125
schema = Some(record_batch.schema());
129126
}

ffi/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ url = "2"
2525
delta_kernel = { path = "../kernel", default-features = false, features = [
2626
"internal-api",
2727
] }
28-
delta_kernel_ffi_macros = { path = "../ffi-proc-macros", version = "0.17.1" }
28+
delta_kernel_ffi_macros = { path = "../ffi-proc-macros", version = "0.18.0" }
2929

3030
[build-dependencies]
3131
cbindgen = "0.29.2"

ffi/src/engine_data.rs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
//! EngineData related ffi code
2-
32
#[cfg(feature = "default-engine-base")]
43
use delta_kernel::arrow;
54
#[cfg(feature = "default-engine-base")]
@@ -8,7 +7,7 @@ use delta_kernel::arrow::array::{
87
ArrayData, RecordBatch, StructArray,
98
};
109
#[cfg(feature = "default-engine-base")]
11-
use delta_kernel::engine::arrow_data::ArrowEngineData;
10+
use delta_kernel::engine::arrow_data::{ArrowEngineData, EngineDataArrowExt as _};
1211
#[cfg(feature = "default-engine-base")]
1312
use delta_kernel::DeltaResult;
1413
use delta_kernel::EngineData;
@@ -96,11 +95,7 @@ pub unsafe extern "C" fn get_raw_arrow_data(
9695
// TODO: This method leaks the returned pointer memory. How will the engine free it?
9796
#[cfg(feature = "default-engine-base")]
9897
fn get_raw_arrow_data_impl(data: Box<dyn EngineData>) -> DeltaResult<*mut ArrowFFIData> {
99-
let record_batch: delta_kernel::arrow::array::RecordBatch = data
100-
.into_any()
101-
.downcast::<ArrowEngineData>()
102-
.map_err(|_| delta_kernel::Error::EngineDataType("ArrowEngineData".to_string()))?
103-
.into();
98+
let record_batch = data.try_into_record_batch()?;
10499
let sa: StructArray = record_batch.into();
105100
let array_data: ArrayData = sa.into();
106101
// these call `clone`. is there a way to not copy anything and what exactly are they cloning?

ffi/src/table_changes.rs

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
use std::sync::Arc;
44
use std::sync::Mutex;
55

6-
use delta_kernel::arrow::array::{Array, ArrayData, RecordBatch, StructArray};
6+
use delta_kernel::arrow::array::{Array, ArrayData, StructArray};
77
use delta_kernel::arrow::ffi::to_ffi;
8-
use delta_kernel::engine::arrow_data::ArrowEngineData;
8+
use delta_kernel::engine::arrow_data::EngineDataArrowExt;
99
use delta_kernel::table_changes::scan::TableChangesScan;
1010
use delta_kernel::table_changes::TableChanges;
1111
use delta_kernel::EngineData;
@@ -319,11 +319,7 @@ fn scan_table_changes_next_impl(data: &ScanTableChangesIterator) -> DeltaResult<
319319
return Ok(ArrowFFIData::empty());
320320
};
321321

322-
let record_batch: RecordBatch = data
323-
.into_any()
324-
.downcast::<ArrowEngineData>()
325-
.map_err(|_| delta_kernel::Error::EngineDataType("ArrowEngineData".to_string()))?
326-
.into();
322+
let record_batch = data.try_into_record_batch()?;
327323

328324
let batch_struct_array: StructArray = record_batch.into();
329325
let array_data: ArrayData = batch_struct_array.into_data();
@@ -346,6 +342,7 @@ mod tests {
346342
use delta_kernel::arrow::record_batch::RecordBatch;
347343
use delta_kernel::arrow::util::pretty::pretty_format_batches;
348344
use delta_kernel::engine::arrow_conversion::TryIntoArrow as _;
345+
use delta_kernel::engine::arrow_data::ArrowEngineData;
349346
use delta_kernel::engine::default::DefaultEngine;
350347
use delta_kernel::schema::{DataType, StructField, StructType};
351348
use delta_kernel::Engine;
@@ -355,7 +352,7 @@ mod tests {
355352
use std::sync::Arc;
356353
use test_utils::{
357354
actions_to_string_with_metadata, add_commit, generate_batch, record_batch_to_bytes,
358-
to_arrow, IntoArray as _, TestAction,
355+
IntoArray as _, TestAction,
359356
};
360357

361358
const PARQUET_FILE1: &str =
@@ -480,7 +477,7 @@ mod tests {
480477
) -> DeltaResult<Vec<RecordBatch>> {
481478
let scan_results = scan.execute(engine)?;
482479
scan_results
483-
.map(|data| -> DeltaResult<_> { to_arrow(data?) })
480+
.map(EngineDataArrowExt::try_into_record_batch)
484481
.try_collect()
485482
}
486483

@@ -699,7 +696,7 @@ mod tests {
699696
}
700697
let engine_data =
701698
ok_or_panic(unsafe { get_engine_data(data.array, &data.schema, allocate_err) });
702-
let record_batch = unsafe { to_arrow(engine_data.into_inner()) }?;
699+
let record_batch = unsafe { engine_data.into_inner().try_into_record_batch() }?;
703700

704701
println!("Batch ({i}) num rows {:?}", record_batch.num_rows());
705702
batches.push(record_batch);

kernel/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ pre-release-hook = [
3939
]
4040

4141
[dependencies]
42-
delta_kernel_derive = { path = "../derive-macros", version = "0.17.1" }
42+
delta_kernel_derive = { path = "../derive-macros", version = "0.18.0" }
4343
bytes = "1.10"
4444
chrono = "0.4.41"
4545
crc = "3.2.2"

kernel/examples/read-table-changes/src/main.rs

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use clap::Parser;
44
use common::{LocationArgs, ParseWithExamples};
55
use delta_kernel::arrow::array::RecordBatch;
66
use delta_kernel::arrow::util::pretty::print_batches;
7-
use delta_kernel::engine::arrow_data::ArrowEngineData;
7+
use delta_kernel::engine::arrow_data::EngineDataArrowExt;
88
use delta_kernel::table_changes::TableChanges;
99
use delta_kernel::DeltaResult;
1010
use itertools::Itertools;
@@ -38,14 +38,7 @@ fn main() -> DeltaResult<()> {
3838
let table_changes_scan = table_changes.into_scan_builder().build()?;
3939
let batches: Vec<RecordBatch> = table_changes_scan
4040
.execute(Arc::new(engine))?
41-
.map(|data| -> DeltaResult<_> {
42-
let record_batch: RecordBatch = data?
43-
.into_any()
44-
.downcast::<ArrowEngineData>()
45-
.map_err(|_| delta_kernel::Error::EngineDataType("ArrowEngineData".to_string()))?
46-
.into();
47-
Ok(record_batch)
48-
})
41+
.map(EngineDataArrowExt::try_into_record_batch)
4942
.try_collect()?;
5043
print_batches(&batches)?;
5144
Ok(())

kernel/examples/read-table-multi-threaded/src/main.rs

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@ use arrow::record_batch::RecordBatch;
99
use arrow::util::pretty::print_batches;
1010
use common::{LocationArgs, ParseWithExamples, ScanArgs};
1111
use delta_kernel::actions::deletion_vector::split_vector;
12-
use delta_kernel::engine::arrow_data::ArrowEngineData;
12+
use delta_kernel::engine::arrow_data::EngineDataArrowExt as _;
1313
use delta_kernel::scan::state::{transform_to_logical, DvInfo, Stats};
1414
use delta_kernel::schema::SchemaRef;
15-
use delta_kernel::{DeltaResult, Engine, EngineData, ExpressionRef, FileMeta, Snapshot};
15+
use delta_kernel::{DeltaResult, Engine, ExpressionRef, FileMeta, Snapshot};
1616

1717
use clap::Parser;
1818
use url::Url;
@@ -59,15 +59,6 @@ struct ScanFile {
5959
dv_info: DvInfo,
6060
}
6161

62-
// we know we're using arrow under the hood, so cast an EngineData into something we can work with
63-
fn to_arrow(data: Box<dyn EngineData>) -> DeltaResult<RecordBatch> {
64-
Ok(data
65-
.into_any()
66-
.downcast::<ArrowEngineData>()
67-
.map_err(|_| delta_kernel::Error::EngineDataType("ArrowEngineData".to_string()))?
68-
.into())
69-
}
70-
7162
// This is the callback that will be called for each valid scan row
7263
fn send_scan_file(
7364
scan_tx: &mut spmc::Sender<ScanFile>,
@@ -231,7 +222,7 @@ fn do_work(
231222
)
232223
.unwrap();
233224

234-
let record_batch = to_arrow(logical).unwrap();
225+
let record_batch = logical.try_into_record_batch().unwrap();
235226

236227
// need to split the dv_mask. what's left in dv_mask covers this result, and rest
237228
// will cover the following results

0 commit comments

Comments
 (0)