diff --git a/Cargo.lock b/Cargo.lock index 4c82df79ff..816ee96782 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3773,6 +3773,7 @@ dependencies = [ "enum-ordinalize", "env_logger", "iceberg", + "iceberg-catalog-loader", "iceberg-datafusion", "indicatif", "libtest-mimic", diff --git a/Cargo.toml b/Cargo.toml index 7ca365dce2..5d909ad4ab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -79,6 +79,7 @@ hive_metastore = "0.2.0" home = "=0.5.11" http = "1.2" iceberg = { version = "0.7.0", path = "./crates/iceberg" } +iceberg-catalog-loader = { version = "0.7.0", path = "./crates/catalog/loader" } iceberg-catalog-glue = { version = "0.7.0", path = "./crates/catalog/glue" } iceberg-catalog-hms = { version = "0.7.0", path = "./crates/catalog/hms" } iceberg-catalog-sql = { version = "0.7.0", path = "./crates/catalog/sql" } diff --git a/crates/catalog/glue/src/error.rs b/crates/catalog/glue/src/error.rs index c936ff4edd..2eadf9a5b6 100644 --- a/crates/catalog/glue/src/error.rs +++ b/crates/catalog/glue/src/error.rs @@ -22,7 +22,9 @@ use iceberg::{Error, ErrorKind}; /// Format AWS SDK error into iceberg error pub(crate) fn from_aws_sdk_error(error: aws_sdk_glue::error::SdkError) -> Error -where T: Debug { +where + T: Debug, +{ Error::new( ErrorKind::Unexpected, "Operation failed for hitting aws sdk error".to_string(), diff --git a/crates/catalog/hms/tests/hms_catalog_test.rs b/crates/catalog/hms/tests/hms_catalog_test.rs index 9793b7f738..702000077e 100644 --- a/crates/catalog/hms/tests/hms_catalog_test.rs +++ b/crates/catalog/hms/tests/hms_catalog_test.rs @@ -269,10 +269,10 @@ async fn test_list_tables() -> Result<()> { catalog.create_table(ns.name(), creation).await?; let result = catalog.list_tables(ns.name()).await?; - assert_eq!(result, vec![TableIdent::new( - ns.name().clone(), - "my_table".to_string() - )]); + assert_eq!( + result, + vec![TableIdent::new(ns.name().clone(), "my_table".to_string())] + ); Ok(()) } diff --git a/crates/catalog/loader/src/lib.rs b/crates/catalog/loader/src/lib.rs index e118ef86a9..c0956119e7 100644 --- a/crates/catalog/loader/src/lib.rs +++ b/crates/catalog/loader/src/lib.rs @@ -19,6 +19,7 @@ use std::collections::HashMap; use std::sync::Arc; use async_trait::async_trait; +use iceberg::memory::MemoryCatalogBuilder; use iceberg::{Catalog, CatalogBuilder, Error, ErrorKind, Result}; use iceberg_catalog_glue::GlueCatalogBuilder; use iceberg_catalog_hms::HmsCatalogBuilder; @@ -31,6 +32,7 @@ type CatalogBuilderFactory = fn() -> Box; /// A registry of catalog builders. static CATALOG_REGISTRY: &[(&str, CatalogBuilderFactory)] = &[ + ("memory", || Box::new(MemoryCatalogBuilder::default())), ("rest", || Box::new(RestCatalogBuilder::default())), ("glue", || Box::new(GlueCatalogBuilder::default())), ("s3tables", || Box::new(S3TablesCatalogBuilder::default())), diff --git a/crates/catalog/s3tables/src/catalog.rs b/crates/catalog/s3tables/src/catalog.rs index 3606fac99a..7a50939e1d 100644 --- a/crates/catalog/s3tables/src/catalog.rs +++ b/crates/catalog/s3tables/src/catalog.rs @@ -646,7 +646,9 @@ impl Catalog for S3TablesCatalog { /// Format AWS SDK error into iceberg error pub(crate) fn from_aws_sdk_error(error: aws_sdk_s3tables::error::SdkError) -> Error -where T: std::fmt::Debug { +where + T: std::fmt::Debug, +{ Error::new( ErrorKind::Unexpected, format!("Operation failed for hitting aws sdk error: {error:?}"), diff --git a/crates/catalog/sql/src/catalog.rs b/crates/catalog/sql/src/catalog.rs index 77b35a228f..bc1b70fa5c 100644 --- a/crates/catalog/sql/src/catalog.rs +++ b/crates/catalog/sql/src/catalog.rs @@ -362,10 +362,10 @@ impl Catalog for SqlCatalog { ); let namespace_rows = self - .fetch_rows(&all_namespaces_stmt, vec![ - Some(&self.name), - Some(&self.name), - ]) + .fetch_rows( + &all_namespaces_stmt, + vec![Some(&self.name), Some(&self.name)], + ) .await?; let mut namespaces = HashSet::::with_capacity(namespace_rows.len()); @@ -1299,11 +1299,10 @@ mod tests { let namespace_ident_1 = NamespaceIdent::new("a".into()); let namespace_ident_2 = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap(); let namespace_ident_3 = NamespaceIdent::new("b".into()); - create_namespaces(&catalog, &vec![ - &namespace_ident_1, - &namespace_ident_2, - &namespace_ident_3, - ]) + create_namespaces( + &catalog, + &vec![&namespace_ident_1, &namespace_ident_2, &namespace_ident_3], + ) .await; assert_eq!( @@ -1336,11 +1335,10 @@ mod tests { let namespace_ident_1 = NamespaceIdent::new("a".into()); let namespace_ident_2 = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap(); let namespace_ident_3 = NamespaceIdent::new("c".into()); - create_namespaces(&catalog, &vec![ - &namespace_ident_1, - &namespace_ident_2, - &namespace_ident_3, - ]) + create_namespaces( + &catalog, + &vec![&namespace_ident_1, &namespace_ident_2, &namespace_ident_3], + ) .await; assert_eq!( @@ -1366,13 +1364,16 @@ mod tests { let namespace_ident_3 = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap(); let namespace_ident_4 = NamespaceIdent::from_strs(vec!["a", "c"]).unwrap(); let namespace_ident_5 = NamespaceIdent::new("b".into()); - create_namespaces(&catalog, &vec![ - &namespace_ident_1, - &namespace_ident_2, - &namespace_ident_3, - &namespace_ident_4, - &namespace_ident_5, - ]) + create_namespaces( + &catalog, + &vec![ + &namespace_ident_1, + &namespace_ident_2, + &namespace_ident_3, + &namespace_ident_4, + &namespace_ident_5, + ], + ) .await; assert_eq!( @@ -1673,11 +1674,14 @@ mod tests { let namespace_ident_a = NamespaceIdent::new("a".into()); let namespace_ident_a_b = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap(); let namespace_ident_a_b_c = NamespaceIdent::from_strs(vec!["a", "b", "c"]).unwrap(); - create_namespaces(&catalog, &vec![ - &namespace_ident_a, - &namespace_ident_a_b, - &namespace_ident_a_b_c, - ]) + create_namespaces( + &catalog, + &vec![ + &namespace_ident_a, + &namespace_ident_a_b, + &namespace_ident_a_b_c, + ], + ) .await; catalog @@ -2039,9 +2043,10 @@ mod tests { .await .unwrap(); - assert_eq!(catalog.list_tables(&namespace_ident).await.unwrap(), vec![ - dst_table_ident - ],); + assert_eq!( + catalog.list_tables(&namespace_ident).await.unwrap(), + vec![dst_table_ident], + ); } #[tokio::test] @@ -2085,9 +2090,10 @@ mod tests { .await .unwrap(); - assert_eq!(catalog.list_tables(&namespace_ident).await.unwrap(), vec![ - table_ident - ],); + assert_eq!( + catalog.list_tables(&namespace_ident).await.unwrap(), + vec![table_ident], + ); } #[tokio::test] @@ -2097,11 +2103,14 @@ mod tests { let namespace_ident_a = NamespaceIdent::new("a".into()); let namespace_ident_a_b = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap(); let namespace_ident_a_b_c = NamespaceIdent::from_strs(vec!["a", "b", "c"]).unwrap(); - create_namespaces(&catalog, &vec![ - &namespace_ident_a, - &namespace_ident_a_b, - &namespace_ident_a_b_c, - ]) + create_namespaces( + &catalog, + &vec![ + &namespace_ident_a, + &namespace_ident_a_b, + &namespace_ident_a_b_c, + ], + ) .await; let src_table_ident = TableIdent::new(namespace_ident_a_b_c.clone(), "tbl1".into()); diff --git a/crates/iceberg/src/arrow/caching_delete_file_loader.rs b/crates/iceberg/src/arrow/caching_delete_file_loader.rs index 192ca390a8..7b874414ee 100644 --- a/crates/iceberg/src/arrow/caching_delete_file_loader.rs +++ b/crates/iceberg/src/arrow/caching_delete_file_loader.rs @@ -640,9 +640,10 @@ mod tests { Arc::new(arrow_schema::Schema::new(fields)) }; - let equality_deletes_to_write = RecordBatch::try_new(equality_delete_schema.clone(), vec![ - col_y, col_z, col_a, col_s, col_b, - ]) + let equality_deletes_to_write = RecordBatch::try_new( + equality_delete_schema.clone(), + vec![col_y, col_z, col_a, col_s, col_b], + ) .unwrap(); let path = format!("{}/equality-deletes-1.parquet", &table_location); @@ -839,12 +840,11 @@ mod tests { let file_path_col = Arc::new(StringArray::from_iter_values(&file_path_values)); let pos_col = Arc::new(Int64Array::from_iter_values(vec![0i64, 1, 2, 3])); - let positional_deletes_to_write = - RecordBatch::try_new(positional_delete_schema.clone(), vec![ - file_path_col, - pos_col, - ]) - .unwrap(); + let positional_deletes_to_write = RecordBatch::try_new( + positional_delete_schema.clone(), + vec![file_path_col, pos_col], + ) + .unwrap(); let props = WriterProperties::builder() .set_compression(Compression::SNAPPY) diff --git a/crates/iceberg/src/arrow/delete_filter.rs b/crates/iceberg/src/arrow/delete_filter.rs index 14b5124ee6..d24fad1956 100644 --- a/crates/iceberg/src/arrow/delete_filter.rs +++ b/crates/iceberg/src/arrow/delete_filter.rs @@ -281,12 +281,11 @@ pub(crate) mod tests { let pos_vals = pos_values.get(n - 1).unwrap(); let pos_col = Arc::new(Int64Array::from_iter_values(pos_vals.clone())); - let positional_deletes_to_write = - RecordBatch::try_new(positional_delete_schema.clone(), vec![ - file_path_col.clone(), - pos_col.clone(), - ]) - .unwrap(); + let positional_deletes_to_write = RecordBatch::try_new( + positional_delete_schema.clone(), + vec![file_path_col.clone(), pos_col.clone()], + ) + .unwrap(); let file = File::create(format!( "{}/pos-del-{}.parquet", diff --git a/crates/iceberg/src/arrow/partition_value_calculator.rs b/crates/iceberg/src/arrow/partition_value_calculator.rs index 3520f75ac5..9ed4d80f92 100644 --- a/crates/iceberg/src/arrow/partition_value_calculator.rs +++ b/crates/iceberg/src/arrow/partition_value_calculator.rs @@ -206,10 +206,13 @@ mod tests { Field::new("name", DataType::Utf8, false), ])); - let batch = RecordBatch::try_new(arrow_schema, vec![ - Arc::new(Int32Array::from(vec![10, 20, 30])), - Arc::new(StringArray::from(vec!["a", "b", "c"])), - ]) + let batch = RecordBatch::try_new( + arrow_schema, + vec![ + Arc::new(Int32Array::from(vec![10, 20, 30])), + Arc::new(StringArray::from(vec!["a", "b", "c"])), + ], + ) .unwrap(); // Calculate partition values diff --git a/crates/iceberg/src/arrow/reader.rs b/crates/iceberg/src/arrow/reader.rs index ab5a96f751..1df6c53acb 100644 --- a/crates/iceberg/src/arrow/reader.rs +++ b/crates/iceberg/src/arrow/reader.rs @@ -1975,27 +1975,30 @@ message schema { async fn test_predicate_cast_literal() { let predicates = vec![ // a == 'foo' - (Reference::new("a").equal_to(Datum::string("foo")), vec![ - Some("foo".to_string()), - ]), + ( + Reference::new("a").equal_to(Datum::string("foo")), + vec![Some("foo".to_string())], + ), // a != 'foo' ( Reference::new("a").not_equal_to(Datum::string("foo")), vec![Some("bar".to_string())], ), // STARTS_WITH(a, 'foo') - (Reference::new("a").starts_with(Datum::string("f")), vec![ - Some("foo".to_string()), - ]), + ( + Reference::new("a").starts_with(Datum::string("f")), + vec![Some("foo".to_string())], + ), // NOT STARTS_WITH(a, 'foo') ( Reference::new("a").not_starts_with(Datum::string("f")), vec![Some("bar".to_string())], ), // a < 'foo' - (Reference::new("a").less_than(Datum::string("foo")), vec![ - Some("bar".to_string()), - ]), + ( + Reference::new("a").less_than(Datum::string("foo")), + vec![Some("bar".to_string())], + ), // a <= 'foo' ( Reference::new("a").less_than_or_equal_to(Datum::string("foo")), @@ -2306,17 +2309,20 @@ message schema { let file_path = format!("{}/multi_row_group.parquet", &table_location); // Force each batch into its own row group for testing byte range filtering. - let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(Int32Array::from( - (0..100).collect::>(), - ))]) + let batch1 = RecordBatch::try_new( + arrow_schema.clone(), + vec![Arc::new(Int32Array::from((0..100).collect::>()))], + ) .unwrap(); - let batch2 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(Int32Array::from( - (100..200).collect::>(), - ))]) + let batch2 = RecordBatch::try_new( + arrow_schema.clone(), + vec![Arc::new(Int32Array::from((100..200).collect::>()))], + ) .unwrap(); - let batch3 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(Int32Array::from( - (200..300).collect::>(), - ))]) + let batch3 = RecordBatch::try_new( + arrow_schema.clone(), + vec![Arc::new(Int32Array::from((200..300).collect::>()))], + ) .unwrap(); let props = WriterProperties::builder() @@ -2615,14 +2621,16 @@ message schema { // Row group 1: rows 100-199 (ids 101-200) let data_file_path = format!("{}/data.parquet", &table_location); - let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new( - Int32Array::from_iter_values(1..=100), - )]) + let batch1 = RecordBatch::try_new( + arrow_schema.clone(), + vec![Arc::new(Int32Array::from_iter_values(1..=100))], + ) .unwrap(); - let batch2 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new( - Int32Array::from_iter_values(101..=200), - )]) + let batch2 = RecordBatch::try_new( + arrow_schema.clone(), + vec![Arc::new(Int32Array::from_iter_values(101..=200))], + ) .unwrap(); // Force each batch into its own row group @@ -2661,10 +2669,13 @@ message schema { ])); // Delete row at position 199 (0-indexed, so it's the last row: id=200) - let delete_batch = RecordBatch::try_new(delete_schema.clone(), vec![ - Arc::new(StringArray::from_iter_values(vec![data_file_path.clone()])), - Arc::new(Int64Array::from_iter_values(vec![199i64])), - ]) + let delete_batch = RecordBatch::try_new( + delete_schema.clone(), + vec![ + Arc::new(StringArray::from_iter_values(vec![data_file_path.clone()])), + Arc::new(Int64Array::from_iter_values(vec![199i64])), + ], + ) .unwrap(); let delete_props = WriterProperties::builder() @@ -2809,14 +2820,16 @@ message schema { // Row group 1: rows 100-199 (ids 101-200) let data_file_path = format!("{}/data.parquet", &table_location); - let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new( - Int32Array::from_iter_values(1..=100), - )]) + let batch1 = RecordBatch::try_new( + arrow_schema.clone(), + vec![Arc::new(Int32Array::from_iter_values(1..=100))], + ) .unwrap(); - let batch2 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new( - Int32Array::from_iter_values(101..=200), - )]) + let batch2 = RecordBatch::try_new( + arrow_schema.clone(), + vec![Arc::new(Int32Array::from_iter_values(101..=200))], + ) .unwrap(); // Force each batch into its own row group @@ -2855,10 +2868,13 @@ message schema { ])); // Delete row at position 199 (0-indexed, so it's the last row: id=200) - let delete_batch = RecordBatch::try_new(delete_schema.clone(), vec![ - Arc::new(StringArray::from_iter_values(vec![data_file_path.clone()])), - Arc::new(Int64Array::from_iter_values(vec![199i64])), - ]) + let delete_batch = RecordBatch::try_new( + delete_schema.clone(), + vec![ + Arc::new(StringArray::from_iter_values(vec![data_file_path.clone()])), + Arc::new(Int64Array::from_iter_values(vec![199i64])), + ], + ) .unwrap(); let delete_props = WriterProperties::builder() @@ -3031,14 +3047,16 @@ message schema { // Row group 1: rows 100-199 (ids 101-200) let data_file_path = format!("{}/data.parquet", &table_location); - let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new( - Int32Array::from_iter_values(1..=100), - )]) + let batch1 = RecordBatch::try_new( + arrow_schema.clone(), + vec![Arc::new(Int32Array::from_iter_values(1..=100))], + ) .unwrap(); - let batch2 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new( - Int32Array::from_iter_values(101..=200), - )]) + let batch2 = RecordBatch::try_new( + arrow_schema.clone(), + vec![Arc::new(Int32Array::from_iter_values(101..=200))], + ) .unwrap(); // Force each batch into its own row group @@ -3077,10 +3095,13 @@ message schema { ])); // Delete row at position 0 (0-indexed, so it's the first row: id=1) - let delete_batch = RecordBatch::try_new(delete_schema.clone(), vec![ - Arc::new(StringArray::from_iter_values(vec![data_file_path.clone()])), - Arc::new(Int64Array::from_iter_values(vec![0i64])), - ]) + let delete_batch = RecordBatch::try_new( + delete_schema.clone(), + vec![ + Arc::new(StringArray::from_iter_values(vec![data_file_path.clone()])), + Arc::new(Int64Array::from_iter_values(vec![0i64])), + ], + ) .unwrap(); let delete_props = WriterProperties::builder() @@ -3297,9 +3318,10 @@ message schema { let col3_data = Arc::new(StringArray::from(vec!["c", "d"])) as ArrayRef; let col4_data = Arc::new(Int32Array::from(vec![30, 40])) as ArrayRef; - let to_write = RecordBatch::try_new(arrow_schema.clone(), vec![ - col1_data, col2_data, col3_data, col4_data, - ]) + let to_write = RecordBatch::try_new( + arrow_schema.clone(), + vec![col1_data, col2_data, col3_data, col4_data], + ) .unwrap(); let props = WriterProperties::builder() diff --git a/crates/iceberg/src/arrow/record_batch_partition_splitter.rs b/crates/iceberg/src/arrow/record_batch_partition_splitter.rs index 7b83621f2d..1ddd89e3c2 100644 --- a/crates/iceberg/src/arrow/record_batch_partition_splitter.rs +++ b/crates/iceberg/src/arrow/record_batch_partition_splitter.rs @@ -270,10 +270,10 @@ mod tests { let arrow_schema = Arc::new(schema_to_arrow_schema(&schema).unwrap()); let id_array = Int32Array::from(vec![1, 2, 1, 3, 2, 3, 1]); let data_array = StringArray::from(vec!["a", "b", "c", "d", "e", "f", "g"]); - let batch = RecordBatch::try_new(arrow_schema.clone(), vec![ - Arc::new(id_array), - Arc::new(data_array), - ]) + let batch = RecordBatch::try_new( + arrow_schema.clone(), + vec![Arc::new(id_array), Arc::new(data_array)], + ) .expect("Failed to create RecordBatch"); let mut partitioned_batches = partition_splitter @@ -296,10 +296,10 @@ mod tests { // check the first partition let expected_id_array = Int32Array::from(vec![1, 1, 1]); let expected_data_array = StringArray::from(vec!["a", "c", "g"]); - let expected_batch = RecordBatch::try_new(arrow_schema.clone(), vec![ - Arc::new(expected_id_array), - Arc::new(expected_data_array), - ]) + let expected_batch = RecordBatch::try_new( + arrow_schema.clone(), + vec![Arc::new(expected_id_array), Arc::new(expected_data_array)], + ) .expect("Failed to create expected RecordBatch"); assert_eq!(partitioned_batches[0].1, expected_batch); } @@ -307,10 +307,10 @@ mod tests { // check the second partition let expected_id_array = Int32Array::from(vec![2, 2]); let expected_data_array = StringArray::from(vec!["b", "e"]); - let expected_batch = RecordBatch::try_new(arrow_schema.clone(), vec![ - Arc::new(expected_id_array), - Arc::new(expected_data_array), - ]) + let expected_batch = RecordBatch::try_new( + arrow_schema.clone(), + vec![Arc::new(expected_id_array), Arc::new(expected_data_array)], + ) .expect("Failed to create expected RecordBatch"); assert_eq!(partitioned_batches[1].1, expected_batch); } @@ -318,10 +318,10 @@ mod tests { // check the third partition let expected_id_array = Int32Array::from(vec![3, 3]); let expected_data_array = StringArray::from(vec!["d", "f"]); - let expected_batch = RecordBatch::try_new(arrow_schema.clone(), vec![ - Arc::new(expected_id_array), - Arc::new(expected_data_array), - ]) + let expected_batch = RecordBatch::try_new( + arrow_schema.clone(), + vec![Arc::new(expected_id_array), Arc::new(expected_data_array)], + ) .expect("Failed to create expected RecordBatch"); assert_eq!(partitioned_batches[2].1, expected_batch); } @@ -331,11 +331,14 @@ mod tests { .map(|(partition_key, _)| partition_key.data().clone()) .collect::>(); // check partition value is struct(1), struct(2), struct(3) - assert_eq!(partition_values, vec![ - Struct::from_iter(vec![Some(Literal::int(1))]), - Struct::from_iter(vec![Some(Literal::int(2))]), - Struct::from_iter(vec![Some(Literal::int(3))]), - ]); + assert_eq!( + partition_values, + vec![ + Struct::from_iter(vec![Some(Literal::int(1))]), + Struct::from_iter(vec![Some(Literal::int(2))]), + Struct::from_iter(vec![Some(Literal::int(3))]), + ] + ); } #[test] @@ -411,11 +414,14 @@ mod tests { Arc::new(partition_values) as ArrayRef, )]); - let batch = RecordBatch::try_new(input_schema.clone(), vec![ - Arc::new(id_array), - Arc::new(data_array), - Arc::new(partition_struct), - ]) + let batch = RecordBatch::try_new( + input_schema.clone(), + vec![ + Arc::new(id_array), + Arc::new(data_array), + Arc::new(partition_struct), + ], + ) .expect("Failed to create RecordBatch"); // Split using the pre-computed partition column diff --git a/crates/iceberg/src/arrow/record_batch_transformer.rs b/crates/iceberg/src/arrow/record_batch_transformer.rs index a20adb6a5a..56fa1917e9 100644 --- a/crates/iceberg/src/arrow/record_batch_transformer.rs +++ b/crates/iceberg/src/arrow/record_batch_transformer.rs @@ -719,14 +719,17 @@ mod test { simple_field("name", DataType::Utf8, true, "2"), ])); - let file_batch = RecordBatch::try_new(file_schema, vec![ - Arc::new(Int32Array::from(vec![1, 2, 3])), - Arc::new(StringArray::from(vec![ - Some("Alice"), - Some("Bob"), - Some("Charlie"), - ])), - ]) + let file_batch = RecordBatch::try_new( + file_schema, + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3])), + Arc::new(StringArray::from(vec![ + Some("Alice"), + Some("Bob"), + Some("Charlie"), + ])), + ], + ) .unwrap(); let result = transformer.process_record_batch(file_batch).unwrap(); @@ -800,10 +803,13 @@ mod test { simple_field("data", DataType::Utf8, false, "2"), ])); - let file_batch = RecordBatch::try_new(file_schema, vec![ - Arc::new(Int32Array::from(vec![1, 2, 3])), - Arc::new(StringArray::from(vec!["a", "b", "c"])), - ]) + let file_batch = RecordBatch::try_new( + file_schema, + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3])), + Arc::new(StringArray::from(vec!["a", "b", "c"])), + ], + ) .unwrap(); let result = transformer.process_record_batch(file_batch).unwrap(); @@ -874,27 +880,30 @@ mod test { } pub fn expected_record_batch_migration_required() -> RecordBatch { - RecordBatch::try_new(arrow_schema_already_same_as_target(), vec![ - Arc::new(StringArray::from(Vec::>::from([ - None, None, None, - ]))), // a - Arc::new(Int64Array::from(vec![Some(1001), Some(1002), Some(1003)])), // b - Arc::new(Float64Array::from(vec![ - Some(12.125), - Some(23.375), - Some(34.875), - ])), // c - Arc::new(StringArray::from(vec![ - Some("Apache"), - Some("Iceberg"), - Some("Rocks"), - ])), // e (d skipped by projection) - Arc::new(StringArray::from(vec![ - Some("(╯°□°)╯"), - Some("(╯°□°)╯"), - Some("(╯°□°)╯"), - ])), // f - ]) + RecordBatch::try_new( + arrow_schema_already_same_as_target(), + vec![ + Arc::new(StringArray::from(Vec::>::from([ + None, None, None, + ]))), // a + Arc::new(Int64Array::from(vec![Some(1001), Some(1002), Some(1003)])), // b + Arc::new(Float64Array::from(vec![ + Some(12.125), + Some(23.375), + Some(34.875), + ])), // c + Arc::new(StringArray::from(vec![ + Some("Apache"), + Some("Iceberg"), + Some("Rocks"), + ])), // e (d skipped by projection) + Arc::new(StringArray::from(vec![ + Some("(╯°□°)╯"), + Some("(╯°□°)╯"), + Some("(╯°□°)╯"), + ])), // f + ], + ) .unwrap() } @@ -1012,10 +1021,13 @@ mod test { RecordBatchTransformerBuilder::new(snapshot_schema, &projected_field_ids).build(); // Create a Parquet RecordBatch with data for: name="John Doe", subdept="communications" - let parquet_batch = RecordBatch::try_new(parquet_schema, vec![ - Arc::new(StringArray::from(vec!["John Doe"])), - Arc::new(StringArray::from(vec!["communications"])), - ]) + let parquet_batch = RecordBatch::try_new( + parquet_schema, + vec![ + Arc::new(StringArray::from(vec!["John Doe"])), + Arc::new(StringArray::from(vec!["communications"])), + ], + ) .unwrap(); let result = transformer.process_record_batch(parquet_batch).unwrap(); @@ -1141,10 +1153,13 @@ mod test { // Create a Parquet RecordBatch with actual data // The id column MUST be read from here, not treated as a constant - let parquet_batch = RecordBatch::try_new(parquet_schema, vec![ - Arc::new(Int32Array::from(vec![100, 200, 300])), - Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie"])), - ]) + let parquet_batch = RecordBatch::try_new( + parquet_schema, + vec![ + Arc::new(Int32Array::from(vec![100, 200, 300])), + Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie"])), + ], + ) .unwrap(); let result = transformer.process_record_batch(parquet_batch).unwrap(); @@ -1259,10 +1274,13 @@ mod test { .with_partition(partition_spec, partition_data) .build(); - let parquet_batch = RecordBatch::try_new(parquet_schema, vec![ - Arc::new(Int32Array::from(vec![100, 200])), - Arc::new(StringArray::from(vec!["Alice", "Bob"])), - ]) + let parquet_batch = RecordBatch::try_new( + parquet_schema, + vec![ + Arc::new(Int32Array::from(vec![100, 200])), + Arc::new(StringArray::from(vec!["Alice", "Bob"])), + ], + ) .unwrap(); let result = transformer.process_record_batch(parquet_batch).unwrap(); @@ -1376,10 +1394,13 @@ mod test { // Create a Parquet RecordBatch with actual data // Despite column rename, data should be read via field_id=1 - let parquet_batch = RecordBatch::try_new(parquet_schema, vec![ - Arc::new(Int32Array::from(vec![100, 200, 300])), - Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie"])), - ]) + let parquet_batch = RecordBatch::try_new( + parquet_schema, + vec![ + Arc::new(Int32Array::from(vec![100, 200, 300])), + Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie"])), + ], + ) .unwrap(); let result = transformer.process_record_batch(parquet_batch).unwrap(); @@ -1478,10 +1499,13 @@ mod test { .with_partition(partition_spec, partition_data) .build(); - let parquet_batch = RecordBatch::try_new(parquet_schema, vec![ - Arc::new(Int32Array::from(vec![100, 200])), - Arc::new(StringArray::from(vec!["value1", "value2"])), - ]) + let parquet_batch = RecordBatch::try_new( + parquet_schema, + vec![ + Arc::new(Int32Array::from(vec![100, 200])), + Arc::new(StringArray::from(vec!["value1", "value2"])), + ], + ) .unwrap(); let result = transformer.process_record_batch(parquet_batch).unwrap(); diff --git a/crates/iceberg/src/arrow/value.rs b/crates/iceberg/src/arrow/value.rs index f1cf225bb4..84786251ae 100644 --- a/crates/iceberg/src/arrow/value.rs +++ b/crates/iceberg/src/arrow/value.rs @@ -841,39 +841,42 @@ mod test { let result = arrow_struct_to_literal(&struct_array, &iceberg_struct_type).unwrap(); - assert_eq!(result, vec![ - Some(Literal::Struct(Struct::from_iter(vec![ - Some(Literal::bool(true)), - Some(Literal::int(3)), - Some(Literal::long(5)), - Some(Literal::float(1.1)), - Some(Literal::double(3.3)), - Some(Literal::decimal(1000)), - Some(Literal::date(18628)), - Some(Literal::time(123456789)), - Some(Literal::timestamp(1622548800000000)), - Some(Literal::timestamp_nano(1622548800000000000)), - Some(Literal::string("a".to_string())), - Some(Literal::binary(b"abc".to_vec())), - ]))), - Some(Literal::Struct(Struct::from_iter(vec![ - Some(Literal::bool(false)), - Some(Literal::int(4)), - Some(Literal::long(6)), - Some(Literal::float(2.2)), - Some(Literal::double(4.4)), - Some(Literal::decimal(2000)), - Some(Literal::date(18629)), - Some(Literal::time(987654321)), - Some(Literal::timestamp(1622635200000000)), - Some(Literal::timestamp_nano(1622635200000000000)), - Some(Literal::string("b".to_string())), - Some(Literal::binary(b"def".to_vec())), - ]))), - Some(Literal::Struct(Struct::from_iter(vec![ - None, None, None, None, None, None, None, None, None, None, None, None, - ]))), - ]); + assert_eq!( + result, + vec![ + Some(Literal::Struct(Struct::from_iter(vec![ + Some(Literal::bool(true)), + Some(Literal::int(3)), + Some(Literal::long(5)), + Some(Literal::float(1.1)), + Some(Literal::double(3.3)), + Some(Literal::decimal(1000)), + Some(Literal::date(18628)), + Some(Literal::time(123456789)), + Some(Literal::timestamp(1622548800000000)), + Some(Literal::timestamp_nano(1622548800000000000)), + Some(Literal::string("a".to_string())), + Some(Literal::binary(b"abc".to_vec())), + ]))), + Some(Literal::Struct(Struct::from_iter(vec![ + Some(Literal::bool(false)), + Some(Literal::int(4)), + Some(Literal::long(6)), + Some(Literal::float(2.2)), + Some(Literal::double(4.4)), + Some(Literal::decimal(2000)), + Some(Literal::date(18629)), + Some(Literal::time(987654321)), + Some(Literal::timestamp(1622635200000000)), + Some(Literal::timestamp_nano(1622635200000000000)), + Some(Literal::string("b".to_string())), + Some(Literal::binary(b"def".to_vec())), + ]))), + Some(Literal::Struct(Struct::from_iter(vec![ + None, None, None, None, None, None, None, None, None, None, None, None, + ]))), + ] + ); } #[test] @@ -945,14 +948,17 @@ mod test { ]); let result = arrow_struct_to_literal(&struct_array, &iceberg_struct_type).unwrap(); - assert_eq!(result, vec![ - Some(Literal::Struct(Struct::from_iter(vec![None, None,]))), - Some(Literal::Struct(Struct::from_iter(vec![ - Some(Literal::int(1)), + assert_eq!( + result, + vec![ + Some(Literal::Struct(Struct::from_iter(vec![None, None,]))), + Some(Literal::Struct(Struct::from_iter(vec![ + Some(Literal::int(1)), + None, + ]))), None, - ]))), - None, - ]); + ] + ); } #[test] @@ -1405,66 +1411,69 @@ mod test { }; let result = arrow_struct_to_literal(&struct_array, &struct_type).unwrap(); - assert_eq!(result, vec![ - Some(Literal::Struct(Struct::from_iter(vec![ - Some(Literal::List(vec![ - Some(Literal::Struct(Struct::from_iter(vec![ - Some(Literal::int(10)), - Some(Literal::int(20)), - ]))), - Some(Literal::Struct(Struct::from_iter(vec![ - Some(Literal::int(11)), - Some(Literal::int(21)), - ]))), - ])), - Some(Literal::List(vec![ - Some(Literal::Map(Map::from_iter(vec![ - (Literal::int(1), Some(Literal::int(100))), - (Literal::int(3), Some(Literal::int(300))), - ]))), - Some(Literal::Map(Map::from_iter(vec![( - Literal::int(2), - Some(Literal::int(200)) - ),]))), - ])), - Some(Literal::List(vec![ + assert_eq!( + result, + vec![ + Some(Literal::Struct(Struct::from_iter(vec![ Some(Literal::List(vec![ - Some(Literal::int(100)), - Some(Literal::int(101)), - Some(Literal::int(102)), + Some(Literal::Struct(Struct::from_iter(vec![ + Some(Literal::int(10)), + Some(Literal::int(20)), + ]))), + Some(Literal::Struct(Struct::from_iter(vec![ + Some(Literal::int(11)), + Some(Literal::int(21)), + ]))), ])), Some(Literal::List(vec![ - Some(Literal::int(200)), - Some(Literal::int(201)), + Some(Literal::Map(Map::from_iter(vec![ + (Literal::int(1), Some(Literal::int(100))), + (Literal::int(3), Some(Literal::int(300))), + ]))), + Some(Literal::Map(Map::from_iter(vec![( + Literal::int(2), + Some(Literal::int(200)) + ),]))), ])), - ])), - ]))), - Some(Literal::Struct(Struct::from_iter(vec![ - Some(Literal::List(vec![ - Some(Literal::Struct(Struct::from_iter(vec![ - Some(Literal::int(12)), - Some(Literal::int(22)), - ]))), - Some(Literal::Struct(Struct::from_iter(vec![ - Some(Literal::int(13)), - Some(Literal::int(23)), - ]))), - ])), - Some(Literal::List(vec![Some(Literal::Map(Map::from_iter( - vec![(Literal::int(3), Some(Literal::int(300))),] - ))),])), - Some(Literal::List(vec![ Some(Literal::List(vec![ - Some(Literal::int(300)), - Some(Literal::int(301)), - Some(Literal::int(302)), + Some(Literal::List(vec![ + Some(Literal::int(100)), + Some(Literal::int(101)), + Some(Literal::int(102)), + ])), + Some(Literal::List(vec![ + Some(Literal::int(200)), + Some(Literal::int(201)), + ])), ])), + ]))), + Some(Literal::Struct(Struct::from_iter(vec![ Some(Literal::List(vec![ - Some(Literal::int(400)), - Some(Literal::int(401)), + Some(Literal::Struct(Struct::from_iter(vec![ + Some(Literal::int(12)), + Some(Literal::int(22)), + ]))), + Some(Literal::Struct(Struct::from_iter(vec![ + Some(Literal::int(13)), + Some(Literal::int(23)), + ]))), ])), - ])), - ]))), - ]); + Some(Literal::List(vec![Some(Literal::Map(Map::from_iter( + vec![(Literal::int(3), Some(Literal::int(300))),] + ))),])), + Some(Literal::List(vec![ + Some(Literal::List(vec![ + Some(Literal::int(300)), + Some(Literal::int(301)), + Some(Literal::int(302)), + ])), + Some(Literal::List(vec![ + Some(Literal::int(400)), + Some(Literal::int(401)), + ])), + ])), + ]))), + ] + ); } } diff --git a/crates/iceberg/src/catalog/memory/catalog.rs b/crates/iceberg/src/catalog/memory/catalog.rs index cfa3dc6b52..b83cdf2fe0 100644 --- a/crates/iceberg/src/catalog/memory/catalog.rs +++ b/crates/iceberg/src/catalog/memory/catalog.rs @@ -523,9 +523,10 @@ pub(crate) mod tests { let namespace_ident = NamespaceIdent::new("abc".into()); create_namespace(&catalog, &namespace_ident).await; - assert_eq!(catalog.list_namespaces(None).await.unwrap(), vec![ - namespace_ident - ]); + assert_eq!( + catalog.list_namespaces(None).await.unwrap(), + vec![namespace_ident] + ); } #[tokio::test] @@ -547,11 +548,10 @@ pub(crate) mod tests { let namespace_ident_1 = NamespaceIdent::new("a".into()); let namespace_ident_2 = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap(); let namespace_ident_3 = NamespaceIdent::new("b".into()); - create_namespaces(&catalog, &vec![ - &namespace_ident_1, - &namespace_ident_2, - &namespace_ident_3, - ]) + create_namespaces( + &catalog, + &vec![&namespace_ident_1, &namespace_ident_2, &namespace_ident_3], + ) .await; assert_eq!( @@ -582,11 +582,10 @@ pub(crate) mod tests { let namespace_ident_1 = NamespaceIdent::new("a".into()); let namespace_ident_2 = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap(); let namespace_ident_3 = NamespaceIdent::new("c".into()); - create_namespaces(&catalog, &vec![ - &namespace_ident_1, - &namespace_ident_2, - &namespace_ident_3, - ]) + create_namespaces( + &catalog, + &vec![&namespace_ident_1, &namespace_ident_2, &namespace_ident_3], + ) .await; assert_eq!( @@ -611,13 +610,16 @@ pub(crate) mod tests { let namespace_ident_3 = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap(); let namespace_ident_4 = NamespaceIdent::from_strs(vec!["a", "c"]).unwrap(); let namespace_ident_5 = NamespaceIdent::new("b".into()); - create_namespaces(&catalog, &vec![ - &namespace_ident_1, - &namespace_ident_2, - &namespace_ident_3, - &namespace_ident_4, - &namespace_ident_5, - ]) + create_namespaces( + &catalog, + &vec![ + &namespace_ident_1, + &namespace_ident_2, + &namespace_ident_3, + &namespace_ident_4, + &namespace_ident_5, + ], + ) .await; assert_eq!( @@ -811,9 +813,10 @@ pub(crate) mod tests { ) ); - assert_eq!(catalog.list_namespaces(None).await.unwrap(), vec![ - namespace_ident_a.clone() - ]); + assert_eq!( + catalog.list_namespaces(None).await.unwrap(), + vec![namespace_ident_a.clone()] + ); assert_eq!( catalog @@ -861,11 +864,14 @@ pub(crate) mod tests { let namespace_ident_a = NamespaceIdent::new("a".into()); let namespace_ident_a_b = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap(); let namespace_ident_a_b_c = NamespaceIdent::from_strs(vec!["a", "b", "c"]).unwrap(); - create_namespaces(&catalog, &vec![ - &namespace_ident_a, - &namespace_ident_a_b, - &namespace_ident_a_b_c, - ]) + create_namespaces( + &catalog, + &vec![ + &namespace_ident_a, + &namespace_ident_a_b, + &namespace_ident_a_b_c, + ], + ) .await; assert_eq!( @@ -937,11 +943,14 @@ pub(crate) mod tests { let namespace_ident_a = NamespaceIdent::new("a".into()); let namespace_ident_a_b = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap(); let namespace_ident_a_b_c = NamespaceIdent::from_strs(vec!["a", "b", "c"]).unwrap(); - create_namespaces(&catalog, &vec![ - &namespace_ident_a, - &namespace_ident_a_b, - &namespace_ident_a_b_c, - ]) + create_namespaces( + &catalog, + &vec![ + &namespace_ident_a, + &namespace_ident_a_b, + &namespace_ident_a_b_c, + ], + ) .await; let mut new_properties = HashMap::new(); @@ -1010,11 +1019,14 @@ pub(crate) mod tests { let namespace_ident_a = NamespaceIdent::new("a".into()); let namespace_ident_a_b = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap(); let namespace_ident_a_b_c = NamespaceIdent::from_strs(vec!["a", "b", "c"]).unwrap(); - create_namespaces(&catalog, &vec![ - &namespace_ident_a, - &namespace_ident_a_b, - &namespace_ident_a_b_c, - ]) + create_namespaces( + &catalog, + &vec![ + &namespace_ident_a, + &namespace_ident_a_b, + &namespace_ident_a_b_c, + ], + ) .await; catalog @@ -1402,9 +1414,10 @@ pub(crate) mod tests { let table_ident = TableIdent::new(namespace_ident.clone(), "tbl1".into()); create_table(&catalog, &table_ident).await; - assert_eq!(catalog.list_tables(&namespace_ident).await.unwrap(), vec![ - table_ident - ]); + assert_eq!( + catalog.list_tables(&namespace_ident).await.unwrap(), + vec![table_ident] + ); } #[tokio::test] @@ -1433,11 +1446,10 @@ pub(crate) mod tests { let table_ident_1 = TableIdent::new(namespace_ident_1.clone(), "tbl1".into()); let table_ident_2 = TableIdent::new(namespace_ident_1.clone(), "tbl2".into()); let table_ident_3 = TableIdent::new(namespace_ident_2.clone(), "tbl1".into()); - let _ = create_tables(&catalog, vec![ - &table_ident_1, - &table_ident_2, - &table_ident_3, - ]) + let _ = create_tables( + &catalog, + vec![&table_ident_1, &table_ident_2, &table_ident_3], + ) .await; assert_eq!( @@ -1627,9 +1639,10 @@ pub(crate) mod tests { .await .unwrap(); - assert_eq!(catalog.list_tables(&namespace_ident).await.unwrap(), vec![ - dst_table_ident - ],); + assert_eq!( + catalog.list_tables(&namespace_ident).await.unwrap(), + vec![dst_table_ident], + ); } #[tokio::test] @@ -1671,9 +1684,10 @@ pub(crate) mod tests { .await .unwrap(); - assert_eq!(catalog.list_tables(&namespace_ident).await.unwrap(), vec![ - table_ident - ],); + assert_eq!( + catalog.list_tables(&namespace_ident).await.unwrap(), + vec![table_ident], + ); } #[tokio::test] @@ -1682,11 +1696,14 @@ pub(crate) mod tests { let namespace_ident_a = NamespaceIdent::new("a".into()); let namespace_ident_a_b = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap(); let namespace_ident_a_b_c = NamespaceIdent::from_strs(vec!["a", "b", "c"]).unwrap(); - create_namespaces(&catalog, &vec![ - &namespace_ident_a, - &namespace_ident_a_b, - &namespace_ident_a_b_c, - ]) + create_namespaces( + &catalog, + &vec![ + &namespace_ident_a, + &namespace_ident_a_b, + &namespace_ident_a_b_c, + ], + ) .await; let src_table_ident = TableIdent::new(namespace_ident_a_b_c.clone(), "tbl1".into()); diff --git a/crates/iceberg/src/catalog/mod.rs b/crates/iceberg/src/catalog/mod.rs index 27d5edaedb..ca9bab98bd 100644 --- a/crates/iceberg/src/catalog/mod.rs +++ b/crates/iceberg/src/catalog/mod.rs @@ -774,7 +774,9 @@ pub(super) mod _serde { pub(super) fn deserialize_snapshot<'de, D>( deserializer: D, ) -> std::result::Result - where D: Deserializer<'de> { + where + D: Deserializer<'de>, + { let buf = CatalogSnapshot::deserialize(deserializer)?; Ok(buf.into()) } @@ -995,7 +997,9 @@ mod _serde_set_statistics { } pub fn deserialize<'de, D>(deserializer: D) -> std::result::Result - where D: Deserializer<'de> { + where + D: Deserializer<'de>, + { let SetStatistics { snapshot_id, statistics, diff --git a/crates/iceberg/src/expr/predicate.rs b/crates/iceberg/src/expr/predicate.rs index 35e88ff3e7..21fef5975b 100644 --- a/crates/iceberg/src/expr/predicate.rs +++ b/crates/iceberg/src/expr/predicate.rs @@ -43,14 +43,18 @@ pub struct LogicalExpression { impl Serialize for LogicalExpression { fn serialize(&self, serializer: S) -> std::result::Result - where S: serde::Serializer { + where + S: serde::Serializer, + { self.inputs.serialize(serializer) } } impl<'de, T: Deserialize<'de>, const N: usize> Deserialize<'de> for LogicalExpression { fn deserialize(deserializer: D) -> std::result::Result - where D: serde::Deserializer<'de> { + where + D: serde::Deserializer<'de>, + { let inputs = Vec::>::deserialize(deserializer)?; Ok(LogicalExpression::new( array_init::from_iter(inputs.into_iter()).ok_or_else(|| { @@ -84,7 +88,8 @@ impl LogicalExpression { } impl Bind for LogicalExpression -where T::Bound: Sized +where + T::Bound: Sized, { type Bound = LogicalExpression; diff --git a/crates/iceberg/src/expr/visitors/page_index_evaluator.rs b/crates/iceberg/src/expr/visitors/page_index_evaluator.rs index 3745d94d18..7665787983 100644 --- a/crates/iceberg/src/expr/visitors/page_index_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/page_index_evaluator.rs @@ -1403,15 +1403,18 @@ mod tests { PageLocation::new(4048, 48, 4048), ]; - Ok((vec![idx_float, idx_string], vec![ - OffsetIndexMetaData { - page_locations: page_locs_float, - unencoded_byte_array_data_bytes: None, - }, - OffsetIndexMetaData { - page_locations: page_locs_string, - unencoded_byte_array_data_bytes: None, - }, - ])) + Ok(( + vec![idx_float, idx_string], + vec![ + OffsetIndexMetaData { + page_locations: page_locs_float, + unencoded_byte_array_data_bytes: None, + }, + OffsetIndexMetaData { + page_locations: page_locs_string, + unencoded_byte_array_data_bytes: None, + }, + ], + )) } } diff --git a/crates/iceberg/src/inspect/manifests.rs b/crates/iceberg/src/inspect/manifests.rs index 60854b8bae..f5f5e96369 100644 --- a/crates/iceberg/src/inspect/manifests.rs +++ b/crates/iceberg/src/inspect/manifests.rs @@ -198,20 +198,23 @@ impl<'a> ManifestsTable<'a> { } } - let batch = RecordBatch::try_new(Arc::new(schema), vec![ - Arc::new(content.finish()), - Arc::new(path.finish()), - Arc::new(length.finish()), - Arc::new(partition_spec_id.finish()), - Arc::new(added_snapshot_id.finish()), - Arc::new(added_data_files_count.finish()), - Arc::new(existing_data_files_count.finish()), - Arc::new(deleted_data_files_count.finish()), - Arc::new(added_delete_files_count.finish()), - Arc::new(existing_delete_files_count.finish()), - Arc::new(deleted_delete_files_count.finish()), - Arc::new(partition_summaries.finish()), - ])?; + let batch = RecordBatch::try_new( + Arc::new(schema), + vec![ + Arc::new(content.finish()), + Arc::new(path.finish()), + Arc::new(length.finish()), + Arc::new(partition_spec_id.finish()), + Arc::new(added_snapshot_id.finish()), + Arc::new(added_data_files_count.finish()), + Arc::new(existing_data_files_count.finish()), + Arc::new(deleted_data_files_count.finish()), + Arc::new(added_delete_files_count.finish()), + Arc::new(existing_delete_files_count.finish()), + Arc::new(deleted_delete_files_count.finish()), + Arc::new(partition_summaries.finish()), + ], + )?; Ok(stream::iter(vec![Ok(batch)]).boxed()) } diff --git a/crates/iceberg/src/inspect/snapshots.rs b/crates/iceberg/src/inspect/snapshots.rs index 6081ec165b..a8d03466d4 100644 --- a/crates/iceberg/src/inspect/snapshots.rs +++ b/crates/iceberg/src/inspect/snapshots.rs @@ -121,14 +121,17 @@ impl<'a> SnapshotsTable<'a> { summary.append(true)?; } - let batch = RecordBatch::try_new(Arc::new(schema), vec![ - Arc::new(committed_at.finish()), - Arc::new(snapshot_id.finish()), - Arc::new(parent_id.finish()), - Arc::new(operation.finish()), - Arc::new(manifest_list.finish()), - Arc::new(summary.finish()), - ])?; + let batch = RecordBatch::try_new( + Arc::new(schema), + vec![ + Arc::new(committed_at.finish()), + Arc::new(snapshot_id.finish()), + Arc::new(parent_id.finish()), + Arc::new(operation.finish()), + Arc::new(manifest_list.finish()), + Arc::new(summary.finish()), + ], + )?; Ok(stream::iter(vec![Ok(batch)]).boxed()) } diff --git a/crates/iceberg/src/io/file_io.rs b/crates/iceberg/src/io/file_io.rs index 24e91ca8a4..25e1d7c2f8 100644 --- a/crates/iceberg/src/io/file_io.rs +++ b/crates/iceberg/src/io/file_io.rs @@ -174,7 +174,9 @@ impl Extensions { /// Fetch an extension. pub fn get(&self) -> Option> - where T: 'static + Send + Sync + Clone { + where + T: 'static + Send + Sync + Clone, + { let type_id = TypeId::of::(); self.0 .get(&type_id) @@ -256,7 +258,9 @@ impl FileIOBuilder { /// Fetch an extension from the file IO builder. pub fn extension(&self) -> Option> - where T: 'static + Send + Sync + Clone { + where + T: 'static + Send + Sync + Clone, + { self.extensions.get::() } diff --git a/crates/iceberg/src/io/object_cache.rs b/crates/iceberg/src/io/object_cache.rs index af297bebb5..7c2116622e 100644 --- a/crates/iceberg/src/io/object_cache.rs +++ b/crates/iceberg/src/io/object_cache.rs @@ -229,12 +229,15 @@ mod tests { env!("CARGO_MANIFEST_DIR") )) .unwrap(); - let metadata_json = render_template(&template_json_str, context! { - table_location => &table_location, - manifest_list_1_location => &manifest_list1_location, - manifest_list_2_location => &manifest_list2_location, - table_metadata_1_location => &table_metadata1_location, - }); + let metadata_json = render_template( + &template_json_str, + context! { + table_location => &table_location, + manifest_list_1_location => &manifest_list1_location, + manifest_list_2_location => &manifest_list2_location, + table_metadata_1_location => &table_metadata1_location, + }, + ); serde_json::from_str::(&metadata_json).unwrap() }; diff --git a/crates/iceberg/src/scan/mod.rs b/crates/iceberg/src/scan/mod.rs index 3e319ca062..9e6207e1f6 100644 --- a/crates/iceberg/src/scan/mod.rs +++ b/crates/iceberg/src/scan/mod.rs @@ -614,12 +614,15 @@ pub mod tests { env!("CARGO_MANIFEST_DIR") )) .unwrap(); - let metadata_json = render_template(&template_json_str, context! { - table_location => &table_location, - manifest_list_1_location => &manifest_list1_location, - manifest_list_2_location => &manifest_list2_location, - table_metadata_1_location => &table_metadata1_location, - }); + let metadata_json = render_template( + &template_json_str, + context! { + table_location => &table_location, + manifest_list_1_location => &manifest_list1_location, + manifest_list_2_location => &manifest_list2_location, + table_metadata_1_location => &table_metadata1_location, + }, + ); serde_json::from_str::(&metadata_json).unwrap() }; @@ -654,10 +657,13 @@ pub mod tests { env!("CARGO_MANIFEST_DIR") )) .unwrap(); - let metadata_json = render_template(&template_json_str, context! { - table_location => &table_location, - table_metadata_1_location => &table_metadata1_location, - }); + let metadata_json = render_template( + &template_json_str, + context! { + table_location => &table_location, + table_metadata_1_location => &table_metadata1_location, + }, + ); serde_json::from_str::(&metadata_json).unwrap() }; @@ -693,12 +699,15 @@ pub mod tests { env!("CARGO_MANIFEST_DIR") )) .unwrap(); - let metadata_json = render_template(&template_json_str, context! { - table_location => &table_location, - manifest_list_1_location => &manifest_list1_location, - manifest_list_2_location => &manifest_list2_location, - table_metadata_1_location => &table_metadata1_location, - }); + let metadata_json = render_template( + &template_json_str, + context! { + table_location => &table_location, + manifest_list_1_location => &manifest_list1_location, + manifest_list_2_location => &manifest_list2_location, + table_metadata_1_location => &table_metadata1_location, + }, + ); serde_json::from_str::(&metadata_json).unwrap() }; @@ -924,9 +933,10 @@ pub mod tests { let values: BooleanArray = values.into(); let col8 = Arc::new(values) as ArrayRef; - let to_write = RecordBatch::try_new(schema.clone(), vec![ - col1, col2, col3, col4, col5, col6, col7, col8, - ]) + let to_write = RecordBatch::try_new( + schema.clone(), + vec![col1, col2, col3, col4, col5, col6, col7, col8], + ) .unwrap(); // Write the Parquet files @@ -1135,9 +1145,10 @@ pub mod tests { let values: BooleanArray = values.into(); let col8 = Arc::new(values) as ArrayRef; - let to_write = RecordBatch::try_new(schema.clone(), vec![ - col1, col2, col3, col4, col5, col6, col7, col8, - ]) + let to_write = RecordBatch::try_new( + schema.clone(), + vec![col1, col2, col3, col4, col5, col6, col7, col8], + ) .unwrap(); // Write the Parquet files diff --git a/crates/iceberg/src/scan/task.rs b/crates/iceberg/src/scan/task.rs index e1ef241a57..58d0a49162 100644 --- a/crates/iceberg/src/scan/task.rs +++ b/crates/iceberg/src/scan/task.rs @@ -33,7 +33,9 @@ pub type FileScanTaskStream = BoxStream<'static, Result>; /// Serialization helper that always returns NotImplementedError. /// Used for fields that should not be serialized but we want to be explicit about it. fn serialize_not_implemented(_: &T, _: S) -> std::result::Result -where S: Serializer { +where + S: Serializer, +{ Err(serde::ser::Error::custom( "Serialization not implemented for this field", )) @@ -42,7 +44,9 @@ where S: Serializer { /// Deserialization helper that always returns NotImplementedError. /// Used for fields that should not be deserialized but we want to be explicit about it. fn deserialize_not_implemented<'de, D, T>(_: D) -> std::result::Result -where D: serde::Deserializer<'de> { +where + D: serde::Deserializer<'de>, +{ Err(serde::de::Error::custom( "Deserialization not implemented for this field", )) diff --git a/crates/iceberg/src/spec/datatypes.rs b/crates/iceberg/src/spec/datatypes.rs index 456b754408..f5a8977400 100644 --- a/crates/iceberg/src/spec/datatypes.rs +++ b/crates/iceberg/src/spec/datatypes.rs @@ -276,7 +276,9 @@ impl PrimitiveType { impl Serialize for Type { fn serialize(&self, serializer: S) -> std::result::Result - where S: Serializer { + where + S: Serializer, + { let type_serde = _serde::SerdeType::from(self); type_serde.serialize(serializer) } @@ -284,7 +286,9 @@ impl Serialize for Type { impl<'de> Deserialize<'de> for Type { fn deserialize(deserializer: D) -> std::result::Result - where D: Deserializer<'de> { + where + D: Deserializer<'de>, + { let type_serde = _serde::SerdeType::deserialize(deserializer)?; Ok(Type::from(type_serde)) } @@ -292,7 +296,9 @@ impl<'de> Deserialize<'de> for Type { impl<'de> Deserialize<'de> for PrimitiveType { fn deserialize(deserializer: D) -> std::result::Result - where D: Deserializer<'de> { + where + D: Deserializer<'de>, + { let s = String::deserialize(deserializer)?; if s.starts_with("decimal") { deserialize_decimal(s.into_deserializer()) @@ -306,7 +312,9 @@ impl<'de> Deserialize<'de> for PrimitiveType { impl Serialize for PrimitiveType { fn serialize(&self, serializer: S) -> std::result::Result - where S: Serializer { + where + S: Serializer, + { match self { PrimitiveType::Decimal { precision, scale } => { serialize_decimal(precision, scale, serializer) @@ -318,7 +326,9 @@ impl Serialize for PrimitiveType { } fn deserialize_decimal<'de, D>(deserializer: D) -> std::result::Result -where D: Deserializer<'de> { +where + D: Deserializer<'de>, +{ let s = String::deserialize(deserializer)?; let (precision, scale) = s .trim_start_matches(r"decimal(") @@ -344,7 +354,9 @@ where } fn deserialize_fixed<'de, D>(deserializer: D) -> std::result::Result -where D: Deserializer<'de> { +where + D: Deserializer<'de>, +{ let fixed = String::deserialize(deserializer)? .trim_start_matches(r"fixed[") .trim_end_matches(']') @@ -357,7 +369,9 @@ where D: Deserializer<'de> { } fn serialize_fixed(value: &u64, serializer: S) -> std::result::Result -where S: Serializer { +where + S: Serializer, +{ serializer.serialize_str(&format!("fixed[{value}]")) } @@ -401,7 +415,9 @@ pub struct StructType { impl<'de> Deserialize<'de> for StructType { fn deserialize(deserializer: D) -> std::result::Result - where D: Deserializer<'de> { + where + D: Deserializer<'de>, + { #[derive(Deserialize)] #[serde(field_identifier, rename_all = "lowercase")] enum Field { @@ -419,7 +435,9 @@ impl<'de> Deserialize<'de> for StructType { } fn visit_map(self, mut map: V) -> std::result::Result - where V: MapAccess<'de> { + where + V: MapAccess<'de>, + { let mut fields = None; while let Some(key) = map.next_key()? { match key { diff --git a/crates/iceberg/src/spec/encrypted_key.rs b/crates/iceberg/src/spec/encrypted_key.rs index 6908ade1d5..5ee9e24b97 100644 --- a/crates/iceberg/src/spec/encrypted_key.rs +++ b/crates/iceberg/src/spec/encrypted_key.rs @@ -106,7 +106,9 @@ pub(super) mod _serde { impl Serialize for EncryptedKey { fn serialize(&self, serializer: S) -> Result - where S: serde::Serializer { + where + S: serde::Serializer, + { let serde_key = _serde::EncryptedKeySerde::from(self); serde_key.serialize(serializer) } @@ -114,7 +116,9 @@ impl Serialize for EncryptedKey { impl<'de> Deserialize<'de> for EncryptedKey { fn deserialize(deserializer: D) -> Result - where D: serde::Deserializer<'de> { + where + D: serde::Deserializer<'de>, + { let serde_key = _serde::EncryptedKeySerde::deserialize(deserializer)?; Self::try_from(serde_key).map_err(serde::de::Error::custom) diff --git a/crates/iceberg/src/spec/manifest/_serde.rs b/crates/iceberg/src/spec/manifest/_serde.rs index 7738af46d4..0f09da8890 100644 --- a/crates/iceberg/src/spec/manifest/_serde.rs +++ b/crates/iceberg/src/spec/manifest/_serde.rs @@ -329,10 +329,10 @@ mod tests { #[test] fn test_parse_negative_manifest_entry() { - let entries = vec![I64Entry { key: 1, value: -1 }, I64Entry { - key: 2, - value: 3, - }]; + let entries = vec![ + I64Entry { key: 1, value: -1 }, + I64Entry { key: 2, value: 3 }, + ]; let ret = parse_i64_entry(entries).unwrap(); diff --git a/crates/iceberg/src/spec/name_mapping/mod.rs b/crates/iceberg/src/spec/name_mapping/mod.rs index db9e44c290..ea23367b2e 100644 --- a/crates/iceberg/src/spec/name_mapping/mod.rs +++ b/crates/iceberg/src/spec/name_mapping/mod.rs @@ -219,38 +219,41 @@ mod tests { "#; let name_mapping: NameMapping = serde_json::from_str(name_mapping).unwrap(); - assert_eq!(name_mapping, NameMapping { - root: vec![ - MappedField { - field_id: Some(1), - names: vec!["id".to_string(), "record_id".to_string()], - fields: vec![] - }, - MappedField { - field_id: Some(2), - names: vec!["data".to_string()], - fields: vec![] - }, - MappedField { - field_id: Some(3), - names: vec!["location".to_string()], - fields: vec![ - MappedField { - field_id: Some(4), - names: vec!["latitude".to_string(), "lat".to_string()], - fields: vec![] - } - .into(), - MappedField { - field_id: Some(5), - names: vec!["longitude".to_string(), "long".to_string()], - fields: vec![] - } - .into(), - ] - } - ], - }); + assert_eq!( + name_mapping, + NameMapping { + root: vec![ + MappedField { + field_id: Some(1), + names: vec!["id".to_string(), "record_id".to_string()], + fields: vec![] + }, + MappedField { + field_id: Some(2), + names: vec!["data".to_string()], + fields: vec![] + }, + MappedField { + field_id: Some(3), + names: vec!["location".to_string()], + fields: vec![ + MappedField { + field_id: Some(4), + names: vec!["latitude".to_string(), "lat".to_string()], + fields: vec![] + } + .into(), + MappedField { + field_id: Some(5), + names: vec!["longitude".to_string(), "long".to_string()], + fields: vec![] + } + .into(), + ] + } + ], + } + ); } #[test] diff --git a/crates/iceberg/src/spec/partition.rs b/crates/iceberg/src/spec/partition.rs index 255aabd476..9dc81ccd9d 100644 --- a/crates/iceberg/src/spec/partition.rs +++ b/crates/iceberg/src/spec/partition.rs @@ -1245,15 +1245,18 @@ mod tests { .build() .unwrap(); - assert_eq!(spec, PartitionSpec { - spec_id: 1, - fields: vec![PartitionField { - source_id: 1, - field_id: 1000, - name: "id_bucket[16]".to_string(), - transform: Transform::Bucket(16), - }], - }); + assert_eq!( + spec, + PartitionSpec { + spec_id: 1, + fields: vec![PartitionField { + source_id: 1, + field_id: 1000, + name: "id_bucket[16]".to_string(), + transform: Transform::Bucket(16), + }], + } + ); assert_eq!( spec.partition_type(&schema).unwrap(), StructType::new(vec![ @@ -1447,15 +1450,18 @@ mod tests { .unwrap() .build(); - assert_eq!(spec, UnboundPartitionSpec { - spec_id: Some(1), - fields: vec![UnboundPartitionField { - source_id: 1, - field_id: None, - name: "id_bucket[16]".to_string(), - transform: Transform::Bucket(16), - }] - }); + assert_eq!( + spec, + UnboundPartitionSpec { + spec_id: Some(1), + fields: vec![UnboundPartitionField { + source_id: 1, + field_id: None, + name: "id_bucket[16]".to_string(), + transform: Transform::Bucket(16), + }] + } + ); } #[test] diff --git a/crates/iceberg/src/spec/snapshot_summary.rs b/crates/iceberg/src/spec/snapshot_summary.rs index 4cd3715e06..49ea222eb5 100644 --- a/crates/iceberg/src/spec/snapshot_summary.rs +++ b/crates/iceberg/src/spec/snapshot_summary.rs @@ -323,7 +323,9 @@ impl UpdateMetrics { } fn set_if_positive(properties: &mut HashMap, value: T, property_name: &str) -where T: PartialOrd + Default + ToString { +where + T: PartialOrd + Default + ToString, +{ if value > T::default() { properties.insert(property_name.to_string(), value.to_string()); } diff --git a/crates/iceberg/src/spec/table_metadata.rs b/crates/iceberg/src/spec/table_metadata.rs index 06b32cc847..c1a2149f26 100644 --- a/crates/iceberg/src/spec/table_metadata.rs +++ b/crates/iceberg/src/spec/table_metadata.rs @@ -392,15 +392,17 @@ impl TableMetadata { fn construct_refs(&mut self) { if let Some(current_snapshot_id) = self.current_snapshot_id { if !self.refs.contains_key(MAIN_BRANCH) { - self.refs - .insert(MAIN_BRANCH.to_string(), SnapshotReference { + self.refs.insert( + MAIN_BRANCH.to_string(), + SnapshotReference { snapshot_id: current_snapshot_id, retention: SnapshotRetention::Branch { min_snapshots_to_keep: None, max_snapshot_age_ms: None, max_ref_age_ms: None, }, - }); + }, + ); } } } @@ -830,7 +832,9 @@ pub(super) mod _serde { impl Serialize for TableMetadata { fn serialize(&self, serializer: S) -> Result - where S: serde::Serializer { + where + S: serde::Serializer, + { // we must do a clone here let table_metadata_enum: TableMetadataEnum = self.clone().try_into().map_err(serde::ser::Error::custom)?; @@ -841,14 +845,18 @@ pub(super) mod _serde { impl Serialize for VersionNumber { fn serialize(&self, serializer: S) -> Result - where S: serde::Serializer { + where + S: serde::Serializer, + { serializer.serialize_u8(V) } } impl<'de, const V: u8> Deserialize<'de> for VersionNumber { fn deserialize(deserializer: D) -> Result - where D: serde::Deserializer<'de> { + where + D: serde::Deserializer<'de>, + { let value = u8::deserialize(deserializer)?; if value == V { Ok(VersionNumber::) @@ -971,14 +979,17 @@ pub(super) mod _serde { default_sort_order_id: value.default_sort_order_id, refs: value.refs.unwrap_or_else(|| { if let Some(snapshot_id) = current_snapshot_id { - HashMap::from_iter(vec![(MAIN_BRANCH.to_string(), SnapshotReference { - snapshot_id, - retention: SnapshotRetention::Branch { - min_snapshots_to_keep: None, - max_snapshot_age_ms: None, - max_ref_age_ms: None, + HashMap::from_iter(vec![( + MAIN_BRANCH.to_string(), + SnapshotReference { + snapshot_id, + retention: SnapshotRetention::Branch { + min_snapshots_to_keep: None, + max_snapshot_age_ms: None, + max_ref_age_ms: None, + }, }, - })]) + )]) } else { HashMap::new() } @@ -1084,14 +1095,17 @@ pub(super) mod _serde { default_sort_order_id: value.default_sort_order_id, refs: value.refs.unwrap_or_else(|| { if let Some(snapshot_id) = current_snapshot_id { - HashMap::from_iter(vec![(MAIN_BRANCH.to_string(), SnapshotReference { - snapshot_id, - retention: SnapshotRetention::Branch { - min_snapshots_to_keep: None, - max_snapshot_age_ms: None, - max_ref_age_ms: None, + HashMap::from_iter(vec![( + MAIN_BRANCH.to_string(), + SnapshotReference { + snapshot_id, + retention: SnapshotRetention::Branch { + min_snapshots_to_keep: None, + max_snapshot_age_ms: None, + max_ref_age_ms: None, + }, }, - })]) + )]) } else { HashMap::new() } @@ -1240,14 +1254,17 @@ pub(super) mod _serde { .default_sort_order_id .unwrap_or(SortOrder::UNSORTED_ORDER_ID), refs: if let Some(snapshot_id) = current_snapshot_id { - HashMap::from_iter(vec![(MAIN_BRANCH.to_string(), SnapshotReference { - snapshot_id, - retention: SnapshotRetention::Branch { - min_snapshots_to_keep: None, - max_snapshot_age_ms: None, - max_ref_age_ms: None, + HashMap::from_iter(vec![( + MAIN_BRANCH.to_string(), + SnapshotReference { + snapshot_id, + retention: SnapshotRetention::Branch { + min_snapshots_to_keep: None, + max_snapshot_age_ms: None, + max_ref_age_ms: None, + }, }, - })]) + )]) } else { HashMap::new() }, @@ -2665,29 +2682,35 @@ mod tests { properties: HashMap::new(), snapshot_log: Vec::new(), metadata_log: Vec::new(), - statistics: HashMap::from_iter(vec![(3055729675574597004, StatisticsFile { - snapshot_id: 3055729675574597004, - statistics_path: "s3://a/b/stats.puffin".to_string(), - file_size_in_bytes: 413, - file_footer_size_in_bytes: 42, - key_metadata: None, - blob_metadata: vec![BlobMetadata { + statistics: HashMap::from_iter(vec![( + 3055729675574597004, + StatisticsFile { snapshot_id: 3055729675574597004, - sequence_number: 1, - fields: vec![1], - r#type: "ndv".to_string(), - properties: HashMap::new(), - }], - })]), + statistics_path: "s3://a/b/stats.puffin".to_string(), + file_size_in_bytes: 413, + file_footer_size_in_bytes: 42, + key_metadata: None, + blob_metadata: vec![BlobMetadata { + snapshot_id: 3055729675574597004, + sequence_number: 1, + fields: vec![1], + r#type: "ndv".to_string(), + properties: HashMap::new(), + }], + }, + )]), partition_statistics: HashMap::new(), - refs: HashMap::from_iter(vec![("main".to_string(), SnapshotReference { - snapshot_id: 3055729675574597004, - retention: SnapshotRetention::Branch { - min_snapshots_to_keep: None, - max_snapshot_age_ms: None, - max_ref_age_ms: None, + refs: HashMap::from_iter(vec![( + "main".to_string(), + SnapshotReference { + snapshot_id: 3055729675574597004, + retention: SnapshotRetention::Branch { + min_snapshots_to_keep: None, + max_snapshot_age_ms: None, + max_ref_age_ms: None, + }, }, - })]), + )]), encryption_keys: HashMap::new(), next_row_id: INITIAL_ROW_ID, }; @@ -2816,14 +2839,17 @@ mod tests { file_size_in_bytes: 43, }, )]), - refs: HashMap::from_iter(vec![("main".to_string(), SnapshotReference { - snapshot_id: 3055729675574597004, - retention: SnapshotRetention::Branch { - min_snapshots_to_keep: None, - max_snapshot_age_ms: None, - max_ref_age_ms: None, + refs: HashMap::from_iter(vec![( + "main".to_string(), + SnapshotReference { + snapshot_id: 3055729675574597004, + retention: SnapshotRetention::Branch { + min_snapshots_to_keep: None, + max_snapshot_age_ms: None, + max_ref_age_ms: None, + }, }, - })]), + )]), encryption_keys: HashMap::new(), next_row_id: INITIAL_ROW_ID, }; @@ -3066,14 +3092,17 @@ mod tests { }, ], metadata_log: Vec::new(), - refs: HashMap::from_iter(vec![("main".to_string(), SnapshotReference { - snapshot_id: 3055729675574597004, - retention: SnapshotRetention::Branch { - min_snapshots_to_keep: None, - max_snapshot_age_ms: None, - max_ref_age_ms: None, + refs: HashMap::from_iter(vec![( + "main".to_string(), + SnapshotReference { + snapshot_id: 3055729675574597004, + retention: SnapshotRetention::Branch { + min_snapshots_to_keep: None, + max_snapshot_age_ms: None, + max_ref_age_ms: None, + }, }, - })]), + )]), statistics: HashMap::new(), partition_statistics: HashMap::new(), encryption_keys: HashMap::new(), diff --git a/crates/iceberg/src/spec/table_metadata_builder.rs b/crates/iceberg/src/spec/table_metadata_builder.rs index 6b8ce1e6a5..82d3626bd7 100644 --- a/crates/iceberg/src/spec/table_metadata_builder.rs +++ b/crates/iceberg/src/spec/table_metadata_builder.rs @@ -1686,40 +1686,43 @@ mod tests { .unwrap() .changes; - pretty_assertions::assert_eq!(changes, vec![ - TableUpdate::SetLocation { - location: TEST_LOCATION.to_string() - }, - TableUpdate::AddSchema { schema: schema() }, - TableUpdate::SetCurrentSchema { schema_id: -1 }, - TableUpdate::AddSpec { - // Because this is a new tables, field-ids are assigned - // partition_spec() has None set for field-id - spec: PartitionSpec::builder(schema()) - .with_spec_id(0) - .add_unbound_field(UnboundPartitionField { - name: "y".to_string(), - transform: Transform::Identity, - source_id: 2, - field_id: Some(1000) - }) - .unwrap() - .build() - .unwrap() - .into_unbound(), - }, - TableUpdate::SetDefaultSpec { spec_id: -1 }, - TableUpdate::AddSortOrder { - sort_order: sort_order(), - }, - TableUpdate::SetDefaultSortOrder { sort_order_id: -1 }, - TableUpdate::SetProperties { - updates: HashMap::from_iter(vec![( - "property 1".to_string(), - "value 1".to_string() - )]), - } - ]); + pretty_assertions::assert_eq!( + changes, + vec![ + TableUpdate::SetLocation { + location: TEST_LOCATION.to_string() + }, + TableUpdate::AddSchema { schema: schema() }, + TableUpdate::SetCurrentSchema { schema_id: -1 }, + TableUpdate::AddSpec { + // Because this is a new tables, field-ids are assigned + // partition_spec() has None set for field-id + spec: PartitionSpec::builder(schema()) + .with_spec_id(0) + .add_unbound_field(UnboundPartitionField { + name: "y".to_string(), + transform: Transform::Identity, + source_id: 2, + field_id: Some(1000) + }) + .unwrap() + .build() + .unwrap() + .into_unbound(), + }, + TableUpdate::SetDefaultSpec { spec_id: -1 }, + TableUpdate::AddSortOrder { + sort_order: sort_order(), + }, + TableUpdate::SetDefaultSortOrder { sort_order_id: -1 }, + TableUpdate::SetProperties { + updates: HashMap::from_iter(vec![( + "property 1".to_string(), + "value 1".to_string() + )]), + } + ] + ); } #[test] @@ -1738,29 +1741,32 @@ mod tests { .unwrap() .changes; - pretty_assertions::assert_eq!(changes, vec![ - TableUpdate::SetLocation { - location: TEST_LOCATION.to_string() - }, - TableUpdate::AddSchema { - schema: Schema::builder().build().unwrap(), - }, - TableUpdate::SetCurrentSchema { schema_id: -1 }, - TableUpdate::AddSpec { - // Because this is a new tables, field-ids are assigned - // partition_spec() has None set for field-id - spec: PartitionSpec::builder(schema) - .with_spec_id(0) - .build() - .unwrap() - .into_unbound(), - }, - TableUpdate::SetDefaultSpec { spec_id: -1 }, - TableUpdate::AddSortOrder { - sort_order: SortOrder::unsorted_order(), - }, - TableUpdate::SetDefaultSortOrder { sort_order_id: -1 }, - ]); + pretty_assertions::assert_eq!( + changes, + vec![ + TableUpdate::SetLocation { + location: TEST_LOCATION.to_string() + }, + TableUpdate::AddSchema { + schema: Schema::builder().build().unwrap(), + }, + TableUpdate::SetCurrentSchema { schema_id: -1 }, + TableUpdate::AddSpec { + // Because this is a new tables, field-ids are assigned + // partition_spec() has None set for field-id + spec: PartitionSpec::builder(schema) + .with_spec_id(0) + .build() + .unwrap() + .into_unbound(), + }, + TableUpdate::SetDefaultSpec { spec_id: -1 }, + TableUpdate::AddSortOrder { + sort_order: SortOrder::unsorted_order(), + }, + TableUpdate::SetDefaultSortOrder { sort_order_id: -1 }, + ] + ); } #[test] @@ -1822,9 +1828,12 @@ mod tests { ); assert_eq!(build_result.metadata.default_spec.spec_id(), 0); assert_eq!(build_result.metadata.last_partition_id, 1001); - pretty_assertions::assert_eq!(build_result.changes[0], TableUpdate::AddSpec { - spec: expected_change - }); + pretty_assertions::assert_eq!( + build_result.changes[0], + TableUpdate::AddSpec { + spec: expected_change + } + ); // Remove the spec let build_result = build_result @@ -1874,13 +1883,16 @@ mod tests { assert_eq!(build_result.changes.len(), 2); assert_eq!(build_result.metadata.default_spec, Arc::new(expected_spec)); - assert_eq!(build_result.changes, vec![ - TableUpdate::AddSpec { - // Should contain the actual ID that was used - spec: added_spec.with_spec_id(1) - }, - TableUpdate::SetDefaultSpec { spec_id: -1 } - ]); + assert_eq!( + build_result.changes, + vec![ + TableUpdate::AddSpec { + // Should contain the actual ID that was used + spec: added_spec.with_spec_id(1) + }, + TableUpdate::SetDefaultSpec { spec_id: -1 } + ] + ); } #[test] @@ -1897,12 +1909,16 @@ mod tests { .unwrap(); assert_eq!(build_result.changes.len(), 2); - assert_eq!(build_result.changes[0], TableUpdate::AddSpec { - spec: unbound_spec.clone() - }); - assert_eq!(build_result.changes[1], TableUpdate::SetDefaultSpec { - spec_id: -1 - }); + assert_eq!( + build_result.changes[0], + TableUpdate::AddSpec { + spec: unbound_spec.clone() + } + ); + assert_eq!( + build_result.changes[1], + TableUpdate::SetDefaultSpec { spec_id: -1 } + ); assert_eq!( build_result.metadata.default_spec, Arc::new( @@ -1924,9 +1940,10 @@ mod tests { .unwrap(); assert_eq!(build_result.changes.len(), 1); - assert_eq!(build_result.changes[0], TableUpdate::SetDefaultSpec { - spec_id: 0 - }); + assert_eq!( + build_result.changes[0], + TableUpdate::SetDefaultSpec { spec_id: 0 } + ); assert_eq!( build_result.metadata.default_spec, Arc::new( @@ -1966,9 +1983,12 @@ mod tests { build_result.metadata.sort_order_by_id(2), Some(&Arc::new(expected_sort_order.clone())) ); - pretty_assertions::assert_eq!(build_result.changes[0], TableUpdate::AddSortOrder { - sort_order: expected_sort_order - }); + pretty_assertions::assert_eq!( + build_result.changes[0], + TableUpdate::AddSortOrder { + sort_order: expected_sort_order + } + ); } #[test] @@ -1998,12 +2018,16 @@ mod tests { build_result.metadata.schema_by_id(1), Some(&Arc::new(added_schema.clone())) ); - pretty_assertions::assert_eq!(build_result.changes[0], TableUpdate::AddSchema { - schema: added_schema - }); - assert_eq!(build_result.changes[1], TableUpdate::SetCurrentSchema { - schema_id: -1 - }); + pretty_assertions::assert_eq!( + build_result.changes[0], + TableUpdate::AddSchema { + schema: added_schema + } + ); + assert_eq!( + build_result.changes[1], + TableUpdate::SetCurrentSchema { schema_id: -1 } + ); } #[test] @@ -2030,9 +2054,10 @@ mod tests { .unwrap(); assert_eq!(build_result.changes.len(), 2); - assert_eq!(build_result.changes[1], TableUpdate::SetCurrentSchema { - schema_id: -1 - }); + assert_eq!( + build_result.changes[1], + TableUpdate::SetCurrentSchema { schema_id: -1 } + ); } #[test] @@ -2133,28 +2158,34 @@ mod tests { assert!( builder .clone() - .set_ref(MAIN_BRANCH, SnapshotReference { - snapshot_id: 10, - retention: SnapshotRetention::Branch { - min_snapshots_to_keep: Some(10), - max_snapshot_age_ms: None, - max_ref_age_ms: None, - }, - }) + .set_ref( + MAIN_BRANCH, + SnapshotReference { + snapshot_id: 10, + retention: SnapshotRetention::Branch { + min_snapshots_to_keep: Some(10), + max_snapshot_age_ms: None, + max_ref_age_ms: None, + }, + } + ) .unwrap_err() .to_string() .contains("Cannot set 'main' to unknown snapshot: '10'") ); let build_result = builder - .set_ref(MAIN_BRANCH, SnapshotReference { - snapshot_id: 1, - retention: SnapshotRetention::Branch { - min_snapshots_to_keep: Some(10), - max_snapshot_age_ms: None, - max_ref_age_ms: None, + .set_ref( + MAIN_BRANCH, + SnapshotReference { + snapshot_id: 1, + retention: SnapshotRetention::Branch { + min_snapshots_to_keep: Some(10), + max_snapshot_age_ms: None, + max_ref_age_ms: None, + }, }, - }) + ) .unwrap() .build() .unwrap(); @@ -2163,10 +2194,13 @@ mod tests { build_result.metadata.snapshot_by_id(1), Some(&Arc::new(snapshot.clone())) ); - assert_eq!(build_result.metadata.snapshot_log, vec![SnapshotLog { - snapshot_id: 1, - timestamp_ms: snapshot.timestamp_ms() - }]) + assert_eq!( + build_result.metadata.snapshot_log, + vec![SnapshotLog { + snapshot_id: 1, + timestamp_ms: snapshot.timestamp_ms() + }] + ) } #[test] @@ -2216,24 +2250,30 @@ mod tests { let result = builder .add_snapshot(snapshot_1) .unwrap() - .set_ref(MAIN_BRANCH, SnapshotReference { - snapshot_id: 1, - retention: SnapshotRetention::Branch { - min_snapshots_to_keep: Some(10), - max_snapshot_age_ms: None, - max_ref_age_ms: None, + .set_ref( + MAIN_BRANCH, + SnapshotReference { + snapshot_id: 1, + retention: SnapshotRetention::Branch { + min_snapshots_to_keep: Some(10), + max_snapshot_age_ms: None, + max_ref_age_ms: None, + }, }, - }) + ) .unwrap() .set_branch_snapshot(snapshot_2.clone(), MAIN_BRANCH) .unwrap() .build() .unwrap(); - assert_eq!(result.metadata.snapshot_log, vec![SnapshotLog { - snapshot_id: 2, - timestamp_ms: snapshot_2.timestamp_ms() - }]); + assert_eq!( + result.metadata.snapshot_log, + vec![SnapshotLog { + snapshot_id: 2, + timestamp_ms: snapshot_2.timestamp_ms() + }] + ); assert_eq!(result.metadata.current_snapshot().unwrap().snapshot_id(), 2); } @@ -2273,13 +2313,16 @@ mod tests { build_result.metadata.refs.get("new_branch"), Some(&reference) ); - assert_eq!(build_result.changes, vec![ - TableUpdate::AddSnapshot { snapshot }, - TableUpdate::SetSnapshotRef { - ref_name: "new_branch".to_string(), - reference - } - ]); + assert_eq!( + build_result.changes, + vec![ + TableUpdate::AddSnapshot { snapshot }, + TableUpdate::SetSnapshotRef { + ref_name: "new_branch".to_string(), + reference + } + ] + ); } #[test] @@ -2422,14 +2465,17 @@ mod tests { let builder = builder .add_snapshot(snapshot.clone()) .unwrap() - .set_ref(MAIN_BRANCH, SnapshotReference { - snapshot_id: 1, - retention: SnapshotRetention::Branch { - min_snapshots_to_keep: Some(10), - max_snapshot_age_ms: None, - max_ref_age_ms: None, + .set_ref( + MAIN_BRANCH, + SnapshotReference { + snapshot_id: 1, + retention: SnapshotRetention::Branch { + min_snapshots_to_keep: Some(10), + max_snapshot_age_ms: None, + max_ref_age_ms: None, + }, }, - }) + ) .unwrap(); let snapshot = Snapshot::builder() @@ -2485,9 +2531,12 @@ mod tests { build_result.metadata.statistics, HashMap::from_iter(vec![(3055729675574597004, statistics.clone())]) ); - assert_eq!(build_result.changes, vec![TableUpdate::SetStatistics { - statistics: statistics.clone() - }]); + assert_eq!( + build_result.changes, + vec![TableUpdate::SetStatistics { + statistics: statistics.clone() + }] + ); // Remove let builder = build_result.metadata.into_builder(None); @@ -2497,9 +2546,12 @@ mod tests { .unwrap(); assert_eq!(build_result.metadata.statistics.len(), 0); - assert_eq!(build_result.changes, vec![TableUpdate::RemoveStatistics { - snapshot_id: statistics.snapshot_id - }]); + assert_eq!( + build_result.changes, + vec![TableUpdate::RemoveStatistics { + snapshot_id: statistics.snapshot_id + }] + ); // Remove again yields no changes let builder = build_result.metadata.into_builder(None); @@ -2529,11 +2581,12 @@ mod tests { build_result.metadata.partition_statistics, HashMap::from_iter(vec![(3055729675574597004, statistics.clone())]) ); - assert_eq!(build_result.changes, vec![ - TableUpdate::SetPartitionStatistics { + assert_eq!( + build_result.changes, + vec![TableUpdate::SetPartitionStatistics { partition_statistics: statistics.clone() - } - ]); + }] + ); // Remove let builder = build_result.metadata.into_builder(None); @@ -2542,11 +2595,12 @@ mod tests { .build() .unwrap(); assert_eq!(build_result.metadata.partition_statistics.len(), 0); - assert_eq!(build_result.changes, vec![ - TableUpdate::RemovePartitionStatistics { + assert_eq!( + build_result.changes, + vec![TableUpdate::RemovePartitionStatistics { snapshot_id: statistics.snapshot_id - } - ]); + }] + ); // Remove again yields no changes let builder = build_result.metadata.into_builder(None); @@ -3246,14 +3300,17 @@ mod tests { .into_builder(None) .add_snapshot(main_snapshot.clone()) .unwrap() - .set_ref(MAIN_BRANCH, SnapshotReference { - snapshot_id: main_snapshot.snapshot_id(), - retention: SnapshotRetention::Branch { - min_snapshots_to_keep: None, - max_snapshot_age_ms: None, - max_ref_age_ms: None, + .set_ref( + MAIN_BRANCH, + SnapshotReference { + snapshot_id: main_snapshot.snapshot_id(), + retention: SnapshotRetention::Branch { + min_snapshots_to_keep: None, + max_snapshot_age_ms: None, + max_ref_age_ms: None, + }, }, - }) + ) .unwrap() .build() .unwrap() @@ -3335,9 +3392,12 @@ mod tests { build_result.metadata.encryption_key("key-1"), Some(&encryption_key_1) ); - assert_eq!(build_result.changes[0], TableUpdate::AddEncryptionKey { - encryption_key: encryption_key_1.clone() - }); + assert_eq!( + build_result.changes[0], + TableUpdate::AddEncryptionKey { + encryption_key: encryption_key_1.clone() + } + ); // Add second encryption key let build_result = build_result @@ -3359,9 +3419,12 @@ mod tests { build_result.metadata.encryption_key("key-2"), Some(&encryption_key_2) ); - assert_eq!(build_result.changes[0], TableUpdate::AddEncryptionKey { - encryption_key: encryption_key_2.clone() - }); + assert_eq!( + build_result.changes[0], + TableUpdate::AddEncryptionKey { + encryption_key: encryption_key_2.clone() + } + ); // Try to add duplicate key - should not create a change let build_result = build_result @@ -3393,9 +3456,12 @@ mod tests { build_result.metadata.encryption_key("key-2"), Some(&encryption_key_2) ); - assert_eq!(build_result.changes[0], TableUpdate::RemoveEncryptionKey { - key_id: "key-1".to_string() - }); + assert_eq!( + build_result.changes[0], + TableUpdate::RemoveEncryptionKey { + key_id: "key-1".to_string() + } + ); // Try to remove non-existent key - should not create a change let build_result = build_result @@ -3431,9 +3497,12 @@ mod tests { assert_eq!(build_result.changes.len(), 1); assert_eq!(build_result.metadata.encryption_keys.len(), 0); assert_eq!(build_result.metadata.encryption_key("key-2"), None); - assert_eq!(build_result.changes[0], TableUpdate::RemoveEncryptionKey { - key_id: "key-2".to_string() - }); + assert_eq!( + build_result.changes[0], + TableUpdate::RemoveEncryptionKey { + key_id: "key-2".to_string() + } + ); // Verify empty encryption_keys_iter() let keys = build_result.metadata.encryption_keys_iter(); diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 6068716eff..9b5f90824d 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -1039,14 +1039,18 @@ impl FromStr for Transform { impl Serialize for Transform { fn serialize(&self, serializer: S) -> std::result::Result - where S: Serializer { + where + S: Serializer, + { serializer.serialize_str(format!("{self}").as_str()) } } impl<'de> Deserialize<'de> for Transform { fn deserialize(deserializer: D) -> std::result::Result - where D: Deserializer<'de> { + where + D: Deserializer<'de>, + { let s = String::deserialize(deserializer)?; s.parse().map_err(::custom) } diff --git a/crates/iceberg/src/spec/values/datum.rs b/crates/iceberg/src/spec/values/datum.rs index cb60fb94e9..8ba110cc78 100644 --- a/crates/iceberg/src/spec/values/datum.rs +++ b/crates/iceberg/src/spec/values/datum.rs @@ -105,7 +105,9 @@ impl<'de> Deserialize<'de> for Datum { } fn visit_seq(self, mut seq: A) -> std::result::Result - where A: serde::de::SeqAccess<'de> { + where + A: serde::de::SeqAccess<'de>, + { let r#type = seq .next_element::()? .ok_or_else(|| serde::de::Error::invalid_length(0, &self))?; @@ -124,7 +126,9 @@ impl<'de> Deserialize<'de> for Datum { } fn visit_map(self, mut map: V) -> std::result::Result - where V: MapAccess<'de> { + where + V: MapAccess<'de>, + { let mut raw_primitive: Option = None; let mut r#type: Option = None; while let Some(key) = map.next_key()? { diff --git a/crates/iceberg/src/spec/values/serde.rs b/crates/iceberg/src/spec/values/serde.rs index 053acca8b0..6ef34bc170 100644 --- a/crates/iceberg/src/spec/values/serde.rs +++ b/crates/iceberg/src/spec/values/serde.rs @@ -69,7 +69,9 @@ pub(crate) mod _serde { impl Serialize for Record { fn serialize(&self, serializer: S) -> Result - where S: serde::Serializer { + where + S: serde::Serializer, + { let len = self.required.len() + self.optional.len(); let mut record = serializer.serialize_struct("", len)?; for (k, v) in &self.required { @@ -90,7 +92,9 @@ pub(crate) mod _serde { impl Serialize for List { fn serialize(&self, serializer: S) -> Result - where S: serde::Serializer { + where + S: serde::Serializer, + { let mut seq = serializer.serialize_seq(Some(self.list.len()))?; for value in &self.list { if self.required { @@ -115,7 +119,9 @@ pub(crate) mod _serde { impl Serialize for StringMap { fn serialize(&self, serializer: S) -> Result - where S: serde::Serializer { + where + S: serde::Serializer, + { let mut map = serializer.serialize_map(Some(self.raw.len()))?; for (k, v) in &self.raw { if self.required { @@ -137,7 +143,9 @@ pub(crate) mod _serde { impl<'de> Deserialize<'de> for RawLiteralEnum { fn deserialize(deserializer: D) -> Result - where D: serde::Deserializer<'de> { + where + D: serde::Deserializer<'de>, + { struct RawLiteralVisitor; impl<'de> Visitor<'de> for RawLiteralVisitor { type Value = RawLiteralEnum; @@ -147,58 +155,80 @@ pub(crate) mod _serde { } fn visit_bool(self, v: bool) -> Result - where E: serde::de::Error { + where + E: serde::de::Error, + { Ok(RawLiteralEnum::Boolean(v)) } fn visit_i32(self, v: i32) -> Result - where E: serde::de::Error { + where + E: serde::de::Error, + { Ok(RawLiteralEnum::Int(v)) } fn visit_i64(self, v: i64) -> Result - where E: serde::de::Error { + where + E: serde::de::Error, + { Ok(RawLiteralEnum::Long(v)) } /// Used in json fn visit_u64(self, v: u64) -> Result - where E: serde::de::Error { + where + E: serde::de::Error, + { Ok(RawLiteralEnum::Long(v as i64)) } fn visit_f32(self, v: f32) -> Result - where E: serde::de::Error { + where + E: serde::de::Error, + { Ok(RawLiteralEnum::Float(v)) } fn visit_f64(self, v: f64) -> Result - where E: serde::de::Error { + where + E: serde::de::Error, + { Ok(RawLiteralEnum::Double(v)) } fn visit_str(self, v: &str) -> Result - where E: serde::de::Error { + where + E: serde::de::Error, + { Ok(RawLiteralEnum::String(v.to_string())) } fn visit_bytes(self, v: &[u8]) -> Result - where E: serde::de::Error { + where + E: serde::de::Error, + { Ok(RawLiteralEnum::Bytes(ByteBuf::from(v))) } fn visit_borrowed_str(self, v: &'de str) -> Result - where E: serde::de::Error { + where + E: serde::de::Error, + { Ok(RawLiteralEnum::String(v.to_string())) } fn visit_unit(self) -> Result - where E: serde::de::Error { + where + E: serde::de::Error, + { Ok(RawLiteralEnum::Null) } fn visit_map(self, mut map: A) -> Result - where A: serde::de::MapAccess<'de> { + where + A: serde::de::MapAccess<'de>, + { let mut required = Vec::new(); while let Some(key) = map.next_key::()? { let value = map.next_value::()?; @@ -211,7 +241,9 @@ pub(crate) mod _serde { } fn visit_seq(self, mut seq: A) -> Result - where A: serde::de::SeqAccess<'de> { + where + A: serde::de::SeqAccess<'de>, + { let mut list = Vec::new(); while let Some(value) = seq.next_element::()? { list.push(Some(value)); diff --git a/crates/iceberg/src/spec/view_metadata.rs b/crates/iceberg/src/spec/view_metadata.rs index 3139c04081..432ee94774 100644 --- a/crates/iceberg/src/spec/view_metadata.rs +++ b/crates/iceberg/src/spec/view_metadata.rs @@ -276,7 +276,9 @@ pub(super) mod _serde { impl Serialize for ViewMetadata { fn serialize(&self, serializer: S) -> Result - where S: serde::Serializer { + where + S: serde::Serializer, + { // we must do a clone here let metadata_enum: ViewMetadataEnum = self.clone().try_into().map_err(serde::ser::Error::custom)?; diff --git a/crates/iceberg/src/spec/view_metadata_builder.rs b/crates/iceberg/src/spec/view_metadata_builder.rs index 9f542a7c61..e7eb7dbfbd 100644 --- a/crates/iceberg/src/spec/view_metadata_builder.rs +++ b/crates/iceberg/src/spec/view_metadata_builder.rs @@ -891,10 +891,10 @@ mod test { .unwrap() .metadata; - assert_eq!(metadata_v3.version_log[1..], vec![ - log_v2.clone(), - log_v3.clone() - ]); + assert_eq!( + metadata_v3.version_log[1..], + vec![log_v2.clone(), log_v3.clone()] + ); // Re-use Version 1, add a new log entry with a new timestamp let metadata_v4 = metadata_v3 @@ -930,9 +930,10 @@ mod test { .build() .unwrap(); assert_eq!(build_result.metadata.location, location); - assert_eq!(build_result.changes, vec![ViewUpdate::SetLocation { - location - }]); + assert_eq!( + build_result.changes, + vec![ViewUpdate::SetLocation { location }] + ); } #[test] @@ -954,14 +955,17 @@ mod test { Some(&"value1".to_string()) ); assert_eq!(build_result.metadata.properties.get("key2"), None); - assert_eq!(build_result.changes, vec![ - ViewUpdate::SetProperties { - updates: properties - }, - ViewUpdate::RemoveProperties { - removals: vec!["key2".to_string(), "key3".to_string()] - } - ]); + assert_eq!( + build_result.changes, + vec![ + ViewUpdate::SetProperties { + updates: properties + }, + ViewUpdate::RemoveProperties { + removals: vec!["key2".to_string(), "key3".to_string()] + } + ] + ); } #[test] @@ -974,18 +978,24 @@ mod test { .unwrap(); let build_result = builder.clone().add_schema(schema.clone()).build().unwrap(); assert_eq!(build_result.metadata.schemas.len(), 2); - assert_eq!(build_result.changes, vec![ViewUpdate::AddSchema { - schema: schema.clone().with_schema_id(2), - last_column_id: Some(0) - }]); + assert_eq!( + build_result.changes, + vec![ViewUpdate::AddSchema { + schema: schema.clone().with_schema_id(2), + last_column_id: Some(0) + }] + ); // Add schema again - id is reused let build_result = builder.clone().add_schema(schema.clone()).build().unwrap(); assert_eq!(build_result.metadata.schemas.len(), 2); - assert_eq!(build_result.changes, vec![ViewUpdate::AddSchema { - schema: schema.clone().with_schema_id(2), - last_column_id: Some(0) - }]); + assert_eq!( + build_result.changes, + vec![ViewUpdate::AddSchema { + schema: schema.clone().with_schema_id(2), + last_column_id: Some(0) + }] + ); } #[test] @@ -1024,21 +1034,24 @@ mod test { v2.clone().with_version_id(3).with_schema_id(2) ); assert_eq!(build_result.changes.len(), 4); - assert_eq!(build_result.changes, vec![ - ViewUpdate::AddViewVersion { - view_version: v1.clone().with_version_id(2).with_schema_id(1) - }, - ViewUpdate::AddSchema { - schema: v2_schema.clone().with_schema_id(2), - last_column_id: Some(0) - }, - ViewUpdate::AddViewVersion { - view_version: v2.clone().with_version_id(3).with_schema_id(-1) - }, - ViewUpdate::SetCurrentViewVersion { - view_version_id: -1 - } - ]); + assert_eq!( + build_result.changes, + vec![ + ViewUpdate::AddViewVersion { + view_version: v1.clone().with_version_id(2).with_schema_id(1) + }, + ViewUpdate::AddSchema { + schema: v2_schema.clone().with_schema_id(2), + last_column_id: Some(0) + }, + ViewUpdate::AddViewVersion { + view_version: v2.clone().with_version_id(3).with_schema_id(-1) + }, + ViewUpdate::SetCurrentViewVersion { + view_version_id: -1 + } + ] + ); assert_eq!( build_result .metadata @@ -1083,21 +1096,24 @@ mod test { v2.clone().with_version_id(3).with_schema_id(2) ); assert_eq!(build_result.changes.len(), 4); - assert_eq!(build_result.changes, vec![ - ViewUpdate::AddViewVersion { - view_version: v1.clone().with_version_id(2).with_schema_id(1) - }, - ViewUpdate::AddSchema { - schema: v2_schema.clone().with_schema_id(2), - last_column_id: Some(0) - }, - ViewUpdate::AddViewVersion { - view_version: v2.clone().with_version_id(3).with_schema_id(-1) - }, - ViewUpdate::SetCurrentViewVersion { - view_version_id: -1 - } - ]); + assert_eq!( + build_result.changes, + vec![ + ViewUpdate::AddViewVersion { + view_version: v1.clone().with_version_id(2).with_schema_id(1) + }, + ViewUpdate::AddSchema { + schema: v2_schema.clone().with_schema_id(2), + last_column_id: Some(0) + }, + ViewUpdate::AddViewVersion { + view_version: v2.clone().with_version_id(3).with_schema_id(-1) + }, + ViewUpdate::SetCurrentViewVersion { + view_version_id: -1 + } + ] + ); assert_eq!( build_result .metadata @@ -1306,14 +1322,17 @@ mod test { .changes; assert_eq!(changes.len(), 2); - assert_eq!(changes, vec![ - ViewUpdate::AddViewVersion { - view_version: v1.clone() - }, - ViewUpdate::AddViewVersion { - view_version: v2.clone() - } - ]); + assert_eq!( + changes, + vec![ + ViewUpdate::AddViewVersion { + view_version: v1.clone() + }, + ViewUpdate::AddViewVersion { + view_version: v2.clone() + } + ] + ); } #[test] diff --git a/crates/iceberg/src/spec/view_version.rs b/crates/iceberg/src/spec/view_version.rs index a02d58c4ef..af576b8bd4 100644 --- a/crates/iceberg/src/spec/view_version.rs +++ b/crates/iceberg/src/spec/view_version.rs @@ -334,9 +334,10 @@ mod tests { }, )]) ); - assert_eq!(result.default_namespace.inner(), vec![ - "default".to_string() - ]); + assert_eq!( + result.default_namespace.inner(), + vec!["default".to_string()] + ); } #[test] diff --git a/crates/iceberg/src/transaction/action.rs b/crates/iceberg/src/transaction/action.rs index aa0a05d0d9..289aa470de 100644 --- a/crates/iceberg/src/transaction/action.rs +++ b/crates/iceberg/src/transaction/action.rs @@ -68,7 +68,9 @@ pub trait ApplyTransactionAction { impl ApplyTransactionAction for T { fn apply(self, mut tx: Transaction) -> Result - where Self: Sized { + where + Self: Sized, + { tx.actions.push(Arc::new(self)); Ok(tx) } @@ -144,12 +146,18 @@ mod tests { let updates = action_commit.take_updates(); let requirements = action_commit.take_requirements(); - assert_eq!(updates[0], TableUpdate::SetLocation { - location: String::from("s3://bucket/prefix/table/") - }); - assert_eq!(requirements[0], TableRequirement::UuidMatch { - uuid: Uuid::from_str("9c12d441-03fe-4693-9a96-a0705ddf69c1").unwrap() - }); + assert_eq!( + updates[0], + TableUpdate::SetLocation { + location: String::from("s3://bucket/prefix/table/") + } + ); + assert_eq!( + requirements[0], + TableRequirement::UuidMatch { + uuid: Uuid::from_str("9c12d441-03fe-4693-9a96-a0705ddf69c1").unwrap() + } + ); } #[test] diff --git a/crates/iceberg/src/transaction/sort_order.rs b/crates/iceberg/src/transaction/sort_order.rs index dfa1328c09..887d9f926d 100644 --- a/crates/iceberg/src/transaction/sort_order.rs +++ b/crates/iceberg/src/transaction/sort_order.rs @@ -156,17 +156,20 @@ mod tests { .downcast_ref::() .unwrap(); - assert_eq!(replace_sort_order.pending_sort_fields, vec![ - PendingSortField { - name: String::from("x"), - direction: SortDirection::Ascending, - null_order: NullOrder::First, - }, - PendingSortField { - name: String::from("y"), - direction: SortDirection::Descending, - null_order: NullOrder::Last, - } - ]); + assert_eq!( + replace_sort_order.pending_sort_fields, + vec![ + PendingSortField { + name: String::from("x"), + direction: SortDirection::Ascending, + null_order: NullOrder::First, + }, + PendingSortField { + name: String::from("y"), + direction: SortDirection::Descending, + null_order: NullOrder::Last, + } + ] + ); } } diff --git a/crates/iceberg/src/transform/bucket.rs b/crates/iceberg/src/transform/bucket.rs index 8807fb1f79..e65021a6b0 100644 --- a/crates/iceberg/src/transform/bucket.rs +++ b/crates/iceberg/src/transform/bucket.rs @@ -373,18 +373,18 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::uuid(value), - Datum::uuid(another), - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::uuid(value), Datum::uuid(another)], + ), Some("name IN (4, 6)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::uuid(value), - Datum::uuid(another), - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::uuid(value), Datum::uuid(another)], + ), None, )?; @@ -440,18 +440,18 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::fixed(value.clone()), - Datum::fixed(another.clone()), - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::fixed(value.clone()), Datum::fixed(another.clone())], + ), Some("name IN (4, 6)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::fixed(value.clone()), - Datum::fixed(another.clone()), - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::fixed(value.clone()), Datum::fixed(another.clone())], + ), None, )?; @@ -500,18 +500,18 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::string(value), - Datum::string(another), - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::string(value), Datum::string(another)], + ), Some("name IN (9, 4)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::string(value), - Datum::string(another), - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::string(value), Datum::string(another)], + ), None, )?; @@ -577,19 +577,25 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::decimal_from_str(next)?, - Datum::decimal_from_str(curr)?, - Datum::decimal_from_str(prev)?, - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::decimal_from_str(next)?, + Datum::decimal_from_str(curr)?, + Datum::decimal_from_str(prev)?, + ], + ), Some("name IN (2, 6)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::decimal_from_str(curr)?, - Datum::decimal_from_str(next)?, - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::decimal_from_str(curr)?, + Datum::decimal_from_str(next)?, + ], + ), None, )?; @@ -636,19 +642,22 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::long(value - 1), - Datum::long(value), - Datum::long(value + 1), - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::long(value - 1), + Datum::long(value), + Datum::long(value + 1), + ], + ), Some("name IN (8, 7, 6)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::long(value), - Datum::long(value + 1), - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::long(value), Datum::long(value + 1)], + ), None, )?; @@ -696,19 +705,22 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::int(value - 1), - Datum::int(value), - Datum::int(value + 1), - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::int(value - 1), + Datum::int(value), + Datum::int(value + 1), + ], + ), Some("name IN (8, 7, 6)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::int(value), - Datum::int(value + 1), - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::int(value), Datum::int(value + 1)], + ), None, )?; diff --git a/crates/iceberg/src/transform/temporal.rs b/crates/iceberg/src/transform/temporal.rs index d0a0da249b..a3dba2a4a6 100644 --- a/crates/iceberg/src/transform/temporal.rs +++ b/crates/iceberg/src/transform/temporal.rs @@ -657,18 +657,24 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), Some("name IN (420034, 412007)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), None, )?; @@ -731,18 +737,24 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), Some("name IN (411288, 420034)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), None, )?; @@ -803,18 +815,24 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), Some("name IN (47, 46)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), None, )?; @@ -875,18 +893,24 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), Some("name IN (47, 46)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), None, )?; @@ -947,18 +971,24 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), Some("name IN (0, -1)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), None, )?; @@ -1019,18 +1049,24 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), Some("name IN (575, 574)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), None, )?; Ok(()) @@ -1090,18 +1126,24 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), Some("name IN (-10, -9, -12, -11)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), None, )?; @@ -1162,18 +1204,24 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), Some("name IN (575)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), None, )?; @@ -1236,18 +1284,24 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), Some("name IN (1970-01-01, 1969-12-31)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), None, )?; @@ -1310,18 +1364,24 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), Some("name IN (2017-12-02, 2017-12-01)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), None, )?; @@ -1384,18 +1444,24 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), Some("name IN (1969-01-02, 1969-01-01, 1969-01-03)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), None, )?; @@ -1458,18 +1524,24 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), Some("name IN (2017-12-02, 2017-12-01)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), None, )?; @@ -1532,18 +1604,24 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), Some("name IN (1970-01-01, 1970-01-02)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::timestamp_from_str(value)?, - Datum::timestamp_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::timestamp_from_str(value)?, + Datum::timestamp_from_str(another)?, + ], + ), None, )?; @@ -1600,18 +1678,18 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::date_from_str(value)?, - Datum::date_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), Some("name IN (1969-12-28, 1969-12-30)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::date_from_str(value)?, - Datum::date_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), None, )?; @@ -1668,18 +1746,18 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::date_from_str(value)?, - Datum::date_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), Some("name IN (2017-01-01, 2017-12-31)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::date_from_str(value)?, - Datum::date_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), None, )?; @@ -1736,18 +1814,18 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::date_from_str(value)?, - Datum::date_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), Some("name IN (-1, -12, -11, 0)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::date_from_str(value)?, - Datum::date_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), None, )?; @@ -1804,18 +1882,18 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::date_from_str(value)?, - Datum::date_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), Some("name IN (575, 564)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::date_from_str(value)?, - Datum::date_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), None, )?; @@ -1872,18 +1950,18 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::date_from_str(value)?, - Datum::date_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), Some("name IN (-1, -12, -11, 0)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::date_from_str(value)?, - Datum::date_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), None, )?; @@ -1940,18 +2018,18 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::date_from_str(value)?, - Datum::date_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), Some("name IN (575, 564)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::date_from_str(value)?, - Datum::date_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), None, )?; @@ -2008,18 +2086,18 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::date_from_str(value)?, - Datum::date_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), Some("name IN (0, -1)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::date_from_str(value)?, - Datum::date_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), None, )?; @@ -2075,18 +2153,18 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::date_from_str(value)?, - Datum::date_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), Some("name IN (0, -1)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::date_from_str(value)?, - Datum::date_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), None, )?; @@ -2143,18 +2221,18 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::date_from_str(value)?, - Datum::date_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), Some("name IN (47, 46)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::date_from_str(value)?, - Datum::date_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), None, )?; @@ -2211,18 +2289,18 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::date_from_str(value)?, - Datum::date_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), Some("name IN (0, -1)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::date_from_str(value)?, - Datum::date_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), None, )?; @@ -2279,18 +2357,18 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::date_from_str(value)?, - Datum::date_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), Some("name IN (47, 46)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::date_from_str(value)?, - Datum::date_from_str(another)?, - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::date_from_str(value)?, Datum::date_from_str(another)?], + ), None, )?; diff --git a/crates/iceberg/src/transform/truncate.rs b/crates/iceberg/src/transform/truncate.rs index 84ef7c0da3..1bbe7f58b9 100644 --- a/crates/iceberg/src/transform/truncate.rs +++ b/crates/iceberg/src/transform/truncate.rs @@ -320,18 +320,18 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::string(value), - Datum::string(format!("{value}abc")), - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![Datum::string(value), Datum::string(format!("{value}abc"))], + ), Some(r#"name IN ("abcde")"#), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::string(value), - Datum::string(format!("{value}abc")), - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::string(value), Datum::string(format!("{value}abc"))], + ), None, )?; @@ -389,19 +389,25 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::decimal_from_str(prev)?, - Datum::decimal_from_str(curr)?, - Datum::decimal_from_str(next)?, - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::decimal_from_str(prev)?, + Datum::decimal_from_str(curr)?, + Datum::decimal_from_str(next)?, + ], + ), Some("name IN (9890, 9990, 10090)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::decimal_from_str(curr)?, - Datum::decimal_from_str(next)?, - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::decimal_from_str(curr)?, + Datum::decimal_from_str(next)?, + ], + ), None, )?; @@ -459,19 +465,25 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::decimal_from_str(prev)?, - Datum::decimal_from_str(curr)?, - Datum::decimal_from_str(next)?, - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::decimal_from_str(prev)?, + Datum::decimal_from_str(curr)?, + Datum::decimal_from_str(next)?, + ], + ), Some("name IN (10000, 10100, 9900)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::decimal_from_str(curr)?, - Datum::decimal_from_str(next)?, - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![ + Datum::decimal_from_str(curr)?, + Datum::decimal_from_str(next)?, + ], + ), None, )?; @@ -514,19 +526,22 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::long(value - 1), - Datum::long(value), - Datum::long(value + 1), - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::long(value - 1), + Datum::long(value), + Datum::long(value + 1), + ], + ), Some("name IN (100, 90)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::long(value), - Datum::long(value + 1), - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::long(value), Datum::long(value + 1)], + ), None, )?; @@ -569,19 +584,22 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::long(value - 1), - Datum::long(value), - Datum::long(value + 1), - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::long(value - 1), + Datum::long(value), + Datum::long(value + 1), + ], + ), Some("name IN (100, 90)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::long(value), - Datum::long(value + 1), - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::long(value), Datum::long(value + 1)], + ), None, )?; @@ -624,19 +642,22 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::int(value - 1), - Datum::int(value), - Datum::int(value + 1), - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::int(value - 1), + Datum::int(value), + Datum::int(value + 1), + ], + ), Some("name IN (100, 90)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::int(value), - Datum::int(value + 1), - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::int(value), Datum::int(value + 1)], + ), None, )?; @@ -679,19 +700,22 @@ mod test { )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::In, vec![ - Datum::int(value - 1), - Datum::int(value), - Datum::int(value + 1), - ]), + &fixture.set_predicate( + PredicateOperator::In, + vec![ + Datum::int(value - 1), + Datum::int(value), + Datum::int(value + 1), + ], + ), Some("name IN (100, 90)"), )?; fixture.assert_projection( - &fixture.set_predicate(PredicateOperator::NotIn, vec![ - Datum::int(value), - Datum::int(value + 1), - ]), + &fixture.set_predicate( + PredicateOperator::NotIn, + vec![Datum::int(value), Datum::int(value + 1)], + ), None, )?; diff --git a/crates/iceberg/src/writer/base_writer/data_file_writer.rs b/crates/iceberg/src/writer/base_writer/data_file_writer.rs index dcaa56cc97..7648f79bc5 100644 --- a/crates/iceberg/src/writer/base_writer/data_file_writer.rs +++ b/crates/iceberg/src/writer/base_writer/data_file_writer.rs @@ -202,10 +202,13 @@ mod test { 4.to_string(), )])), ]); - let batch = RecordBatch::try_new(Arc::new(arrow_schema.clone()), vec![ - Arc::new(Int32Array::from(vec![1, 2, 3])), - Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie"])), - ])?; + let batch = RecordBatch::try_new( + Arc::new(arrow_schema.clone()), + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3])), + Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie"])), + ], + )?; data_file_writer.write(batch).await?; let data_files = data_file_writer.close().await.unwrap(); @@ -287,10 +290,13 @@ mod test { 6.to_string(), )])), ]); - let batch = RecordBatch::try_new(Arc::new(arrow_schema.clone()), vec![ - Arc::new(Int32Array::from(vec![1, 2, 3])), - Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie"])), - ])?; + let batch = RecordBatch::try_new( + Arc::new(arrow_schema.clone()), + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3])), + Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie"])), + ], + )?; data_file_writer.write(batch).await?; let data_files = data_file_writer.close().await.unwrap(); diff --git a/crates/iceberg/src/writer/base_writer/equality_delete_writer.rs b/crates/iceberg/src/writer/base_writer/equality_delete_writer.rs index 664ea84334..454bae2ad5 100644 --- a/crates/iceberg/src/writer/base_writer/equality_delete_writer.rs +++ b/crates/iceberg/src/writer/base_writer/equality_delete_writer.rs @@ -683,9 +683,13 @@ mod test { Some(b""), Some(b"zzzz"), ])) as ArrayRef; - let to_write = RecordBatch::try_new(delete_arrow_schema.clone(), vec![ - col0, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13, - ]) + let to_write = RecordBatch::try_new( + delete_arrow_schema.clone(), + vec![ + col0, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, + col13, + ], + ) .unwrap(); equality_delete_writer.write(to_write.clone()).await?; let res = equality_delete_writer.close().await?; @@ -794,13 +798,15 @@ mod test { // check let to_write_projected = projector.project_batch(to_write)?; - let expect_batch = - RecordBatch::try_new(equality_config.projected_arrow_schema_ref().clone(), vec![ + let expect_batch = RecordBatch::try_new( + equality_config.projected_arrow_schema_ref().clone(), + vec![ Arc::new(Int32Array::from(vec![None, Some(2), Some(3)])) as ArrayRef, Arc::new(Int32Array::from(vec![Some(1), None, None])) as ArrayRef, Arc::new(Int32Array::from(vec![None, None, None])) as ArrayRef, - ]) - .unwrap(); + ], + ) + .unwrap(); assert_eq!(to_write_projected, expect_batch); Ok(()) } diff --git a/crates/iceberg/src/writer/file_writer/parquet_writer.rs b/crates/iceberg/src/writer/file_writer/parquet_writer.rs index 3e9d1715c9..c7cd4994bb 100644 --- a/crates/iceberg/src/writer/file_writer/parquet_writer.rs +++ b/crates/iceberg/src/writer/file_writer/parquet_writer.rs @@ -1042,9 +1042,10 @@ mod tests { ordered, ) }) as ArrayRef; - let to_write = RecordBatch::try_new(arrow_schema.clone(), vec![ - col0, col1, col2, col3, col4, col5, - ]) + let to_write = RecordBatch::try_new( + arrow_schema.clone(), + vec![col0, col1, col2, col3, col4, col5], + ) .unwrap(); let output_file = file_io.new_output( location_gen.generate_location(None, &file_name_gen.generate_file_name()), @@ -1231,10 +1232,13 @@ mod tests { .with_precision_and_scale(38, 5) .unwrap(), ) as ArrayRef; - let to_write = RecordBatch::try_new(arrow_schema.clone(), vec![ - col0, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13, - col14, col15, col16, - ]) + let to_write = RecordBatch::try_new( + arrow_schema.clone(), + vec![ + col0, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, + col13, col14, col15, col16, + ], + ) .unwrap(); let output_file = file_io.new_output( location_gen.generate_location(None, &file_name_gen.generate_file_name()), @@ -1820,10 +1824,10 @@ mod tests { None, )) as ArrayRef; - let to_write = RecordBatch::try_new(arrow_schema.clone(), vec![ - struct_float_field_col, - struct_nested_float_field_col, - ]) + let to_write = RecordBatch::try_new( + arrow_schema.clone(), + vec![struct_float_field_col, struct_nested_float_field_col], + ) .unwrap(); let output_file = file_io.new_output( location_gen.generate_location(None, &file_name_gen.generate_file_name()), @@ -1978,11 +1982,14 @@ mod tests { None, )) as ArrayRef; - let to_write = RecordBatch::try_new(arrow_schema.clone(), vec![ - list_float_field_col, - struct_list_float_field_col, - // large_list_float_field_col, - ]) + let to_write = RecordBatch::try_new( + arrow_schema.clone(), + vec![ + list_float_field_col, + struct_list_float_field_col, + // large_list_float_field_col, + ], + ) .expect("Could not form record batch"); let output_file = file_io.new_output( location_gen.generate_location(None, &file_name_gen.generate_file_name()), @@ -2160,10 +2167,10 @@ mod tests { None, )) as ArrayRef; - let to_write = RecordBatch::try_new(arrow_schema.clone(), vec![ - map_array, - struct_list_float_field_col, - ]) + let to_write = RecordBatch::try_new( + arrow_schema.clone(), + vec![map_array, struct_list_float_field_col], + ) .expect("Could not form record batch"); let output_file = file_io.new_output( location_gen.generate_location(None, &file_name_gen.generate_file_name()), diff --git a/crates/iceberg/src/writer/file_writer/rolling_writer.rs b/crates/iceberg/src/writer/file_writer/rolling_writer.rs index 8f03654786..96a0047856 100644 --- a/crates/iceberg/src/writer/file_writer/rolling_writer.rs +++ b/crates/iceberg/src/writer/file_writer/rolling_writer.rs @@ -337,10 +337,13 @@ mod tests { // Create test data let arrow_schema = make_test_arrow_schema(); - let batch = RecordBatch::try_new(Arc::new(arrow_schema), vec![ - Arc::new(Int32Array::from(vec![1, 2, 3])), - Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie"])), - ])?; + let batch = RecordBatch::try_new( + Arc::new(arrow_schema), + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3])), + Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie"])), + ], + )?; // Write data writer.write(batch.clone()).await?; diff --git a/crates/iceberg/src/writer/partitioning/clustered_writer.rs b/crates/iceberg/src/writer/partitioning/clustered_writer.rs index 3587723965..6fd28fc0f7 100644 --- a/crates/iceberg/src/writer/partitioning/clustered_writer.rs +++ b/crates/iceberg/src/writer/partitioning/clustered_writer.rs @@ -226,17 +226,23 @@ mod tests { )])), ]); - let batch1 = RecordBatch::try_new(Arc::new(arrow_schema.clone()), vec![ - Arc::new(Int32Array::from(vec![1, 2])), - Arc::new(StringArray::from(vec!["Alice", "Bob"])), - Arc::new(StringArray::from(vec!["US", "US"])), - ])?; - - let batch2 = RecordBatch::try_new(Arc::new(arrow_schema.clone()), vec![ - Arc::new(Int32Array::from(vec![3, 4])), - Arc::new(StringArray::from(vec!["Charlie", "Dave"])), - Arc::new(StringArray::from(vec!["US", "US"])), - ])?; + let batch1 = RecordBatch::try_new( + Arc::new(arrow_schema.clone()), + vec![ + Arc::new(Int32Array::from(vec![1, 2])), + Arc::new(StringArray::from(vec!["Alice", "Bob"])), + Arc::new(StringArray::from(vec!["US", "US"])), + ], + )?; + + let batch2 = RecordBatch::try_new( + Arc::new(arrow_schema.clone()), + vec![ + Arc::new(Int32Array::from(vec![3, 4])), + Arc::new(StringArray::from(vec!["Charlie", "Dave"])), + Arc::new(StringArray::from(vec!["US", "US"])), + ], + )?; // Write data to the same partition (this should work) writer.write(partition_key.clone(), batch1).await?; @@ -345,23 +351,32 @@ mod tests { ]); // Create batches for different partitions (in sorted order) - let batch_asia = RecordBatch::try_new(Arc::new(arrow_schema.clone()), vec![ - Arc::new(Int32Array::from(vec![1, 2])), - Arc::new(StringArray::from(vec!["Alice", "Bob"])), - Arc::new(StringArray::from(vec!["ASIA", "ASIA"])), - ])?; - - let batch_eu = RecordBatch::try_new(Arc::new(arrow_schema.clone()), vec![ - Arc::new(Int32Array::from(vec![3, 4])), - Arc::new(StringArray::from(vec!["Charlie", "Dave"])), - Arc::new(StringArray::from(vec!["EU", "EU"])), - ])?; - - let batch_us = RecordBatch::try_new(Arc::new(arrow_schema.clone()), vec![ - Arc::new(Int32Array::from(vec![5, 6])), - Arc::new(StringArray::from(vec!["Eve", "Frank"])), - Arc::new(StringArray::from(vec!["US", "US"])), - ])?; + let batch_asia = RecordBatch::try_new( + Arc::new(arrow_schema.clone()), + vec![ + Arc::new(Int32Array::from(vec![1, 2])), + Arc::new(StringArray::from(vec!["Alice", "Bob"])), + Arc::new(StringArray::from(vec!["ASIA", "ASIA"])), + ], + )?; + + let batch_eu = RecordBatch::try_new( + Arc::new(arrow_schema.clone()), + vec![ + Arc::new(Int32Array::from(vec![3, 4])), + Arc::new(StringArray::from(vec!["Charlie", "Dave"])), + Arc::new(StringArray::from(vec!["EU", "EU"])), + ], + )?; + + let batch_us = RecordBatch::try_new( + Arc::new(arrow_schema.clone()), + vec![ + Arc::new(Int32Array::from(vec![5, 6])), + Arc::new(StringArray::from(vec!["Eve", "Frank"])), + Arc::new(StringArray::from(vec!["US", "US"])), + ], + )?; // Write data in sorted partition order (this should work) writer.write(partition_key_asia.clone(), batch_asia).await?; @@ -478,23 +493,32 @@ mod tests { ]); // Create batches for different partitions - let batch_us = RecordBatch::try_new(Arc::new(arrow_schema.clone()), vec![ - Arc::new(Int32Array::from(vec![1, 2])), - Arc::new(StringArray::from(vec!["Alice", "Bob"])), - Arc::new(StringArray::from(vec!["US", "US"])), - ])?; - - let batch_eu = RecordBatch::try_new(Arc::new(arrow_schema.clone()), vec![ - Arc::new(Int32Array::from(vec![3, 4])), - Arc::new(StringArray::from(vec!["Charlie", "Dave"])), - Arc::new(StringArray::from(vec!["EU", "EU"])), - ])?; - - let batch_us2 = RecordBatch::try_new(Arc::new(arrow_schema.clone()), vec![ - Arc::new(Int32Array::from(vec![5])), - Arc::new(StringArray::from(vec!["Eve"])), - Arc::new(StringArray::from(vec!["US"])), - ])?; + let batch_us = RecordBatch::try_new( + Arc::new(arrow_schema.clone()), + vec![ + Arc::new(Int32Array::from(vec![1, 2])), + Arc::new(StringArray::from(vec!["Alice", "Bob"])), + Arc::new(StringArray::from(vec!["US", "US"])), + ], + )?; + + let batch_eu = RecordBatch::try_new( + Arc::new(arrow_schema.clone()), + vec![ + Arc::new(Int32Array::from(vec![3, 4])), + Arc::new(StringArray::from(vec!["Charlie", "Dave"])), + Arc::new(StringArray::from(vec!["EU", "EU"])), + ], + )?; + + let batch_us2 = RecordBatch::try_new( + Arc::new(arrow_schema.clone()), + vec![ + Arc::new(Int32Array::from(vec![5])), + Arc::new(StringArray::from(vec!["Eve"])), + Arc::new(StringArray::from(vec!["US"])), + ], + )?; // Write data to US partition first writer.write(partition_key_us.clone(), batch_us).await?; diff --git a/crates/iceberg/src/writer/partitioning/fanout_writer.rs b/crates/iceberg/src/writer/partitioning/fanout_writer.rs index 796c1a4888..89f5487de0 100644 --- a/crates/iceberg/src/writer/partitioning/fanout_writer.rs +++ b/crates/iceberg/src/writer/partitioning/fanout_writer.rs @@ -202,17 +202,23 @@ mod tests { )])), ]); - let batch1 = RecordBatch::try_new(Arc::new(arrow_schema.clone()), vec![ - Arc::new(Int32Array::from(vec![1, 2])), - Arc::new(StringArray::from(vec!["Alice", "Bob"])), - Arc::new(StringArray::from(vec!["US", "US"])), - ])?; - - let batch2 = RecordBatch::try_new(Arc::new(arrow_schema.clone()), vec![ - Arc::new(Int32Array::from(vec![3, 4])), - Arc::new(StringArray::from(vec!["Charlie", "Dave"])), - Arc::new(StringArray::from(vec!["US", "US"])), - ])?; + let batch1 = RecordBatch::try_new( + Arc::new(arrow_schema.clone()), + vec![ + Arc::new(Int32Array::from(vec![1, 2])), + Arc::new(StringArray::from(vec!["Alice", "Bob"])), + Arc::new(StringArray::from(vec!["US", "US"])), + ], + )?; + + let batch2 = RecordBatch::try_new( + Arc::new(arrow_schema.clone()), + vec![ + Arc::new(Int32Array::from(vec![3, 4])), + Arc::new(StringArray::from(vec!["Charlie", "Dave"])), + Arc::new(StringArray::from(vec!["US", "US"])), + ], + )?; // Write data to the same partition writer.write(partition_key.clone(), batch1).await?; @@ -318,29 +324,41 @@ mod tests { ]); // Create batches for different partitions - let batch_us1 = RecordBatch::try_new(Arc::new(arrow_schema.clone()), vec![ - Arc::new(Int32Array::from(vec![1, 2])), - Arc::new(StringArray::from(vec!["Alice", "Bob"])), - Arc::new(StringArray::from(vec!["US", "US"])), - ])?; - - let batch_eu1 = RecordBatch::try_new(Arc::new(arrow_schema.clone()), vec![ - Arc::new(Int32Array::from(vec![3, 4])), - Arc::new(StringArray::from(vec!["Charlie", "Dave"])), - Arc::new(StringArray::from(vec!["EU", "EU"])), - ])?; - - let batch_us2 = RecordBatch::try_new(Arc::new(arrow_schema.clone()), vec![ - Arc::new(Int32Array::from(vec![5])), - Arc::new(StringArray::from(vec!["Eve"])), - Arc::new(StringArray::from(vec!["US"])), - ])?; - - let batch_asia1 = RecordBatch::try_new(Arc::new(arrow_schema.clone()), vec![ - Arc::new(Int32Array::from(vec![6, 7])), - Arc::new(StringArray::from(vec!["Frank", "Grace"])), - Arc::new(StringArray::from(vec!["ASIA", "ASIA"])), - ])?; + let batch_us1 = RecordBatch::try_new( + Arc::new(arrow_schema.clone()), + vec![ + Arc::new(Int32Array::from(vec![1, 2])), + Arc::new(StringArray::from(vec!["Alice", "Bob"])), + Arc::new(StringArray::from(vec!["US", "US"])), + ], + )?; + + let batch_eu1 = RecordBatch::try_new( + Arc::new(arrow_schema.clone()), + vec![ + Arc::new(Int32Array::from(vec![3, 4])), + Arc::new(StringArray::from(vec!["Charlie", "Dave"])), + Arc::new(StringArray::from(vec!["EU", "EU"])), + ], + )?; + + let batch_us2 = RecordBatch::try_new( + Arc::new(arrow_schema.clone()), + vec![ + Arc::new(Int32Array::from(vec![5])), + Arc::new(StringArray::from(vec!["Eve"])), + Arc::new(StringArray::from(vec!["US"])), + ], + )?; + + let batch_asia1 = RecordBatch::try_new( + Arc::new(arrow_schema.clone()), + vec![ + Arc::new(Int32Array::from(vec![6, 7])), + Arc::new(StringArray::from(vec!["Frank", "Grace"])), + Arc::new(StringArray::from(vec!["ASIA", "ASIA"])), + ], + )?; // Write data in mixed partition order to demonstrate fanout capability // This is the key difference from ClusteredWriter - we can write to any partition at any time diff --git a/crates/iceberg/src/writer/partitioning/unpartitioned_writer.rs b/crates/iceberg/src/writer/partitioning/unpartitioned_writer.rs index 0fb9cba3f1..48c8c73a71 100644 --- a/crates/iceberg/src/writer/partitioning/unpartitioned_writer.rs +++ b/crates/iceberg/src/writer/partitioning/unpartitioned_writer.rs @@ -171,14 +171,20 @@ mod tests { let mut writer = UnpartitionedWriter::new(writer_builder); // Write two batches - let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![ - Arc::new(Int32Array::from(vec![1, 2])), - Arc::new(StringArray::from(vec!["Alice", "Bob"])), - ])?; - let batch2 = RecordBatch::try_new(arrow_schema, vec![ - Arc::new(Int32Array::from(vec![3, 4])), - Arc::new(StringArray::from(vec!["Charlie", "Dave"])), - ])?; + let batch1 = RecordBatch::try_new( + arrow_schema.clone(), + vec![ + Arc::new(Int32Array::from(vec![1, 2])), + Arc::new(StringArray::from(vec!["Alice", "Bob"])), + ], + )?; + let batch2 = RecordBatch::try_new( + arrow_schema, + vec![ + Arc::new(Int32Array::from(vec![3, 4])), + Arc::new(StringArray::from(vec!["Charlie", "Dave"])), + ], + )?; writer.write(batch1).await?; writer.write(batch2).await?; diff --git a/crates/integration_tests/tests/shared_tests/append_data_file_test.rs b/crates/integration_tests/tests/shared_tests/append_data_file_test.rs index bedc975102..0575b54a2c 100644 --- a/crates/integration_tests/tests/shared_tests/append_data_file_test.rs +++ b/crates/integration_tests/tests/shared_tests/append_data_file_test.rs @@ -87,11 +87,14 @@ async fn test_append_data_file() { let col1 = StringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]); let col2 = Int32Array::from(vec![Some(1), Some(2), Some(3), Some(4)]); let col3 = BooleanArray::from(vec![Some(true), Some(false), None, Some(false)]); - let batch = RecordBatch::try_new(schema.clone(), vec![ - Arc::new(col1) as ArrayRef, - Arc::new(col2) as ArrayRef, - Arc::new(col3) as ArrayRef, - ]) + let batch = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(col1) as ArrayRef, + Arc::new(col2) as ArrayRef, + Arc::new(col3) as ArrayRef, + ], + ) .unwrap(); data_file_writer.write(batch.clone()).await.unwrap(); let data_file = data_file_writer.close().await.unwrap(); diff --git a/crates/integration_tests/tests/shared_tests/append_partition_data_file_test.rs b/crates/integration_tests/tests/shared_tests/append_partition_data_file_test.rs index a305ec0842..121e8eb50a 100644 --- a/crates/integration_tests/tests/shared_tests/append_partition_data_file_test.rs +++ b/crates/integration_tests/tests/shared_tests/append_partition_data_file_test.rs @@ -118,11 +118,14 @@ async fn test_append_partition_data_file() { Some(first_partition_id_value), ]); let col3 = BooleanArray::from(vec![Some(true), Some(false)]); - let batch = RecordBatch::try_new(schema.clone(), vec![ - Arc::new(col1) as ArrayRef, - Arc::new(col2) as ArrayRef, - Arc::new(col3) as ArrayRef, - ]) + let batch = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(col1) as ArrayRef, + Arc::new(col2) as ArrayRef, + Arc::new(col3) as ArrayRef, + ], + ) .unwrap(); data_file_writer_valid.write(batch.clone()).await.unwrap(); diff --git a/crates/integration_tests/tests/shared_tests/conflict_commit_test.rs b/crates/integration_tests/tests/shared_tests/conflict_commit_test.rs index fc529cc3d2..cc6193882a 100644 --- a/crates/integration_tests/tests/shared_tests/conflict_commit_test.rs +++ b/crates/integration_tests/tests/shared_tests/conflict_commit_test.rs @@ -86,11 +86,14 @@ async fn test_append_data_file_conflict() { let col1 = StringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]); let col2 = Int32Array::from(vec![Some(1), Some(2), Some(3), Some(4)]); let col3 = BooleanArray::from(vec![Some(true), Some(false), None, Some(false)]); - let batch = RecordBatch::try_new(schema.clone(), vec![ - Arc::new(col1) as ArrayRef, - Arc::new(col2) as ArrayRef, - Arc::new(col3) as ArrayRef, - ]) + let batch = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(col1) as ArrayRef, + Arc::new(col2) as ArrayRef, + Arc::new(col3) as ArrayRef, + ], + ) .unwrap(); data_file_writer.write(batch.clone()).await.unwrap(); let data_file = data_file_writer.close().await.unwrap(); diff --git a/crates/integration_tests/tests/shared_tests/scan_all_type.rs b/crates/integration_tests/tests/shared_tests/scan_all_type.rs index 7a2907d4cb..3f66cee579 100644 --- a/crates/integration_tests/tests/shared_tests/scan_all_type.rs +++ b/crates/integration_tests/tests/shared_tests/scan_all_type.rs @@ -291,25 +291,28 @@ async fn test_scan_all_type() { MapArray::new(field, offsets, entries, nulls, ordered) }; - let batch = RecordBatch::try_new(schema.clone(), vec![ - Arc::new(col1) as ArrayRef, - Arc::new(col2) as ArrayRef, - Arc::new(col3) as ArrayRef, - Arc::new(col4) as ArrayRef, - Arc::new(col5) as ArrayRef, - Arc::new(col6) as ArrayRef, - Arc::new(col7) as ArrayRef, - Arc::new(col8) as ArrayRef, - Arc::new(col9) as ArrayRef, - Arc::new(col10) as ArrayRef, - Arc::new(col11) as ArrayRef, - Arc::new(col12) as ArrayRef, - Arc::new(col13) as ArrayRef, - Arc::new(col14) as ArrayRef, - Arc::new(col15) as ArrayRef, - Arc::new(col16) as ArrayRef, - Arc::new(col17) as ArrayRef, - ]) + let batch = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(col1) as ArrayRef, + Arc::new(col2) as ArrayRef, + Arc::new(col3) as ArrayRef, + Arc::new(col4) as ArrayRef, + Arc::new(col5) as ArrayRef, + Arc::new(col6) as ArrayRef, + Arc::new(col7) as ArrayRef, + Arc::new(col8) as ArrayRef, + Arc::new(col9) as ArrayRef, + Arc::new(col10) as ArrayRef, + Arc::new(col11) as ArrayRef, + Arc::new(col12) as ArrayRef, + Arc::new(col13) as ArrayRef, + Arc::new(col14) as ArrayRef, + Arc::new(col15) as ArrayRef, + Arc::new(col16) as ArrayRef, + Arc::new(col17) as ArrayRef, + ], + ) .unwrap(); data_file_writer.write(batch.clone()).await.unwrap(); let data_file = data_file_writer.close().await.unwrap(); diff --git a/crates/integrations/datafusion/src/physical_plan/commit.rs b/crates/integrations/datafusion/src/physical_plan/commit.rs index f876908ae6..9d07e68697 100644 --- a/crates/integrations/datafusion/src/physical_plan/commit.rs +++ b/crates/integrations/datafusion/src/physical_plan/commit.rs @@ -571,10 +571,13 @@ mod tests { let batches: Vec = (1..4) .map(|idx| { - RecordBatch::try_new(arrow_schema.clone(), vec![ - Arc::new(Int32Array::from(vec![idx])) as ArrayRef, - Arc::new(StringArray::from(vec![format!("Name{idx}")])) as ArrayRef, - ]) + RecordBatch::try_new( + arrow_schema.clone(), + vec![ + Arc::new(Int32Array::from(vec![idx])) as ArrayRef, + Arc::new(StringArray::from(vec![format!("Name{idx}")])) as ArrayRef, + ], + ) }) .collect::>()?; diff --git a/crates/integrations/datafusion/src/physical_plan/project.rs b/crates/integrations/datafusion/src/physical_plan/project.rs index 17492176a4..5e96a0e1f2 100644 --- a/crates/integrations/datafusion/src/physical_plan/project.rs +++ b/crates/integrations/datafusion/src/physical_plan/project.rs @@ -271,12 +271,15 @@ mod tests { Field::new("data", DataType::Utf8, false), ])); - let batch = RecordBatch::try_new(arrow_schema.clone(), vec![ - Arc::new(Int32Array::from(vec![10, 20, 30])), - Arc::new(datafusion::arrow::array::StringArray::from(vec![ - "a", "b", "c", - ])), - ]) + let batch = RecordBatch::try_new( + arrow_schema.clone(), + vec![ + Arc::new(Int32Array::from(vec![10, 20, 30])), + Arc::new(datafusion::arrow::array::StringArray::from(vec![ + "a", "b", "c", + ])), + ], + ) .unwrap(); let partition_spec = Arc::new(partition_spec); @@ -357,10 +360,13 @@ mod tests { ), ]); - let batch = RecordBatch::try_new(arrow_schema.clone(), vec![ - Arc::new(Int32Array::from(vec![1, 2])), - Arc::new(struct_array), - ]) + let batch = RecordBatch::try_new( + arrow_schema.clone(), + vec![ + Arc::new(Int32Array::from(vec![1, 2])), + Arc::new(struct_array), + ], + ) .unwrap(); let calculator = PartitionValueCalculator::try_new(&partition_spec, &table_schema).unwrap(); diff --git a/crates/integrations/datafusion/src/physical_plan/write.rs b/crates/integrations/datafusion/src/physical_plan/write.rs index 9eb53c235f..eed33b3eb3 100644 --- a/crates/integrations/datafusion/src/physical_plan/write.rs +++ b/crates/integrations/datafusion/src/physical_plan/write.rs @@ -515,9 +515,10 @@ mod tests { })?; // 3. Create mock input execution plan - let input_plan = Arc::new(MockExecutionPlan::new(arrow_schema.clone(), vec![ - batch.clone(), - ])); + let input_plan = Arc::new(MockExecutionPlan::new( + arrow_schema.clone(), + vec![batch.clone()], + )); // 4. Create IcebergWriteExec let write_exec = IcebergWriteExec::new(table.clone(), input_plan, arrow_schema); diff --git a/crates/integrations/datafusion/src/task_writer.rs b/crates/integrations/datafusion/src/task_writer.rs index 5329f26458..7b375c3705 100644 --- a/crates/integrations/datafusion/src/task_writer.rs +++ b/crates/integrations/datafusion/src/task_writer.rs @@ -379,11 +379,14 @@ mod tests { let mut task_writer = TaskWriter::try_new(writer_builder, false, schema, partition_spec)?; // Write data - let batch = RecordBatch::try_new(arrow_schema, vec![ - Arc::new(Int32Array::from(vec![1, 2, 3])), - Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie"])), - Arc::new(StringArray::from(vec!["US", "EU", "US"])), - ])?; + let batch = RecordBatch::try_new( + arrow_schema, + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3])), + Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie"])), + Arc::new(StringArray::from(vec!["US", "EU", "US"])), + ], + )?; task_writer.write(batch).await?; let data_files = task_writer.close().await?; @@ -454,12 +457,15 @@ mod tests { Arc::new(partition_values) as ArrayRef, )]); - let batch = RecordBatch::try_new(arrow_schema, vec![ - Arc::new(Int32Array::from(vec![1, 2, 3, 4])), - Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie", "Dave"])), - Arc::new(StringArray::from(vec!["US", "EU", "US", "EU"])), - Arc::new(partition_struct), - ])?; + let batch = RecordBatch::try_new( + arrow_schema, + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3, 4])), + Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie", "Dave"])), + Arc::new(StringArray::from(vec!["US", "EU", "US", "EU"])), + Arc::new(partition_struct), + ], + )?; task_writer.write(batch).await?; let data_files = task_writer.close().await?; @@ -498,12 +504,15 @@ mod tests { )]); // ClusteredWriter expects data to be pre-sorted by partition - let batch = RecordBatch::try_new(arrow_schema, vec![ - Arc::new(Int32Array::from(vec![1, 2, 3, 4])), - Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie", "Dave"])), - Arc::new(StringArray::from(vec!["ASIA", "ASIA", "EU", "EU"])), - Arc::new(partition_struct), - ])?; + let batch = RecordBatch::try_new( + arrow_schema, + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3, 4])), + Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie", "Dave"])), + Arc::new(StringArray::from(vec!["ASIA", "ASIA", "EU", "EU"])), + Arc::new(partition_struct), + ], + )?; task_writer.write(batch).await?; let data_files = task_writer.close().await?; diff --git a/crates/sqllogictest/Cargo.toml b/crates/sqllogictest/Cargo.toml index e826ad7ae0..0c3280ee70 100644 --- a/crates/sqllogictest/Cargo.toml +++ b/crates/sqllogictest/Cargo.toml @@ -32,6 +32,7 @@ datafusion-sqllogictest = { workspace = true } enum-ordinalize = { workspace = true } env_logger = { workspace = true } iceberg = { workspace = true } +iceberg-catalog-loader = { workspace = true } iceberg-datafusion = { workspace = true } indicatif = { workspace = true } log = { workspace = true } diff --git a/crates/sqllogictest/README.md b/crates/sqllogictest/README.md index ddcfe851c5..5c47b70dff 100644 --- a/crates/sqllogictest/README.md +++ b/crates/sqllogictest/README.md @@ -32,4 +32,68 @@ cargo test The tests are run against the following sql engines: * [Apache datafusion](https://crates.io/crates/datafusion) -* [Apache spark](https://github.com/apache/spark) \ No newline at end of file +* [Apache spark](https://github.com/apache/spark) + +## Catalog Configuration + +This crate now supports dynamic catalog configuration in test schedules. You can configure different types of catalogs (memory, rest, glue, sql, etc.) and share them across multiple engines. + +### Configuration Format + +```toml +# Define catalogs +[catalogs.memory_catalog] +type = "memory" +warehouse = "memory://warehouse" + +[catalogs.rest_catalog] +type = "rest" +uri = "http://localhost:8181" +warehouse = "s3://my-bucket/warehouse" +credential = "client_credentials" +token = "xxx" + +[catalogs.sql_catalog] +type = "sql" +uri = "postgresql://user:pass@localhost/iceberg" +warehouse = "s3://my-bucket/warehouse" +sql_bind_style = "DollarNumeric" + +# Define engines with optional catalog references +[engines.datafusion1] +type = "datafusion" +catalog = "memory_catalog" # Reference to a catalog + +[engines.datafusion2] +type = "datafusion" +catalog = "rest_catalog" # Multiple engines can share the same catalog + +[engines.datafusion3] +type = "datafusion" +# No catalog specified - uses default MemoryCatalog + +# Define test steps +[[steps]] +engine = "datafusion1" +slt = "path/to/test1.slt" + +[[steps]] +engine = "datafusion2" +slt = "path/to/test2.slt" +``` + +### Supported Catalog Types + +- `memory`: In-memory catalog for testing +- `rest`: REST catalog for integration testing +- `glue`: AWS Glue catalog +- `sql`: SQL database catalog +- `hms`: Hive Metastore catalog +- `s3tables`: S3 Tables catalog + +### Key Features + +- **Lazy Loading**: Catalogs are created only when first referenced +- **Sharing**: Multiple engines can share the same catalog instance +- **Backward Compatibility**: Existing schedules continue to work without modification +- **Type Safety**: Catalog configurations are validated at parse time \ No newline at end of file diff --git a/crates/sqllogictest/examples/example_catalog_config.toml.example b/crates/sqllogictest/examples/example_catalog_config.toml.example new file mode 100644 index 0000000000..bc410ccc9d --- /dev/null +++ b/crates/sqllogictest/examples/example_catalog_config.toml.example @@ -0,0 +1,76 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Example configuration file: demonstrates various catalog configuration methods +# Note: This file is for example purposes only and will not be executed by the test framework +# (because the filename does not end with .toml, but with .example) + +# Memory catalog example +[catalogs.memory_example] +type = "memory" +warehouse = "memory://example" + +# REST catalog example (for integration testing) +[catalogs.rest_example] +type = "rest" +uri = "http://localhost:8181" +warehouse = "s3://test-bucket/warehouse" +credential = "client_credentials" +token = "your-token-here" + +# SQL catalog example +[catalogs.sql_example] +type = "sql" +uri = "postgresql://user:password@localhost:5432/iceberg_test" +warehouse = "s3://test-bucket/warehouse" +sql_bind_style = "DollarNumeric" + +# Glue catalog example +[catalogs.glue_example] +type = "glue" +warehouse = "s3://test-bucket/warehouse" +region = "us-east-1" + +# Engine configuration example +[engines.memory_engine] +type = "datafusion" +catalog = "memory_example" + +[engines.rest_engine] +type = "datafusion" +catalog = "rest_example" + +[engines.sql_engine] +type = "datafusion" +catalog = "sql_example" + +[engines.glue_engine] +type = "datafusion" +catalog = "glue_example" + +[engines.default_engine] +type = "datafusion" +# No catalog specified, uses default MemoryCatalog + +# Test steps example +[[steps]] +engine = "memory_engine" +slt = "df_test/show_tables.slt" + +[[steps]] +engine = "default_engine" +slt = "df_test/show_tables.slt" diff --git a/crates/sqllogictest/src/engine/datafusion.rs b/crates/sqllogictest/src/engine/datafusion.rs index b3e37d9206..12710d005b 100644 --- a/crates/sqllogictest/src/engine/datafusion.rs +++ b/crates/sqllogictest/src/engine/datafusion.rs @@ -22,8 +22,8 @@ use std::sync::Arc; use datafusion::catalog::CatalogProvider; use datafusion::prelude::{SessionConfig, SessionContext}; use datafusion_sqllogictest::DataFusion; -use iceberg::CatalogBuilder; use iceberg::memory::{MEMORY_CATALOG_WAREHOUSE, MemoryCatalogBuilder}; +use iceberg::{Catalog, CatalogBuilder}; use iceberg_datafusion::IcebergCatalogProvider; use indicatif::ProgressBar; use toml::Table as TomlTable; @@ -36,6 +36,15 @@ pub struct DataFusionEngine { session_context: SessionContext, } +impl std::fmt::Debug for DataFusionEngine { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("DataFusionEngine") + .field("test_data_path", &self.test_data_path) + .field("session_context", &"") + .finish() + } +} + #[async_trait::async_trait] impl EngineRunner for DataFusionEngine { async fn run_slt_file(&mut self, path: &Path) -> Result<()> { @@ -58,12 +67,24 @@ impl EngineRunner for DataFusionEngine { } impl DataFusionEngine { - pub async fn new(config: TomlTable) -> Result { + pub async fn new(config: TomlTable, catalog: Option>) -> Result { let session_config = SessionConfig::new() .with_target_partitions(4) .with_information_schema(true); let ctx = SessionContext::new_with_config(session_config); - ctx.register_catalog("default", Self::create_catalog(&config).await?); + + let catalog_provider = match catalog { + Some(cat) => { + // Use the provided catalog + Arc::new(IcebergCatalogProvider::try_new(cat).await?) + } + None => { + // Fallback: create default MemoryCatalog + Self::create_default_catalog(&config).await? + } + }; + + ctx.register_catalog("default", catalog_provider); Ok(Self { test_data_path: PathBuf::from("testdata"), @@ -71,9 +92,8 @@ impl DataFusionEngine { }) } - async fn create_catalog(_: &TomlTable) -> anyhow::Result> { - // TODO: support dynamic catalog configuration - // See: https://github.com/apache/iceberg-rust/issues/1780 + async fn create_default_catalog(_: &TomlTable) -> anyhow::Result> { + // Create default MemoryCatalog as fallback let catalog = MemoryCatalogBuilder::default() .load( "memory", diff --git a/crates/sqllogictest/src/engine/mod.rs b/crates/sqllogictest/src/engine/mod.rs index 724359fbe5..956f1da35f 100644 --- a/crates/sqllogictest/src/engine/mod.rs +++ b/crates/sqllogictest/src/engine/mod.rs @@ -18,8 +18,10 @@ mod datafusion; use std::path::Path; +use std::sync::Arc; use anyhow::anyhow; +use iceberg::Catalog; use sqllogictest::{AsyncDB, MakeConnection, Runner, parse_file}; use toml::Table as TomlTable; @@ -29,16 +31,17 @@ use crate::error::{Error, Result}; const TYPE_DATAFUSION: &str = "datafusion"; #[async_trait::async_trait] -pub trait EngineRunner: Send { +pub trait EngineRunner: Send + std::fmt::Debug { async fn run_slt_file(&mut self, path: &Path) -> Result<()>; } pub async fn load_engine_runner( engine_type: &str, cfg: TomlTable, + catalog: Option>, ) -> Result> { match engine_type { - TYPE_DATAFUSION => Ok(Box::new(DataFusionEngine::new(cfg).await?)), + TYPE_DATAFUSION => Ok(Box::new(DataFusionEngine::new(cfg, catalog).await?)), _ => Err(anyhow::anyhow!("Unsupported engine type: {engine_type}").into()), } } @@ -74,7 +77,7 @@ mod tests { random = { type = "random_engine", url = "http://localhost:8181" } "#; let tbl = toml::from_str(input).unwrap(); - let result = load_engine_runner("random_engine", tbl).await; + let result = load_engine_runner("random_engine", tbl, None).await; assert!(result.is_err()); } @@ -86,7 +89,7 @@ mod tests { df = { type = "datafusion" } "#; let tbl = toml::from_str(input).unwrap(); - let result = load_engine_runner(TYPE_DATAFUSION, tbl).await; + let result = load_engine_runner(TYPE_DATAFUSION, tbl, None).await; assert!(result.is_ok()); } diff --git a/crates/sqllogictest/src/error.rs b/crates/sqllogictest/src/error.rs index 2bf5a09d6b..c279493a8b 100644 --- a/crates/sqllogictest/src/error.rs +++ b/crates/sqllogictest/src/error.rs @@ -50,3 +50,9 @@ impl From for Error { Self(value.into()) } } + +impl From for Error { + fn from(value: iceberg::Error) -> Self { + Self(value.into()) + } +} diff --git a/crates/sqllogictest/src/schedule.rs b/crates/sqllogictest/src/schedule.rs index 7c13ad4d12..75e0f1f170 100644 --- a/crates/sqllogictest/src/schedule.rs +++ b/crates/sqllogictest/src/schedule.rs @@ -18,14 +18,80 @@ use std::collections::HashMap; use std::fs::read_to_string; use std::path::{Path, PathBuf}; +use std::sync::Arc; use anyhow::{Context, anyhow}; +use iceberg::Catalog; use serde::{Deserialize, Serialize}; use toml::{Table as TomlTable, Value}; use tracing::info; use crate::engine::{EngineRunner, load_engine_runner}; +/// Catalog configuration parsed from TOML +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CatalogConfig { + /// Catalog type (e.g., "memory", "rest", "glue", "sql") + pub r#type: String, + /// Additional properties passed to the catalog builder + #[serde(flatten)] + pub properties: HashMap, +} + +/// Registry for managing catalog instances with lazy loading and sharing +pub struct CatalogRegistry { + /// Cached catalog instances by name + catalogs: HashMap>, +} + +impl CatalogRegistry { + /// Create a new empty catalog registry + pub fn new() -> Self { + Self { + catalogs: HashMap::new(), + } + } +} + +impl Default for CatalogRegistry { + fn default() -> Self { + Self::new() + } +} + +impl CatalogRegistry { + /// Get or create a catalog instance + pub async fn get_or_create_catalog( + &mut self, + name: &str, + config: &CatalogConfig, + ) -> anyhow::Result> { + if let Some(catalog) = self.catalogs.get(name) { + return Ok(Arc::clone(catalog)); + } + + // Load catalog using catalog-loader + use iceberg_catalog_loader::CatalogLoader; + let catalog = CatalogLoader::from(config.r#type.as_str()) + .load(name.to_string(), config.properties.clone()) + .await + .with_context(|| { + format!( + "Failed to load catalog '{}' of type '{}'", + name, config.r#type + ) + })?; + + self.catalogs.insert(name.to_string(), Arc::clone(&catalog)); + Ok(catalog) + } + + /// Get all catalog names + pub fn catalog_names(&self) -> Vec { + self.catalogs.keys().cloned().collect() + } +} + pub struct Schedule { /// Engine names to engine instances engines: HashMap>, @@ -56,6 +122,31 @@ impl Schedule { } } + /// Parse catalogs from TOML table + async fn parse_catalogs(table: &TomlTable) -> anyhow::Result> { + let catalogs_tbl = match table.get("catalogs") { + Some(val) => val + .as_table() + .ok_or_else(|| anyhow!("'catalogs' must be a table"))?, + None => return Ok(HashMap::new()), // catalogs are optional + }; + + let mut catalogs = HashMap::new(); + + for (name, catalog_val) in catalogs_tbl { + let cfg: CatalogConfig = catalog_val + .clone() + .try_into() + .with_context(|| format!("Failed to parse catalog '{name}'"))?; + + if catalogs.insert(name.clone(), cfg).is_some() { + return Err(anyhow!("Duplicate catalog '{name}'")); + } + } + + Ok(catalogs) + } + pub async fn from_file>(path: P) -> anyhow::Result { let path_str = path.as_ref().to_string_lossy().to_string(); let content = read_to_string(path)?; @@ -64,7 +155,12 @@ impl Schedule { .as_table() .ok_or_else(|| anyhow!("Schedule file must be a TOML table"))?; - let engines = Schedule::parse_engines(toml_table).await?; + // Parse catalogs first + let catalogs = Schedule::parse_catalogs(toml_table).await?; + + // Then parse engines, passing in catalogs + let engines = Schedule::parse_engines(toml_table, &catalogs).await?; + let steps = Schedule::parse_steps(toml_table)?; Ok(Self::new(engines, steps, path_str)) @@ -108,6 +204,7 @@ impl Schedule { async fn parse_engines( table: &TomlTable, + catalog_configs: &HashMap, ) -> anyhow::Result>> { let engines_tbl = table .get("engines") @@ -116,6 +213,7 @@ impl Schedule { .ok_or_else(|| anyhow!("'engines' must be a table"))?; let mut engines = HashMap::new(); + let mut catalog_registry = CatalogRegistry::new(); for (name, engine_val) in engines_tbl { let cfg_tbl = engine_val @@ -129,7 +227,26 @@ impl Schedule { .as_str() .ok_or_else(|| anyhow::anyhow!("Engine {name} type must be a string"))?; - let engine = load_engine_runner(engine_type, cfg_tbl.clone()).await?; + // Get catalog reference (if configured) + let catalog = if let Some(catalog_name) = cfg_tbl.get("catalog") { + let catalog_name = catalog_name + .as_str() + .ok_or_else(|| anyhow!("Engine {name} catalog must be a string"))?; + + let catalog_config = catalog_configs + .get(catalog_name) + .ok_or_else(|| anyhow!("Catalog '{catalog_name}' not found"))?; + + Some( + catalog_registry + .get_or_create_catalog(catalog_name, catalog_config) + .await?, + ) + } else { + None + }; + + let engine = load_engine_runner(engine_type, cfg_tbl.clone(), catalog).await?; if engines.insert(name.clone(), engine).is_some() { return Err(anyhow!("Duplicate engine '{name}'")); @@ -155,6 +272,7 @@ impl Schedule { #[cfg(test)] mod tests { + use std::collections::HashMap; use toml::Table as TomlTable; use crate::schedule::Schedule; @@ -200,8 +318,258 @@ mod tests { "#; let table: TomlTable = toml::from_str(toml_content).unwrap(); - let result = Schedule::parse_engines(&table).await; + let catalogs = HashMap::new(); + let result = Schedule::parse_engines(&table, &catalogs).await; + + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_parse_catalogs() { + let input = r#" + [catalogs.memory_catalog] + type = "memory" + warehouse = "memory://test" + + [catalogs.rest_catalog] + type = "rest" + uri = "http://localhost:8181" + warehouse = "s3://my-bucket/warehouse" + credential = "client_credentials" + token = "xxx" + + [catalogs.sql_catalog] + type = "sql" + uri = "postgresql://user:pass@localhost/iceberg" + warehouse = "s3://my-bucket/warehouse" + sql_bind_style = "DollarNumeric" + "#; + + let tbl: TomlTable = toml::from_str(input).unwrap(); + let catalogs = Schedule::parse_catalogs(&tbl).await.unwrap(); + + assert_eq!(catalogs.len(), 3); + assert!(catalogs.contains_key("memory_catalog")); + assert!(catalogs.contains_key("rest_catalog")); + assert!(catalogs.contains_key("sql_catalog")); + + // Verify memory catalog configuration + let memory_cfg = &catalogs["memory_catalog"]; + assert_eq!(memory_cfg.r#type, "memory"); + assert_eq!( + memory_cfg.properties.get("warehouse").unwrap(), + "memory://test" + ); + + // Verify rest catalog configuration + let rest_cfg = &catalogs["rest_catalog"]; + assert_eq!(rest_cfg.r#type, "rest"); + assert_eq!( + rest_cfg.properties.get("uri").unwrap(), + "http://localhost:8181" + ); + assert_eq!( + rest_cfg.properties.get("warehouse").unwrap(), + "s3://my-bucket/warehouse" + ); + assert_eq!( + rest_cfg.properties.get("credential").unwrap(), + "client_credentials" + ); + assert_eq!(rest_cfg.properties.get("token").unwrap(), "xxx"); + } + + #[tokio::test] + async fn test_parse_catalogs_optional() { + let input = r#" + [engines.df] + type = "datafusion" + "#; + + let tbl: TomlTable = toml::from_str(input).unwrap(); + let catalogs = Schedule::parse_catalogs(&tbl).await.unwrap(); + + assert_eq!(catalogs.len(), 0); + } + + #[tokio::test] + async fn test_parse_catalogs_invalid_type() { + let input = r#" + [catalogs.invalid] + warehouse = "memory://test" + "#; + + let tbl: TomlTable = toml::from_str(input).unwrap(); + let result = Schedule::parse_catalogs(&tbl).await; assert!(result.is_err()); } + + #[tokio::test] + async fn test_engine_with_catalog_reference() { + let input = r#" + [catalogs.shared_catalog] + type = "memory" + warehouse = "memory://shared" + + [engines.df1] + type = "datafusion" + catalog = "shared_catalog" + + [engines.df2] + type = "datafusion" + catalog = "shared_catalog" + "#; + + let tbl: TomlTable = toml::from_str(input).unwrap(); + let catalog_configs = Schedule::parse_catalogs(&tbl).await.unwrap(); + let engines = Schedule::parse_engines(&tbl, &catalog_configs) + .await + .unwrap(); + + assert_eq!(engines.len(), 2); + assert!(engines.contains_key("df1")); + assert!(engines.contains_key("df2")); + } + + #[tokio::test] + async fn test_engine_with_missing_catalog() { + let input = r#" + [engines.df] + type = "datafusion" + catalog = "nonexistent" + "#; + + let tbl: TomlTable = toml::from_str(input).unwrap(); + let catalog_configs = Schedule::parse_catalogs(&tbl).await.unwrap(); + let result = Schedule::parse_engines(&tbl, &catalog_configs).await; + + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("Catalog 'nonexistent' not found") + ); + } + + #[tokio::test] + async fn test_engine_without_catalog() { + let input = r#" + [engines.df] + type = "datafusion" + "#; + + let tbl: TomlTable = toml::from_str(input).unwrap(); + let catalog_configs = Schedule::parse_catalogs(&tbl).await.unwrap(); + let engines = Schedule::parse_engines(&tbl, &catalog_configs) + .await + .unwrap(); + + assert_eq!(engines.len(), 1); + assert!(engines.contains_key("df")); + } + + #[tokio::test] + async fn test_catalog_config_validation() { + // Test that catalog configuration must have a type field + let input = r#" + [catalogs.invalid] + warehouse = "memory://test" + "#; + + let tbl: TomlTable = toml::from_str(input).unwrap(); + let result = Schedule::parse_catalogs(&tbl).await; + + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("Failed to parse catalog") + ); + } + + #[tokio::test] + async fn test_catalog_sharing_across_engines() { + let input = r#" + [catalogs.shared] + type = "memory" + warehouse = "memory://shared" + + [engines.engine1] + type = "datafusion" + catalog = "shared" + + [engines.engine2] + type = "datafusion" + catalog = "shared" + + [engines.engine3] + type = "datafusion" + catalog = "shared" + "#; + + let tbl: TomlTable = toml::from_str(input).unwrap(); + let catalog_configs = Schedule::parse_catalogs(&tbl).await.unwrap(); + let engines = Schedule::parse_engines(&tbl, &catalog_configs) + .await + .unwrap(); + + assert_eq!(engines.len(), 3); + assert!(engines.contains_key("engine1")); + assert!(engines.contains_key("engine2")); + assert!(engines.contains_key("engine3")); + } + + #[tokio::test] + async fn test_mixed_catalog_usage() { + // Test that some engines use custom catalog, others use default catalog + let input = r#" + [catalogs.custom] + type = "memory" + warehouse = "memory://custom" + + [engines.with_catalog] + type = "datafusion" + catalog = "custom" + + [engines.without_catalog] + type = "datafusion" + "#; + + let tbl: TomlTable = toml::from_str(input).unwrap(); + let catalog_configs = Schedule::parse_catalogs(&tbl).await.unwrap(); + let engines = Schedule::parse_engines(&tbl, &catalog_configs) + .await + .unwrap(); + + assert_eq!(engines.len(), 2); + assert!(engines.contains_key("with_catalog")); + assert!(engines.contains_key("without_catalog")); + } + + #[tokio::test] + async fn test_catalog_properties_preservation() { + let input = r#" + [catalogs.complex] + type = "memory" + warehouse = "memory://complex" + some_property = "value" + another_property = "42" + "#; + + let tbl: TomlTable = toml::from_str(input).unwrap(); + let catalogs = Schedule::parse_catalogs(&tbl).await.unwrap(); + + assert_eq!(catalogs.len(), 1); + let config = &catalogs["complex"]; + assert_eq!(config.r#type, "memory"); + assert_eq!( + config.properties.get("warehouse").unwrap(), + "memory://complex" + ); + assert_eq!(config.properties.get("some_property").unwrap(), "value"); + assert_eq!(config.properties.get("another_property").unwrap(), "42"); + } } diff --git a/crates/sqllogictest/testdata/schedules/catalog_config.toml b/crates/sqllogictest/testdata/schedules/catalog_config.toml new file mode 100644 index 0000000000..b1185a886a --- /dev/null +++ b/crates/sqllogictest/testdata/schedules/catalog_config.toml @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Test basic catalog configuration functionality +[catalogs.test_memory] +type = "memory" +warehouse = "memory://test" + +[engines.df] +type = "datafusion" +catalog = "test_memory" + +[[steps]] +engine = "df" +slt = "df_test/show_tables.slt" diff --git a/crates/sqllogictest/testdata/schedules/error_cases.toml b/crates/sqllogictest/testdata/schedules/error_cases.toml new file mode 100644 index 0000000000..3ccf80fe77 --- /dev/null +++ b/crates/sqllogictest/testdata/schedules/error_cases.toml @@ -0,0 +1,31 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# This file is used to test error cases and should not be executed under normal circumstances +# It will be automatically discovered by the integration test framework, but we need to ensure it handles errors correctly + +[catalogs.valid] +type = "memory" +warehouse = "memory://valid" + +[engines.df] +type = "datafusion" +catalog = "valid" + +[[steps]] +engine = "df" +slt = "df_test/show_tables.slt" diff --git a/crates/sqllogictest/testdata/schedules/shared_catalog.toml b/crates/sqllogictest/testdata/schedules/shared_catalog.toml new file mode 100644 index 0000000000..df3f780252 --- /dev/null +++ b/crates/sqllogictest/testdata/schedules/shared_catalog.toml @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Test multiple engines sharing the same catalog +[catalogs.shared] +type = "memory" +warehouse = "memory://shared" + +[engines.writer] +type = "datafusion" +catalog = "shared" + +[engines.reader] +type = "datafusion" +catalog = "shared" + +[[steps]] +engine = "writer" +slt = "shared_test/create_table.slt" + +[[steps]] +engine = "reader" +slt = "shared_test/query_table.slt" diff --git a/crates/sqllogictest/testdata/slts/shared_test/create_table.slt b/crates/sqllogictest/testdata/slts/shared_test/create_table.slt new file mode 100644 index 0000000000..f06c5005a2 --- /dev/null +++ b/crates/sqllogictest/testdata/slts/shared_test/create_table.slt @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Verify that the writer engine can access the catalog +statement ok +SHOW TABLES; diff --git a/crates/sqllogictest/testdata/slts/shared_test/query_table.slt b/crates/sqllogictest/testdata/slts/shared_test/query_table.slt new file mode 100644 index 0000000000..abb403eb73 --- /dev/null +++ b/crates/sqllogictest/testdata/slts/shared_test/query_table.slt @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Verify that the reader engine can access the same catalog +statement ok +SHOW TABLES; diff --git a/crates/sqllogictest/tests/sqllogictests.rs b/crates/sqllogictest/tests/sqllogictests.rs index 434d9d78cc..6b9bb1292f 100644 --- a/crates/sqllogictest/tests/sqllogictests.rs +++ b/crates/sqllogictest/tests/sqllogictests.rs @@ -85,3 +85,84 @@ pub(crate) async fn run_schedule(schedule_file: PathBuf) -> anyhow::Result<()> { Ok(()) } + +#[cfg(test)] +mod tests { + + #[tokio::test] + async fn test_catalog_config_schedule_execution() { + let rt = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .unwrap(); + + let schedule_path = PathBuf::from(format!( + "{}/testdata/schedules/catalog_config.toml", + env!("CARGO_MANIFEST_DIR") + )); + + let result = rt.block_on(run_schedule(schedule_path)); + assert!( + result.is_ok(), + "Catalog config schedule should execute successfully" + ); + } + + #[tokio::test] + async fn test_shared_catalog_schedule_execution() { + let rt = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .unwrap(); + + let schedule_path = PathBuf::from(format!( + "{}/testdata/schedules/shared_catalog.toml", + env!("CARGO_MANIFEST_DIR") + )); + + let result = rt.block_on(run_schedule(schedule_path)); + assert!( + result.is_ok(), + "Shared catalog schedule should execute successfully" + ); + } + + #[tokio::test] + async fn test_error_cases_schedule_execution() { + let rt = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .unwrap(); + + let schedule_path = PathBuf::from(format!( + "{}/testdata/schedules/error_cases.toml", + env!("CARGO_MANIFEST_DIR") + )); + + let result = rt.block_on(run_schedule(schedule_path)); + assert!( + result.is_ok(), + "Error cases schedule should execute successfully" + ); + } + + #[test] + fn test_collect_schedule_files() { + let files = collect_schedule_files().unwrap(); + assert!( + !files.is_empty(), + "Should find at least some schedule files" + ); + + // Verify that the newly created files are included + let file_names: Vec = files + .iter() + .filter_map(|p| p.file_name()?.to_str()) + .map(|s| s.to_string()) + .collect(); + + assert!(file_names.contains(&"catalog_config.toml".to_string())); + assert!(file_names.contains(&"shared_catalog.toml".to_string())); + assert!(file_names.contains(&"error_cases.toml".to_string())); + } +}