Skip to content

Commit c6cbbb5

Browse files
feat: implement builder for StructType (#1492)
## What changes are proposed in this pull request? Fixes #1284 ## How was this change tested? Added couple of unit tests --------- Co-authored-by: aleksandarskrbic <aleksandarskrbic@users.noreply.github.com>
1 parent 0564bd7 commit c6cbbb5

File tree

2 files changed

+89
-16
lines changed

2 files changed

+89
-16
lines changed

kernel/src/schema/mod.rs

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,43 @@ pub struct StructType {
547547
metadata_columns: HashMap<MetadataColumnSpec, usize>,
548548
}
549549

550+
pub struct StructTypeBuilder {
551+
fields: IndexMap<String, StructField>,
552+
}
553+
554+
impl Default for StructTypeBuilder {
555+
fn default() -> Self {
556+
Self::new()
557+
}
558+
}
559+
560+
impl StructTypeBuilder {
561+
pub fn new() -> Self {
562+
Self {
563+
fields: IndexMap::new(),
564+
}
565+
}
566+
567+
pub fn from_schema(schema: &StructType) -> Self {
568+
Self {
569+
fields: schema.fields.clone(),
570+
}
571+
}
572+
573+
pub fn add_field(mut self, field: StructField) -> Self {
574+
self.fields.insert(field.name.clone(), field);
575+
self
576+
}
577+
578+
pub fn build(self) -> DeltaResult<StructType> {
579+
StructType::try_new(self.fields.into_values())
580+
}
581+
582+
pub fn build_arc_unchecked(self) -> Arc<StructType> {
583+
Arc::new(StructType::new_unchecked(self.fields.into_values()))
584+
}
585+
}
586+
550587
impl StructType {
551588
/// Creates a new [`StructType`] from the given fields.
552589
///
@@ -600,6 +637,10 @@ impl StructType {
600637
.process_results(|iter| Self::try_new(iter))?
601638
}
602639

640+
pub fn builder() -> StructTypeBuilder {
641+
StructTypeBuilder::new()
642+
}
643+
603644
/// Creates a new [`StructType`] from the given fields without validating them.
604645
///
605646
/// This should only be used when you are sure that the fields are valid.
@@ -3158,4 +3199,38 @@ mod tests {
31583199
");
31593200
Ok(())
31603201
}
3202+
3203+
#[test]
3204+
fn test_builder_empty() {
3205+
let schema = StructType::builder().build().unwrap();
3206+
assert_eq!(schema.num_fields(), 0)
3207+
}
3208+
3209+
#[test]
3210+
fn test_builder_add_fields() {
3211+
let schema = StructType::builder()
3212+
.add_field(StructField::new("id", DataType::INTEGER, false))
3213+
.add_field(StructField::new("name", DataType::STRING, true))
3214+
.build()
3215+
.unwrap();
3216+
3217+
assert_eq!(schema.num_fields(), 2);
3218+
assert_eq!(schema.field_at_index(0).unwrap().name(), "id");
3219+
assert_eq!(schema.field_at_index(1).unwrap().name(), "name");
3220+
}
3221+
3222+
#[test]
3223+
fn test_builder_from_schema() {
3224+
let base_schema =
3225+
StructType::try_new([StructField::new("id", DataType::INTEGER, false)]).unwrap();
3226+
3227+
let extended_schema = StructTypeBuilder::from_schema(&base_schema)
3228+
.add_field(StructField::new("name", DataType::STRING, true))
3229+
.build()
3230+
.unwrap();
3231+
3232+
assert_eq!(extended_schema.num_fields(), 2);
3233+
assert_eq!(extended_schema.field_at_index(0).unwrap().name(), "id");
3234+
assert_eq!(extended_schema.field_at_index(1).unwrap().name(), "name");
3235+
}
31613236
}

kernel/src/transaction/mod.rs

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ use crate::scan::log_replay::{
2424
BASE_ROW_ID_NAME, DEFAULT_ROW_COMMIT_VERSION_NAME, FILE_CONSTANT_VALUES_NAME, TAGS_NAME,
2525
};
2626
use crate::scan::scan_row_schema;
27-
use crate::schema::{ArrayType, MapType, SchemaRef, StructField, StructType};
27+
use crate::schema::{ArrayType, MapType, SchemaRef, StructField, StructType, StructTypeBuilder};
2828
use crate::snapshot::SnapshotRef;
2929
use crate::utils::{current_time_ms, require};
3030
use crate::{
@@ -66,9 +66,9 @@ pub(crate) static BASE_ADD_FILES_SCHEMA: LazyLock<SchemaRef> = LazyLock::new(||
6666
DataType::struct_type_unchecked(vec![StructField::nullable("numRecords", DataType::LONG)]),
6767
);
6868

69-
Arc::new(StructType::new_unchecked(
70-
mandatory_add_file_schema().fields().cloned().chain([stats]),
71-
))
69+
StructTypeBuilder::from_schema(mandatory_add_file_schema())
70+
.add_field(stats)
71+
.build_arc_unchecked()
7272
});
7373

7474
static DATA_CHANGE_COLUMN: LazyLock<StructField> =
@@ -86,30 +86,28 @@ static ADD_FILES_SCHEMA_WITH_DATA_CHANGE: LazyLock<SchemaRef> = LazyLock::new(||
8686
Arc::new(StructType::new_unchecked(fields.into_iter().cloned()))
8787
});
8888

89-
// NOTE: The following two methods are a workaround for the fact that we do not have a proper SchemaBuilder yet.
90-
// See https://github.com/delta-io/delta-kernel-rs/issues/1284
9189
/// Extend a schema with a statistics column and return a new SchemaRef.
9290
///
9391
/// The stats column is of type string as required by the spec.
9492
///
9593
/// Note that this method is only useful to extend an Add action schema.
9694
fn with_stats_col(schema: &SchemaRef) -> SchemaRef {
97-
let fields = schema
98-
.fields()
99-
.cloned()
100-
.chain([StructField::nullable("stats", DataType::STRING)]);
101-
Arc::new(StructType::new_unchecked(fields))
95+
StructTypeBuilder::from_schema(schema)
96+
.add_field(StructField::nullable("stats", DataType::STRING))
97+
.build_arc_unchecked()
10298
}
10399

104100
/// Extend a schema with row tracking columns and return a new SchemaRef.
105101
///
106102
/// Note that this method is only useful to extend an Add action schema.
107103
fn with_row_tracking_cols(schema: &SchemaRef) -> SchemaRef {
108-
let fields = schema.fields().cloned().chain([
109-
StructField::nullable("baseRowId", DataType::LONG),
110-
StructField::nullable("defaultRowCommitVersion", DataType::LONG),
111-
]);
112-
Arc::new(StructType::new_unchecked(fields))
104+
StructTypeBuilder::from_schema(schema)
105+
.add_field(StructField::nullable("baseRowId", DataType::LONG))
106+
.add_field(StructField::nullable(
107+
"defaultRowCommitVersion",
108+
DataType::LONG,
109+
))
110+
.build_arc_unchecked()
113111
}
114112

115113
/// A transaction represents an in-progress write to a table. After creating a transaction, changes

0 commit comments

Comments
 (0)