@@ -382,10 +382,19 @@ fn filter_ancestor_fields(fields: Vec<FieldChange>) -> Vec<FieldChange> {
382382///
383383/// Returns true if any path in `added_ancestor_paths` is a prefix of `path`.
384384/// For example, "user" is an ancestor of "user.name" and "user.address.street".
385+ ///
386+ /// This implementation walks up the parent chain of `path`, checking at each level
387+ /// if that parent exists in the set. This is O(depth) instead of O(N * depth) where
388+ /// N is the number of ancestor paths.
385389fn has_added_ancestor ( path : & ColumnName , added_ancestor_paths : & HashSet < ColumnName > ) -> bool {
386- added_ancestor_paths
387- . iter ( )
388- . any ( |ancestor| path. is_descendant_of ( ancestor) )
390+ let mut curr = path. parent ( ) ;
391+ while let Some ( parent) = curr {
392+ if added_ancestor_paths. contains ( & parent) {
393+ return true ;
394+ }
395+ curr = parent. parent ( ) ;
396+ }
397+ false
389398}
390399
391400/// Gets the physical name of a field if present
@@ -396,6 +405,50 @@ fn physical_name(field: &StructField) -> Option<&str> {
396405 }
397406}
398407
408+ /// Validates that physical names are consistent between two versions of the same field.
409+ ///
410+ /// Since schema diffing requires column mapping (field IDs), physical names must be present
411+ /// and must not change for the same field ID across schema versions.
412+ ///
413+ /// # Errors
414+ /// - `PhysicalNameChanged`: Physical name differs between before and after
415+ /// - `MissingPhysicalName`: Physical name is missing in either version
416+ fn validate_physical_name (
417+ before : & FieldWithPath ,
418+ after : & FieldWithPath ,
419+ ) -> Result < ( ) , SchemaDiffError > {
420+ let before_physical = physical_name ( & before. field ) ;
421+ let after_physical = physical_name ( & after. field ) ;
422+
423+ match ( before_physical, after_physical) {
424+ ( Some ( b) , Some ( a) ) if b == a => {
425+ // Valid: physical name is present and unchanged
426+ Ok ( ( ) )
427+ }
428+ ( Some ( b) , Some ( a) ) => {
429+ // Invalid: physical name changed for the same field ID
430+ Err ( SchemaDiffError :: PhysicalNameChanged {
431+ field_id : before. field_id ,
432+ path : after. path . clone ( ) ,
433+ before : b. to_string ( ) ,
434+ after : a. to_string ( ) ,
435+ } )
436+ }
437+ ( Some ( _) , None ) | ( None , Some ( _) ) => {
438+ // Invalid: physical name was added or removed
439+ Err ( SchemaDiffError :: MissingPhysicalName {
440+ path : after. path . clone ( ) ,
441+ } )
442+ }
443+ ( None , None ) => {
444+ // Invalid: physical name must be present when column mapping is enabled
445+ Err ( SchemaDiffError :: MissingPhysicalName {
446+ path : after. path . clone ( ) ,
447+ } )
448+ }
449+ }
450+ }
451+
399452/// Recursively collects all struct fields from a data type with their paths
400453///
401454/// This helper function handles deep nesting like `array<array<struct<...>>>` or
@@ -522,49 +575,21 @@ fn compute_field_update(
522575 }
523576
524577 // Check for nullability change - distinguish between tightening and loosening
525- if let Some ( change) = check_container_nullability_change ( before. field . nullable , after. field . nullable ) {
578+ if let Some ( change) = check_field_nullability_change ( before. field . nullable , after. field . nullable ) {
526579 changes. push ( change) ;
527580 }
528581
529582 // Validate physical name consistency
530- // Since we require column mapping (field IDs) for schema diffing, physical names must be present
531- let bp = physical_name ( & before. field ) ;
532- let ap = physical_name ( & after. field ) ;
533- match ( bp, ap) {
534- ( Some ( b) , Some ( a) ) if b == a => {
535- // Valid: physical name is present and unchanged
536- }
537- ( Some ( b) , Some ( a) ) => {
538- // Invalid: physical name changed for the same field ID
539- return Err ( SchemaDiffError :: PhysicalNameChanged {
540- field_id : before. field_id ,
541- path : after. path . clone ( ) ,
542- before : b. to_string ( ) ,
543- after : a. to_string ( ) ,
544- } ) ;
545- }
546- ( Some ( _) , None ) | ( None , Some ( _) ) => {
547- // Invalid: physical name was added or removed
548- return Err ( SchemaDiffError :: MissingPhysicalName {
549- path : after. path . clone ( ) ,
550- } ) ;
551- }
552- ( None , None ) => {
553- // Invalid: physical name must be present when column mapping is enabled
554- return Err ( SchemaDiffError :: MissingPhysicalName {
555- path : after. path . clone ( ) ,
556- } ) ;
557- }
558- }
583+ validate_physical_name ( before, after) ?;
559584
560585 // Check for type change (including container changes)
561586 if let Some ( change_type) =
562587 classify_data_type_change ( before. field . data_type ( ) , after. field . data_type ( ) )
563588 {
564589 changes. push ( change_type) ;
565590 }
566- // If None is returned, the container structure is the same and nested changes
567- // are already captured via field IDs, so we don't report a change here
591+ // If None is returned, the types are identical or the container structure is the same
592+ // and nested changes are already captured via field IDs, so we don't report a change here
568593
569594 // Check for metadata changes (excluding column mapping metadata)
570595 if has_metadata_changes ( & before. field , & after. field ) {
@@ -585,6 +610,23 @@ fn compute_field_update(
585610 } ) )
586611}
587612
613+ /// Checks for field nullability changes.
614+ ///
615+ /// Returns:
616+ /// - `Some(FieldChangeType::NullabilityLoosened)` if nullability was relaxed (false -> true)
617+ /// - `Some(FieldChangeType::NullabilityTightened)` if nullability was restricted (true -> false)
618+ /// - `None` if nullability didn't change
619+ fn check_field_nullability_change (
620+ before_nullable : bool ,
621+ after_nullable : bool ,
622+ ) -> Option < FieldChangeType > {
623+ match ( before_nullable, after_nullable) {
624+ ( false , true ) => Some ( FieldChangeType :: NullabilityLoosened ) ,
625+ ( true , false ) => Some ( FieldChangeType :: NullabilityTightened ) ,
626+ ( true , true ) | ( false , false ) => None ,
627+ }
628+ }
629+
588630/// Checks for container nullability changes.
589631///
590632/// Returns:
@@ -617,6 +659,11 @@ fn check_container_nullability_change(
617659/// 3. **Map containers**: Similar logic to arrays, but for both key and value types
618660/// 4. **Different container types or primitives**: Type change
619661fn classify_data_type_change ( before : & DataType , after : & DataType ) -> Option < FieldChangeType > {
662+ // Early return if types are identical - no change to report
663+ if before == after {
664+ return None ;
665+ }
666+
620667 match ( before, after) {
621668 // Struct-to-struct: nested field changes are handled separately via field IDs
622669 ( DataType :: Struct ( _) , DataType :: Struct ( _) ) => None ,
@@ -628,7 +675,7 @@ fn classify_data_type_change(before: &DataType, after: &DataType) -> Option<Fiel
628675 match ( before_array. element_type ( ) , after_array. element_type ( ) ) {
629676 // Both have struct elements - nested changes handled via field IDs
630677 ( DataType :: Struct ( _) , DataType :: Struct ( _) ) => None ,
631- // For non-struct elements, recurse to check for changes (but only if types differ)
678+ // For non-struct elements, recurse to check for changes
632679 ( e1, e2) => classify_data_type_change ( e1, e2) ,
633680 } ;
634681
@@ -654,18 +701,16 @@ fn classify_data_type_change(before: &DataType, after: &DataType) -> Option<Fiel
654701 let key_type_change = match ( before_map. key_type ( ) , after_map. key_type ( ) ) {
655702 // Both have struct keys - nested changes handled via field IDs
656703 ( DataType :: Struct ( _) , DataType :: Struct ( _) ) => None ,
657- // For non-struct keys (including arrays/maps containing structs), recurse (but only if types differ)
658- ( k1, k2) if k1 != k2 => classify_data_type_change ( k1, k2) ,
659- _ => None ,
704+ // For non-struct keys (including arrays/maps containing structs), recurse
705+ ( k1, k2) => classify_data_type_change ( k1, k2) ,
660706 } ;
661707
662708 // Recursively check for value type changes
663709 let value_type_change = match ( before_map. value_type ( ) , after_map. value_type ( ) ) {
664710 // Both have struct values - nested changes handled via field IDs
665711 ( DataType :: Struct ( _) , DataType :: Struct ( _) ) => None ,
666- // For non-struct values (including arrays/maps containing structs), recurse (but only if types differ)
667- ( v1, v2) if v1 != v2 => classify_data_type_change ( v1, v2) ,
668- _ => None ,
712+ // For non-struct values (including arrays/maps containing structs), recurse
713+ ( v1, v2) => classify_data_type_change ( v1, v2) ,
669714 } ;
670715
671716 // Check container nullability change
@@ -2540,5 +2585,137 @@ mod tests {
25402585 ] ) ) ) ;
25412586
25422587 assert ! ( !diff5. has_breaking_changes( ) ) ; // Removal is safe, renames are safe
2588+
2589+ // Case 6: Nullability tightening in deeply nested structure - should be breaking
2590+ // Test: array<struct<items: array<struct<value int nullable>>>> -> array<struct<items: array<struct<value int not null>>>>
2591+ let before6 = StructType :: new_unchecked ( [ create_field_with_id (
2592+ "wrapper" ,
2593+ DataType :: Array ( Box :: new ( ArrayType :: new (
2594+ DataType :: try_struct_type ( [ create_field_with_id (
2595+ "items" ,
2596+ DataType :: Array ( Box :: new ( ArrayType :: new (
2597+ DataType :: try_struct_type ( [
2598+ create_field_with_id ( "value" , DataType :: INTEGER , true , 3 ) , // Nullable
2599+ ] )
2600+ . unwrap ( ) ,
2601+ false ,
2602+ ) ) ) ,
2603+ false ,
2604+ 2 ,
2605+ ) ] )
2606+ . unwrap ( ) ,
2607+ false ,
2608+ ) ) ) ,
2609+ false ,
2610+ 1 ,
2611+ ) ] ) ;
2612+
2613+ let after6 = StructType :: new_unchecked ( [ create_field_with_id (
2614+ "wrapper" ,
2615+ DataType :: Array ( Box :: new ( ArrayType :: new (
2616+ DataType :: try_struct_type ( [ create_field_with_id (
2617+ "items" ,
2618+ DataType :: Array ( Box :: new ( ArrayType :: new (
2619+ DataType :: try_struct_type ( [
2620+ create_field_with_id ( "value" , DataType :: INTEGER , false , 3 ) , // Non-nullable now - BREAKING!
2621+ ] )
2622+ . unwrap ( ) ,
2623+ false ,
2624+ ) ) ) ,
2625+ false ,
2626+ 2 ,
2627+ ) ] )
2628+ . unwrap ( ) ,
2629+ false ,
2630+ ) ) ) ,
2631+ false ,
2632+ 1 ,
2633+ ) ] ) ;
2634+
2635+ let diff6 = SchemaDiffArgs {
2636+ before : & before6,
2637+ after : & after6,
2638+ }
2639+ . compute_diff ( )
2640+ . unwrap ( ) ;
2641+
2642+ assert_eq ! ( diff6. added_fields. len( ) , 0 ) ;
2643+ assert_eq ! ( diff6. removed_fields. len( ) , 0 ) ;
2644+ assert_eq ! ( diff6. updated_fields. len( ) , 1 ) ;
2645+ assert_eq ! (
2646+ diff6. updated_fields[ 0 ] . path,
2647+ ColumnName :: new( [ "wrapper" , "element" , "items" , "element" , "value" ] )
2648+ ) ;
2649+ assert_eq ! (
2650+ diff6. updated_fields[ 0 ] . change_type,
2651+ FieldChangeType :: NullabilityTightened
2652+ ) ;
2653+ assert ! ( diff6. has_breaking_changes( ) ) ; // Nullability tightening is breaking
2654+
2655+ // Case 7: Array container nullability tightening in nested structure - should be breaking
2656+ // Test: array<struct<items: array<struct<value int> nullable>>> -> array<struct<items: array<struct<value int> not null>>>
2657+ let before7 = StructType :: new_unchecked ( [ create_field_with_id (
2658+ "wrapper" ,
2659+ DataType :: Array ( Box :: new ( ArrayType :: new (
2660+ DataType :: try_struct_type ( [ create_field_with_id (
2661+ "items" ,
2662+ DataType :: Array ( Box :: new ( ArrayType :: new (
2663+ DataType :: try_struct_type ( [
2664+ create_field_with_id ( "value" , DataType :: INTEGER , false , 3 ) ,
2665+ ] )
2666+ . unwrap ( ) ,
2667+ true , // Array elements are nullable
2668+ ) ) ) ,
2669+ false ,
2670+ 2 ,
2671+ ) ] )
2672+ . unwrap ( ) ,
2673+ false ,
2674+ ) ) ) ,
2675+ false ,
2676+ 1 ,
2677+ ) ] ) ;
2678+
2679+ let after7 = StructType :: new_unchecked ( [ create_field_with_id (
2680+ "wrapper" ,
2681+ DataType :: Array ( Box :: new ( ArrayType :: new (
2682+ DataType :: try_struct_type ( [ create_field_with_id (
2683+ "items" ,
2684+ DataType :: Array ( Box :: new ( ArrayType :: new (
2685+ DataType :: try_struct_type ( [
2686+ create_field_with_id ( "value" , DataType :: INTEGER , false , 3 ) ,
2687+ ] )
2688+ . unwrap ( ) ,
2689+ false , // Array elements now non-nullable - BREAKING!
2690+ ) ) ) ,
2691+ false ,
2692+ 2 ,
2693+ ) ] )
2694+ . unwrap ( ) ,
2695+ false ,
2696+ ) ) ) ,
2697+ false ,
2698+ 1 ,
2699+ ) ] ) ;
2700+
2701+ let diff7 = SchemaDiffArgs {
2702+ before : & before7,
2703+ after : & after7,
2704+ }
2705+ . compute_diff ( )
2706+ . unwrap ( ) ;
2707+
2708+ assert_eq ! ( diff7. added_fields. len( ) , 0 ) ;
2709+ assert_eq ! ( diff7. removed_fields. len( ) , 0 ) ;
2710+ assert_eq ! ( diff7. updated_fields. len( ) , 1 ) ;
2711+ assert_eq ! (
2712+ diff7. updated_fields[ 0 ] . path,
2713+ ColumnName :: new( [ "wrapper" , "element" , "items" ] )
2714+ ) ;
2715+ assert_eq ! (
2716+ diff7. updated_fields[ 0 ] . change_type,
2717+ FieldChangeType :: ContainerNullabilityTightened
2718+ ) ;
2719+ assert ! ( diff7. has_breaking_changes( ) ) ; // Container nullability tightening is breaking
25432720 }
25442721}
0 commit comments