@@ -38,6 +38,15 @@ use super::append::split_datafiles;
3838/// The target number of datafiles per manifest is dynamic, but we don't want to go below this number.
3939static MIN_DATAFILES_PER_MANIFEST : usize = 4 ;
4040
41+ #[ derive( Debug , Clone ) ]
42+ /// Group of write sharing a Data Sequence Number
43+ pub struct DsnGroup {
44+ /// Delete files. These apply to insert files from previous Data Sequence Groups
45+ pub delete_files : Vec < DataFile > ,
46+ /// Insert files
47+ pub data_files : Vec < DataFile > ,
48+ }
49+
4150#[ derive( Debug ) ]
4251///Table operations
4352pub enum Operation {
@@ -56,8 +65,7 @@ pub enum Operation {
5665 /// Append new files to the table
5766 Append {
5867 branch : Option < String > ,
59- data_files : Vec < DataFile > ,
60- delete_files : Vec < DataFile > ,
68+ dsn_groups : Vec < DsnGroup > ,
6169 additional_summary : Option < HashMap < String , String > > ,
6270 } ,
6371 // /// Quickly append new files to the table
@@ -99,8 +107,7 @@ impl Operation {
99107 match self {
100108 Operation :: Append {
101109 branch,
102- data_files,
103- delete_files,
110+ dsn_groups,
104111 additional_summary,
105112 } => {
106113 let old_snapshot = table_metadata. current_snapshot ( branch. as_deref ( ) ) ?;
@@ -110,14 +117,31 @@ impl Operation {
110117 FormatVersion :: V2 => manifest_list_schema_v2 ( ) ,
111118 } ;
112119
120+ let mut dsn_offset = 0 ;
121+ let mut data_files: Vec < ( DataFile , i64 /* DSN offset */ ) > = vec ! [ ] ;
122+ let mut delete_files: Vec < ( DataFile , i64 /* DSN offset */ ) > = vec ! [ ] ;
123+ for dsn_group in dsn_groups. into_iter ( ) {
124+ if !dsn_group. data_files . is_empty ( ) || !dsn_group. delete_files . is_empty ( ) {
125+ dsn_offset += 1 ;
126+ for data_file in dsn_group. data_files . into_iter ( ) {
127+ data_files. push ( ( data_file, dsn_offset) ) ;
128+ }
129+ for delete_file in dsn_group. delete_files . into_iter ( ) {
130+ delete_files. push ( ( delete_file, dsn_offset) ) ;
131+ }
132+ }
133+ }
134+
113135 let n_data_files = data_files. len ( ) ;
114136 let n_delete_files = delete_files. len ( ) ;
115137
116138 if n_data_files + n_delete_files == 0 {
117139 return Ok ( ( None , Vec :: new ( ) ) ) ;
118140 }
141+ let largest_dsn_offset = dsn_offset;
142+ assert ! ( largest_dsn_offset >= 1 , "Should have exited early" ) ;
119143
120- let data_files_iter = delete_files. iter ( ) . chain ( data_files. iter ( ) ) ;
144+ let data_files_iter = delete_files. iter ( ) . chain ( data_files. iter ( ) ) . map ( | ( x , _ ) | x ) ;
121145
122146 let mut manifest_list_writer = if let Some ( manifest_list_bytes) =
123147 prefetch_manifest_list ( old_snapshot, & object_store)
@@ -143,25 +167,41 @@ impl Operation {
143167 let n_delete_splits =
144168 manifest_list_writer. n_splits ( n_delete_files, Content :: Deletes ) ;
145169
146- let new_datafile_iter = data_files. into_iter ( ) . map ( |data_file| {
147- ManifestEntry :: builder ( )
170+ let new_datafile_iter = data_files. into_iter ( ) . map ( |( data_file, dsn_offset) | {
171+ let mut builder = ManifestEntry :: builder ( ) ;
172+ builder
148173 . with_format_version ( table_metadata. format_version )
149174 . with_status ( Status :: Added )
150- . with_data_file ( data_file)
175+ . with_data_file ( data_file) ;
176+ // If there is only one data sequence number in this commit, we can just use sequence number inheritance
177+ // If there are multiple data sequence numbers in this commit, we need to set the data sequence number on each manifest
178+ if largest_dsn_offset > 1 {
179+ builder
180+ . with_sequence_number ( table_metadata. last_sequence_number + dsn_offset) ;
181+ }
182+ builder
151183 . build ( )
152184 . map_err ( crate :: spec:: error:: Error :: from)
153185 . map_err ( Error :: from)
154186 } ) ;
155187
156- let new_deletefile_iter = delete_files. into_iter ( ) . map ( |data_file| {
157- ManifestEntry :: builder ( )
158- . with_format_version ( table_metadata. format_version )
159- . with_status ( Status :: Added )
160- . with_data_file ( data_file)
161- . build ( )
162- . map_err ( crate :: spec:: error:: Error :: from)
163- . map_err ( Error :: from)
164- } ) ;
188+ let new_deletefile_iter =
189+ delete_files. into_iter ( ) . map ( |( data_file, dsn_offset) | {
190+ let mut builder = ManifestEntry :: builder ( ) ;
191+ builder
192+ . with_format_version ( table_metadata. format_version )
193+ . with_status ( Status :: Added )
194+ . with_data_file ( data_file) ;
195+ if largest_dsn_offset > 1 {
196+ builder. with_sequence_number (
197+ table_metadata. last_sequence_number + dsn_offset,
198+ ) ;
199+ }
200+ builder
201+ . build ( )
202+ . map_err ( crate :: spec:: error:: Error :: from)
203+ . map_err ( Error :: from)
204+ } ) ;
165205
166206 let snapshot_id = generate_snapshot_id ( ) ;
167207
@@ -211,11 +251,13 @@ impl Operation {
211251 ( _, _) => Ok ( SnapshotOperation :: Overwrite ) ,
212252 } ?;
213253
254+ let snapshot_sequence_number =
255+ table_metadata. last_sequence_number + largest_dsn_offset;
214256 let mut snapshot_builder = SnapshotBuilder :: default ( ) ;
215257 snapshot_builder
216258 . with_snapshot_id ( snapshot_id)
217259 . with_manifest_list ( new_manifest_list_location)
218- . with_sequence_number ( table_metadata . last_sequence_number + 1 )
260+ . with_sequence_number ( snapshot_sequence_number )
219261 . with_summary ( Summary {
220262 operation : snapshot_operation,
221263 other : additional_summary. unwrap_or_default ( ) ,
0 commit comments