Skip to content

Commit 9fa3776

Browse files
c-thielXuanwo
andauthored
fix: Keep snapshot log on replace (apache#1896)
## Which issue does this PR close? Fixes remove_ref() to preserve snapshot log when removing MainBranch reference during CREATE OR REPLACE TABLE operations. Previously cleared entire snapshot history, causing testReplaceTableKeepsSnapshotLog RCK test to fail. Related Go Issue: apache/iceberg-go#638 Java also does not clear the log: https://github.com/apache/iceberg/blob/16e84356dae1975fa04d8c3ecce30a90df18ca9f/core/src/main/java/org/apache/iceberg/TableMetadata.java#L1342-L1352 ## What changes are included in this PR? - Do not clear `snapshot_log` if ref to `main` branch is removed ## Are these changes tested? Yes --------- Signed-off-by: Xuanwo <github@xuanwo.io> Co-authored-by: Xuanwo <github@xuanwo.io>
1 parent 3d47be5 commit 9fa3776

File tree

1 file changed

+67
-1
lines changed

1 file changed

+67
-1
lines changed

crates/iceberg/src/spec/table_metadata_builder.rs

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -572,7 +572,6 @@ impl TableMetadataBuilder {
572572
pub fn remove_ref(mut self, ref_name: &str) -> Self {
573573
if ref_name == MAIN_BRANCH {
574574
self.metadata.current_snapshot_id = None;
575-
self.metadata.snapshot_log.clear();
576575
}
577576

578577
if self.metadata.refs.remove(ref_name).is_some() || ref_name == MAIN_BRANCH {
@@ -2237,6 +2236,73 @@ mod tests {
22372236
assert_eq!(result.metadata.current_snapshot().unwrap().snapshot_id(), 2);
22382237
}
22392238

2239+
#[test]
2240+
fn test_remove_main_ref_keeps_snapshot_log() {
2241+
let builder = builder_without_changes(FormatVersion::V2);
2242+
2243+
let snapshot = Snapshot::builder()
2244+
.with_snapshot_id(1)
2245+
.with_timestamp_ms(builder.metadata.last_updated_ms + 1)
2246+
.with_sequence_number(0)
2247+
.with_schema_id(0)
2248+
.with_manifest_list("/snap-1.avro")
2249+
.with_summary(Summary {
2250+
operation: Operation::Append,
2251+
additional_properties: HashMap::from_iter(vec![
2252+
(
2253+
"spark.app.id".to_string(),
2254+
"local-1662532784305".to_string(),
2255+
),
2256+
("added-data-files".to_string(), "4".to_string()),
2257+
("added-records".to_string(), "4".to_string()),
2258+
("added-files-size".to_string(), "6001".to_string()),
2259+
]),
2260+
})
2261+
.build();
2262+
2263+
let result = builder
2264+
.add_snapshot(snapshot.clone())
2265+
.unwrap()
2266+
.set_ref(MAIN_BRANCH, SnapshotReference {
2267+
snapshot_id: 1,
2268+
retention: SnapshotRetention::Branch {
2269+
min_snapshots_to_keep: Some(10),
2270+
max_snapshot_age_ms: None,
2271+
max_ref_age_ms: None,
2272+
},
2273+
})
2274+
.unwrap()
2275+
.build()
2276+
.unwrap();
2277+
2278+
// Verify snapshot log was created
2279+
assert_eq!(result.metadata.snapshot_log.len(), 1);
2280+
assert_eq!(result.metadata.snapshot_log[0].snapshot_id, 1);
2281+
assert_eq!(result.metadata.current_snapshot_id, Some(1));
2282+
2283+
// Remove the main ref
2284+
let result_after_remove = result
2285+
.metadata
2286+
.into_builder(Some(
2287+
"s3://bucket/test/location/metadata/metadata2.json".to_string(),
2288+
))
2289+
.remove_ref(MAIN_BRANCH)
2290+
.build()
2291+
.unwrap();
2292+
2293+
// Verify snapshot log is kept even after removing main ref
2294+
assert_eq!(result_after_remove.metadata.snapshot_log.len(), 1);
2295+
assert_eq!(result_after_remove.metadata.snapshot_log[0].snapshot_id, 1);
2296+
assert_eq!(result_after_remove.metadata.current_snapshot_id, None);
2297+
assert_eq!(result_after_remove.changes.len(), 1);
2298+
assert_eq!(
2299+
result_after_remove.changes[0],
2300+
TableUpdate::RemoveSnapshotRef {
2301+
ref_name: MAIN_BRANCH.to_string()
2302+
}
2303+
);
2304+
}
2305+
22402306
#[test]
22412307
fn test_set_branch_snapshot_creates_branch_if_not_exists() {
22422308
let builder = builder_without_changes(FormatVersion::V2);

0 commit comments

Comments
 (0)