Skip to content

Commit b36ac03

Browse files
committed
checkpoint deduplicator
1 parent 17553c3 commit b36ac03

File tree

1 file changed

+44
-0
lines changed

1 file changed

+44
-0
lines changed

kernel/src/log_replay/deduplicator.rs

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,3 +65,47 @@ pub(crate) trait Deduplicator {
6565
}
6666
}
6767
}
68+
69+
#[allow(unused)]
70+
pub(crate) struct CheckpointDeduplicator {
71+
seen_file_keys: HashSet<String>,
72+
add_path_index: usize,
73+
}
74+
impl CheckpointDeduplicator {
75+
#[allow(unused)]
76+
pub(crate) fn try_new(
77+
seen_file_keys: HashSet<String>,
78+
add_path_index: usize,
79+
) -> DeltaResult<Self> {
80+
Ok(Self {
81+
seen_file_keys,
82+
add_path_index,
83+
})
84+
}
85+
}
86+
87+
impl Deduplicator for CheckpointDeduplicator {
88+
type Key = String;
89+
90+
fn extract_file_action<'a>(
91+
&self,
92+
i: usize,
93+
getters: &[&'a dyn GetData<'a>],
94+
_skip_removes: bool,
95+
) -> DeltaResult<Option<(Self::Key, bool)>> {
96+
// Try to extract an add action by the required path column
97+
if let Some(path) = getters[self.add_path_index].get_str(i, "add.path")? {
98+
Ok(Some((path.to_string(), true)))
99+
} else {
100+
Ok(None)
101+
}
102+
}
103+
104+
fn check_and_record_seen(&mut self, key: Self::Key) -> bool {
105+
self.seen_file_keys.contains(&key)
106+
}
107+
108+
fn is_log_batch(&self) -> bool {
109+
false
110+
}
111+
}

0 commit comments

Comments
 (0)