File tree Expand file tree Collapse file tree 1 file changed +44
-0
lines changed
Expand file tree Collapse file tree 1 file changed +44
-0
lines changed Original file line number Diff line number Diff line change @@ -65,3 +65,47 @@ pub(crate) trait Deduplicator {
6565 }
6666 }
6767}
68+
69+ #[ allow( unused) ]
70+ pub ( crate ) struct CheckpointDeduplicator {
71+ seen_file_keys : HashSet < String > ,
72+ add_path_index : usize ,
73+ }
74+ impl CheckpointDeduplicator {
75+ #[ allow( unused) ]
76+ pub ( crate ) fn try_new (
77+ seen_file_keys : HashSet < String > ,
78+ add_path_index : usize ,
79+ ) -> DeltaResult < Self > {
80+ Ok ( Self {
81+ seen_file_keys,
82+ add_path_index,
83+ } )
84+ }
85+ }
86+
87+ impl Deduplicator for CheckpointDeduplicator {
88+ type Key = String ;
89+
90+ fn extract_file_action < ' a > (
91+ & self ,
92+ i : usize ,
93+ getters : & [ & ' a dyn GetData < ' a > ] ,
94+ _skip_removes : bool ,
95+ ) -> DeltaResult < Option < ( Self :: Key , bool ) > > {
96+ // Try to extract an add action by the required path column
97+ if let Some ( path) = getters[ self . add_path_index ] . get_str ( i, "add.path" ) ? {
98+ Ok ( Some ( ( path. to_string ( ) , true ) ) )
99+ } else {
100+ Ok ( None )
101+ }
102+ }
103+
104+ fn check_and_record_seen ( & mut self , key : Self :: Key ) -> bool {
105+ self . seen_file_keys . contains ( & key)
106+ }
107+
108+ fn is_log_batch ( & self ) -> bool {
109+ false
110+ }
111+ }
You can’t perform that action at this time.
0 commit comments