|
1 | 1 | import os |
2 | | -from tqdm import tqdm |
| 2 | +import itertools |
3 | 3 | from .settings import config |
4 | 4 | from .errors import DataJointError |
5 | 5 | from .hash import long_hash |
@@ -165,22 +165,35 @@ def delete_garbage(self): |
165 | 165 | for ref in self.references) or "TRUE") |
166 | 166 | print('Deleted %d items' % self.connection.query("SELECT ROW_COUNT()").fetchone()[0]) |
167 | 167 |
|
168 | | - def clean_store(self, store, display_progress=True): |
| 168 | + def clean_store(self, store, verbose=True): |
169 | 169 | """ |
170 | 170 | Clean unused data in an external storage repository from unused blobs. |
171 | 171 | This must be performed after delete_garbage during low-usage periods to reduce risks of data loss. |
172 | 172 | """ |
173 | 173 | spec = config.get_store_spec(store) |
174 | | - progress = tqdm if display_progress else lambda x: x |
175 | | - in_use = set(self.fetch('hash')) |
| 174 | + in_use = set(x for x in (self & '`hash` LIKE "%%{store}"'.format(store=store)).fetch('hash')) |
176 | 175 | if spec['protocol'] == 'file': |
177 | | - for folder, _, files in progress(os.walk(os.path.join(spec['location'], self.database))): |
178 | | - for f in files: |
179 | | - if f not in in_use: |
| 176 | + count = itertools.count() |
| 177 | + print('Deleting...') |
| 178 | + deleted_folders = set() |
| 179 | + for folder, dirs, files in os.walk(os.path.join(spec['location'], self.database), topdown=False): |
| 180 | + if dirs and files: |
| 181 | + raise DataJointError('Invalid repository with files in non-terminal folder %s' % folder) |
| 182 | + dirs = set(d for d in dirs if os.path.join(folder, d) not in deleted_folders) |
| 183 | + if not dirs: |
| 184 | + files_not_in_use = [f for f in files if f not in in_use] |
| 185 | + for f in files_not_in_use: |
180 | 186 | filename = os.path.join(folder, f) |
| 187 | + next(count) |
| 188 | + if verbose: |
| 189 | + print(filename) |
181 | 190 | os.remove(filename) |
| 191 | + if len(files_not_in_use) == len(files): |
| 192 | + os.rmdir(folder) |
| 193 | + deleted_folders.add(folder) |
| 194 | + print('Deleted %d objects' % next(count)) |
182 | 195 | elif spec['protocol'] == 's3': |
183 | 196 | try: |
184 | | - s3.Folder(database=self.database, **spec).clean(in_use) |
| 197 | + failed_deletes = s3.Folder(database=self.database, **spec).clean(in_use, verbose=verbose) |
185 | 198 | except TypeError: |
186 | 199 | raise DataJointError('External store {store} configuration is incomplete.'.format(store=store)) |
0 commit comments