148148# NOTE: even if filename has number that is 0-padded, internally no padding
149149# is done
150150fix_accession2run = {
151- 'A000005' : ['^1-' ],
152- 'A000035' : ['^8-' , '^9-' ],
153- 'A000067' : ['^9-' ],
154- 'A000072' : ['^5-' ],
155- 'A000081' : ['^5-' ],
156- 'A000082' : ['^5-' ],
157- 'A000088' : ['^9-' ],
158- 'A000090' : ['^5-' ],
159- 'A000127' : ['^21-' ],
160- 'A000130' : ['^15-' ],
161- 'A000137' : ['^9-' , '^11-' ],
162- 'A000297' : ['^12-' ],
163- 'A000326' : ['^15-' ],
164- 'A000376' : ['^15-' ],
165- 'A000384' : ['^8-' , '^11-' ],
166- 'A000467' : ['^15-' ],
167- 'A000490' : ['^15-' ],
168- 'A000511' : ['^15-' ],
169- 'A000797' : ['^[1-7]-' ],
151+ # e.g.:
152+ # 'A000035': ['^8-', '^9-'],
170153}
171154
172- # dictionary containing fixes, keys are md5sum of study_description from
173- # dicoms, in the form of PI-Experimenter^protocolname
174- # values are list of tuples in the form (regex_pattern, substitution)
155+ # A dictionary containing fixes/remapping for sequence names per study.
156+ # Keys are md5sum of study_description from DICOMs, in the form of PI-Experimenter^protocolname
157+ # You can use `heudiconv -f reproin --command ls --files PATH
158+ # to list the "study hash".
159+ # Values are list of tuples in the form (regex_pattern, substitution).
160+ # If the key is an empty string`''''`, it would apply to any study.
175161protocols2fix = {
176- # QA
177- '43b67d9139e8c7274578b7451ab21123' :
178- [
179- # ('anat-scout.*', 'anat-scout_ses-{date}'),
180- # do not change it so we retain _ses-{date}
181- # ('anat-scout.*', 'anat-scout'),
182- ('BOLD_p2_s4_3\.5mm' , 'func_task-rest_acq-p2-s4-3.5mm' ),
183- ('BOLD_p2_s4' , 'func_task-rest_acq-p2-s4' ),
184- ('BOLD_p2_noprescannormalize' , 'func-bold_task-rest_acq-p2noprescannormalize' ),
185- ('BOLD_p2' , 'func-bold_task-rest_acq-p2' ),
186- ('BOLD_' , 'func_task-rest' ),
187- ('DTI_30_p2_s4_3\.5mm' , 'dwi_acq-DTI-30-p2-s4-3.5mm' ),
188- ('DTI_30_p2_s4' , 'dwi_acq-DTI-30-p2-s4' ),
189- ('DTI_30_p2' , 'dwi_acq-DTI-30-p2' ),
190- ('_p2_s4_3\.5mm' , '_acq-p2-s4-3.5mm' ),
191- ('_p2_s4' , '_acq-p2-s4' ),
192- ('_p2' , '_acq-p2' ),
193- ],
194- '9d148e2a05f782273f6343507733309d' :
195- [('anat_' , 'anat-' ),
196- ('run-life[0-9]' , 'run+_task-life' ),
197- ('scout_run\+' , 'scout' ),
198- ('T2w' , 'T2w_run+' ),
199- # substitutions for old protocol names
200- ('AAHead_Scout_32ch-head-coil' , 'anat-scout' ),
201- ('MPRAGE' , 'anat-T1w_acq-MPRAGE_run+' ),
202- ('gre_field_mapping_2mm' , 'fmap_run+_acq-2mm' ),
203- ('gre_field_mapping_3mm' , 'fmap_run+_acq-3mm' ),
204- ('epi_bold_sms_p2_s4_2mm_life1_748' ,
205- 'func_run+_task-life_acq-2mm748' ),
206- ('epi_bold_sms_p2_s4_2mm_life2_692' ,
207- 'func_run+_task-life_acq-2mm692' ),
208- ('epi_bold_sms_p2_s4_2mm_life3_754' ,
209- 'func_run+_task-life_acq-2mm754' ),
210- ('epi_bold_sms_p2_s4_2mm_life4_824' ,
211- 'func_run+_task-life_acq-2mm824' ),
212- ('epi_bold_p2_3mm_nofs_life1_374' ,
213- 'func_run+_task-life_acq-3mmnofs374' ),
214- ('epi_bold_p2_3mm_nofs_life2_346' ,
215- 'func_run+_task-life_acq-3mmnofs346' ),
216- ('epi_bold_p2_3mm_nofs_life3_377' ,
217- 'func_run+_task-life_acq-3mmnofs377' ),
218- ('epi_bold_p2_3mm_nofs_life4_412' ,
219- 'func_run+_task-life_acq-3mmnofs412' ),
220- ('t2_space_sag_p4_iso' , 'anat-T2w_run+' ),
221- ('gre_field_mapping_2.4mm' , 'fmap_run+_acq-2.4mm' ),
222- ('rest_p2_sms4_2.4mm_64sl_1000tr_32te_600dyn' ,
223- 'func_run+_task-rest_acq-2.4mm64sl1000tr32te600dyn' ),
224- ('DTI_30' , 'dwi_run+_acq-30' ),
225- ('t1_space_sag_p2_iso' , 'anat-T1w_acq-060mm_run+' )],
226- '76b36c80231b0afaf509e2d52046e964' :
227- [('fmap_run\+_2mm' , 'fmap_run+_acq-2mm' )],
228- 'c6d8fbccc72990bee61d28e73b2618a4' :
229- [('run=' , 'run+' )],
230- 'a751cc977f1e354fcafcb0ea2de123bd' :
231- [
232- ('_unlabeled' , '_task-unlabeled' ),
233- ('_mSense' , '_acq-mSense' ),
234- ('_p1_sms4_2.5mm' , '_acq-p1-sms4-2.5mm' ),
235- ('_p1_sms4_3mm' , '_acq-p1-sms4-3mm' ),
236- ],
237- 'd160113cf5ea8c5d0cbbbe14ef625e76' :
238- [
239- ('_run0' , '_run-0' ),
240- ],
241- '1bd62e10672fe0b435a9aa8d75b45425' :
242- [
243- # need to add incrementing session -- study should have 2
244- # and no need for run+ for the scout!
245- ('scout(_run\+)?$' , 'scout_ses+' ),
246- ],
247- 'da218a66de902adb3ad9407d514e3639' :
248- [
249- # those sequences renamed later to include DTI- in their acq-
250- # so fot consistency
251- ('hardi_64' , 'dwi_acq-DTI-hardi64' ),
252- ('acq-hardi' , 'acq-DTI-hardi' ),
253- ],
254- 'ed20c1ad4a0861b2b65768e159258eec' :
255- [
256- ('fmap_acq-discorr-dti-' , 'fmap_acq-dwi_dir-' ),
257- ('_test' , '' ),
258- ],
259- '1996f745c30c1df1d3851844e56d294f' :
260- [
261- ('fmap_acq-discorr-dti-' , 'fmap_acq-dwi_dir-' ),
262- ],
263- # '022969bfde39c2940c114edf1db3fabc':
264- # [ # should be applied only for ses-03!
265- # ('_acq-MPRAGE_ses-02', '_acq-MPRAGE_ses-03'),
266- # ],
267- # to be used only once for one interrupted accession but we cannot
268- # fix per accession yet
269- # '23763823d2b9b4b09dafcadc8e8edf21':
270- # [
271- # ('anat-T1w_acq-MPRAGE', 'anat-T1w_acq-MPRAGE_run-06'),
272- # ('anat_T2w', 'anat_T2w_run-06'),
273- # ('fmap_acq-3mm', 'fmap_acq-3mm_run-06'),
274- # ],
162+ # e.g., QA:
163+ # '43b67d9139e8c7274578b7451ab21123':
164+ # [
165+ # ('BOLD_p2_s4_3\.5mm', 'func_task-rest_acq-p2-s4-3.5mm'),
166+ # ('BOLD_', 'func_task-rest'),
167+ # ('_p2_s4', '_acq-p2-s4'),
168+ # ('_p2', '_acq-p2'),
169+ # ],
170+ # '': # for any study example with regexes used
171+ # [
172+ # ('AAHead_Scout_.*', 'anat-scout'),
173+ # ('^dti_.*', 'dwi'),
174+ # ('^.*_distortion_corr.*_([ap]+)_([12])', r'fmap-epi_dir-\1_run-\2'),
175+ # ('^(.+)_ap.*_r(0[0-9])', r'func_task-\1_run-\2'),
176+ # ('^t1w_.*', 'anat-T1w'),
177+ # # problematic case -- multiple identically named pepolar fieldmap runs
178+ # # I guess we will just sacrifice ability to detect canceled runs here.
179+ # # And we cannot just use _run+ since it would increment independently
180+ # # for ap and then for pa. We will rely on having ap preceding pa.
181+ # # Added _acq-mb8 so they match the one in funcs
182+ # ('func_task-discorr_acq-ap', r'fmap-epi_dir-ap_acq-mb8_run+'),
183+ # ('func_task-discorr_acq-pa', r'fmap-epi_dir-pa_acq-mb8_run='),
184+ # ]
275185}
276- # there was also screw up in the locator specification
277- # so we need to fix in both
278- # protocols2fix['67ae5e641ea9d487b6fdf56fb91aeb93'] = protocols2fix['022969bfde39c2940c114edf1db3fabc']
279186
280187# list containing StudyInstanceUID to skip -- hopefully doesn't happen too often
281188dicoms2skip = [
282- '1.3.12.2.1107.5.2.43.66112.30000016110117002435700000001' ,
283- '1.3.12.2.1107.5.2.43.66112.30000016102813152550600000004' , # double scout
189+ # e.g.
190+ # '1.3.12.2.1107.5.2.43.66112.30000016110117002435700000001',
284191]
285192
286193DEFAULT_FIELDS = {
287194 # Let it just be in each json file extracted
288- # 'Manufacturer': "Siemens",
289- # 'ManufacturersModelName': "Prisma",
290195 "Acknowledgements" :
291196 "We thank Terry Sacket and the rest of the DBIC (Dartmouth Brain Imaging "
292197 "Center) personnel for assistance in data collection, and "
293- "Yaroslav Halchenko and Matteo Visconti for preparing BIDS dataset. "
294- "TODO: more " ,
198+ "Yaroslav O. Halchenko for preparing BIDS dataset. "
199+ "TODO: adjust to your case. " ,
295200}
296201
297202
@@ -311,38 +216,10 @@ def filter_dicom(dcmdata):
311216
312217def filter_files (fn ):
313218 """Return True if a file should be kept, else False.
314- We're using it to filter out files that do not start with a number."""
315219
316- # do not check for these accession numbers because they haven't been
317- # recopied with the initial number
318- donotfilter = ['A000012' , 'A000013' , 'A000020' , 'A000041' ]
319-
320- split = os .path .split (fn )
321- split2 = os .path .split (split [0 ])
322- sequence_dir = split2 [1 ]
323- split3 = os .path .split (split2 [0 ])
324- accession_number = split3 [1 ]
220+ ATM reproin does not do any filtering. Override if you need to add some
221+ """
325222 return True
326- if accession_number == 'A000043' :
327- # crazy one that got copied for some runs but not for others,
328- # so we are going to discard those that got copied and let heudiconv
329- # figure out the rest
330- return False if re .match ('^[0-9]+-' , sequence_dir ) else True
331- elif accession_number == 'unknown' :
332- # this one had some stuff without study description, filter stuff before
333- # collecting info, so it doesn't crash completely
334- return False if re .match ('^[34][07-9]-sn' , sequence_dir ) else True
335- elif accession_number in donotfilter :
336- return True
337- elif accession_number .startswith ('phantom-' ):
338- # Accessions on phantoms, e.g. in dartmouth-phantoms/bids_test4-20161014
339- return True
340- elif accession_number .startswith ('heudiconvdcm' ):
341- # we were given some tarball with dicoms which was extracted so we
342- # better obey
343- return True
344- else :
345- return True if re .match ('^[0-9]+-' , sequence_dir ) else False
346223
347224
348225def create_key (subdir , file_suffix , outtype = ('nii.gz' , 'dicom' ),
0 commit comments