1616import base64
1717import hashlib
1818import subprocess as sp
19- from io import open
2019from builtins import str
2120
2221try :
3130
3231PY3 = sys .version_info [0 ] > 2
3332MAX_RETRIES = 20
34- NIWORKFLOWS_CACHE_DIR = ( Path .home () / '.cache' / 'stanford-crn' ). resolve ()
33+ NIWORKFLOWS_CACHE_DIR = Path .home () / '.cache' / 'stanford-crn'
3534
3635
3736def fetch_file (dataset_name , url , dataset_dir , dataset_prefix = None ,
@@ -69,11 +68,10 @@ def fetch_file(dataset_name, url, dataset_dir, dataset_prefix=None,
6968 data_dir = final_path .parent
7069
7170 if temp_downloads is None :
72- temp_downloads = str (NIWORKFLOWS_CACHE_DIR / 'downloads' )
71+ temp_downloads = NIWORKFLOWS_CACHE_DIR / 'downloads'
72+ temp_downloads = Path (temp_downloads )
7373
74- # Determine data path
75- if not op .exists (temp_downloads ):
76- os .makedirs (temp_downloads )
74+ temp_downloads .mkdir (parents = True , exist_ok = True )
7775
7876 # Determine filename using URL
7977 parse = urlparse (url )
@@ -84,15 +82,14 @@ def fetch_file(dataset_name, url, dataset_dir, dataset_prefix=None,
8482 if filetype is not None :
8583 file_name += filetype
8684
87- temp_full_name = op . join ( temp_downloads , file_name )
88- temp_part_name = temp_full_name + " .part"
85+ temp_full_path = temp_downloads / file_name
86+ temp_part_path = temp_full_path . with_name ( file_name + ' .part' )
8987
9088 if overwrite :
91- shutil .rmtree (dataset_dir , ignore_errors = True )
89+ shutil .rmtree (str ( dataset_dir ) , ignore_errors = True )
9290
93- if op .exists (temp_full_name ):
94- if overwrite :
95- os .remove (temp_full_name )
91+ if temp_full_path .exists ():
92+ temp_full_path .unlink ()
9693
9794 t_0 = time .time ()
9895 local_file = None
@@ -116,9 +113,9 @@ def fetch_file(dataset_name, url, dataset_dir, dataset_prefix=None,
116113 if verbose > 0 :
117114 displayed_url = url .split ('?' )[0 ] if verbose == 1 else url
118115 NIWORKFLOWS_LOG .info ('Downloading data from %s ...' , displayed_url )
119- if resume and op .exists (temp_part_name ):
116+ if resume and temp_part_path .exists ():
120117 # Download has been interrupted, we try to resume it.
121- local_file_size = op . getsize ( temp_part_name )
118+ local_file_size = temp_part_path . stat (). st_size
122119 # If the file exists, then only download the remainder
123120 request .add_header ("Range" , "bytes={}-" .format (local_file_size ))
124121 try :
@@ -139,8 +136,8 @@ def fetch_file(dataset_name, url, dataset_dir, dataset_prefix=None,
139136 resume = False , overwrite = overwrite ,
140137 md5sum = md5sum , username = username , password = password ,
141138 verbose = verbose )
142- local_file = open (temp_part_name , "ab" )
143139 initial_size = local_file_size
140+ mode = 'ab'
144141 else :
145142 try :
146143 data = urlopen (request )
@@ -156,28 +153,25 @@ def fetch_file(dataset_name, url, dataset_dir, dataset_prefix=None,
156153 verbose = verbose , retry = retry + 1 )
157154 else :
158155 raise
156+ mode = 'wb'
159157
160- local_file = open (temp_part_name , "wb" )
161-
162- _chunk_read_ (data , local_file , report_hook = (verbose > 0 ),
163- initial_size = initial_size , verbose = verbose )
164- # temp file must be closed prior to the move
165- if not local_file .closed :
166- local_file .close ()
167- shutil .move (temp_part_name , temp_full_name )
158+ with temp_part_path .open (mode ) as local_file :
159+ _chunk_read_ (data , local_file , report_hook = (verbose > 0 ),
160+ initial_size = initial_size , verbose = verbose )
161+ temp_part_path .replace (temp_full_path )
168162 delta_t = time .time () - t_0
169163 if verbose > 0 :
170164 # Complete the reporting hook
171165 sys .stderr .write (' ...done. ({0:.0f} seconds, {1:.0f} min)\n '
172166 .format (delta_t , delta_t // 60 ))
173167
174168 if md5sum is not None :
175- if _md5_sum_file (temp_full_name ) != md5sum :
176- raise ValueError ("File {} checksum verification has failed."
177- " Dataset fetching aborted." .format (local_file ))
169+ if _md5_sum_file (temp_full_path ) != md5sum :
170+ raise ValueError ("File {!s } checksum verification has failed."
171+ " Dataset fetching aborted." .format (temp_full_path ))
178172
179173 if filetype is None :
180- fname , filetype = op .splitext (op . basename ( temp_full_name ) )
174+ fname , filetype = op .splitext (temp_full_path . name )
181175 if filetype == '.gz' :
182176 fname , ext = op .splitext (fname )
183177 filetype = ext + filetype
@@ -187,15 +181,15 @@ def fetch_file(dataset_name, url, dataset_dir, dataset_prefix=None,
187181
188182 if filetype .startswith ('tar' ):
189183 args = 'xf' if not filetype .endswith ('gz' ) else 'xzf'
190- sp .check_call (['tar' , args , temp_full_name ], cwd = data_dir )
191- os . remove ( temp_full_name )
184+ sp .check_call (['tar' , args , str ( temp_full_path ) ], cwd = data_dir )
185+ temp_full_path . unlink ( )
192186 return final_path
193187
194188 if filetype == 'zip' :
195189 import zipfile
196190 sys .stderr .write ('Unzipping package (%s) to data path (%s)...' % (
197- temp_full_name , data_dir ))
198- with zipfile .ZipFile (temp_full_name , 'r' ) as zip_ref :
191+ temp_full_path , data_dir ))
192+ with zipfile .ZipFile (str ( temp_full_path ) , 'r' ) as zip_ref :
199193 zip_ref .extractall (data_dir )
200194 sys .stderr .write ('done.\n ' )
201195 return final_path
@@ -225,7 +219,7 @@ def _get_data_path(data_dir=None):
225219 if d .strip ()]
226220 default_dirs += [NIWORKFLOWS_CACHE_DIR ]
227221
228- return [Path (d ).expanduser (). resolve ()
222+ return [Path (d ).expanduser ()
229223 for d in data_dir .split (os .pathsep ) if d .strip ()] or default_dirs
230224
231225
@@ -297,7 +291,7 @@ def readlinkabs(link):
297291def _md5_sum_file (path ):
298292 """ Calculates the MD5 sum of a file.
299293 """
300- with open (path , 'rb' ) as fhandle :
294+ with Path (path ). open ( 'rb' ) as fhandle :
301295 md5sum = hashlib .md5 ()
302296 while True :
303297 data = fhandle .read (8192 )
0 commit comments