Skip to content

Commit dd5bd37

Browse files
committed
merge changes from #272 to fix fMRIPrep/master
1 parent 24ef30f commit dd5bd37

File tree

1 file changed

+27
-33
lines changed

1 file changed

+27
-33
lines changed

niworkflows/data/utils.py

Lines changed: 27 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
import base64
1717
import hashlib
1818
import subprocess as sp
19-
from io import open
2019
from builtins import str
2120

2221
try:
@@ -31,7 +30,7 @@
3130

3231
PY3 = sys.version_info[0] > 2
3332
MAX_RETRIES = 20
34-
NIWORKFLOWS_CACHE_DIR = (Path.home() / '.cache' / 'stanford-crn').resolve()
33+
NIWORKFLOWS_CACHE_DIR = Path.home() / '.cache' / 'stanford-crn'
3534

3635

3736
def fetch_file(dataset_name, url, dataset_dir, dataset_prefix=None,
@@ -69,11 +68,10 @@ def fetch_file(dataset_name, url, dataset_dir, dataset_prefix=None,
6968
data_dir = final_path.parent
7069

7170
if temp_downloads is None:
72-
temp_downloads = str(NIWORKFLOWS_CACHE_DIR / 'downloads')
71+
temp_downloads = NIWORKFLOWS_CACHE_DIR / 'downloads'
72+
temp_downloads = Path(temp_downloads)
7373

74-
# Determine data path
75-
if not op.exists(temp_downloads):
76-
os.makedirs(temp_downloads)
74+
temp_downloads.mkdir(parents=True, exist_ok=True)
7775

7876
# Determine filename using URL
7977
parse = urlparse(url)
@@ -84,15 +82,14 @@ def fetch_file(dataset_name, url, dataset_dir, dataset_prefix=None,
8482
if filetype is not None:
8583
file_name += filetype
8684

87-
temp_full_name = op.join(temp_downloads, file_name)
88-
temp_part_name = temp_full_name + ".part"
85+
temp_full_path = temp_downloads / file_name
86+
temp_part_path = temp_full_path.with_name(file_name + '.part')
8987

9088
if overwrite:
91-
shutil.rmtree(dataset_dir, ignore_errors=True)
89+
shutil.rmtree(str(dataset_dir), ignore_errors=True)
9290

93-
if op.exists(temp_full_name):
94-
if overwrite:
95-
os.remove(temp_full_name)
91+
if temp_full_path.exists():
92+
temp_full_path.unlink()
9693

9794
t_0 = time.time()
9895
local_file = None
@@ -116,9 +113,9 @@ def fetch_file(dataset_name, url, dataset_dir, dataset_prefix=None,
116113
if verbose > 0:
117114
displayed_url = url.split('?')[0] if verbose == 1 else url
118115
NIWORKFLOWS_LOG.info('Downloading data from %s ...', displayed_url)
119-
if resume and op.exists(temp_part_name):
116+
if resume and temp_part_path.exists():
120117
# Download has been interrupted, we try to resume it.
121-
local_file_size = op.getsize(temp_part_name)
118+
local_file_size = temp_part_path.stat().st_size
122119
# If the file exists, then only download the remainder
123120
request.add_header("Range", "bytes={}-".format(local_file_size))
124121
try:
@@ -139,8 +136,8 @@ def fetch_file(dataset_name, url, dataset_dir, dataset_prefix=None,
139136
resume=False, overwrite=overwrite,
140137
md5sum=md5sum, username=username, password=password,
141138
verbose=verbose)
142-
local_file = open(temp_part_name, "ab")
143139
initial_size = local_file_size
140+
mode = 'ab'
144141
else:
145142
try:
146143
data = urlopen(request)
@@ -156,28 +153,25 @@ def fetch_file(dataset_name, url, dataset_dir, dataset_prefix=None,
156153
verbose=verbose, retry=retry + 1)
157154
else:
158155
raise
156+
mode = 'wb'
159157

160-
local_file = open(temp_part_name, "wb")
161-
162-
_chunk_read_(data, local_file, report_hook=(verbose > 0),
163-
initial_size=initial_size, verbose=verbose)
164-
# temp file must be closed prior to the move
165-
if not local_file.closed:
166-
local_file.close()
167-
shutil.move(temp_part_name, temp_full_name)
158+
with temp_part_path.open(mode) as local_file:
159+
_chunk_read_(data, local_file, report_hook=(verbose > 0),
160+
initial_size=initial_size, verbose=verbose)
161+
temp_part_path.replace(temp_full_path)
168162
delta_t = time.time() - t_0
169163
if verbose > 0:
170164
# Complete the reporting hook
171165
sys.stderr.write(' ...done. ({0:.0f} seconds, {1:.0f} min)\n'
172166
.format(delta_t, delta_t // 60))
173167

174168
if md5sum is not None:
175-
if _md5_sum_file(temp_full_name) != md5sum:
176-
raise ValueError("File {} checksum verification has failed."
177-
" Dataset fetching aborted.".format(local_file))
169+
if _md5_sum_file(temp_full_path) != md5sum:
170+
raise ValueError("File {!s} checksum verification has failed."
171+
" Dataset fetching aborted.".format(temp_full_path))
178172

179173
if filetype is None:
180-
fname, filetype = op.splitext(op.basename(temp_full_name))
174+
fname, filetype = op.splitext(temp_full_path.name)
181175
if filetype == '.gz':
182176
fname, ext = op.splitext(fname)
183177
filetype = ext + filetype
@@ -187,15 +181,15 @@ def fetch_file(dataset_name, url, dataset_dir, dataset_prefix=None,
187181

188182
if filetype.startswith('tar'):
189183
args = 'xf' if not filetype.endswith('gz') else 'xzf'
190-
sp.check_call(['tar', args, temp_full_name], cwd=data_dir)
191-
os.remove(temp_full_name)
184+
sp.check_call(['tar', args, str(temp_full_path)], cwd=data_dir)
185+
temp_full_path.unlink()
192186
return final_path
193187

194188
if filetype == 'zip':
195189
import zipfile
196190
sys.stderr.write('Unzipping package (%s) to data path (%s)...' % (
197-
temp_full_name, data_dir))
198-
with zipfile.ZipFile(temp_full_name, 'r') as zip_ref:
191+
temp_full_path, data_dir))
192+
with zipfile.ZipFile(str(temp_full_path), 'r') as zip_ref:
199193
zip_ref.extractall(data_dir)
200194
sys.stderr.write('done.\n')
201195
return final_path
@@ -225,7 +219,7 @@ def _get_data_path(data_dir=None):
225219
if d.strip()]
226220
default_dirs += [NIWORKFLOWS_CACHE_DIR]
227221

228-
return [Path(d).expanduser().resolve()
222+
return [Path(d).expanduser()
229223
for d in data_dir.split(os.pathsep) if d.strip()] or default_dirs
230224

231225

@@ -297,7 +291,7 @@ def readlinkabs(link):
297291
def _md5_sum_file(path):
298292
""" Calculates the MD5 sum of a file.
299293
"""
300-
with open(path, 'rb') as fhandle:
294+
with Path(path).open('rb') as fhandle:
301295
md5sum = hashlib.md5()
302296
while True:
303297
data = fhandle.read(8192)

0 commit comments

Comments
 (0)