merge changes from #272 to fix fMRIPrep/master

oesteban · oesteban · commit dd5bd37c407e · 2019-01-24T19:50:31.000-08:00
diff --git a/niworkflows/data/utils.py b/niworkflows/data/utils.py
@@ -16,7 +16,6 @@
 import base64
 import hashlib
 import subprocess as sp
-from io import open
 from builtins import str
 
 try:
@@ -31,7 +30,7 @@
 
 PY3 = sys.version_info[0] > 2
 MAX_RETRIES = 20
-NIWORKFLOWS_CACHE_DIR = (Path.home() / '.cache' / 'stanford-crn').resolve()
+NIWORKFLOWS_CACHE_DIR = Path.home() / '.cache' / 'stanford-crn'
 
 
 def fetch_file(dataset_name, url, dataset_dir, dataset_prefix=None,
@@ -69,11 +68,10 @@ def fetch_file(dataset_name, url, dataset_dir, dataset_prefix=None,
     data_dir = final_path.parent
 
     if temp_downloads is None:
-        temp_downloads = str(NIWORKFLOWS_CACHE_DIR / 'downloads')
+        temp_downloads = NIWORKFLOWS_CACHE_DIR / 'downloads'
+    temp_downloads = Path(temp_downloads)
 
-    # Determine data path
-    if not op.exists(temp_downloads):
-        os.makedirs(temp_downloads)
+    temp_downloads.mkdir(parents=True, exist_ok=True)
 
     # Determine filename using URL
     parse = urlparse(url)
@@ -84,15 +82,14 @@ def fetch_file(dataset_name, url, dataset_dir, dataset_prefix=None,
         if filetype is not None:
             file_name += filetype
 
-    temp_full_name = op.join(temp_downloads, file_name)
-    temp_part_name = temp_full_name + ".part"
+    temp_full_path = temp_downloads / file_name
+    temp_part_path = temp_full_path.with_name(file_name + '.part')
 
     if overwrite:
-        shutil.rmtree(dataset_dir, ignore_errors=True)
+        shutil.rmtree(str(dataset_dir), ignore_errors=True)
 
-    if op.exists(temp_full_name):
-        if overwrite:
-            os.remove(temp_full_name)
+        if temp_full_path.exists():
+            temp_full_path.unlink()
 
     t_0 = time.time()
     local_file = None
@@ -116,9 +113,9 @@ def fetch_file(dataset_name, url, dataset_dir, dataset_prefix=None,
     if verbose > 0:
         displayed_url = url.split('?')[0] if verbose == 1 else url
         NIWORKFLOWS_LOG.info('Downloading data from %s ...', displayed_url)
-    if resume and op.exists(temp_part_name):
+    if resume and temp_part_path.exists():
         # Download has been interrupted, we try to resume it.
-        local_file_size = op.getsize(temp_part_name)
+        local_file_size = temp_part_path.stat().st_size
         # If the file exists, then only download the remainder
         request.add_header("Range", "bytes={}-".format(local_file_size))
         try:
@@ -139,8 +136,8 @@ def fetch_file(dataset_name, url, dataset_dir, dataset_prefix=None,
                 resume=False, overwrite=overwrite,
                 md5sum=md5sum, username=username, password=password,
                 verbose=verbose)
-        local_file = open(temp_part_name, "ab")
         initial_size = local_file_size
+        mode = 'ab'
     else:
         try:
             data = urlopen(request)
@@ -156,28 +153,25 @@ def fetch_file(dataset_name, url, dataset_dir, dataset_prefix=None,
                     verbose=verbose, retry=retry + 1)
             else:
                 raise
+        mode = 'wb'
 
-        local_file = open(temp_part_name, "wb")
-
-    _chunk_read_(data, local_file, report_hook=(verbose > 0),
-                 initial_size=initial_size, verbose=verbose)
-    # temp file must be closed prior to the move
-    if not local_file.closed:
-        local_file.close()
-    shutil.move(temp_part_name, temp_full_name)
+    with temp_part_path.open(mode) as local_file:
+        _chunk_read_(data, local_file, report_hook=(verbose > 0),
+                     initial_size=initial_size, verbose=verbose)
+    temp_part_path.replace(temp_full_path)
     delta_t = time.time() - t_0
     if verbose > 0:
         # Complete the reporting hook
         sys.stderr.write(' ...done. ({0:.0f} seconds, {1:.0f} min)\n'
                          .format(delta_t, delta_t // 60))
 
     if md5sum is not None:
-        if _md5_sum_file(temp_full_name) != md5sum:
-            raise ValueError("File {} checksum verification has failed."
-                             " Dataset fetching aborted.".format(local_file))
+        if _md5_sum_file(temp_full_path) != md5sum:
+            raise ValueError("File {!s} checksum verification has failed."
+                             " Dataset fetching aborted.".format(temp_full_path))
 
     if filetype is None:
-        fname, filetype = op.splitext(op.basename(temp_full_name))
+        fname, filetype = op.splitext(temp_full_path.name)
         if filetype == '.gz':
             fname, ext = op.splitext(fname)
             filetype = ext + filetype
@@ -187,15 +181,15 @@ def fetch_file(dataset_name, url, dataset_dir, dataset_prefix=None,
 
     if filetype.startswith('tar'):
         args = 'xf' if not filetype.endswith('gz') else 'xzf'
-        sp.check_call(['tar', args, temp_full_name], cwd=data_dir)
-        os.remove(temp_full_name)
+        sp.check_call(['tar', args, str(temp_full_path)], cwd=data_dir)
+        temp_full_path.unlink()
         return final_path
 
     if filetype == 'zip':
         import zipfile
         sys.stderr.write('Unzipping package (%s) to data path (%s)...' % (
-            temp_full_name, data_dir))
-        with zipfile.ZipFile(temp_full_name, 'r') as zip_ref:
+            temp_full_path, data_dir))
+        with zipfile.ZipFile(str(temp_full_path), 'r') as zip_ref:
             zip_ref.extractall(data_dir)
         sys.stderr.write('done.\n')
         return final_path
@@ -225,7 +219,7 @@ def _get_data_path(data_dir=None):
                      if d.strip()]
     default_dirs += [NIWORKFLOWS_CACHE_DIR]
 
-    return [Path(d).expanduser().resolve()
+    return [Path(d).expanduser()
             for d in data_dir.split(os.pathsep) if d.strip()] or default_dirs
 
 
@@ -297,7 +291,7 @@ def readlinkabs(link):
 def _md5_sum_file(path):
     """ Calculates the MD5 sum of a file.
     """
-    with open(path, 'rb') as fhandle:
+    with Path(path).open('rb') as fhandle:
         md5sum = hashlib.md5()
         while True:
             data = fhandle.read(8192)