1616
1717from ftplib import FTP
1818from mimetypes import MimeTypes
19- import os
20- import tempfile
19+ from pathlib import Path
20+ from pathlib import PurePosixPath
2121from urllib .parse import urlparse
22+ from kiss_headers import parse_it
2223
2324import requests
25+ import tempfile
2426
2527
2628class Response :
@@ -41,14 +43,35 @@ def __init__(self, location, content_type, size, url):
4143def fetch_http (url , location ):
4244 """
4345 Return a `Response` object built from fetching the content at a HTTP/HTTPS based `url` URL string
44- saving the content in a file at `location`
46+ Saving the content in a file at `location`
47+ If `location` is an existing directory - try to deduce the filename
48+ If deduction failed, save the content in a temporary file created at a `location`
4549 """
4650 r = requests .get (url )
47- with open (location , 'wb' ) as f :
51+
52+ if Path .is_dir (location ):
53+ content_disposition = parse_it (r .headers ).get ("content-disposition" ) or {}
54+ filename_priority = [
55+ content_disposition .get ("filename*" ),
56+ content_disposition .get ("filename" ),
57+ PurePosixPath (urlparse (url ).path ).name ,
58+ ]
59+ filename_found = False
60+ for filename in filename_priority :
61+ if filename is not None and len (filename ):
62+ filename_found = True
63+ location /= filename
64+ break
65+ if not filename_found :
66+ location = Path (
67+ tempfile .NamedTemporaryFile (dir = location , delete = False ).name
68+ )
69+
70+ with open (location , "wb" ) as f :
4871 f .write (r .content )
4972
50- content_type = r .headers .get (' content-type' )
51- size = r .headers .get (' content-length' )
73+ content_type = r .headers .get (" content-type" )
74+ size = r .headers .get (" content-length" )
5275 size = int (size ) if size else None
5376
5477 resp = Response (location = location , content_type = content_type , size = size , url = url )
@@ -59,49 +82,57 @@ def fetch_http(url, location):
5982def fetch_ftp (url , location ):
6083 """
6184 Return a `Response` object built from fetching the content at a FTP based `url` URL string
62- saving the content in a file at `location`
85+ Saving the content in a file at `location`
86+ If `location` is an existing directory - deduce the filename from the URL
6387 """
6488 url_parts = urlparse (url )
6589
6690 netloc = url_parts .netloc
67- path = url_parts .path
68- dir , file = os .path .split (path )
91+ path = PurePosixPath (url_parts .path )
92+ directory = path .parent
93+ filename = path .name
94+
95+ if Path .is_dir (location ):
96+ location /= filename
6997
7098 ftp = FTP (netloc )
7199 ftp .login ()
72100
73- size = ftp .size (path )
101+ size = ftp .size (str ( path ) )
74102 mime = MimeTypes ()
75- mime_type = mime .guess_type (file )
103+ mime_type = mime .guess_type (filename )
76104 if mime_type :
77105 content_type = mime_type [0 ]
78106 else :
79107 content_type = None
80108
81- ftp .cwd (dir )
82- file = ' RETR {}' .format (file )
83- with open (location , 'wb' ) as f :
84- ftp .retrbinary (file , f .write )
109+ ftp .cwd (str ( directory ) )
110+ filename = " RETR {}" .format (filename )
111+ with open (location , "wb" ) as f :
112+ ftp .retrbinary (filename , f .write )
85113 ftp .close ()
86114
87115 resp = Response (location = location , content_type = content_type , size = size , url = url )
88116 return resp
89117
90118
91- def fetch (url ):
119+ def fetch (url , location = None ):
92120 """
93- Return a `Response` object built from fetching the content at the `url` URL string and store content at a temporary file.
121+ Return a `Response` object built from fetching the content at the `url` URL string and store content at a provided `location`
122+ If `location` is None, save the content in a newly created temporary file
123+ If `location` is an existing directory - try to deduce the filename
94124 """
95125
96- temp = tempfile .NamedTemporaryFile (delete = False )
97- location = temp .name
126+ if location is None :
127+ temp = tempfile .NamedTemporaryFile (delete = False )
128+ location = Path (temp .name )
98129
99130 url_parts = urlparse (url )
100131 scheme = url_parts .scheme
101132
102- fetchers = {' ftp' : fetch_ftp , ' http' : fetch_http , ' https' : fetch_http }
133+ fetchers = {" ftp" : fetch_ftp , " http" : fetch_http , " https" : fetch_http }
103134
104135 if scheme in fetchers :
105136 return fetchers .get (scheme )(url , location )
106137
107- raise Exception (' Not a supported/known scheme.' )
138+ raise Exception (" Not a supported/known scheme." )
0 commit comments