1- from typing import Any , Iterable , List , Optional , Tuple , AsyncIterator , Set
2- from bs4 import BeautifulSoup
1+ from typing import Any , Iterable , List , Optional , Tuple , AsyncIterator , Union
2+ from filesender . download import files_from_page , DownloadFile
33import filesender .response_types as response
44import filesender .request_types as request
55from urllib .parse import urlparse , urlunparse , unquote
1010import aiofiles
1111from aiostream import stream
1212from contextlib import contextmanager
13- from tenacity import retry , stop_after_attempt , wait_fixed , retry_if_exception
13+ from tenacity import RetryCallState , retry , stop_after_attempt , wait_fixed , retry_if_exception
1414import logging
1515from tqdm .asyncio import tqdm
1616
@@ -25,9 +25,13 @@ def should_retry(e: BaseException) -> bool:
2525 # Seems to be just a bug in the backend
2626 # https://github.com/encode/httpx/discussions/2941
2727 return True
28- elif isinstance (e , HTTPStatusError ) and e .response .status_code == 500 and e .response .json ()["message" ] == "auth_remote_too_late" :
28+ elif isinstance (e , HTTPStatusError ) and e .response .status_code == 500 :
29+ message = e .response .json ()["message" ]
30+ if message == "auth_remote_too_late" :
31+ return True
32+ if message == "auth_remote_signature_check_failed" :
33+ return True
2934 # These errors are caused by lag between creating the response and it being received
30- return True
3135 return False
3236
3337
@@ -40,6 +44,13 @@ def url_without_scheme(url: str) -> str:
4044 """
4145 return unquote (urlunparse (urlparse (url )._replace (scheme = "" )).lstrip ("/" ))
4246
47+ def exception_to_message (e : BaseException ) -> str :
48+ if isinstance (e , HTTPStatusError ):
49+ return f"Request failed with content { e .response .text } for request { e .request .method } { e .request .url } ."
50+ elif isinstance (e , RequestError ):
51+ return f"Request failed for request { e .request .method } { e .request .url } . { repr (e )} "
52+ else :
53+ return repr (e )
4354
4455@contextmanager
4556def raise_status ():
@@ -49,16 +60,8 @@ def raise_status():
4960 """
5061 try :
5162 yield
52- except HTTPStatusError as e :
53- raise Exception (
54- f"Request failed with content { e .response .text } for request { e .request .method } { e .request .url } "
55- ) from e
56- except RequestError as e :
57- # TODO: check for SSL read error
58- raise Exception (
59- f"Request failed for request { e .request .method } { e .request .url } "
60- ) from e
61-
63+ except BaseException as e :
64+ raise Exception (exception_to_message (e )) from e
6265
6366async def yield_chunks (path : Path , chunk_size : int ) -> AsyncIterator [Tuple [bytes , int ]]:
6467 """
@@ -166,11 +169,21 @@ async def _sign_send(self, request: Request) -> Any:
166169 with raise_status ():
167170 return await self ._sign_send_inner (request )
168171
172+ @staticmethod
173+ def on_retry (state : RetryCallState ) -> None :
174+ message = str (state .outcome )
175+ if state .outcome is not None :
176+ e = state .outcome .exception ()
177+ if e is not None :
178+ message = exception_to_message (e )
179+
180+ logger .warn (f"Attempt { state .attempt_number } . { message } " )
181+
169182 @retry (
170183 retry = retry_if_exception (should_retry ),
171184 wait = wait_fixed (0.1 ),
172185 stop = stop_after_attempt (5 ),
173- before_sleep = lambda x : logger . warn ( f"Attempt { x . attempt_number } . { x . outcome } " )
186+ before_sleep = on_retry
174187 )
175188 async def _sign_send_inner (self , request : Request ) -> Any :
176189 # Needs to be a separate function to handle retry policy correctly
@@ -313,19 +326,14 @@ async def create_guest(self, body: request.Guest) -> response.Guest:
313326 self .http_client .build_request ("POST" , f"{ self .base_url } /guest" , json = body )
314327 )
315328
316- async def _files_from_token (self , token : str ) -> Set [ int ]:
329+ async def _files_from_token (self , token : str ) -> Iterable [ DownloadFile ]:
317330 """
318331 Internal function that returns a list of file IDs for a given guest token
319332 """
320333 download_page = await self .http_client .get (
321334 "https://filesender.aarnet.edu.au" , params = {"s" : "download" , "token" : token }
322335 )
323- files : Set [int ] = set ()
324- for file in BeautifulSoup (download_page .content , "html.parser" ).find_all (
325- class_ = "file"
326- ):
327- files .add (int (file .attrs ["data-id" ]))
328- return files
336+ return files_from_page (download_page .content )
329337
330338 async def download_files (
331339 self ,
@@ -342,12 +350,12 @@ async def download_files(
342350 out_dir: The path to write the downloaded files.
343351 """
344352
345- file_ids = await self ._files_from_token (token )
353+ file_meta = await self ._files_from_token (token )
346354
347- async def _download_args () -> AsyncIterator [Tuple [str , Any , Path ]]:
355+ async def _download_args () -> AsyncIterator [Tuple [str , Any , Path , int , str ]]:
348356 "Yields tuples of arguments to pass to download_file"
349- for file_id in file_ids :
350- yield token , file_id , out_dir
357+ for file in file_meta :
358+ yield token , file [ "id" ] , out_dir , file [ "size" ], file [ "name" ]
351359
352360 # Each file is downloaded in parallel
353361 # Pyright messes this up
@@ -358,8 +366,8 @@ async def download_file(
358366 token : str ,
359367 file_id : int ,
360368 out_dir : Path ,
361- key : Optional [ bytes ] = None ,
362- algorithm : Optional [str ] = None ,
369+ file_size : Union [ int , float , None ] = None ,
370+ file_name : Optional [str ] = None
363371 ) -> None :
364372 """
365373 Downloads a single file.
@@ -368,23 +376,35 @@ async def download_file(
368376 token: Obtained from the transfer email. The same as [`GuestAuth`][filesender.GuestAuth]'s `guest_token`.
369377 file_id: A single file ID indicating the file to be downloaded.
370378 out_dir: The path to write the downloaded file.
379+ file_size: The file size in bytes, optionally.
380+ file_name: The file name of the file being downloaded. This will impact the name by which it's saved.
371381 """
372382 download_endpoint = urlunparse (
373383 urlparse (self .base_url )._replace (path = "/download.php" )
374384 )
375385 async with self .http_client .stream (
376386 "GET" , download_endpoint , params = {"files_ids" : file_id , "token" : token }
377387 ) as res :
378- for content_param in res .headers ["Content-Disposition" ].split (";" ):
379- if "filename" in content_param :
380- filename = content_param .split ("=" )[1 ].lstrip ('"' ).rstrip ('"' )
381- break
382- else :
383- raise Exception ("No filename found" )
384-
385- async with aiofiles .open (out_dir / filename , "wb" ) as fp :
386- async for chunk in res .aiter_raw (chunk_size = 8192 ):
387- await fp .write (chunk )
388+ # Determine filename from response, if not provided
389+ if file_name is None :
390+ for content_param in res .headers ["Content-Disposition" ].split (";" ):
391+ if "filename" in content_param :
392+ file_name = content_param .split ("=" )[1 ].lstrip ('"' ).rstrip ('"' )
393+ break
394+ else :
395+ raise Exception ("No filename found" )
396+
397+ file_path = out_dir / file_name
398+ file_path .parent .mkdir (parents = True , exist_ok = True )
399+ chunk_size = 8192
400+ chunk_size_mb = chunk_size / 1024 / 1024
401+ with tqdm (desc = file_name , unit = "MB" , total = None if file_size is None else int (file_size / 1024 / 1024 )) as progress :
402+ async with aiofiles .open (out_dir / file_name , "wb" ) as fp :
403+ # We can't add the total here, because we don't know it:
404+ # https://github.com/filesender/filesender/issues/1555
405+ async for chunk in res .aiter_raw (chunk_size = chunk_size ):
406+ await fp .write (chunk )
407+ progress .update (chunk_size_mb )
388408
389409 async def get_server_info (self ) -> response .ServerInfo :
390410 """
0 commit comments