Skip to content

Commit a494f6d

Browse files
authored
Merge pull request #27 from WEHI-ResearchComputing/logging
Logging
2 parents 99d6c73 + 86a0d03 commit a494f6d

File tree

7 files changed

+205
-62
lines changed

7 files changed

+205
-62
lines changed

docs/changelog.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,16 @@
11
# Changelog
22

3+
## Version 2.1.0
4+
5+
### Added
6+
7+
* A progress bar for file downloads
8+
9+
### Changed
10+
11+
* All terminal output is now through the `logging` module. You can use the new `--log-level` CLI parameter to configure the amount of info that is printed out.
12+
* Update the CLI default concurrency to 2 for chunks and 1 for files. This seems to be moderately performant without ever failing
13+
314
## Version 2.0.0
415

516
### Added

filesender/api.py

Lines changed: 59 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
from typing import Any, Iterable, List, Optional, Tuple, AsyncIterator, Set
2-
from bs4 import BeautifulSoup
1+
from typing import Any, Iterable, List, Optional, Tuple, AsyncIterator, Union
2+
from filesender.download import files_from_page, DownloadFile
33
import filesender.response_types as response
44
import filesender.request_types as request
55
from urllib.parse import urlparse, urlunparse, unquote
@@ -10,7 +10,7 @@
1010
import aiofiles
1111
from aiostream import stream
1212
from contextlib import contextmanager
13-
from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception
13+
from tenacity import RetryCallState, retry, stop_after_attempt, wait_fixed, retry_if_exception
1414
import logging
1515
from tqdm.asyncio import tqdm
1616

@@ -25,9 +25,13 @@ def should_retry(e: BaseException) -> bool:
2525
# Seems to be just a bug in the backend
2626
# https://github.com/encode/httpx/discussions/2941
2727
return True
28-
elif isinstance(e, HTTPStatusError) and e.response.status_code == 500 and e.response.json()["message"] == "auth_remote_too_late":
28+
elif isinstance(e, HTTPStatusError) and e.response.status_code == 500:
29+
message = e.response.json()["message"]
30+
if message == "auth_remote_too_late":
31+
return True
32+
if message == "auth_remote_signature_check_failed":
33+
return True
2934
# These errors are caused by lag between creating the response and it being received
30-
return True
3135
return False
3236

3337

@@ -40,6 +44,13 @@ def url_without_scheme(url: str) -> str:
4044
"""
4145
return unquote(urlunparse(urlparse(url)._replace(scheme="")).lstrip("/"))
4246

47+
def exception_to_message(e: BaseException) -> str:
48+
if isinstance(e, HTTPStatusError):
49+
return f"Request failed with content {e.response.text} for request {e.request.method} {e.request.url}."
50+
elif isinstance(e, RequestError):
51+
return f"Request failed for request {e.request.method} {e.request.url}. {repr(e)}"
52+
else:
53+
return repr(e)
4354

4455
@contextmanager
4556
def raise_status():
@@ -49,16 +60,8 @@ def raise_status():
4960
"""
5061
try:
5162
yield
52-
except HTTPStatusError as e:
53-
raise Exception(
54-
f"Request failed with content {e.response.text} for request {e.request.method} {e.request.url}"
55-
) from e
56-
except RequestError as e:
57-
# TODO: check for SSL read error
58-
raise Exception(
59-
f"Request failed for request {e.request.method} {e.request.url}"
60-
) from e
61-
63+
except BaseException as e:
64+
raise Exception(exception_to_message(e)) from e
6265

6366
async def yield_chunks(path: Path, chunk_size: int) -> AsyncIterator[Tuple[bytes, int]]:
6467
"""
@@ -166,11 +169,21 @@ async def _sign_send(self, request: Request) -> Any:
166169
with raise_status():
167170
return await self._sign_send_inner(request)
168171

172+
@staticmethod
173+
def on_retry(state: RetryCallState) -> None:
174+
message = str(state.outcome)
175+
if state.outcome is not None:
176+
e = state.outcome.exception()
177+
if e is not None:
178+
message = exception_to_message(e)
179+
180+
logger.warn(f"Attempt {state.attempt_number}. {message}")
181+
169182
@retry(
170183
retry=retry_if_exception(should_retry),
171184
wait=wait_fixed(0.1),
172185
stop=stop_after_attempt(5),
173-
before_sleep=lambda x: logger.warn(f"Attempt {x.attempt_number}.{x.outcome}")
186+
before_sleep=on_retry
174187
)
175188
async def _sign_send_inner(self, request: Request) -> Any:
176189
# Needs to be a separate function to handle retry policy correctly
@@ -313,19 +326,14 @@ async def create_guest(self, body: request.Guest) -> response.Guest:
313326
self.http_client.build_request("POST", f"{self.base_url}/guest", json=body)
314327
)
315328

316-
async def _files_from_token(self, token: str) -> Set[int]:
329+
async def _files_from_token(self, token: str) -> Iterable[DownloadFile]:
317330
"""
318331
Internal function that returns a list of file IDs for a given guest token
319332
"""
320333
download_page = await self.http_client.get(
321334
"https://filesender.aarnet.edu.au", params={"s": "download", "token": token}
322335
)
323-
files: Set[int] = set()
324-
for file in BeautifulSoup(download_page.content, "html.parser").find_all(
325-
class_="file"
326-
):
327-
files.add(int(file.attrs["data-id"]))
328-
return files
336+
return files_from_page(download_page.content)
329337

330338
async def download_files(
331339
self,
@@ -342,12 +350,12 @@ async def download_files(
342350
out_dir: The path to write the downloaded files.
343351
"""
344352

345-
file_ids = await self._files_from_token(token)
353+
file_meta = await self._files_from_token(token)
346354

347-
async def _download_args() -> AsyncIterator[Tuple[str, Any, Path]]:
355+
async def _download_args() -> AsyncIterator[Tuple[str, Any, Path, int, str]]:
348356
"Yields tuples of arguments to pass to download_file"
349-
for file_id in file_ids:
350-
yield token, file_id, out_dir
357+
for file in file_meta:
358+
yield token, file["id"], out_dir, file["size"], file["name"]
351359

352360
# Each file is downloaded in parallel
353361
# Pyright messes this up
@@ -358,8 +366,8 @@ async def download_file(
358366
token: str,
359367
file_id: int,
360368
out_dir: Path,
361-
key: Optional[bytes] = None,
362-
algorithm: Optional[str] = None,
369+
file_size: Union[int, float, None] = None,
370+
file_name: Optional[str] = None
363371
) -> None:
364372
"""
365373
Downloads a single file.
@@ -368,23 +376,35 @@ async def download_file(
368376
token: Obtained from the transfer email. The same as [`GuestAuth`][filesender.GuestAuth]'s `guest_token`.
369377
file_id: A single file ID indicating the file to be downloaded.
370378
out_dir: The path to write the downloaded file.
379+
file_size: The file size in bytes, optionally.
380+
file_name: The file name of the file being downloaded. This will impact the name by which it's saved.
371381
"""
372382
download_endpoint = urlunparse(
373383
urlparse(self.base_url)._replace(path="/download.php")
374384
)
375385
async with self.http_client.stream(
376386
"GET", download_endpoint, params={"files_ids": file_id, "token": token}
377387
) as res:
378-
for content_param in res.headers["Content-Disposition"].split(";"):
379-
if "filename" in content_param:
380-
filename = content_param.split("=")[1].lstrip('"').rstrip('"')
381-
break
382-
else:
383-
raise Exception("No filename found")
384-
385-
async with aiofiles.open(out_dir / filename, "wb") as fp:
386-
async for chunk in res.aiter_raw(chunk_size=8192):
387-
await fp.write(chunk)
388+
# Determine filename from response, if not provided
389+
if file_name is None:
390+
for content_param in res.headers["Content-Disposition"].split(";"):
391+
if "filename" in content_param:
392+
file_name = content_param.split("=")[1].lstrip('"').rstrip('"')
393+
break
394+
else:
395+
raise Exception("No filename found")
396+
397+
file_path = out_dir / file_name
398+
file_path.parent.mkdir(parents=True, exist_ok=True)
399+
chunk_size = 8192
400+
chunk_size_mb = chunk_size / 1024 / 1024
401+
with tqdm(desc=file_name, unit="MB", total=None if file_size is None else int(file_size / 1024 / 1024)) as progress:
402+
async with aiofiles.open(out_dir / file_name, "wb") as fp:
403+
# We can't add the total here, because we don't know it:
404+
# https://github.com/filesender/filesender/issues/1555
405+
async for chunk in res.aiter_raw(chunk_size=chunk_size):
406+
await fp.write(chunk)
407+
progress.update(chunk_size_mb)
388408

389409
async def get_server_info(self) -> response.ServerInfo:
390410
"""

filesender/download.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
from typing import Iterable, TypedDict
2+
3+
from bs4 import BeautifulSoup
4+
5+
6+
class DownloadFile(TypedDict):
7+
client_entropy: str
8+
encrypted: str
9+
encrypted_size: int
10+
fileaead: str
11+
fileiv: str
12+
id: int
13+
key_salt: str
14+
key_version: int
15+
mime: str
16+
#: filename
17+
name: str
18+
password_encoding: str
19+
password_hash_iterations: int
20+
password_version: int
21+
size: int
22+
transferid: int
23+
24+
def files_from_page(content: bytes) -> Iterable[DownloadFile]:
25+
"""
26+
Yields dictionaries describing the files listed on a FileSender web page
27+
28+
Params:
29+
content: The HTML content of the FileSender download page
30+
"""
31+
for file in BeautifulSoup(content, "html.parser").find_all(
32+
class_="file"
33+
):
34+
yield {
35+
"client_entropy": file.attrs[f"data-client-entropy"],
36+
"encrypted": file.attrs["data-encrypted"],
37+
"encrypted_size": int(file.attrs["data-encrypted-size"]),
38+
"fileaead": file.attrs["data-fileaead"],
39+
"fileiv": file.attrs["data-fileiv"],
40+
"id": int(file.attrs["data-id"]),
41+
"key_salt": file.attrs["data-key-salt"],
42+
"key_version": int(file.attrs["data-key-version"]),
43+
"mime": file.attrs["data-mime"],
44+
"name": file.attrs["data-name"],
45+
"password_encoding": file.attrs["data-password-encoding"],
46+
"password_hash_iterations": int(file.attrs["data-password-hash-iterations"]),
47+
"password_version": int(file.attrs["data-password-version"]),
48+
"size": int(file.attrs["data-size"]),
49+
"transferid": int(file.attrs["data-transferid"]),
50+
}

filesender/log.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
from typing import Union
2+
from click import ParamType, Context, Parameter
3+
from enum import Enum
4+
import logging
5+
6+
class LogLevel(Enum):
7+
NOTSET = 0
8+
DEBUG = 10
9+
#: Used for verbose logging that the average user wouldn't want
10+
VERBOSE = 15
11+
INFO = 20
12+
#: Used for basic feedback that a CLI user would expect
13+
FEEDBACK = 25
14+
WARNING = 30
15+
ERROR = 40
16+
CRITICAL = 50
17+
18+
def configure_label(self):
19+
"""
20+
Configures the logging module to understand this log level
21+
"""
22+
logging.addLevelName(self.value, self.name)
23+
24+
def configure_extra_levels():
25+
"""
26+
Configures the logging module to understand the additional log levels
27+
"""
28+
for level in (LogLevel.VERBOSE, LogLevel.FEEDBACK):
29+
level.configure_label()
30+
31+
class LogParam(ParamType):
32+
name = "LogParam"
33+
34+
def convert(self, value: Union[int, str], param: Union[Parameter, None], ctx: Union[Context, None]) -> int:
35+
if isinstance(value, int):
36+
return value
37+
38+
# Convert string representation to int
39+
if not hasattr(LogLevel, value):
40+
self.fail(f"{value!r} is not a valid log level", param, ctx)
41+
42+
return LogLevel[value].value
43+
44+
def get_metavar(self, param: Parameter) -> Union[str, None]:
45+
# Print out the choices
46+
return "|".join(LogLevel._member_map_)

0 commit comments

Comments
 (0)