Skip to content

Commit 1ed3d66

Browse files
committed
refactor: Add atomic writes for attachment files and manifests
1 parent a194fa4 commit 1ed3d66

File tree

1 file changed

+45
-49
lines changed

1 file changed

+45
-49
lines changed

github_backup/github_backup.py

Lines changed: 45 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -948,6 +948,8 @@ def download_attachment_file(url, path, auth, as_app=False, fine=False):
948948
# Reuse S3HTTPRedirectHandler from download_file()
949949
opener = build_opener(S3HTTPRedirectHandler)
950950

951+
temp_path = path + ".temp"
952+
951953
try:
952954
response = opener.open(request)
953955
metadata["http_status"] = response.getcode()
@@ -986,17 +988,20 @@ def download_attachment_file(url, path, auth, as_app=False, fine=False):
986988
if "." in filename_from_url:
987989
metadata["original_filename"] = filename_from_url
988990

989-
# Download file
991+
# Download file to temporary location
990992
chunk_size = 16 * 1024
991993
bytes_downloaded = 0
992-
with open(path, "wb") as f:
994+
with open(temp_path, "wb") as f:
993995
while True:
994996
chunk = response.read(chunk_size)
995997
if not chunk:
996998
break
997999
f.write(chunk)
9981000
bytes_downloaded += len(chunk)
9991001

1002+
# Atomic rename to final location
1003+
os.rename(temp_path, path)
1004+
10001005
metadata["size_bytes"] = bytes_downloaded
10011006
metadata["success"] = True
10021007

@@ -1027,6 +1032,12 @@ def download_attachment_file(url, path, auth, as_app=False, fine=False):
10271032
logger.warning(
10281033
"Skipping download of attachment {0} due to error: {1}".format(url, str(e))
10291034
)
1035+
# Clean up temp file if it was partially created
1036+
if os.path.exists(temp_path):
1037+
try:
1038+
os.remove(temp_path)
1039+
except Exception:
1040+
pass
10301041

10311042
return metadata
10321043

@@ -1222,40 +1233,6 @@ def redirect_request(self, req, fp, code, msg, headers, newurl):
12221233
return regex_urls
12231234

12241235

1225-
def extract_and_apply_extension(filepath, original_filename):
1226-
"""Extract extension from original filename and rename file if needed.
1227-
1228-
Args:
1229-
filepath: Current file path (may have no extension)
1230-
original_filename: Original filename from Content-Disposition (has extension)
1231-
1232-
Returns:
1233-
Final filepath with extension applied
1234-
"""
1235-
if not original_filename or not os.path.exists(filepath):
1236-
return filepath
1237-
1238-
# Get extension from original filename
1239-
original_ext = os.path.splitext(original_filename)[1]
1240-
if not original_ext:
1241-
return filepath
1242-
1243-
# Check if current file already has this extension
1244-
current_ext = os.path.splitext(filepath)[1]
1245-
if current_ext == original_ext:
1246-
return filepath
1247-
1248-
# Rename file to add extension
1249-
new_filepath = filepath + original_ext
1250-
try:
1251-
os.rename(filepath, new_filepath)
1252-
logger.debug("Renamed {0} to {1}".format(filepath, new_filepath))
1253-
return new_filepath
1254-
except Exception as e:
1255-
logger.warning("Could not rename {0}: {1}".format(filepath, str(e)))
1256-
return filepath
1257-
1258-
12591236
def get_attachment_filename(url):
12601237
"""Get filename from attachment URL, handling all GitHub formats.
12611238
@@ -1333,7 +1310,9 @@ def resolve_filename_collision(filepath):
13331310
counter += 1
13341311

13351312

1336-
def download_attachments(args, item_cwd, item_data, number, repository, item_type="issue"):
1313+
def download_attachments(
1314+
args, item_cwd, item_data, number, repository, item_type="issue"
1315+
):
13371316
"""Download user-attachments from issue/PR body and comments with manifest.
13381317
13391318
Args:
@@ -1428,20 +1407,36 @@ def download_attachments(args, item_cwd, item_data, number, repository, item_typ
14281407
fine=args.token_fine is not None,
14291408
)
14301409

1431-
# Apply extension from Content-Disposition if available
1410+
# If download succeeded but we got an extension from Content-Disposition,
1411+
# we may need to rename the file to add the extension
14321412
if metadata["success"] and metadata.get("original_filename"):
1433-
final_filepath = extract_and_apply_extension(
1434-
filepath, metadata["original_filename"]
1435-
)
1436-
# Check for collision again ONLY if filename changed (extension was added)
1437-
if final_filepath != filepath:
1413+
original_ext = os.path.splitext(metadata["original_filename"])[1]
1414+
current_ext = os.path.splitext(filepath)[1]
1415+
1416+
# Add extension if not present
1417+
if original_ext and current_ext != original_ext:
1418+
final_filepath = filepath + original_ext
1419+
# Check for collision again with new extension
14381420
final_filepath = resolve_filename_collision(final_filepath)
1439-
# Update saved_as to reflect actual filename
1440-
metadata["saved_as"] = os.path.basename(final_filepath)
1421+
logger.debug(
1422+
"Adding extension {0} to {1}".format(original_ext, filepath)
1423+
)
1424+
1425+
# Rename to add extension (already atomic from download)
1426+
try:
1427+
os.rename(filepath, final_filepath)
1428+
metadata["saved_as"] = os.path.basename(final_filepath)
1429+
except Exception as e:
1430+
logger.warning(
1431+
"Could not add extension to {0}: {1}".format(filepath, str(e))
1432+
)
1433+
metadata["saved_as"] = os.path.basename(filepath)
1434+
else:
1435+
metadata["saved_as"] = os.path.basename(filepath)
1436+
elif metadata["success"]:
1437+
metadata["saved_as"] = os.path.basename(filepath)
14411438
else:
1442-
metadata["saved_as"] = (
1443-
os.path.basename(filepath) if metadata["success"] else None
1444-
)
1439+
metadata["saved_as"] = None
14451440

14461441
attachment_metadata_list.append(metadata)
14471442

@@ -1458,8 +1453,9 @@ def download_attachments(args, item_cwd, item_data, number, repository, item_typ
14581453
}
14591454

14601455
manifest_path = os.path.join(attachments_dir, "manifest.json")
1461-
with open(manifest_path, "w") as f:
1456+
with open(manifest_path + ".temp", "w") as f:
14621457
json.dump(manifest, f, indent=2)
1458+
os.rename(manifest_path + ".temp", manifest_path) # Atomic write
14631459
logger.debug(
14641460
"Wrote manifest for {0} #{1}: {2} attachments".format(
14651461
item_type_display, number, len(attachment_metadata_list)

0 commit comments

Comments
 (0)