PR2: Load Tensor Compression Support (#157)

FindHao · meta-codesync[bot] · commit 625c3f46c3e4 · 2025-10-07T13:47:02.000-07:00
Summary: ## Overview This PR adds gzip compression support to the `load_tensor` function while maintaining backward compatibility with existing uncompressed tensor files. ## Key Changes ### Modified Files 1. `tritonparse/tools/load_tensor.py` 2. `tritonparse/reproducer/templates/example.py` ### Features **Compression Format Support** - Added support for `.bin.gz` format (gzip compressed tensors) - Maintains backward compatibility with existing `.bin` format - Auto-detects compression based on file extension **Hash Verification Updates** - Hash is computed on **decompressed data** for compressed files - Filename format: - Compressed: `{hash}.bin.gz` - Uncompressed: `{hash}.bin` (backward compatible) **Loading Process Improvements** - Read file contents first - Decompress if needed - Load tensor from memory buffer using `io.BytesIO` - Enhanced error handling with clear messages for decompression failures ## Technical Details ### New Dependencies - `gzip`: For decompression - `io`: For memory buffer operations ### Implementation Logic ```python # 1. Detect file format is_compressed = str(blob_path).endswith('.bin.gz') # 2. Read and decompress if needed with open(blob_path, "rb") as f: file_contents = f.read() if is_compressed: file_contents = gzip.decompress(file_contents) # 3. Verify hash (based on decompressed data) computed_hash = hashlib.blake2b(file_contents).hexdigest() # 4. Load from memory buffer = io.BytesIO(file_contents) tensor = torch.load(buffer, map_location=device) ``` ## Benefits - **Storage Optimization**: Compression significantly reduces tensor file sizes - **Backward Compatible**: Existing `.bin` files continue to work without changes - **Data Integrity**: Hash verification ensures data correctness - **Transparent**: Users don't need to worry about compression, API remains unchanged ## Impact - ✅ Fully backward compatible, no breaking changes - ✅ Applies to all scenarios using `load_tensor` - ✅ Reproducer templates automatically gain compression support Pull Request resolved: #157 Reviewed By: wychi Differential Revision: D84068071 Pulled By: FindHao fbshipit-source-id: 32fe673401e553b6d5d2c26ae19e8ac7229f0b7a
diff --git a/tritonparse/reproducer/templates/example.py b/tritonparse/reproducer/templates/example.py
@@ -3,13 +3,16 @@
 It contains a smallest testing example for a Triton kernel.
 """
 
+import gzip
 import hashlib
 import importlib
+import io
 import json
 import logging
 import sys
 from functools import lru_cache
 from pathlib import Path
+from typing import Union
 
 import torch
 
@@ -42,13 +45,14 @@ def _get_triton_tensor_types():
     )
 
 
-def load_tensor(tensor_file_path: str, device: str = None) -> torch.Tensor:
+def load_tensor(tensor_file_path: Union[str, Path], device: str = None) -> torch.Tensor:
     """
     Load a tensor from its file path and verify its integrity using the hash in the filename.
 
     Args:
-        tensor_file_path (str): Direct path to the tensor .bin file. The filename should be
-                               the hash of the file contents followed by .bin extension.
+        tensor_file_path (str | Path): Direct path to the tensor file. Supports both:
+                               - .bin.gz: gzip-compressed tensor (hash is of uncompressed data)
+                               - .bin: uncompressed tensor (for backward compatibility)
         device (str, optional): Device to load the tensor to (e.g., 'cuda:0', 'cpu').
                                If None, keeps the tensor on its original device.
 
@@ -65,13 +69,26 @@ def load_tensor(tensor_file_path: str, device: str = None) -> torch.Tensor:
     if not blob_path.exists():
         raise FileNotFoundError(f"Tensor blob not found: {blob_path}")
 
-    # Extract expected hash from filename (remove .bin extension)
-    expected_hash = blob_path.stem
+    # Detect compression by file extension
+    is_compressed = blob_path.name.endswith(".bin.gz")
 
-    # Compute actual hash of file contents
-    with open(blob_path, "rb") as f:
-        file_contents = f.read()
-        computed_hash = hashlib.blake2b(file_contents).hexdigest()
+    # Read file contents (decompress if needed)
+    try:
+        with open(blob_path, "rb") as f:
+            file_obj = gzip.GzipFile(fileobj=f, mode="rb") if is_compressed else f
+            file_contents = file_obj.read()
+    except (OSError, gzip.BadGzipFile) as e:
+        if is_compressed:
+            raise RuntimeError(f"Failed to decompress gzip file {blob_path}: {str(e)}")
+        else:
+            raise RuntimeError(f"Failed to read file {blob_path}: {str(e)}")
+
+    # Extract expected hash from filename
+    # abc123.bin.gz -> abc123 or abc123.bin -> abc123
+    expected_hash = blob_path.name.removesuffix(".bin.gz" if is_compressed else ".bin")
+
+    # Compute hash of uncompressed data
+    computed_hash = hashlib.blake2b(file_contents).hexdigest()
 
     # Verify hash matches filename
     if computed_hash != expected_hash:
@@ -80,12 +97,11 @@ def load_tensor(tensor_file_path: str, device: str = None) -> torch.Tensor:
         )
 
     try:
-        # Load the tensor using torch.load (tensors are saved with torch.save)
-        # If device is None, keep tensor on its original device, otherwise move to specified device
-        tensor = torch.load(blob_path, map_location=device)
+        # Load the tensor from memory buffer
+        tensor = torch.load(io.BytesIO(file_contents), map_location=device)
         return tensor
     except Exception as e:
-        raise RuntimeError(f"Failed to load tensor from {blob_path}: {str(e)}") from e
+        raise RuntimeError(f"Failed to load tensor from {blob_path}: {str(e)}")
 
 
 def create_args_from_json_file(json_path):
diff --git a/tritonparse/tools/load_tensor.py b/tritonparse/tools/load_tensor.py
@@ -6,19 +6,23 @@
 tensor = load_tensor.load_tensor(tensor_file_path, device)
 """
 
+import gzip
 import hashlib
+import io
 from pathlib import Path
+from typing import Union
 
 import torch
 
 
-def load_tensor(tensor_file_path: str, device: str = None) -> torch.Tensor:
+def load_tensor(tensor_file_path: Union[str, Path], device: str = None) -> torch.Tensor:
     """
     Load a tensor from its file path and verify its integrity using the hash in the filename.
 
     Args:
-        tensor_file_path (str): Direct path to the tensor .bin file. The filename should be
-                               the hash of the file contents followed by .bin extension.
+        tensor_file_path (str | Path): Direct path to the tensor file. Supports both:
+                               - .bin.gz: gzip-compressed tensor (hash is of uncompressed data)
+                               - .bin: uncompressed tensor (for backward compatibility)
         device (str, optional): Device to load the tensor to (e.g., 'cuda:0', 'cpu').
                                If None, keeps the tensor on its original device.
 
@@ -35,13 +39,26 @@ def load_tensor(tensor_file_path: str, device: str = None) -> torch.Tensor:
     if not blob_path.exists():
         raise FileNotFoundError(f"Tensor blob not found: {blob_path}")
 
-    # Extract expected hash from filename (remove .bin extension)
-    expected_hash = blob_path.stem
+    # Detect compression by file extension
+    is_compressed = blob_path.name.endswith(".bin.gz")
 
-    # Compute actual hash of file contents
-    with open(blob_path, "rb") as f:
-        file_contents = f.read()
-        computed_hash = hashlib.blake2b(file_contents).hexdigest()
+    # Read file contents (decompress if needed)
+    try:
+        with open(blob_path, "rb") as f:
+            file_obj = gzip.GzipFile(fileobj=f, mode="rb") if is_compressed else f
+            file_contents = file_obj.read()
+    except (OSError, gzip.BadGzipFile) as e:
+        if is_compressed:
+            raise RuntimeError(f"Failed to decompress gzip file {blob_path}: {str(e)}")
+        else:
+            raise RuntimeError(f"Failed to read file {blob_path}: {str(e)}")
+
+    # Extract expected hash from filename
+    # abc123.bin.gz -> abc123 or abc123.bin -> abc123
+    expected_hash = blob_path.name.removesuffix(".bin.gz" if is_compressed else ".bin")
+
+    # Compute hash of uncompressed data
+    computed_hash = hashlib.blake2b(file_contents).hexdigest()
 
     # Verify hash matches filename
     if computed_hash != expected_hash:
@@ -50,9 +67,8 @@ def load_tensor(tensor_file_path: str, device: str = None) -> torch.Tensor:
         )
 
     try:
-        # Load the tensor using torch.load (tensors are saved with torch.save)
-        # If device is None, keep tensor on its original device, otherwise move to specified device
-        tensor = torch.load(blob_path, map_location=device)
+        # Load the tensor from memory buffer
+        tensor = torch.load(io.BytesIO(file_contents), map_location=device)
         return tensor
     except Exception as e:
         raise RuntimeError(f"Failed to load tensor from {blob_path}: {str(e)}")