From ad4092537c7e83d0eaa43150e459fa9465c39c60 Mon Sep 17 00:00:00 2001 From: Prithvi Dixit Date: Tue, 11 Nov 2025 07:17:07 +0000 Subject: [PATCH 1/2] Fail fast on invalid DoubleSparsity configs; surface Ray task failures --- benchmark/raytune/run_config_dir.py | 21 +++++++++++++++++++-- benchmark/raytune/utility.py | 22 ++++++++++++---------- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/benchmark/raytune/run_config_dir.py b/benchmark/raytune/run_config_dir.py index d47dc76e..371ebf3e 100755 --- a/benchmark/raytune/run_config_dir.py +++ b/benchmark/raytune/run_config_dir.py @@ -29,6 +29,11 @@ current_dir = Path(__file__).parent root_path = current_dir.parent.parent sys.path.extend([str(current_dir), str(root_path)]) +existing_pythonpath = os.environ.get("PYTHONPATH", "") +if existing_pythonpath: + os.environ["PYTHONPATH"] = f"{existing_pythonpath}:{current_dir}:{root_path}" +else: + os.environ["PYTHONPATH"] = f"{current_dir}:{root_path}" import ray from ray.util.queue import Queue as RayQueue @@ -323,9 +328,12 @@ def main( print(f"RAY BENCHMARK RUNNER") print(f"{'='*80}") - # Initialize Ray + # Initialize Ray with runtime environment so workers can import modules if not ray.is_initialized(): - ray.init(ignore_reinit_error=True) + ray.init( + ignore_reinit_error=True, + runtime_env={"working_dir": str(root_path)} + ) # Get GPU info num_gpus = int(ray.available_resources().get("GPU", 0)) @@ -427,13 +435,17 @@ def main( pool.submit(lambda actor, task: actor.run_benchmark.remote(task), task) # Collect results + results: List[BenchmarkResult] = [] while pool.has_next(): result = pool.get_next() result_queue.put(result) + results.append(result) # Wait for progress reporter ray.get(progress_task) + failed_results: List[BenchmarkResult] = [r for r in results if not r.success] + # Get actor statistics print("\nActor statistics:") for actor in actors: @@ -455,6 +467,11 @@ def main( ray.shutdown() + if failed_results: + for result in failed_results: + print(f"[ERROR] Task {result.task_id} failed: {result.error}") + raise RuntimeError("One or more benchmark tasks failed. See errors above.") + if __name__ == "__main__": fire.Fire(main) diff --git a/benchmark/raytune/utility.py b/benchmark/raytune/utility.py index b84c0d83..a7890d1e 100644 --- a/benchmark/raytune/utility.py +++ b/benchmark/raytune/utility.py @@ -165,19 +165,21 @@ def deserialize_sparse_config(data: Optional[Dict]) -> Optional[Any]: # Dynamically discover all available masker config classes config_map = get_all_masker_config_classes() - # Reconstruct masker configs with error handling + #reconstruct masker configs so we let errors propagate for critical failures + #this ennsures missing files or invalid configurations cause immediate failues in stead of silent skipping maskers and producing misleading results masker_configs = [] for masker_data in data.get("masker_configs", []): config_class = config_map.get(masker_data["type"]) if config_class: - try: - # Create instance with parameters - params = masker_data.get("params", {}) - masker_configs.append(config_class(**params)) - except Exception as e: - logging.warning(f"Failed to create {masker_data['type']}: {e}") - continue - - # Import ResearchAttentionConfig here to avoid circular imports + #create instance with parameters so we let ValueError/FileNotFoundError propagate + params = masker_data.get("params", {}) + masker_configs.append(config_class(**params)) + else: + raise ValueError( + f"Unknown masker config type: {masker_data['type']}. " + f"Available types: {list(config_map.keys())}" + ) + + #import ResearchAttentionConfig here to avoid circular imports from sparse_attention_hub.sparse_attention.research_attention import ResearchAttentionConfig return ResearchAttentionConfig(masker_configs=masker_configs) if masker_configs else None From 3d6ffe9dc3313975b851e1a3a43ab0a0ef59b8db Mon Sep 17 00:00:00 2001 From: Prithvi Dixit Date: Tue, 11 Nov 2025 07:28:23 +0000 Subject: [PATCH 2/2] removing explicit path from dir --- benchmark/raytune/run_config_dir.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/benchmark/raytune/run_config_dir.py b/benchmark/raytune/run_config_dir.py index 371ebf3e..9b7ecd9f 100755 --- a/benchmark/raytune/run_config_dir.py +++ b/benchmark/raytune/run_config_dir.py @@ -29,11 +29,6 @@ current_dir = Path(__file__).parent root_path = current_dir.parent.parent sys.path.extend([str(current_dir), str(root_path)]) -existing_pythonpath = os.environ.get("PYTHONPATH", "") -if existing_pythonpath: - os.environ["PYTHONPATH"] = f"{existing_pythonpath}:{current_dir}:{root_path}" -else: - os.environ["PYTHONPATH"] = f"{current_dir}:{root_path}" import ray from ray.util.queue import Queue as RayQueue @@ -327,13 +322,10 @@ def main( print(f"\n{'='*80}") print(f"RAY BENCHMARK RUNNER") print(f"{'='*80}") - - # Initialize Ray with runtime environment so workers can import modules + + # Initialize Ray if not ray.is_initialized(): - ray.init( - ignore_reinit_error=True, - runtime_env={"working_dir": str(root_path)} - ) + ray.init(ignore_reinit_error=True) # Get GPU info num_gpus = int(ray.available_resources().get("GPU", 0))