Added live UI updates

SakhinetiPraveena · SakhinetiPraveena · commit 62573f85f052 · 2025-08-22T17:42:39.000+05:30
diff --git a/app.py b/app.py
@@ -70,6 +70,7 @@ def browse_folder():
 st.session_state.setdefault("max_detections", 100)
 st.session_state.setdefault("device", "cpu")
 st.session_state.setdefault("batch_size", 1)
+st.session_state.setdefault("evaluation_step", 5)
 st.session_state.setdefault("detection_model", None)
 st.session_state.setdefault("detection_model_loaded", False)
 
@@ -182,6 +183,15 @@ def browse_folder():
                     step=1,
                     key="batch_size",
                 )
+                st.number_input(
+                    "Evaluation Step",
+                    min_value=0,
+                    max_value=1000,
+                    value=st.session_state.get("evaluation_step", 10),
+                    step=1,
+                    key="evaluation_step",
+                    help="Update UI with intermediate metrics every N images (0 = disable intermediate updates)"
+                )
 
         # Load model action in sidebar
         from detectionmetrics.models.torch_detection import TorchImageDetectionModel
@@ -220,12 +230,14 @@ def browse_folder():
                     max_detections = int(st.session_state.get('max_detections', 100))
                     device = st.session_state.get('device', 'cpu')
                     batch_size = int(st.session_state.get('batch_size', 1))
+                    evaluation_step = int(st.session_state.get('evaluation_step', 5))
                     config_data = {
                         "confidence_threshold": confidence_threshold,
                         "nms_threshold": nms_threshold,
                         "max_detections_per_image": max_detections,
                         "device": device,
                         "batch_size": batch_size,
+                        "evaluation_step": evaluation_step,
                     }
                     with tempfile.NamedTemporaryFile(delete=False, suffix='.json', mode='w') as tmp_cfg:
                         json.dump(config_data, tmp_cfg)
diff --git a/detectionmetrics/models/torch_detection.py b/detectionmetrics/models/torch_detection.py
@@ -315,6 +315,7 @@ def eval(
         predictions_outdir: Optional[str] = None,
         results_per_sample: bool = False,
         progress_callback=None,
+        metrics_callback=None,
     ) -> pd.DataFrame:
         """Evaluate model over a detection dataset and compute metrics
 
@@ -330,6 +331,8 @@ def eval(
         :type results_per_sample: bool
         :param progress_callback: Optional callback function for progress updates in Streamlit UI
         :type progress_callback: Optional[Callable[[int, int], None]]
+        :param metrics_callback: Optional callback function for intermediate metrics updates in Streamlit UI
+        :type metrics_callback: Optional[Callable[[pd.DataFrame, int, int], None]]
         :return: DataFrame containing evaluation results
         :rtype: pd.DataFrame
         """
@@ -353,16 +356,28 @@ def eval(
             splits=[split] if isinstance(split, str) else split,
         )
 
+        # This ensures compatibility with Streamlit and callback functions
+        if progress_callback is not None and metrics_callback is not None:
+            num_workers = 0
+        else:
+            num_workers = self.model_cfg.get("num_workers")
+        
         dataloader = DataLoader(
             dataset,
             batch_size=self.model_cfg.get("batch_size", 1),
-            num_workers=self.model_cfg.get("num_workers", 1),
-            collate_fn=lambda x: tuple(zip(*x)),  # handles variable-size targets
+            num_workers=num_workers,
+            collate_fn=lambda batch: tuple(zip(*batch)),  # handles variable-size targets
         )
 
         # Get iou_threshold from model config, default to 0.5 if not present
         iou_threshold = self.model_cfg.get("iou_threshold", 0.5)
 
+        # Get evaluation_step from model config, default to None (no intermediate updates)
+        evaluation_step = self.model_cfg.get("evaluation_step", None)
+        # If evaluation_step is 0, treat as None (disabled)
+        if evaluation_step == 0:
+            evaluation_step = None
+
         # Init metrics
         metrics_factory = um.DetectionMetricsFactory(
             iou_threshold=iou_threshold, num_classes=self.n_classes
@@ -472,6 +487,14 @@ def eval(
                     # Call progress callback if provided
                     if progress_callback is not None:
                         progress_callback(processed_samples, total_samples)
+                    
+                    # Call metrics callback if provided and evaluation_step is reached
+                    if (metrics_callback is not None and 
+                        evaluation_step is not None and 
+                        processed_samples % evaluation_step == 0):
+                        # Get intermediate metrics
+                        intermediate_metrics = metrics_factory.get_metrics_dataframe(self.ontology)
+                        metrics_callback(intermediate_metrics, processed_samples, total_samples)
 
         # Return both the DataFrame and the metrics factory for access to precision-recall curves
         return {
diff --git a/tabs/evaluator.py b/tabs/evaluator.py
@@ -113,6 +113,10 @@ def evaluator_tab():
                 progress_bar = st.progress(0)
                 status_text = st.empty()
                 
+                # Create placeholders for intermediate metrics that will be updated in place
+                intermediate_metrics_placeholder = st.empty()
+                intermediate_table_placeholder = st.empty()
+                
                 def progress_callback(processed, total):
                     """Progress callback for Streamlit UI"""
                     try:
@@ -122,6 +126,42 @@ def progress_callback(processed, total):
                     except Exception as e:
                         st.error(f"Progress callback error: {e}")
                 
+                def metrics_callback(metrics_df, processed, total):
+                    """Metrics callback for intermediate results display"""
+                    try:
+                        # Update the metrics placeholder with current summary metrics
+                        if 'mean' in metrics_df.columns:
+                            mean_metrics = metrics_df['mean']
+                            
+                            with intermediate_metrics_placeholder.container():
+                                st.markdown(f"#### 📊 Intermediate Results (after {processed} images)")
+                                
+                                col1, col2, col3 = st.columns(3)
+                                with col1:
+                                    st.metric("mAP", f"{mean_metrics.get('AP', 0):.3f}")
+                                with col2:
+                                    st.metric("Mean Precision", f"{mean_metrics.get('Precision', 0):.3f}")
+                                with col3:
+                                    st.metric("Mean Recall", f"{mean_metrics.get('Recall', 0):.3f}")
+                        
+                        # Update the table placeholder with current per-class results
+                        per_class_results = metrics_df.drop(columns=['mean']) if 'mean' in metrics_df.columns else metrics_df
+                        per_class_results = per_class_results.drop(['AUC-PR', 'mAP@[0.5:0.95]'], errors='ignore')
+                        
+                        # Round for display
+                        display_df = per_class_results.copy()
+                        numeric_columns = display_df.select_dtypes(include=['float64', 'int64']).columns
+                        for col in numeric_columns:
+                            if col in display_df.columns:
+                                display_df[col] = display_df[col].round(3)
+                        
+                        with intermediate_table_placeholder.container():
+                            st.markdown("#### Per-Class Metrics (Intermediate)")
+                            st.dataframe(display_df, use_container_width=True)
+                            
+                    except Exception as e:
+                        st.error(f"Metrics callback error: {e}")
+                
                 # Run evaluation with progress tracking
                 # Use full dataset for evaluation
 
@@ -131,7 +171,7 @@ def progress_callback(processed, total):
                         # Create a shallow copy of the dataset object with only first 10 rows
                         import copy
                         dataset_subset = copy.copy(dataset)
-                        dataset_subset.dataset = dataset.dataset.iloc[:10].copy()
+                        dataset_subset.dataset = dataset.dataset.iloc[:100].copy()
                     else:
                         st.warning("Dataset object does not have a 'dataset' attribute; using as is.")
                         dataset_subset = dataset
@@ -142,17 +182,20 @@ def progress_callback(processed, total):
                         ontology_translation=ontology_translation_path,
                         predictions_outdir=predictions_outdir,
                         results_per_sample=save_predictions,
-                        progress_callback=progress_callback
+                        progress_callback=progress_callback,
+                        metrics_callback=metrics_callback
                     )
                 except Exception as e:
                     st.error(f"Error in model.eval(): {e}")
                     return
                 
                 # Results ready
                 
-                # Clear progress elements
+                # Clear progress elements and intermediate results
                 progress_bar.empty()
                 status_text.empty()
+                intermediate_metrics_placeholder.empty()
+                intermediate_table_placeholder.empty()
                 
                 # Store results in session state
                 st.session_state['evaluation_results'] = results
@@ -201,33 +244,41 @@ def display_evaluation_results(results):
     if 'mean' in metrics_df.columns:
         mean_metrics = metrics_df['mean']
         
-        col1, col2, col3, col4 = st.columns(4)
+        col1, col2, col3, col4, col5 = st.columns(5)
         with col1:
             st.metric("mAP", f"{mean_metrics.get('AP', 0):.3f}")
         with col2:
             st.metric("Mean Precision", f"{mean_metrics.get('Precision', 0):.3f}")
         with col3:
             st.metric("Mean Recall", f"{mean_metrics.get('Recall', 0):.3f}")
         with col4:
-            total_detections = mean_metrics.get('TP', 0) + mean_metrics.get('FP', 0)
-            st.metric("Total Detections", f"{total_detections:.0f}")
-        
-        # Add COCO mAP and AUC-PR in a second row
-        col5, col6, col7, col8 = st.columns(4)
-        with col5:
             coco_map = mean_metrics.get('mAP@[0.5:0.95]', 0)
             st.metric("mAP@[0.5:0.95]", f"{coco_map:.3f}")
-        with col6:
+        with col5:
             auc_pr = mean_metrics.get('AUC-PR', 0)
             st.metric("AUC-PR", f"{auc_pr:.3f}")
-        with col7:
-            # Empty column for spacing
-            st.empty()
-        with col8:
-            # Empty column for spacing
-            st.empty()
     
-    # Display Precision-Recall Curve
+    # Display per-class metrics first
+    st.markdown("#### Per-Class Metrics")
+    
+    # Filter out the 'mean' column for per-class display
+    per_class_results = metrics_df.drop(columns=['mean']) if 'mean' in metrics_df.columns else metrics_df
+    
+    # Remove overall metrics rows (AUC-PR and mAP@[0.5:0.95]) from per-class display
+    per_class_results = per_class_results.drop(['AUC-PR', 'mAP@[0.5:0.95]'], errors='ignore')
+    
+    # Create a more readable display
+    display_df = per_class_results.copy()
+    
+    # Round numeric columns for better display
+    numeric_columns = display_df.select_dtypes(include=['float64', 'int64']).columns
+    for col in numeric_columns:
+        if col in display_df.columns:
+            display_df[col] = display_df[col].round(3)
+    
+    st.dataframe(display_df, use_container_width=True)
+
+    # Now display Precision-Recall Curve
     if metrics_factory is not None:
         st.markdown("#### Precision-Recall Curve")
         
@@ -287,38 +338,38 @@ def display_evaluation_results(results):
             st.error(f"Error plotting precision-recall curve: {e}")
             st.info("Precision-recall curve data not available.")
     
-    # Display per-class metrics
-    st.markdown("#### Per-Class Metrics")
-    
-    # Filter out the 'mean' column for per-class display
-    per_class_results = metrics_df.drop(columns=['mean']) if 'mean' in metrics_df.columns else metrics_df
-    
-    # Remove overall metrics rows (AUC-PR and mAP@[0.5:0.95]) from per-class display
-    per_class_results = per_class_results.drop(['AUC-PR', 'mAP@[0.5:0.95]'], errors='ignore')
-    
-    # Create a more readable display
-    display_df = per_class_results.copy()
-    
-    # Round numeric columns for better display
-    numeric_columns = display_df.select_dtypes(include=['float64', 'int64']).columns
-    for col in numeric_columns:
-        if col in display_df.columns:
-            display_df[col] = display_df[col].round(3)
-    
-    st.dataframe(display_df, use_container_width=True)
-    
     # Download results
     st.markdown("#### Download Results")
     
     # Convert to CSV for download
     csv = metrics_df.to_csv(index=True)
     st.download_button(
-        label="📥 Download Results as CSV",
+        label="📥 Download per class metrics",
         data=csv,
         file_name="evaluation_results.csv",
         mime="text/csv"
     )
-    
+    try:
+        curve_data = metrics_factory.get_overall_precision_recall_curve() if metrics_factory is not None else None
+        if curve_data is not None:
+            import io
+            import pandas as pd
+            pr_points_df = pd.DataFrame({
+                "recall": curve_data["recall"],
+                "precision": curve_data["precision"]
+            })
+            pr_csv = pr_points_df.to_csv(index=False)
+            st.download_button(
+                label="📈 Download precision-recall points",
+                data=pr_csv,
+                file_name="precision_recall_points.csv",
+                mime="text/csv"
+            )
+        else:
+            st.write("No precision-recall data available.")
+    except Exception as e:
+        st.write(f"Error preparing precision-recall points: {e}")
+
     # Show detailed statistics
     with st.expander("📊 Detailed Statistics"):
         st.markdown("**Results Shape:**")