h2o related fixes

SamoraHunter · SamoraHunter · commit ab1e6d7b8024 · 2025-10-29T19:10:49.000Z
diff --git a/ml_grid/model_classes/H2OBaseClassifier.py b/ml_grid/model_classes/H2OBaseClassifier.py
@@ -427,17 +427,22 @@ def predict(self, X: pd.DataFrame) -> np.ndarray:
         # Ensure the model is loaded (critical for cross-validation)
         self._ensure_model_is_loaded()
 
-        # Create H2O frame with explicit column names
-        # --- ROBUSTNESS FIX for java.lang.NullPointerException ---
-        # Instead of creating the frame directly, upload the data and then assign it.
-        # This seems to create a more 'stable' frame in the H2O cluster, preventing
-        # internal errors during prediction with some models like GLM.
         try:
-            # Create a temporary H2OFrame by uploading the pandas DataFrame
-            tmp_frame = h2o.H2OFrame(X, column_names=self.feature_names_, column_types=self.feature_types_)
+            # --- ROBUSTNESS FIX for java.lang.NullPointerException ---
+            # Instead of creating the frame directly, upload the data and then assign it.
+            # This seems to create a more 'stable' frame in the H2O cluster, preventing
+            # internal errors during prediction with some models like GLM.
+            
+            # Create a temporary H2OFrame by uploading the pandas DataFrame.
+            # We ensure column names and types match what the model was trained on.
+            tmp_frame = h2o.H2OFrame(
+                X, 
+                column_names=self.feature_names_, 
+                column_types=self.feature_types_
+            )
             
             # Assign it to a unique key in the H2O cluster. This is more reliable.
-            frame_id = f"predict_frame_{self.model_id}_{pd.Timestamp.now().strftime('%Y%m%d%H%M%S%f')}"
+            frame_id = f"predict_frame_{self.model_id}_{pd.Timestamp.now().strftime('%Y%m%d%H%M%S%f')}" # noqa
             h2o.assign(tmp_frame, frame_id)
             
             # Get a handle to the newly created frame
diff --git a/ml_grid/model_classes/H2OGAMClassifier.py b/ml_grid/model_classes/H2OGAMClassifier.py
@@ -123,9 +123,15 @@ def _prepare_fit(self, X: pd.DataFrame, y: pd.Series):
                 try:
                     quantiles = np.linspace(0, 1, required_knots)
                     knot_values = X[col].quantile(quantiles)
-                    if knot_values.nunique() < required_knots:
+                    # Check for enough unique values AND that they are monotonically increasing
+                    # The diff() will be > 0 for all elements in a strictly increasing series.
+                    are_knots_valid = (knot_values.nunique() >= required_knots) and \
+                                      (np.all(np.diff(knot_values.to_numpy()) > 0))
+
+                    if not are_knots_valid:
                         self.logger.warning(
-                            f"Excluding GAM column '{col}': Not enough unique values to generate distinct knots."
+                            f"Excluding GAM column '{col}': Not enough unique values to generate distinct, "
+                            f"monotonically increasing knots."
                         )
                         continue
                 except Exception as e:
diff --git a/ml_grid/pipeline/grid_search_cross_validate.py b/ml_grid/pipeline/grid_search_cross_validate.py
@@ -452,21 +452,6 @@ def __init__(
                 if not getattr(self.global_parameters, 'test_mode', False):
                     # Fit on the full training data first
                     current_algorithm.fit(self.X_train, self.y_train)
-                
-                # --- TENSORFLOW PERFORMANCE FIX (Corrected Position) ---
-                # Pre-compile the predict function for Keras/TF models to avoid retracing warnings.
-                # This is done AFTER fitting and before cross-validation.
-                if isinstance(current_algorithm, (KerasClassifier, kerasClassifier_class, NeuralNetworkClassifier)):
-                    try:
-                        self.logger.debug("Pre-compiling TensorFlow predict function to avoid retracing.")
-                        n_features = self.X_train.shape[1]
-                        # Define an input signature that allows for variable batch size.
-                        input_signature = [tf.TensorSpec(shape=(None, n_features), dtype=tf.float32)]
-                        # Access the underlying Keras model via .model_
-                        current_algorithm.model_.predict.get_concrete_function(input_signature)
-                    except Exception as e:
-                        self.logger.warning(f"Could not pre-compile TF function. Performance may be impacted. Error: {e}")
-                
                 # --- CRITICAL FIX: Pass the pandas Series, not the numpy array ---
                 # Passing the numpy array (y_train.to_numpy()) causes index misalignment
                 # with the pandas DataFrame (X_train_final) inside sklearn's CV,
@@ -481,6 +466,21 @@ def __init__(
                     pre_dispatch=80,
                     error_score=self.error_raise,  # Raise error if cross-validation fails
                 )
+                
+                # --- TENSORFLOW PERFORMANCE FIX (Corrected Position) ---
+                # Pre-compile the predict function for Keras/TF models to avoid retracing warnings.
+                # This is done AFTER fitting and before cross-validation.
+                if isinstance(current_algorithm, (KerasClassifier, kerasClassifier_class, NeuralNetworkClassifier)):
+                    try:
+                        self.logger.debug("Pre-compiling TensorFlow predict function to avoid retracing.")
+                        n_features = self.X_train.shape[1]
+                        # Define an input signature that allows for variable batch size.
+                        input_signature = [tf.TensorSpec(shape=(None, n_features), dtype=tf.float32)]
+                        # Access the underlying Keras model via .model_
+                        current_algorithm.model_.predict.get_concrete_function(input_signature)
+                    except Exception as e:
+                        self.logger.warning(f"Could not pre-compile TF function. Performance may be impacted. Error: {e}")
+
 
 
         except XGBoostError as e:
diff --git a/ml_grid/pipeline/main.py b/ml_grid/pipeline/main.py
@@ -162,11 +162,17 @@ def __init__(self, local_param_dict: Dict[str, Any], **kwargs):
                         
 
         # sample from mean of all param space n
-        self.mean_parameter_space_val = np.mean(self.pg_list)
-
-        self.sub_sample_parameter_val = int(
-            self.sub_sample_param_space_pct * self.mean_parameter_space_val
-        )
+        if self.pg_list:
+            self.mean_parameter_space_val = np.mean(self.pg_list)
+            self.sub_sample_parameter_val = int(
+                self.sub_sample_param_space_pct * self.mean_parameter_space_val
+            )
+        else:
+            self.logger.warning(
+                "Parameter grid list is empty; no models were loaded. Setting parameter space values to 0."
+            )
+            self.mean_parameter_space_val = 0
+            self.sub_sample_parameter_val = 0
 
         # Initialize the project_score_save_class instance once per run
         # The ml_grid_object should have the experiment_dir set
@@ -283,8 +289,8 @@ def multi_run_wrapper(args: Tuple) -> Any:
                     self.logger.error(f"An exception occurred during grid search for {self.arg_list[k][2]}: {e}", exc_info=True)
                     
                     self.model_error_list.append(
-                        [self.arg_list[k][0], e, traceback.print_exc()]
-                    ) # traceback is printed to stderr, not captured here.
+                        [self.arg_list[k][0], e, traceback.format_exc()]
+                    )
                     
                     # Based on the 'error_raise' flag, either halt execution or log and continue.
                     if self.error_raise: