Skip to content

Commit ab1e6d7

Browse files
committed
h2o related fixes
1 parent ec3cf20 commit ab1e6d7

File tree

4 files changed

+49
-32
lines changed

4 files changed

+49
-32
lines changed

ml_grid/model_classes/H2OBaseClassifier.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -427,17 +427,22 @@ def predict(self, X: pd.DataFrame) -> np.ndarray:
427427
# Ensure the model is loaded (critical for cross-validation)
428428
self._ensure_model_is_loaded()
429429

430-
# Create H2O frame with explicit column names
431-
# --- ROBUSTNESS FIX for java.lang.NullPointerException ---
432-
# Instead of creating the frame directly, upload the data and then assign it.
433-
# This seems to create a more 'stable' frame in the H2O cluster, preventing
434-
# internal errors during prediction with some models like GLM.
435430
try:
436-
# Create a temporary H2OFrame by uploading the pandas DataFrame
437-
tmp_frame = h2o.H2OFrame(X, column_names=self.feature_names_, column_types=self.feature_types_)
431+
# --- ROBUSTNESS FIX for java.lang.NullPointerException ---
432+
# Instead of creating the frame directly, upload the data and then assign it.
433+
# This seems to create a more 'stable' frame in the H2O cluster, preventing
434+
# internal errors during prediction with some models like GLM.
435+
436+
# Create a temporary H2OFrame by uploading the pandas DataFrame.
437+
# We ensure column names and types match what the model was trained on.
438+
tmp_frame = h2o.H2OFrame(
439+
X,
440+
column_names=self.feature_names_,
441+
column_types=self.feature_types_
442+
)
438443

439444
# Assign it to a unique key in the H2O cluster. This is more reliable.
440-
frame_id = f"predict_frame_{self.model_id}_{pd.Timestamp.now().strftime('%Y%m%d%H%M%S%f')}"
445+
frame_id = f"predict_frame_{self.model_id}_{pd.Timestamp.now().strftime('%Y%m%d%H%M%S%f')}" # noqa
441446
h2o.assign(tmp_frame, frame_id)
442447

443448
# Get a handle to the newly created frame

ml_grid/model_classes/H2OGAMClassifier.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,15 @@ def _prepare_fit(self, X: pd.DataFrame, y: pd.Series):
123123
try:
124124
quantiles = np.linspace(0, 1, required_knots)
125125
knot_values = X[col].quantile(quantiles)
126-
if knot_values.nunique() < required_knots:
126+
# Check for enough unique values AND that they are monotonically increasing
127+
# The diff() will be > 0 for all elements in a strictly increasing series.
128+
are_knots_valid = (knot_values.nunique() >= required_knots) and \
129+
(np.all(np.diff(knot_values.to_numpy()) > 0))
130+
131+
if not are_knots_valid:
127132
self.logger.warning(
128-
f"Excluding GAM column '{col}': Not enough unique values to generate distinct knots."
133+
f"Excluding GAM column '{col}': Not enough unique values to generate distinct, "
134+
f"monotonically increasing knots."
129135
)
130136
continue
131137
except Exception as e:

ml_grid/pipeline/grid_search_cross_validate.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -452,21 +452,6 @@ def __init__(
452452
if not getattr(self.global_parameters, 'test_mode', False):
453453
# Fit on the full training data first
454454
current_algorithm.fit(self.X_train, self.y_train)
455-
456-
# --- TENSORFLOW PERFORMANCE FIX (Corrected Position) ---
457-
# Pre-compile the predict function for Keras/TF models to avoid retracing warnings.
458-
# This is done AFTER fitting and before cross-validation.
459-
if isinstance(current_algorithm, (KerasClassifier, kerasClassifier_class, NeuralNetworkClassifier)):
460-
try:
461-
self.logger.debug("Pre-compiling TensorFlow predict function to avoid retracing.")
462-
n_features = self.X_train.shape[1]
463-
# Define an input signature that allows for variable batch size.
464-
input_signature = [tf.TensorSpec(shape=(None, n_features), dtype=tf.float32)]
465-
# Access the underlying Keras model via .model_
466-
current_algorithm.model_.predict.get_concrete_function(input_signature)
467-
except Exception as e:
468-
self.logger.warning(f"Could not pre-compile TF function. Performance may be impacted. Error: {e}")
469-
470455
# --- CRITICAL FIX: Pass the pandas Series, not the numpy array ---
471456
# Passing the numpy array (y_train.to_numpy()) causes index misalignment
472457
# with the pandas DataFrame (X_train_final) inside sklearn's CV,
@@ -481,6 +466,21 @@ def __init__(
481466
pre_dispatch=80,
482467
error_score=self.error_raise, # Raise error if cross-validation fails
483468
)
469+
470+
# --- TENSORFLOW PERFORMANCE FIX (Corrected Position) ---
471+
# Pre-compile the predict function for Keras/TF models to avoid retracing warnings.
472+
# This is done AFTER fitting and before cross-validation.
473+
if isinstance(current_algorithm, (KerasClassifier, kerasClassifier_class, NeuralNetworkClassifier)):
474+
try:
475+
self.logger.debug("Pre-compiling TensorFlow predict function to avoid retracing.")
476+
n_features = self.X_train.shape[1]
477+
# Define an input signature that allows for variable batch size.
478+
input_signature = [tf.TensorSpec(shape=(None, n_features), dtype=tf.float32)]
479+
# Access the underlying Keras model via .model_
480+
current_algorithm.model_.predict.get_concrete_function(input_signature)
481+
except Exception as e:
482+
self.logger.warning(f"Could not pre-compile TF function. Performance may be impacted. Error: {e}")
483+
484484

485485

486486
except XGBoostError as e:

ml_grid/pipeline/main.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -162,11 +162,17 @@ def __init__(self, local_param_dict: Dict[str, Any], **kwargs):
162162

163163

164164
# sample from mean of all param space n
165-
self.mean_parameter_space_val = np.mean(self.pg_list)
166-
167-
self.sub_sample_parameter_val = int(
168-
self.sub_sample_param_space_pct * self.mean_parameter_space_val
169-
)
165+
if self.pg_list:
166+
self.mean_parameter_space_val = np.mean(self.pg_list)
167+
self.sub_sample_parameter_val = int(
168+
self.sub_sample_param_space_pct * self.mean_parameter_space_val
169+
)
170+
else:
171+
self.logger.warning(
172+
"Parameter grid list is empty; no models were loaded. Setting parameter space values to 0."
173+
)
174+
self.mean_parameter_space_val = 0
175+
self.sub_sample_parameter_val = 0
170176

171177
# Initialize the project_score_save_class instance once per run
172178
# The ml_grid_object should have the experiment_dir set
@@ -283,8 +289,8 @@ def multi_run_wrapper(args: Tuple) -> Any:
283289
self.logger.error(f"An exception occurred during grid search for {self.arg_list[k][2]}: {e}", exc_info=True)
284290

285291
self.model_error_list.append(
286-
[self.arg_list[k][0], e, traceback.print_exc()]
287-
) # traceback is printed to stderr, not captured here.
292+
[self.arg_list[k][0], e, traceback.format_exc()]
293+
)
288294

289295
# Based on the 'error_raise' flag, either halt execution or log and continue.
290296
if self.error_raise:

0 commit comments

Comments
 (0)