Skip to content

Commit 47379f3

Browse files
committed
lgbm test fixes
1 parent cf62dee commit 47379f3

File tree

3 files changed

+63
-22
lines changed

3 files changed

+63
-22
lines changed

.github/workflows/notebook-test.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@ jobs:
3737
sudo apt-get install -y tzdata
3838
sudo dpkg-reconfigure -f noninteractive tzdata
3939
40+
- name: Install libgomp1 for LightGBM
41+
run: |
42+
sudo apt-get update
43+
sudo apt-get install -y libgomp1
44+
4045
- name: Install Python 3.10, Git, and set CA environment
4146
run: |
4247
sudo apt-get update

ml_grid/model_classes/knn_wrapper_class.py

Lines changed: 45 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
import numpy as np
55
import pandas as pd
66
import torch
7-
from simbsig.neighbors import KNeighborsClassifier
7+
from simbsig.neighbors import KNeighborsClassifier as SimbsigKNeighborsClassifier
8+
from sklearn.neighbors import KNeighborsClassifier as SklearnKNeighborsClassifier
89
from sklearn import metrics
910
import logging
1011

@@ -48,18 +49,22 @@ def __init__(
4849
self.p = p
4950
self.metric = metric
5051
self.metric_params = metric_params
52+
self._init_device = device # Store the original device parameter
53+
self.device = device
54+
55+
# Auto-detect device if not specified, or validate if specified
56+
self._set_device(device)
5157

52-
# Auto-detect device
58+
self.model: Optional[Union[SimbsigKNeighborsClassifier, SklearnKNeighborsClassifier]] = None
59+
60+
def _set_device(self, device: Optional[str]):
61+
"""Helper to set the device, falling back to CPU if GPU is not available."""
5362
gpu_available = torch.cuda.is_available()
5463
if device == "gpu" and not gpu_available:
55-
logging.getLogger('ml_grid').warning("GPU requested for KNNWrapper, but torch.cuda.is_available() is False. Falling back to CPU.")
64+
logging.getLogger('ml_grid').warning("GPU requested for KNNWrapper, but torch.cuda is not available. Falling back to CPU.")
5665
self.device = "cpu"
57-
elif device:
58-
self.device = device
5966
else:
60-
self.device = "gpu" if gpu_available else "cpu"
61-
62-
self.model: Optional[KNeighborsClassifier] = None
67+
self.device = device if device else ("gpu" if gpu_available else "cpu")
6368

6469
def fit(
6570
self, X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray]
@@ -75,17 +80,31 @@ def fit(
7580
Returns:
7681
KNNWrapper: The fitted estimator.
7782
"""
78-
self.model = KNeighborsClassifier(
79-
n_neighbors=self.n_neighbors,
80-
weights=self.weights,
81-
algorithm=self.algorithm,
82-
leaf_size=self.leaf_size,
83-
p=self.p,
84-
metric=self.metric,
85-
metric_params=self.metric_params,
86-
device=self.device,
87-
)
88-
83+
# If the device is CPU, use the standard scikit-learn implementation
84+
# to completely avoid any simbsig/torch/cuda calls.
85+
if self.device == 'cpu':
86+
logging.getLogger('ml_grid').info("Using scikit-learn's KNeighborsClassifier for CPU execution.")
87+
self.model = SklearnKNeighborsClassifier(
88+
n_neighbors=self.n_neighbors,
89+
weights=self.weights,
90+
algorithm=self.algorithm,
91+
leaf_size=self.leaf_size,
92+
p=self.p,
93+
metric=self.metric,
94+
metric_params=self.metric_params,
95+
)
96+
else:
97+
# If GPU is intended and available, use the simbsig implementation.
98+
self.model = SimbsigKNeighborsClassifier(
99+
n_neighbors=self.n_neighbors,
100+
weights=self.weights,
101+
algorithm=self.algorithm,
102+
leaf_size=self.leaf_size,
103+
p=self.p,
104+
metric=self.metric,
105+
metric_params=self.metric_params,
106+
device=self.device,
107+
)
89108
self.model.fit(X, y)
90109
return self
91110

@@ -97,18 +116,17 @@ def get_params(self, deep: bool = False) -> Dict[str, Any]:
97116
contained subobjects that are estimators.
98117
99118
Returns:
100-
Dict[str, Any]: Parameter names mapped to their values.
119+
Dict[str, Any]: Parameter names mapped to their original values.
101120
"""
102121
return {
103-
"device": self.device,
122+
"device": self._init_device,
104123
"n_neighbors": self.n_neighbors,
105124
"weights": self.weights,
106125
"algorithm": self.algorithm,
107126
"leaf_size": self.leaf_size,
108127
"p": self.p,
109128
"metric": self.metric,
110129
"metric_params": self.metric_params,
111-
"n_neighbors": self.n_neighbors,
112130
}
113131

114132
def predict(self, X: Union[pd.DataFrame, np.ndarray]) -> np.ndarray:
@@ -158,5 +176,10 @@ def set_params(self, **parameters: Any) -> "KNNWrapper":
158176
KNNWrapper: The instance with updated parameters.
159177
"""
160178
for parameter, value in parameters.items():
179+
# Special handling for device to re-validate availability
180+
if parameter == 'device':
181+
# Update both the initial and current device setting
182+
self._init_device = value
183+
self._set_device(value)
161184
setattr(self, parameter, value)
162185
return self

ml_grid/pipeline/grid_search_cross_validate.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import numpy as np
99
import pandas as pd
1010
import tensorflow as tf
11+
import torch
1112
from IPython.display import clear_output
1213
from numpy import absolute, mean, std
1314
from scikeras.wrappers import KerasClassifier
@@ -150,6 +151,15 @@ def __init__(
150151
if "catboost" in method_name.lower() and hasattr(current_algorithm, 'set_params'):
151152
ml_grid_object.logger.info("Silencing CatBoost verbose output.")
152153
current_algorithm.set_params(verbose=0)
154+
155+
# Check for GPU availability and set device for torch-based models
156+
if "simbsig" in str(type(algorithm_implementation)):
157+
if not torch.cuda.is_available():
158+
self.logger.info("No CUDA GPU detected. Forcing simbsig model to use CPU.")
159+
if hasattr(current_algorithm, 'set_params'):
160+
current_algorithm.set_params(device='cpu')
161+
else:
162+
self.logger.info("CUDA GPU detected. Allowing simbsig model to use GPU.")
153163

154164
self.logger.info(f"Algorithm implementation: {algorithm_implementation}")
155165

@@ -250,6 +260,9 @@ def __init__(
250260
# Dynamically adjust KNN parameter space for small datasets
251261
if "kneighbors" in method_name.lower():
252262
self._adjust_knn_parameters(parameter_space)
263+
self.logger.info(
264+
"Adjusted KNN n_neighbors parameter space to prevent errors on small CV folds."
265+
)
253266

254267
# Instantiate and run the hyperparameter grid/random search
255268
search = HyperparameterSearch(

0 commit comments

Comments
 (0)