Merge pull request #219 from automl/polynom_surrogate

mwever · web-flow · commit c6360208d3c0 · 2025-04-16T18:16:37.000+02:00
Polynomial test for ablation path
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,12 @@
+# Version 1.3.5
+
+## Access Specifier
+- Changed access specifier from '_fit' to 'fit' in the RF surrogate
+
+## Ablation tests
+- Added Polynomial "model" class for testing ablation path behavior
+- Added extra test case using the polynomial to validate ablation output correctness
+
 # Version 1.3.4
 
 ## Bug-Fixes
diff --git a/deepcave/evaluators/ablation.py b/deepcave/evaluators/ablation.py
@@ -75,25 +75,24 @@ def calculate(
         self,
         objectives: Optional[Union[Objective, List[Objective]]],  # noqa
         budget: Optional[Union[int, float]] = None,  # noqa
-        n_trees: int = 50,  # noqa
-        seed: int = 0,  # noqa
+        model: Any = None,
     ) -> None:
         """
         Calculate the ablation path performances and improvements.
 
+        To use standard Random Forest surrogate do not pass a model.
+        The option to pass another model is just for testing purposes.
+
         Parameters
         ----------
         objectives : Optional[Union[Objective, List[Objective]]]
             The objective(s) to be considered.
         budget : Optional[Union[int, float]]
             The budget to be considered. If None, all budgets of the run are considered.
             Default is None.
-        n_trees : int
-            The number of trees for the surrogate model.
-            Default is 50.
-        seed : int
-            The seed for the surrogate model.
-            Default is 0.
+        model :
+            The surrogate model to use for the prediction of the perfromances.
+            By default None.
         """
         if isinstance(objectives, list) and len(objectives) > 1:
             raise ValueError("Only one objective is supported for ablation paths.")
@@ -103,25 +102,30 @@ def calculate(
         performances: OrderedDict = OrderedDict()
         improvements: OrderedDict = OrderedDict()
 
+        self._model = model
+
         df = self.run.get_encoded_data(objective, budget, specific=True)
 
         # Obtain all configurations with theirs costs
         df = df.dropna(subset=[objective.name])
         X = df[list(self.run.configspace.keys())].to_numpy()
         Y = df[objective.name].to_numpy()
 
-        # A Random Forest Regressor is used as surrogate model
-        self._model = RandomForestSurrogate(self.cs, seed=seed, n_trees=n_trees)
-        self._model._fit(X, Y)
-
         # Get the incumbent configuration
         incumbent_config, _ = self.run.get_incumbent(budget=budget, objectives=objective)
         incumbent_encode = self.run.encode_config(incumbent_config)
 
         # Get the default configuration
+
         self.default_config = self.cs.get_default_configuration()
         default_encode = self.run.encode_config(self.default_config)
 
+        # The default model is a RF Surrogate, but it cant be passed as parameter directly
+        # because it needs access to its config space
+        if self._model is None:
+            self._model = RandomForestSurrogate(self.cs, seed=0, n_trees=50)
+
+        self._model.fit(X, Y)
         # Obtain the predicted cost of the default and incumbent configuration
         def_cost, def_std = self._model.predict(np.array([default_encode]))
         def_cost, def_std = def_cost[0], def_std[0]
@@ -166,6 +170,7 @@ def calculate(
                     performances[max_hp] = (max_hp_cost, max_hp_std)
                 impr_std = np.sqrt(def_std**2 + max_hp_std**2)
                 improvements[max_hp] = ((def_cost - max_hp_cost), impr_std)
+
                 # New 'default' cost and std
                 def_cost = max_hp_cost
                 def_std = max_hp_std
@@ -248,7 +253,6 @@ def _ablation(
             if hp in incumbent_config.keys() and hp in self.default_config.keys():
                 config_copy = copy.copy(self.default_config)
                 config_copy[hp] = incumbent_config[hp]
-
                 new_cost, _ = self._model.predict(np.array([self.run.encode_config(config_copy)]))
                 if objective.optimize == "upper":
                     new_cost = -new_cost
@@ -262,6 +266,7 @@ def _ablation(
             else:
                 continue
         hp_count = len(list(self.cs.keys()))
+
         if max_hp != "":
             # For the maximum impact hyperparameter, switch the default with the incumbent value
             self.default_config[max_hp] = incumbent_config[max_hp]
@@ -270,6 +275,7 @@ def _ablation(
             )
             if objective.optimize == "upper":
                 max_hp_cost = -max_hp_cost
+
             return True, max_hp, max_hp_cost[0], max_hp_std[0]
         else:
             self.logger.info(
diff --git a/deepcave/evaluators/epm/random_forest_surrogate.py b/deepcave/evaluators/epm/random_forest_surrogate.py
@@ -78,3 +78,16 @@ def _fit(self, X: np.ndarray, y: np.ndarray) -> None:
             Corresponding target values.
         """
         self._model.train(X, y)
+
+    def fit(self, X: np.ndarray, y: np.ndarray) -> None:
+        """
+        Train the surrogate model.
+
+        Parameters
+        ----------
+        X : np.ndarray
+            Input data points.
+        y : np.ndarray
+            Corresponding target values.
+        """
+        self._fit(X, y)
diff --git a/deepcave/evaluators/mo_ablation.py b/deepcave/evaluators/mo_ablation.py
@@ -122,8 +122,7 @@ def calculate(
         self,
         objectives: Optional[Union[Objective, List[Objective]]],  # noqa
         budget: Optional[Union[int, float]] = None,  # noqa
-        n_trees: int = 50,  # noqa
-        seed: int = 0,  # noqa
+        model: Any = None,
     ) -> None:
         """
         Calculate the MO ablation path performances and improvements.
@@ -135,12 +134,9 @@ def calculate(
         budget : Optional[Union[int, float]]
             The budget to be considered. If None, all budgets of the run are considered.
             Default is None.
-        n_trees : int
-            The number of trees for the surrogate model.
-            Default is 50.
-        seed : int
-            The seed for the surrogate model.
-            Default is 0.
+        model : Any
+            For mo ablation this parameter does not do anything, except fit the head.
+            By default None.
         """
         assert isinstance(objectives, list)
         for objective in objectives:
@@ -166,8 +162,9 @@ def calculate(
 
             # train one model per objective
             Y = df[normed].to_numpy()
-            model = RandomForestSurrogate(self.cs, seed=seed, n_trees=n_trees)
-            model._fit(X, Y)
+            if model is None:
+                model = RandomForestSurrogate(self.cs, seed=0, n_trees=50)
+            model.fit(X, Y)
             self.models.append(model)
 
         weightings = get_weightings(objectives_normed, df)
diff --git a/deepcave/plugins/hyperparameter/ablation_paths.py b/deepcave/plugins/hyperparameter/ablation_paths.py
@@ -338,7 +338,7 @@ def process(run: AbstractRun, inputs: Dict[str, Any]) -> Dict[str, Any]:
         data: Dict[Any, Any] = {}
         for budget_id, budget in enumerate(budgets):
             assert isinstance(budget, (int, float))
-            evaluator.calculate(objective, budget, n_trees=n_trees, seed=0)
+            evaluator.calculate(objective, budget)
             if isinstance(objective, list):
                 assert isinstance(evaluator, MOAblation)
                 data[budget_id] = evaluator.get_importances()
diff --git a/deepcave/runs/__init__.py b/deepcave/runs/__init__.py
@@ -1317,6 +1317,7 @@ def get_encoded_data(
         config_ids = []
 
         results = self.get_all_costs(budget, statuses, seed)
+
         for config_id, config_costs in results.items():
             config = self.configs[config_id]
             for seed, costs in config_costs.items():
diff --git a/deepcave/worker.py b/deepcave/worker.py
@@ -18,7 +18,7 @@
 This module can create a redis queue worker.
 """
 
-from rq import Connection, Worker
+from rq import Connection, Worker  # type: ignore
 
 from deepcave import queue
 
diff --git a/tests/test_evaluators/dummy_run/configs.json b/tests/test_evaluators/dummy_run/configs.json
@@ -0,0 +1,12 @@
+{
+    "0": {
+        "alpha": 0.0,
+        "beta": 0.0,
+        "gamma": 0.0
+    },
+    "1": {
+        "alpha": 1.0,
+        "beta": 1.0,
+        "gamma": 1.0
+    }
+}
diff --git a/tests/test_evaluators/dummy_run/configspace.json b/tests/test_evaluators/dummy_run/configspace.json
@@ -0,0 +1 @@
+{"name": null, "hyperparameters": [{"type": "uniform_float", "name": "alpha", "lower": 0.0, "upper": 1.0, "default_value": 0.0, "log": false, "meta": null}, {"type": "uniform_float", "name": "beta", "lower": 0.0, "upper": 1.0, "default_value": 0.0, "log": false, "meta": null}, {"type": "uniform_float", "name": "gamma", "lower": 0.0, "upper": 1.0, "default_value": 0.0, "log": false, "meta": null}], "conditions": [], "forbiddens": [], "python_module_version": "1.2.0", "format_version": 0.4}
diff --git a/tests/test_evaluators/dummy_run/history.jsonl b/tests/test_evaluators/dummy_run/history.jsonl
@@ -0,0 +1,6 @@
+[0, 20, -1, [0.0, 0.0], 0.0, 0.0, 1, {}]
+[0, 40, -1, [0.0, 0.0], 0.0, 0.0, 1, {}]
+[0, 60, -1, [0.0, 0.0], 0.0, 0.0, 1, {}]
+[1, 20, -1, [1.0, 1.0], 0.0, 0.0, 1, {}]
+[1, 40, -1, [1.0, 1.0], 0.0, 0.0, 1, {}]
+[1, 60, -1, [1.0, 1.0], 0.0, 0.0, 1, {}]
diff --git a/tests/test_evaluators/dummy_run/meta.json b/tests/test_evaluators/dummy_run/meta.json
@@ -0,0 +1,20 @@
+{
+    "objectives": [
+        {
+            "name": "test",
+            "lower": 0,
+            "upper": 1,
+            "lock_lower": true,
+            "lock_upper": true,
+            "optimize": "upper"
+        }
+    ],
+    "budgets": [
+        20,
+        40,
+        60
+    ],
+    "seeds": [
+        -1
+    ]
+}
diff --git a/tests/test_evaluators/dummy_run/origins.json b/tests/test_evaluators/dummy_run/origins.json
@@ -0,0 +1,4 @@
+{
+    "0": null,
+    "1": null
+}
diff --git a/tests/test_evaluators/test_ablation.py b/tests/test_evaluators/test_ablation.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+{"name": null, "hyperparameters": [{"type": "uniform_float", "name": "alpha", "lower": 0.0, "upper": 1.0, "default_value": 0.0, "log": false, "meta": null}, {"type": "uniform_float", "name": "beta", "lower": 0.0, "upper": 1.0, "default_value": 0.0, "log": false, "meta": null}, {"type": "uniform_float", "name": "gamma", "lower": 0.0, "upper": 1.0, "default_value": 0.0, "log": false, "meta": null}], "conditions": [], "forbiddens": [], "python_module_version": "1.2.0", "format_version": 0.4}`