From 8dfe2ddcc5ee19bd5e317029fb2979b08de5f628 Mon Sep 17 00:00:00 2001 From: Satwik Sai Prakash Sahoo Date: Mon, 17 Nov 2025 10:52:52 +0530 Subject: [PATCH 1/2] Fix zero-distance instability in Hidalgo --- aeon/segmentation/_hidalgo.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/aeon/segmentation/_hidalgo.py b/aeon/segmentation/_hidalgo.py index c298873ab9..3ed29c22cd 100644 --- a/aeon/segmentation/_hidalgo.py +++ b/aeon/segmentation/_hidalgo.py @@ -170,7 +170,13 @@ def _get_neighbourhood_params(self, X): n_neighbors=q + 1, algorithm="ball_tree", metric=metric ).fit(X) distances, Iin = nbrs.kneighbors(X) - mu = np.divide(distances[:, 2], distances[:, 1]) + num = distances[:, 2] + den = distances[:, 1] + eps = 1e-12 + mu = np.divide(num, den, out=num / eps, where=den != 0) + mu = np.nan_to_num( + np.asarray(mu), nan=num / eps, posinf=num / eps, neginf=num / eps + ) nbrmat = np.zeros((m, m)) for n in range(q): From 6bb76a792c71553c02b50a59a0a8dd336c5a58d1 Mon Sep 17 00:00:00 2001 From: Satwik Sai Prakash Sahoo Date: Thu, 4 Dec 2025 09:17:53 +0530 Subject: [PATCH 2/2] Add a regression test to ensure Hidalgo handles duplicate rows without errors --- aeon/segmentation/_hidalgo.py | 8 ++----- aeon/segmentation/tests/test_hidalgo.py | 28 ++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/aeon/segmentation/_hidalgo.py b/aeon/segmentation/_hidalgo.py index 3ed29c22cd..2362dbc44e 100644 --- a/aeon/segmentation/_hidalgo.py +++ b/aeon/segmentation/_hidalgo.py @@ -170,13 +170,9 @@ def _get_neighbourhood_params(self, X): n_neighbors=q + 1, algorithm="ball_tree", metric=metric ).fit(X) distances, Iin = nbrs.kneighbors(X) - num = distances[:, 2] - den = distances[:, 1] eps = 1e-12 - mu = np.divide(num, den, out=num / eps, where=den != 0) - mu = np.nan_to_num( - np.asarray(mu), nan=num / eps, posinf=num / eps, neginf=num / eps - ) + # stabilise r2/r1 ratio; protect against zero or near-zero r1 + mu = np.divide(distances[:, 2], distances[:, 1] + eps) nbrmat = np.zeros((m, m)) for n in range(q): diff --git a/aeon/segmentation/tests/test_hidalgo.py b/aeon/segmentation/tests/test_hidalgo.py index f216dcc54a..e055407b45 100644 --- a/aeon/segmentation/tests/test_hidalgo.py +++ b/aeon/segmentation/tests/test_hidalgo.py @@ -1,6 +1,8 @@ """Test Hidalgo segmenter.""" -from aeon.segmentation._hidalgo import _binom, _partition_function +import numpy as np + +from aeon.segmentation._hidalgo import HidalgoSegmenter, _binom, _partition_function def test_partition_function(): @@ -9,3 +11,27 @@ def test_partition_function(): assert p == 8.0 b = _binom(10, 2) assert b == 45.0 + + +def test_hidalgo_zero_distance_duplicate_rows(): + """Test that Hidalgo handles duplicate rows without numerical errors.""" + X = np.array( + [ + [1.0, 2.0, 3.0], + [1.0, 2.0, 3.0], + [4.0, 5.0, 6.0], + [7.0, 8.0, 9.0], + ] + ) + seg = HidalgoSegmenter( + K=2, + q=2, + n_iter=50, + burn_in=0.2, + sampling_rate=5, + seed=1, + ) + out = seg.fit_predict(X, axis=0) + assert out is not None + assert isinstance(out, np.ndarray) + assert len(out) == len(X)