Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions aeon/segmentation/_hidalgo.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,11 @@ def _get_neighbourhood_params(self, X):
n_neighbors=q + 1, algorithm="ball_tree", metric=metric
).fit(X)
distances, Iin = nbrs.kneighbors(X)
mu = np.divide(distances[:, 2], distances[:, 1])

# Add numerical stability: prevent division by zero when duplicate points exist
# Use epsilon to handle cases where r1 (distances[:, 1]) is zero or near-zero
eps = 1e-12
mu = np.divide(distances[:, 2], distances[:, 1] + eps)

nbrmat = np.zeros((m, m))
for n in range(q):
Expand Down Expand Up @@ -354,7 +358,14 @@ def sample_p(K, p, pp, c1, _rng):
r1 = _rng.random() # random sample for p[k]
r2 = _rng.random() # random number for accepting

rmax = (c1[k] - 1) / (c1[k] - 1 + c1[K - 1] - 1)
# Add numerical stability for edge cases
eps = 1e-12
denom = max(c1[k] - 1 + c1[K - 1] - 1, eps)
rmax = (c1[k] - 1) / denom

# Prevent division by zero when rmax is 0 or 1
rmax = np.clip(rmax, eps, 1.0 - eps)

frac = ((r1 / rmax) ** (c1[k] - 1)) * (
((1 - r1) / (1 - rmax)) ** (c1[K - 1] - 1)
)
Expand Down
53 changes: 52 additions & 1 deletion aeon/segmentation/tests/test_hidalgo.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""Test Hidalgo segmenter."""

from aeon.segmentation._hidalgo import _binom, _partition_function
import numpy as np

from aeon.segmentation._hidalgo import HidalgoSegmenter, _binom, _partition_function


def test_partition_function():
Expand All @@ -9,3 +11,52 @@ def test_partition_function():
assert p == 8.0
b = _binom(10, 2)
assert b == 45.0


def test_hidalgo_zero_distance_stability():
"""
Test Hidalgo segmenter with duplicate/near-duplicate points.

Regression test for issue #3068: AssertionError when data contains
identical rows, causing zero distances in nearest neighbor search.
This should not crash but handle duplicates gracefully.
"""
# Create data with exact duplicates (causes zero distances)
X = np.array(
[
[0.1, 0.2, 0.3],
[0.1, 0.2, 0.3], # Exact duplicate
[0.4, 0.5, 0.6],
[0.7, 0.8, 0.9],
[0.7, 0.8, 0.9],
]
) # Another duplicate

# This should not raise AssertionError or divide-by-zero warnings
hidalgo = HidalgoSegmenter(K=2, q=2, n_iter=100, burn_in=0.5)

# Should complete without errors
result = hidalgo.fit_predict(X, axis=0)
Copy link

Copilot AI Dec 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The assertion assert len(result) >= 0 is always True since len() always returns a non-negative integer. This assertion doesn't provide meaningful test coverage. Consider removing it or replacing it with a more meaningful check, such as verifying the result is a valid array or checking specific properties of the returned changepoints.

Copilot uses AI. Check for mistakes.

# Basic sanity checks
assert result is not None
assert len(result) >= 0 # May return empty array if no changepoints
assert isinstance(result, np.ndarray)


def test_hidalgo_normal_data():
"""
Test Hidalgo segmenter with normal random data.

Verifies that the fix doesn't break normal operation.
"""
# Random data without duplicates
rng = np.random.RandomState(42)
X = rng.rand(50, 3)

hidalgo = HidalgoSegmenter(K=3, q=3, n_iter=200, burn_in=0.8)
result = hidalgo.fit_predict(X, axis=0)

# Should work as before
assert result is not None
assert isinstance(result, np.ndarray)