Skip to content

Commit 7787394

Browse files
authored
Merge pull request #182 from lucasimi/develop
Improved class hierarchies
2 parents 363a1f2 + f53b702 commit 7787394

File tree

2 files changed

+133
-87
lines changed

2 files changed

+133
-87
lines changed

src/tdamapper/cover.py

Lines changed: 104 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -229,44 +229,7 @@ def search(self, x):
229229
return [x for (x, _) in neighs]
230230

231231

232-
class ProximityCubicalCover(Proximity):
233-
"""
234-
Cover algorithm based on the `cubical proximity function`, which covers
235-
data with open hypercubes of uniform size and overlap. The cubical cover is
236-
obtained by selecting a subsect of all the hypercubes that intersect the
237-
dataset using proximity net (see :class:`tdamapper.core.Proximity`).
238-
For an open cover containing all the hypercubes interecting the dataset
239-
use :class:`tdamapper.core.StandardCubicalCover`.
240-
241-
A hypercube is a multidimensional generalization of a square or a cube.
242-
The size and overlap of the hypercubes are determined by the number of
243-
intervals and the overlap fraction parameters. This class maps each point
244-
to the hypercube with the nearest center.
245-
246-
:param n_intervals: The number of intervals to use for each dimension.
247-
Must be positive and less than or equal to the length of the dataset.
248-
Defaults to 1.
249-
:type n_intervals: int
250-
:param overlap_frac: The fraction of overlap between adjacent intervals on
251-
each dimension, must be in the range (0.0, 0.5]. If not specified, the
252-
overlap_frac is computed such that the volume of the overlap within
253-
each hypercube is half the total volume. Defaults to None.
254-
:type overlap_frac: float
255-
:param kind: Specifies whether to use a flat or a hierarchical vantage
256-
point tree. Acceptable values are 'flat' or 'hierarchical'. Defaults to
257-
'flat'.
258-
:type kind: str
259-
:param leaf_capacity: The maximum number of points in a leaf node of the
260-
vantage point tree. Must be a positive value. Defaults to 1.
261-
:type leaf_capacity: int
262-
:param leaf_radius: The radius of the leaf nodes. If not specified, it
263-
defaults to the value of `radius`. Must be a positive value. Defaults
264-
to None.
265-
:type leaf_radius: float, optional
266-
:param pivoting: The method used for pivoting in the vantage point tree.
267-
Acceptable values are None, 'random', or 'furthest'. Defaults to None.
268-
:type pivoting: str or callable, optional
269-
"""
232+
class BaseCubicalCover:
270233

271234
def __init__(
272235
self,
@@ -284,6 +247,39 @@ def __init__(
284247
self.leaf_radius = leaf_radius
285248
self.pivoting = pivoting
286249

250+
def _get_center(self, x):
251+
offset = self._offset(x)
252+
center = self._phi(x)
253+
return tuple(offset), center
254+
255+
def _get_overlap_frac(self, dim, overlap_vol_frac):
256+
beta = math.pow(1.0 - overlap_vol_frac, 1.0 / dim)
257+
return 1.0 - 1.0 / (2.0 - beta)
258+
259+
def _offset(self, x):
260+
return np.minimum(self._n_intervals - 1, np.floor(self._gamma_n(x)))
261+
262+
def _phi(self, x):
263+
offset = self._offset(x)
264+
return self._gamma_n_inv(0.5 + offset)
265+
266+
def _gamma_n(self, x):
267+
return self._n_intervals * (x - self._min) / self._delta
268+
269+
def _gamma_n_inv(self, x):
270+
return self._min + self._delta * x / self._n_intervals
271+
272+
def _get_bounds(self, X):
273+
if (X is None) or len(X) == 0:
274+
return
275+
_min, _max = X[0], X[0]
276+
eps = np.finfo(np.float64).eps
277+
_min = np.min(X, axis=0)
278+
_max = np.max(X, axis=0)
279+
_delta = _max - _min
280+
_delta[(_delta >= -eps) & (_delta <= eps)] = self._n_intervals
281+
return _min, _max, _delta
282+
287283
def fit(self, X):
288284
"""
289285
Train internal parameters.
@@ -299,30 +295,30 @@ def fit(self, X):
299295
X = np.asarray(X).reshape(len(X), -1).astype(float)
300296
if self.overlap_frac is None:
301297
dim = 1 if X.ndim == 1 else X.shape[1]
302-
self.__overlap_frac = self._get_overlap_frac(dim, 0.5)
298+
self._overlap_frac = self._get_overlap_frac(dim, 0.5)
303299
else:
304-
self.__overlap_frac = self.overlap_frac
305-
self.__n_intervals = self.n_intervals
306-
if self.__overlap_frac <= 0.0:
300+
self._overlap_frac = self.overlap_frac
301+
self._n_intervals = self.n_intervals
302+
if self._overlap_frac <= 0.0:
307303
raise ValueError(
308304
'The parameter overlap_frac is expected to be '
309305
'> 0.0'
310306
)
311-
if self.__overlap_frac > 0.5:
307+
if self._overlap_frac > 0.5:
312308
warn_user(
313309
'The parameter overlap_frac is expected to be <= 0.5'
314310
)
315-
self.__min, self.__max, self.__delta = self._get_bounds(X)
316-
radius = 1.0 / (2.0 - 2.0 * self.__overlap_frac)
317-
self.__cover = BallCover(
311+
self._min, self._max, self._delta = self._get_bounds(X)
312+
radius = 1.0 / (2.0 - 2.0 * self._overlap_frac)
313+
self._cover = BallCover(
318314
radius,
319315
metric=_Pullback(self._gamma_n, chebyshev()),
320316
kind=self.kind,
321317
leaf_capacity=self.leaf_capacity,
322318
leaf_radius=self.leaf_radius,
323319
pivoting=self.pivoting,
324320
)
325-
self.__cover.fit(X)
321+
self._cover.fit(X)
326322
return self
327323

328324
def search(self, x):
@@ -338,43 +334,68 @@ def search(self, x):
338334
:rtype: list[int]
339335
"""
340336
center = self._phi(x)
341-
return self.__cover.search(center)
342-
343-
def _get_center(self, x):
344-
offset = self._offset(x)
345-
center = self._phi(x)
346-
return tuple(offset), center
337+
return self._cover.search(center)
347338

348-
def _get_overlap_frac(self, dim, overlap_vol_frac):
349-
beta = math.pow(1.0 - overlap_vol_frac, 1.0 / dim)
350-
return 1.0 - 1.0 / (2.0 - beta)
351-
352-
def _offset(self, x):
353-
return np.minimum(self.__n_intervals - 1, np.floor(self._gamma_n(x)))
354339

355-
def _phi(self, x):
356-
offset = self._offset(x)
357-
return self._gamma_n_inv(0.5 + offset)
340+
class ProximityCubicalCover(BaseCubicalCover, Proximity):
341+
"""
342+
Cover algorithm based on the `cubical proximity function`, which covers
343+
data with open hypercubes of uniform size and overlap. The cubical cover is
344+
obtained by selecting a subsect of all the hypercubes that intersect the
345+
dataset using proximity net (see :class:`tdamapper.core.Proximity`).
346+
For an open cover containing all the hypercubes interecting the dataset
347+
use :class:`tdamapper.core.StandardCubicalCover`.
358348
359-
def _gamma_n(self, x):
360-
return self.__n_intervals * (x - self.__min) / self.__delta
349+
A hypercube is a multidimensional generalization of a square or a cube.
350+
The size and overlap of the hypercubes are determined by the number of
351+
intervals and the overlap fraction parameters. This class maps each point
352+
to the hypercube with the nearest center.
361353
362-
def _gamma_n_inv(self, x):
363-
return self.__min + self.__delta * x / self.__n_intervals
354+
:param n_intervals: The number of intervals to use for each dimension.
355+
Must be positive and less than or equal to the length of the dataset.
356+
Defaults to 1.
357+
:type n_intervals: int
358+
:param overlap_frac: The fraction of overlap between adjacent intervals on
359+
each dimension, must be in the range (0.0, 0.5]. If not specified, the
360+
overlap_frac is computed such that the volume of the overlap within
361+
each hypercube is half the total volume. Defaults to None.
362+
:type overlap_frac: float
363+
:param kind: Specifies whether to use a flat or a hierarchical vantage
364+
point tree. Acceptable values are 'flat' or 'hierarchical'. Defaults to
365+
'flat'.
366+
:type kind: str
367+
:param leaf_capacity: The maximum number of points in a leaf node of the
368+
vantage point tree. Must be a positive value. Defaults to 1.
369+
:type leaf_capacity: int
370+
:param leaf_radius: The radius of the leaf nodes. If not specified, it
371+
defaults to the value of `radius`. Must be a positive value. Defaults
372+
to None.
373+
:type leaf_radius: float, optional
374+
:param pivoting: The method used for pivoting in the vantage point tree.
375+
Acceptable values are None, 'random', or 'furthest'. Defaults to None.
376+
:type pivoting: str or callable, optional
377+
"""
364378

365-
def _get_bounds(self, X):
366-
if (X is None) or len(X) == 0:
367-
return
368-
_min, _max = X[0], X[0]
369-
eps = np.finfo(np.float64).eps
370-
_min = np.min(X, axis=0)
371-
_max = np.max(X, axis=0)
372-
_delta = _max - _min
373-
_delta[(_delta >= -eps) & (_delta <= eps)] = self.__n_intervals
374-
return _min, _max, _delta
379+
def __init__(
380+
self,
381+
n_intervals=1,
382+
overlap_frac=None,
383+
kind='flat',
384+
leaf_capacity=1,
385+
leaf_radius=None,
386+
pivoting=None,
387+
):
388+
super().__init__(
389+
n_intervals=n_intervals,
390+
overlap_frac=overlap_frac,
391+
kind=kind,
392+
leaf_capacity=leaf_capacity,
393+
leaf_radius=leaf_radius,
394+
pivoting=pivoting,
395+
)
375396

376397

377-
class StandardCubicalCover(ProximityCubicalCover):
398+
class StandardCubicalCover(BaseCubicalCover, Cover):
378399
"""
379400
Cover algorithm based on the standard open cover, which covers data with
380401
open hypercubes of uniform size and overlap. The standard cover is
@@ -521,7 +542,7 @@ def __init__(
521542
self.leaf_radius = leaf_radius
522543
self.pivoting = pivoting
523544

524-
def __get_cubical_cover(self):
545+
def _get_cubical_cover(self):
525546
params = dict(
526547
n_intervals=self.n_intervals,
527548
overlap_frac=self.overlap_frac,
@@ -552,8 +573,8 @@ def fit(self, X):
552573
:return: The object itself.
553574
:rtype: self
554575
"""
555-
self.__cubical_cover = self.__get_cubical_cover()
556-
self.__cubical_cover.fit(X)
576+
self._cubical_cover = self._get_cubical_cover()
577+
self._cubical_cover.fit(X)
557578
return self
558579

559580
def search(self, x):
@@ -568,7 +589,7 @@ def search(self, x):
568589
:return: The indices of the neighbors contained in the dataset.
569590
:rtype: list[int]
570591
"""
571-
return self.__cubical_cover.search(x)
592+
return self._cubical_cover.search(x)
572593

573594
def apply(self, X):
574595
"""
@@ -582,5 +603,5 @@ def apply(self, X):
582603
:return: A generator of lists of ids.
583604
:rtype: generator of lists of ints
584605
"""
585-
self.__cubical_cover = self.__get_cubical_cover()
586-
return self.__cubical_cover.apply(X)
606+
self._cubical_cover = self._get_cubical_cover()
607+
return self._cubical_cover.apply(X)

tests/ball_tree.py

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,39 @@
33

44
class SkBallTree:
55

6-
def __init__(self, X, metric='euclidean', leaf_capacity=1, leaf_radius=0.0, pivoting=None, **kwargs):
6+
def __init__(
7+
self,
8+
X,
9+
metric='euclidean',
10+
leaf_capacity=1,
11+
leaf_radius=0.0,
12+
pivoting=None,
13+
**kwargs,
14+
):
715
self.__dataset = X
8-
self.__ball_tree = BallTree(X, leaf_size=leaf_capacity, metric=metric, **kwargs)
16+
self.__ball_tree = BallTree(
17+
X,
18+
leaf_size=leaf_capacity,
19+
metric=metric,
20+
**kwargs,
21+
)
922

1023
def ball_search(self, point, eps, inclusive=True):
11-
ids = self.__ball_tree.query_radius([point], eps, return_distance=False, count_only=False, sort_results=False)
24+
ids = self.__ball_tree.query_radius(
25+
[point],
26+
eps,
27+
return_distance=False,
28+
count_only=False,
29+
sort_results=False,
30+
)
1231
return [self.__dataset[i] for i in ids[0]]
1332

1433
def knn_search(self, point, k):
15-
ids = self.__ball_tree.query([point], k=k, return_distance=False, dualtree=False, breadth_first=False)
34+
ids = self.__ball_tree.query(
35+
[point],
36+
k=k,
37+
return_distance=False,
38+
dualtree=False,
39+
breadth_first=False,
40+
)
1641
return [self.__dataset[i] for i in ids[0]]

0 commit comments

Comments
 (0)