Skip to content

Commit 4d89406

Browse files
authored
Merge pull request #50 from lucasimi/feature/alignement
Feature/alignement
2 parents b01d22c + 64ffafa commit 4d89406

File tree

6 files changed

+166
-106
lines changed

6 files changed

+166
-106
lines changed

src/tdamapper/clustering.py

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1+
'''A module containing the logic related to clustering for the Mapper algorithm.'''
12
import logging
23

3-
import numpy as np
4-
54
from tdamapper.core import mapper_connected_components
65
from tdamapper.cover import TrivialCover
76

@@ -14,11 +13,10 @@
1413
level = logging.INFO)
1514

1615

17-
def euclidean(x, y):
18-
return np.linalg.norm(x - y)
19-
20-
2116
class TrivialClustering:
17+
'''
18+
A clustering algorithm that returns a single cluster.
19+
'''
2220

2321
def __init__(self):
2422
self.labels_ = None
@@ -28,7 +26,17 @@ def fit(self, X, y=None):
2826
return self
2927

3028

31-
class PermissiveClustering:
29+
class FailSafeClustering:
30+
'''
31+
A delegating clustering algorithm that prevents failure.
32+
When clustering fails, instead of throwing an exception,
33+
a single cluster, containing all points, is returned.
34+
35+
:param clustering: A clustering algorithm to delegate to.
36+
:type clustering: Anything compatible with a `sklearn.cluster` class.
37+
:param verbose: Set to `True` to log exceptions.
38+
:type verbose: `bool`
39+
'''
3240

3341
def __init__(self, clustering, verbose=True):
3442
self.__clustering = clustering
@@ -47,6 +55,19 @@ def fit(self, X, y=None):
4755

4856

4957
class MapperClustering:
58+
'''
59+
A clustering algorithm based on the Mapper graph.
60+
The Mapper algorithm returns a graph where each point is eventually contained
61+
in multiple nodes. In this case all those nodes are connected in the Mapper graph,
62+
therefore they share the same connected component. For this reason the notion of
63+
connected component is well-defined for any point of the dataset. This class
64+
clusters point according to their connected component in the Mapper graph.
65+
66+
:type cover: A cover algorithm.
67+
:type cover: Anything compatible with a `tdamapper.cover` class.
68+
:param clustering: A clustering algorithm.
69+
:type clustering: Anything compatible with a `sklearn.cluster` class.
70+
'''
5071

5172
def __init__(self, cover=None, clustering=None):
5273
self.cover = cover

src/tdamapper/core.py

Lines changed: 73 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
'''A module containing the main implementation logic for the Mapper algorithm.'''
2+
13
import networkx as nx
24

35
from tdamapper.utils.unionfind import UnionFind
@@ -11,18 +13,18 @@ def mapper_labels(X, y, cover, clustering):
1113
'''
1214
Computes the open cover, then perform local clustering on each open set from the cover.
1315
14-
:param X: A dataset
15-
:type X: numpy.ndarray or list-like
16-
:param y: lens values
17-
:type y: numpy.ndarray or list-like
18-
:param cover: A cover algorithm
19-
:type cover: A class from tdamapper.cover
20-
:param clustering: A clustering algorithm
21-
:type clustering: A class from tdamapper.clustering or a class from sklearn.cluster
16+
:param X: A dataset.
17+
:type X: `numpy.ndarray` or list-like.
18+
:param y: Lens values.
19+
:type y: `numpy.ndarray` or list-like.
20+
:param cover: A cover algorithm.
21+
:type cover: A class from `tdamapper.cover`.
22+
:param clustering: A clustering algorithm.
23+
:type clustering: A class from `tdamapper.clustering` or a class from `sklearn.cluster`.
2224
:return: A list where each item is a sorted list of ints with no duplicate.
23-
The list at position i contains the cluster labels to which the point at position i in X
24-
belongs to. If i < j, the labels at position i are strictly less then those at position j.
25-
:rtype: list[list[int]]
25+
The list at position `i` contains the cluster labels to which the point at position `i` in `X`
26+
belongs to. If `i < j`, the labels at position `i` are strictly less then those at position `j`.
27+
:rtype: `list[list[int]]`.
2628
'''
2729
itm_lbls = [[] for _ in X]
2830
max_lbl = 0
@@ -39,12 +41,30 @@ def mapper_labels(X, y, cover, clustering):
3941

4042

4143
def mapper_connected_components(X, y, cover, clustering):
44+
'''
45+
Computes the connected components of the Mapper graph.
46+
The algorithm computes the connected components using a union-find data structure.
47+
This approach should be faster than computing the Mapper graph by first calling
48+
`tdamapper.core.mapper_graph` and then calling `networkx.connected_components` on it.
49+
50+
:param X: A dataset.
51+
:type X: `numpy.ndarray` or list-like.
52+
:param y: Lens values.
53+
:type y: `numpy.ndarray` or list-like.
54+
:param cover: A cover algorithm.
55+
:type cover: A class from `tdamapper.cover`.
56+
:param clustering: A clustering algorithm.
57+
:type clustering: A class from `tdamapper.clustering` or a class from `sklearn.cluster`.
58+
:return: A list of labels, where the value at position `i` identifies
59+
the connected component of the point `X[i]`.
60+
:rtype: `list[int]`.
61+
'''
4262
itm_lbls = mapper_labels(X, y, cover, clustering)
4363
label_values = set()
4464
for lbls in itm_lbls:
4565
label_values.update(lbls)
4666
uf = UnionFind(label_values)
47-
labels = []
67+
labels = [-1 for _ in X]
4868
for lbls in itm_lbls:
4969
len_lbls = len(lbls)
5070
# noise points
@@ -61,18 +81,18 @@ def mapper_connected_components(X, y, cover, clustering):
6181

6282
def mapper_graph(X, y, cover, clustering):
6383
'''
64-
Computes the Mapper graph
65-
66-
:param X: A dataset
67-
:type X: numpy.ndarray or list-like
68-
:param y: Lens values
69-
:type y: numpy.ndarray or list-like
70-
:param cover: A cover algorithm
71-
:type cover: A class from tdamapper.cover
72-
:param clustering: A clustering algorithm
73-
:type clustering: A class from tdamapper.clustering or a class from sklearn.cluster
74-
:return: The Mapper graph
75-
:rtype: networkx.Graph
84+
Computes the Mapper graph.
85+
86+
:param X: A dataset.
87+
:type X: `numpy.ndarray` or list-like.
88+
:param y: Lens values.
89+
:type y: `numpy.ndarray` or list-like.
90+
:param cover: A cover algorithm.
91+
:type cover: A class from `tdamapper.cover`.
92+
:param clustering: A clustering algorithm.
93+
:type clustering: A class from `tdamapper.clustering` or a class from `sklearn.cluster`.
94+
:return: The Mapper graph.
95+
:rtype: `networkx.Graph`.
7696
'''
7797
itm_lbls = mapper_labels(X, y, cover, clustering)
7898
graph = nx.Graph()
@@ -95,6 +115,18 @@ def mapper_graph(X, y, cover, clustering):
95115

96116

97117
def aggregate_graph(y, graph, agg):
118+
'''
119+
Computes an aggregation on the nodes of a graph.
120+
121+
:param y: A dataset.
122+
:type y: `numpy.ndarray` or list-like.
123+
:param graph: A graph.
124+
:type graph: `networkx.Graph`.
125+
:param agg: An aggregation function.
126+
:type agg: Callable.
127+
:return: A dict of values, where each node is mapped to its aggregation.
128+
:rtype: `dict`.
129+
'''
98130
agg_values = {}
99131
nodes = graph.nodes()
100132
for node_id in nodes:
@@ -108,10 +140,10 @@ class MapperAlgorithm:
108140
'''
109141
Main class for performing the Mapper Algorithm.
110142
111-
:param cover: A cover algorithm
112-
:type cover: A class from tdamapper.cover
113-
:param clustering: A clustering algorithm
114-
:type clustering: A class from tdamapper.clustering or a class from sklearn.cluster
143+
:param cover: A cover algorithm.
144+
:type cover: A class from `tdamapper.cover`.
145+
:param clustering: A clustering algorithm.
146+
:type clustering: A class from `tdamapper.clustering` or a class from `sklearn.cluster`.
115147
'''
116148

117149
def __init__(self, cover, clustering):
@@ -123,24 +155,24 @@ def fit(self, X, y=None):
123155
'''
124156
Computes the Mapper Graph
125157
126-
:param X: A dataset
127-
:type X: numpy.ndarray or list-like
128-
:param y: Lens values
129-
:type y: numpy.ndarray or list-like
130-
:return: self
158+
:param X: A dataset.
159+
:type X: `numpy.ndarray` or list-like.
160+
:param y: Lens values.
161+
:type y: `numpy.ndarray` or list-like.
162+
:return: `self`.
131163
'''
132164
self.graph_ = self.fit_transform(X, y)
133165
return self
134166

135167
def fit_transform(self, X, y):
136168
'''
137-
Computes the Mapper Graph
138-
139-
:param X: A dataset
140-
:type X: numpy.ndarray or list-like
141-
:param y: Lens values
142-
:type y: numpy.ndarray or list-like
143-
:return: The Mapper Graph
144-
:rtype: networkx.Graph
169+
Computes the Mapper Graph.
170+
171+
:param X: A dataset.
172+
:type X: `numpy.ndarray` or list-like.
173+
:param y: Lens values.
174+
:type y: `numpy.ndarray` or list-like.
175+
:return: The Mapper graph.
176+
:rtype: `networkx.Graph`
145177
'''
146178
return mapper_graph(X, y, self.__cover, self.__clustering)

src/tdamapper/cover.py

Lines changed: 39 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,28 @@
1+
'''A module containing the logic for building open covers for the Mapper algorithm.'''
12
import numpy as np
23

34
from tdamapper.utils.vptree_flat import VPTree
45

56

6-
class __ProximityNetCover:
7+
class ProximityNetCover:
8+
'''
9+
This class serves as a blueprint for proximity-based cover algorithm
10+
and implements proximity-net in the `ProximityNetCover.apply` method.
11+
Subclasses are expected to override the methods `ProximityNetCover.fit`
12+
and `ProximityNetCover.search`.
13+
'''
714

815
def __init__(self):
916
pass
1017

1118
def apply(self, X):
1219
'''
13-
Compute the proximity-net for a given open cover.
20+
Compute proximity-net for a given open cover.
21+
Returns a generator where each item is a subset of ids
22+
of points from `X`.
1423
15-
:param X: A dataset
16-
:type X: numpy.ndarray or list-like
17-
:param proximity: A proximity function
18-
:type proximity: A class from tdamapper.proximity
24+
:param X: A dataset.
25+
:type X: `numpy.ndarray` or list-like.
1926
'''
2027
covered_ids = set()
2128
self.fit(X)
@@ -33,16 +40,16 @@ def search(self, x):
3340
return []
3441

3542

36-
class BallCover(__ProximityNetCover):
43+
class BallCover(ProximityNetCover):
3744
'''
38-
Create an open cover made of overlapping open balls of fixed radius.
45+
Creates an open cover made of overlapping open balls of fixed radius.
3946
This class implements the Ball Proximity function: after calling fit on X,
40-
the search method returns all the points within a ball centered in the target point.
47+
the `BallCover.search` method returns all the points within a ball centered in the target point.
4148
4249
:param radius: The radius of open balls
43-
:type radius: float
44-
:param metric: The metric used to define open balls
45-
:type metric: function
50+
:type radius: float.
51+
:param metric: The metric used to define open balls.
52+
:type metric: Callable.
4653
'''
4754

4855
def __init__(self, radius, metric):
@@ -58,22 +65,22 @@ def fit(self, X):
5865
return self
5966

6067
def search(self, x):
61-
if self.__vptree:
62-
neighs = self.__vptree.ball_search((-1, x), self.__radius)
63-
return [x for (x, _) in neighs]
64-
return []
68+
if self.__vptree is None:
69+
return []
70+
neighs = self.__vptree.ball_search((-1, x), self.__radius)
71+
return [x for (x, _) in neighs]
6572

6673

67-
class KNNCover(__ProximityNetCover):
74+
class KNNCover(ProximityNetCover):
6875
'''
69-
Create an open cover where each open set containes a fixed number of neighbors, using KNN.
76+
Creates an open cover where each open set containes a fixed number of neighbors, using KNN.
7077
This class implements the KNN Proximity function: after calling fit on X,
71-
the search method returns the k nearest points to the target point.
78+
the `KNNCover.search` method returns the k nearest points to the target point.
7279
73-
:param neighbors: The number of neighbors
74-
:type neighbors: int
75-
:param metric: The metric used to search neighbors
76-
:type metric: function
80+
:param neighbors: The number of neighbors.
81+
:type neighbors: int.
82+
:param metric: The metric used to search neighbors.
83+
:type metric: function.
7784
'''
7885

7986
def __init__(self, neighbors, metric):
@@ -94,18 +101,18 @@ def search(self, x):
94101
return [x for (x, _) in neighs]
95102

96103

97-
class CubicalCover(__ProximityNetCover):
104+
class CubicalCover(ProximityNetCover):
98105
'''
99-
Create an open cover of hypercubes of evenly-sized sides and overlap.
106+
Creates an open cover of hypercubes of evenly-sized sides and overlap.
100107
This class implements the Cubical Proximity function: after calling fit on X,
101-
the search method returns the hypercube whose center is nearest to
108+
the `CubicalCover.search` method returns the hypercube whose center is nearest to
102109
the target point. Each hypercube is the product of 1-dimensional intervals
103110
with the same lenght and overlap.
104111
105-
:param n_intervals: The number of intervals on each dimension
106-
:type n_intervals: int
107-
:param overlap_frac: The overlap fracion
108-
:type overlap_frac: float in (0.0, 1.0)
112+
:param n_intervals: The number of intervals on each dimension.
113+
:type n_intervals: int.
114+
:param overlap_frac: The overlap fraction.
115+
:type overlap_frac: float in (0.0, 1.0).
109116
'''
110117

111118
def __init__(self, n_intervals, overlap_frac):
@@ -159,9 +166,9 @@ def search(self, x):
159166
return self.__ball_proximity.search(self._phi(x))
160167

161168

162-
class TrivialCover(__ProximityNetCover):
169+
class TrivialCover(ProximityNetCover):
163170
'''
164-
Create an open cover made of a single open set that contains the whole dataset.
171+
Creates an open cover made of a single open set that contains the whole dataset.
165172
'''
166173

167174
def fit(self, X):

0 commit comments

Comments
 (0)