Skip to content

Commit 165e052

Browse files
authored
Merge pull request #36 from lucasimi/feature/remove-grid-cover
Feature/remove grid cover
2 parents 3d2ca3a + c4fee5c commit 165e052

File tree

7 files changed

+62
-122
lines changed

7 files changed

+62
-122
lines changed

README.md

Lines changed: 3 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -39,56 +39,13 @@ pip install git+https://github.com/lucasimi/tda-mapper-python.git@develop
3939

4040
## A worked out example
4141

42-
In order to show how to use this package, we perform some analysis on the the well known dataset of hand written digits (more info [here](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html)), consisting of less than 2000 8x8 pictures represented as arrays of 64 elements.
43-
44-
```python
45-
import numpy as np
46-
47-
from sklearn.datasets import load_digits
48-
from sklearn.cluster import AgglomerativeClustering
49-
from sklearn.decomposition import PCA
50-
51-
from tdamapper.core import *
52-
from tdamapper.cover import *
53-
from tdamapper.clustering import *
54-
from tdamapper.plot import *
55-
56-
import matplotlib
57-
58-
digits = load_digits()
59-
X, y = [np.array(x) for x in digits.data], digits.target
60-
lens = PCA(2).fit_transform(X)
61-
62-
mapper_algo = MapperAlgorithm(
63-
cover=GridCover(n_intervals=10, overlap_frac=0.65),
64-
clustering=AgglomerativeClustering(10),
65-
verbose=True,
66-
n_jobs=8)
67-
mapper_graph = mapper_algo.fit_transform(X, lens)
68-
69-
mapper_plot = MapperPlot(X, mapper_graph,
70-
colors=y,
71-
cmap='jet',
72-
agg=np.nanmean,
73-
dim=2,
74-
iterations=400)
75-
fig_mean = mapper_plot.plot(title='digit (mean)', width=600, height=600)
76-
fig_mean.show(config={'scrollZoom': True})
77-
```
42+
![In this file](tests/example.py) you can find a worked out example that shows how to use this package.
43+
We perform some analysis on the the well known dataset of ![hand written digits](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html), consisting of less than 2000 8x8 pictures represented as arrays of 64 elements.
7844

7945
![The mapper graph of the digits dataset, colored according to mean value](resources/digits_mean.png)
8046

8147
It's also possible to obtain a new plot colored according to different values, while keeping the same computed geometry. For example, if we want to visualize how much dispersion we have on each cluster, we could plot colors according to the standard deviation
8248

83-
```python
84-
fig_std = mapper_plot.with_colors(
85-
colors=y,
86-
cmap='viridis',
87-
agg=np.nanstd,
88-
).plot(title='digit (std)', width=600, height=600)
89-
fig_std.show(config={'scrollZoom': True})
90-
```
91-
9249
![The mapper graph of the digits dataset, colored according to std](resources/digits_std.png)
9350

9451
The mapper graph of the digits dataset shows a few interesting patterns. For example, we can make the following observations:
@@ -107,7 +64,7 @@ The mapper graph of the digits dataset shows a few interesting patterns. For exa
10764
- [x] custom metrics
10865

10966
- [x] Cover algorithms:
110-
- [x] `GridCover`
67+
- [x] `CubicalCover`
11168
- [x] `BallCover`
11269
- [x] `KnnCover`
11370

src/tdamapper/clustering.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import numpy as np
33
from tdamapper.core import build_labels_par, build_connected_components, MapperAlgorithm
44
from tdamapper.utils.unionfind import UnionFind
5-
from tdamapper.cover import TrivialCover, GridCover, BallCover, KNNCover
5+
from tdamapper.cover import TrivialCover, CubicalCover, BallCover, KNNCover
66

77

88
_logger = logging.getLogger(__name__)
@@ -82,7 +82,7 @@ def fit(self, X, y=None):
8282
class MapperGraphClustering:
8383

8484
def __init__(self,
85-
cover='grid',
85+
cover='cubical',
8686
n_intervals=10,
8787
overlap_frac=0.25,
8888
radius=0.5,
@@ -104,8 +104,8 @@ def fit(self, X, y=None):
104104
def __get_cover(self):
105105
if self.cover == 'trivial':
106106
return TrivialCover()
107-
elif self.cover == 'grid':
108-
return GridCover(n_intervals=self.n_intervals, overlap_frac=self.overlap_frac)
107+
elif self.cover == 'cubical':
108+
return CubicalCover(n_intervals=self.n_intervals, overlap_frac=self.overlap_frac)
109109
elif self.cover == 'ball':
110110
return BallCover(radius=self.radius, metric=self.metric)
111111
elif self.cover == 'knn':

src/tdamapper/cover.py

Lines changed: 1 addition & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -64,69 +64,6 @@ def search(self, x):
6464
return [x for (x, _) in neighs]
6565

6666

67-
class GridCover:
68-
69-
def __init__(self, n_intervals, overlap_frac):
70-
self.n_intervals = n_intervals
71-
self.overlap_frac = overlap_frac
72-
73-
def proximity(self):
74-
return GridProximity(self.n_intervals, self.overlap_frac)
75-
76-
77-
class GridProximity:
78-
79-
def __init__(self, n_intervals, overlap_frac):
80-
self.__n_intervals = n_intervals
81-
self.__overlap_frac = overlap_frac
82-
self.__radius = (1.0 + self.__overlap_frac) / 2.0
83-
self.__minimum = None
84-
self.__maximum = None
85-
self.__delta = None
86-
metric = self._pullback(self._gamma_n, self._l_infty)
87-
self.__ball_proximity = BallCover(self.__radius, metric).proximity()
88-
89-
def _l_infty(self, x, y):
90-
return np.max(np.abs(x - y)) # in alternative: np.linalg.norm(x - y, ord=np.inf)
91-
92-
def _gamma_n(self, x):
93-
return self.__n_intervals * (x - self.__minimum) / self.__delta
94-
95-
def _gamma_n_inv(self, x):
96-
return self.__minimum + self.__delta * x / self.__n_intervals
97-
98-
def _rho(self, x):
99-
return x.round()
100-
101-
def _phi(self, x):
102-
return self._gamma_n_inv(self._rho(self._gamma_n(x)))
103-
104-
def _pullback(self, fun, dist):
105-
return lambda x, y: dist(fun(x), fun(y))
106-
107-
def _set_bounds(self, data):
108-
if (data is None) or len(data) == 0:
109-
return
110-
minimum, maximum = data[0], data[0]
111-
eps = np.finfo(np.float64).eps
112-
for w in data:
113-
minimum = np.minimum(minimum, np.array(w))
114-
maximum = np.maximum(maximum, np.array(w))
115-
self.__minimum = np.nan_to_num(minimum, nan=-eps)
116-
self.__maximum = np.nan_to_num(maximum, nan=eps)
117-
delta = self.__maximum - self.__minimum
118-
eps = np.finfo(np.float64).eps
119-
self.__delta = np.maximum(eps, delta)
120-
121-
def fit(self, X):
122-
self._set_bounds(X)
123-
self.__ball_proximity.fit(X)
124-
return
125-
126-
def search(self, x):
127-
return self.__ball_proximity.search(self._phi(x))
128-
129-
13067
class CubicalCover:
13168

13269
def __init__(self, n_intervals, overlap_frac):
@@ -159,7 +96,7 @@ def _gamma_n_inv(self, x):
15996
return self.__minimum + self.__delta * x / self.__n_intervals
16097

16198
def _rho(self, x):
162-
return x.round() + 0.5
99+
return np.floor(x) + 0.5
163100

164101
def _phi(self, x):
165102
return self._gamma_n_inv(self._rho(self._gamma_n(x)))

tests/example.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import numpy as np
2+
3+
from sklearn.datasets import load_digits
4+
from sklearn.cluster import AgglomerativeClustering
5+
from sklearn.decomposition import PCA
6+
7+
from tdamapper.core import MapperAlgorithm
8+
from tdamapper.cover import CubicalCover
9+
from tdamapper.clustering import PermissiveClustering
10+
from tdamapper.plot import MapperPlot
11+
12+
X, y = load_digits(return_X_y=True) # We load a labelled dataset
13+
lens = PCA(2).fit_transform(X) # We compute the lens values
14+
15+
mapper_algo = MapperAlgorithm(
16+
cover=CubicalCover(
17+
n_intervals=10,
18+
overlap_frac=0.65),
19+
clustering=PermissiveClustering( # We prevent clustering failures
20+
clustering=AgglomerativeClustering(10),
21+
verbose=False),
22+
n_jobs=1)
23+
mapper_graph = mapper_algo.fit_transform(X, lens)
24+
25+
mapper_plot = MapperPlot(X, mapper_graph,
26+
colors=y, # We color according to digit values
27+
cmap='jet', # Jet colormap, used for classes
28+
agg=np.nanmean, # We aggregate on graph nodes according to mean
29+
dim=2,
30+
iterations=400)
31+
fig_mean = mapper_plot.plot(title='digit (mean)', width=600, height=600)
32+
#fig_mean.show(config={'scrollZoom': True}) # Uncomment to show the plot
33+
34+
fig_std = mapper_plot.with_colors( # We reuse the graph plot with the same positions
35+
colors=y,
36+
cmap='viridis', # Virtidis colormap, used for ranges
37+
agg=np.nanstd, # We aggregate on graph nodes according to std
38+
).plot(title='digit (std)', width=600, height=600)
39+
#fig_std.show(config={'scrollZoom': True}) # Uncomment to show the plot

tests/test_cover.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import unittest
22

33
import numpy as np
4-
from tdamapper.cover import TrivialCover, BallCover, KNNCover, GridCover
4+
from tdamapper.cover import TrivialCover, BallCover, KNNCover, CubicalCover
55
from tdamapper.core import ProximityNet
66

77

@@ -37,10 +37,10 @@ def testKnnCover(self):
3737
charts = list(ProximityNet(cover).proximity_net(data))
3838
self.assertEqual(2, len(charts))
3939

40-
def testGridCover(self):
40+
def testCubicalCover(self):
4141
data = [
4242
np.array([0.0, 1.0]), np.array([1.1, 0.0]),
4343
np.array([0.0, 0.0]), np.array([1.1, 1.0])]
44-
cover = GridCover(2, 0.5)
44+
cover = CubicalCover(2, 0.5)
4545
charts = list(ProximityNet(cover).proximity_net(data))
4646
self.assertEqual(4, len(charts))

tests/test_readme.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import unittest
2+
3+
4+
class TestReadme(unittest.TestCase):
5+
6+
def testRun(self):
7+
import tests.example

tests/test_sklearn.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from sklearn.cluster import KMeans
88

99
from tdamapper.clustering import TrivialClustering, CoverClustering, PermissiveClustering, MapperGraphClustering
10-
from tdamapper.cover import TrivialCover, BallCover, KNNCover, GridCover
10+
from tdamapper.cover import TrivialCover, BallCover, KNNCover, CubicalCover
1111

1212

1313
def euclidean(x, y):
@@ -77,14 +77,14 @@ def get_cover(self):
7777
return BallCover(radius=self.radius, metric=self.metric)
7878

7979

80-
class GridCoverEstimator(CoverClusteringEstimator):
80+
class CubicalCoverEstimator(CoverClusteringEstimator):
8181

8282
def __init__(self, n_intervals=10, overlap_frac=0.25):
8383
self.n_intervals = n_intervals
8484
self.overlap_frac = overlap_frac
8585

8686
def get_cover(self):
87-
return GridCover(n_intervals=self.n_intervals, overlap_frac=self.overlap_frac)
87+
return CubicalCover(n_intervals=self.n_intervals, overlap_frac=self.overlap_frac)
8888

8989

9090
class KNNCoverEstimator(CoverClusteringEstimator):
@@ -114,8 +114,8 @@ def testClustering(self):
114114
def testBall(self):
115115
check_estimator(BallCoverEstimator())
116116

117-
def testGrid(self):
118-
check_estimator(GridCoverEstimator())
117+
def testCubical(self):
118+
check_estimator(CubicalCoverEstimator())
119119

120120
def testKNN(self):
121121
check_estimator(KNNCoverEstimator())

0 commit comments

Comments
 (0)