Skip to content

Commit 6374e9e

Browse files
authored
Merge pull request #170 from lucasimi/develop
Added new learn submodule
2 parents 31b2a5b + 15174b4 commit 6374e9e

File tree

5 files changed

+264
-90
lines changed

5 files changed

+264
-90
lines changed

docs/source/apiref.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,14 @@ tdamapper.cover
1717
:undoc-members:
1818
:show-inheritance:
1919

20+
tdamapper.learn
21+
---------------
22+
23+
.. automodule:: tdamapper.learn
24+
:members:
25+
:undoc-members:
26+
:show-inheritance:
27+
2028
tdamapper.clustering
2129
--------------------
2230

src/tdamapper/clustering.py

Lines changed: 19 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -38,31 +38,7 @@ def __init__(self, clustering=None, verbose=True):
3838
super().__init__(clustering, verbose)
3939

4040

41-
class MapperClustering(EstimatorMixin, ParamsMixin):
42-
"""
43-
A clustering algorithm based on the Mapper graph.
44-
45-
The Mapper algorithm constructs a graph from a dataset, where each node
46-
represents a cluster of points and each edge represents an overlap between
47-
clusters. Each point in the dataset belongs to one or more nodes in the
48-
graph. These nodes are therefore all connected and share the same connected
49-
component in the Mapper graph. This class builds clusters of points
50-
according to their connected component in the Mapper graph.
51-
52-
This class does not compute the Mapper graph but calls the function
53-
:func:`tdamapper.core.mapper_connected_components`, which is faster.
54-
55-
:param cover: A cover algorithm.
56-
:type cover: A class compatible with :class:`tdamapper.core.Cover`
57-
:param clustering: The clustering algorithm to apply to each subset of the
58-
dataset.
59-
:type clustering: A class compatible with scikit-learn estimators from
60-
:mod:`sklearn.cluster`
61-
:param n_jobs: The maximum number of parallel clustering jobs. This
62-
parameter is passed to the constructor of :class:`joblib.Parallel`.
63-
Defaults to 1.
64-
:type n_jobs: int
65-
"""
41+
class _MapperClustering(EstimatorMixin, ParamsMixin):
6642

6743
def __init__(self, cover=None, clustering=None, n_jobs=1):
6844
self.cover = cover
@@ -89,3 +65,21 @@ def fit(self, X, y=None):
8965
self.labels_ = [itm_lbls[i] for i, _ in enumerate(X)]
9066
self._set_n_features_in(X)
9167
return self
68+
69+
70+
class MapperClustering(_MapperClustering):
71+
"""
72+
**DEPRECATED**: This class is deprecated and will be removed in a future
73+
release. Use :class:`tdamapper.learn.MapperClustering`.
74+
"""
75+
76+
def __init__(self, cover=None, clustering=None, n_jobs=1):
77+
warn_deprecated(
78+
MapperClustering.__qualname__,
79+
'tdamapper.learn.MapperClustering',
80+
)
81+
super().__init__(
82+
cover=cover,
83+
clustering=clustering,
84+
n_jobs=n_jobs,
85+
)

src/tdamapper/core.py

Lines changed: 35 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,16 @@
3333
from joblib import Parallel, delayed
3434

3535
from tdamapper.utils.unionfind import UnionFind
36-
from tdamapper._common import ParamsMixin, EstimatorMixin, clone
36+
from tdamapper._common import (
37+
clone,
38+
warn_deprecated,
39+
ParamsMixin,
40+
EstimatorMixin,
41+
)
3742

3843

3944
ATTR_IDS = 'ids'
45+
4046
ATTR_SIZE = 'size'
4147

4248

@@ -364,45 +370,7 @@ def apply(self, X):
364370
yield list(range(0, len(X)))
365371

366372

367-
class MapperAlgorithm(EstimatorMixin, ParamsMixin):
368-
"""
369-
A class for creating and analyzing Mapper graphs.
370-
371-
This class provides two methods :func:`fit` and :func:`fit_transform`. Once
372-
fitted, the Mapper graph is stored in the attribute `graph_` as a
373-
:class:`networkx.Graph` object.
374-
375-
This class adopts the same interface as scikit-learn's estimators for ease
376-
and consistency of use. However, it's important to note that this is not a
377-
proper scikit-learn estimator as it does not validata the input in the same
378-
way as a scikit-learn estimator is required to do. This class can work
379-
with datasets whose elements are arbitrary objects when feasible for the
380-
supplied parameters.
381-
382-
:param cover: A cover algorithm. If no cover is specified,
383-
:class:`tdamapper.core.TrivialCover` is used, which produces a single
384-
open cover containing the whole dataset. Defaults to None.
385-
:type cover: A class compatible with :class:`tdamapper.core.Cover`
386-
:param clustering: The clustering algorithm to apply to each subset of the
387-
dataset. If no clustering is specified,
388-
:class:`tdamapper.core.TrivialClustering` is used, which produces a
389-
single cluster for each subset. Defaults to None.
390-
:type clustering: An estimator compatible with scikit-learn's clustering
391-
interface, typically from :mod:`sklearn.cluster`.
392-
:param failsafe: A flag that is used to prevent failures. If True, the
393-
clustering object is wrapped by
394-
:class:`tdamapper.core.FailSafeClustering`. Defaults to True.
395-
:type failsafe: bool, optional
396-
:param verbose: A flag that is used for logging, supplied to
397-
:class:`tdamapper.core.FailSafeClustering`. If True, clustering
398-
failures are logged. Set to False to suppress these messages. Defaults
399-
to True.
400-
:type verbose: bool, optional
401-
:param n_jobs: The maximum number of parallel clustering jobs. This
402-
parameter is passed to the constructor of :class:`joblib.Parallel`.
403-
Defaults to 1.
404-
:type n_jobs: int
405-
"""
373+
class _MapperAlgorithm(EstimatorMixin, ParamsMixin):
406374

407375
def __init__(
408376
self,
@@ -419,18 +387,6 @@ def __init__(
419387
self.n_jobs = n_jobs
420388

421389
def fit(self, X, y=None):
422-
"""
423-
Create the Mapper graph and store it for later use.
424-
425-
This method stores the result of :func:`tdamapper.core.mapper_graph` in
426-
the attribute `graph_` and returns a reference to the calling object.
427-
428-
:param X: A dataset of n points.
429-
:type X: array-like of shape (n, m) or list-like of length n
430-
:param y: Lens values for the n points of the dataset.
431-
:type y: array-like of shape (n, k) or list-like of length n
432-
:return: The object itself.
433-
"""
434390
X, y = self._validate_X_y(X, y)
435391
self.__cover = TrivialCover() if self.cover is None \
436392
else self.cover
@@ -458,23 +414,37 @@ def fit(self, X, y=None):
458414
return self
459415

460416
def fit_transform(self, X, y):
461-
"""
462-
Create the Mapper graph.
463-
464-
This method is equivalent to calling
465-
:func:`tdamapper.core.mapper_graph`.
466-
467-
:param X: A dataset of n points.
468-
:type X: array-like of shape (n, m) or list-like of length n
469-
:param y: Lens values for the n points of the dataset.
470-
:type y: array-like of shape (n, k) or list-like of length n
471-
:return: The Mapper graph.
472-
:rtype: :class:`networkx.Graph`
473-
"""
474417
self.fit(X, y)
475418
return self.graph_
476419

477420

421+
class MapperAlgorithm(_MapperAlgorithm):
422+
"""
423+
**DEPRECATED**: This class is deprecated and will be removed in a future
424+
release. Use :class:`tdamapper.learn.MapperAlgorithm`.
425+
"""
426+
427+
def __init__(
428+
self,
429+
cover=None,
430+
clustering=None,
431+
failsafe=True,
432+
verbose=True,
433+
n_jobs=1,
434+
):
435+
warn_deprecated(
436+
MapperAlgorithm.__qualname__,
437+
'tdamapper.learn.MapperAlgorithm',
438+
)
439+
super().__init__(
440+
cover=cover,
441+
clustering=clustering,
442+
failsafe=failsafe,
443+
verbose=verbose,
444+
n_jobs=n_jobs,
445+
)
446+
447+
478448
class FailSafeClustering(ParamsMixin):
479449
"""
480450
A delegating clustering algorithm that prevents failure.

src/tdamapper/learn.py

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
"""
2+
This module provides classes based on the Mapper algorithm, a technique from
3+
topological data analysis (TDA) for extracting insights from complex data.
4+
Each class is designed to be compatible with scikit-learn's estimator APIs,
5+
ensuring seamless integration with existing machine learning pipelines.
6+
7+
Users can leverage these classes to explore high-dimensional data, visualize
8+
relationships, and uncover meaningful structures in a manner that aligns with
9+
scikit-learn's conventions for estimators.
10+
"""
11+
12+
from tdamapper.core import _MapperAlgorithm
13+
from tdamapper.clustering import _MapperClustering
14+
15+
16+
class MapperClustering(_MapperClustering):
17+
"""
18+
A clustering algorithm based on the Mapper graph.
19+
20+
The Mapper algorithm constructs a graph from a dataset, where each node
21+
represents a cluster of points and each edge represents an overlap between
22+
clusters. Each point in the dataset belongs to one or more nodes in the
23+
graph. These nodes are therefore all connected and share the same connected
24+
component in the Mapper graph. This class builds clusters of points
25+
according to their connected component in the Mapper graph.
26+
27+
This class does not compute the Mapper graph but calls the function
28+
:func:`tdamapper.core.mapper_connected_components`, which is faster.
29+
30+
:param cover: A cover algorithm.
31+
:type cover: A class compatible with :class:`tdamapper.core.Cover`
32+
:param clustering: The clustering algorithm to apply to each subset of the
33+
dataset.
34+
:type clustering: A class compatible with scikit-learn estimators from
35+
:mod:`sklearn.cluster`
36+
:param n_jobs: The maximum number of parallel clustering jobs. This
37+
parameter is passed to the constructor of :class:`joblib.Parallel`.
38+
Defaults to 1.
39+
:type n_jobs: int
40+
"""
41+
42+
def __init__(
43+
self,
44+
cover=None,
45+
clustering=None,
46+
n_jobs=1,
47+
):
48+
super().__init__(
49+
cover=cover,
50+
clustering=clustering,
51+
n_jobs=n_jobs,
52+
)
53+
54+
def fit(self, X, y=None):
55+
"""
56+
Fit the clustering algorithm to the data.
57+
58+
:param X: A dataset of n points.
59+
:type X: array-like of shape (n, m) or list-like of length n
60+
:param y: Ignored.
61+
:return: self
62+
"""
63+
return super().fit(X, y)
64+
65+
66+
class MapperAlgorithm(_MapperAlgorithm):
67+
"""
68+
A class for creating and analyzing Mapper graphs.
69+
70+
This class provides two methods :func:`fit` and :func:`fit_transform`. Once
71+
fitted, the Mapper graph is stored in the attribute `graph_` as a
72+
:class:`networkx.Graph` object.
73+
74+
This class adopts the same interface as scikit-learn's estimators for ease
75+
and consistency of use. However, it's important to note that this is not a
76+
proper scikit-learn estimator as it does not validata the input in the same
77+
way as a scikit-learn estimator is required to do. This class can work
78+
with datasets whose elements are arbitrary objects when feasible for the
79+
supplied parameters.
80+
81+
:param cover: A cover algorithm. If no cover is specified,
82+
:class:`tdamapper.core.TrivialCover` is used, which produces a single
83+
open cover containing the whole dataset. Defaults to None.
84+
:type cover: A class compatible with :class:`tdamapper.core.Cover`
85+
:param clustering: The clustering algorithm to apply to each subset of the
86+
dataset. If no clustering is specified,
87+
:class:`tdamapper.core.TrivialClustering` is used, which produces a
88+
single cluster for each subset. Defaults to None.
89+
:type clustering: An estimator compatible with scikit-learn's clustering
90+
interface, typically from :mod:`sklearn.cluster`.
91+
:param failsafe: A flag that is used to prevent failures. If True, the
92+
clustering object is wrapped by
93+
:class:`tdamapper.core.FailSafeClustering`. Defaults to True.
94+
:type failsafe: bool, optional
95+
:param verbose: A flag that is used for logging, supplied to
96+
:class:`tdamapper.core.FailSafeClustering`. If True, clustering
97+
failures are logged. Set to False to suppress these messages. Defaults
98+
to True.
99+
:type verbose: bool, optional
100+
:param n_jobs: The maximum number of parallel clustering jobs. This
101+
parameter is passed to the constructor of :class:`joblib.Parallel`.
102+
Defaults to 1.
103+
:type n_jobs: int
104+
"""
105+
106+
def __init__(
107+
self,
108+
cover=None,
109+
clustering=None,
110+
failsafe=True,
111+
verbose=True,
112+
n_jobs=1,
113+
):
114+
super().__init__(
115+
cover=cover,
116+
clustering=clustering,
117+
failsafe=failsafe,
118+
verbose=verbose,
119+
n_jobs=n_jobs
120+
)
121+
122+
def fit(self, X, y=None):
123+
"""
124+
Create the Mapper graph and store it for later use.
125+
126+
This method stores the result of :func:`tdamapper.core.mapper_graph` in
127+
the attribute `graph_` and returns a reference to the calling object.
128+
129+
:param X: A dataset of n points.
130+
:type X: array-like of shape (n, m) or list-like of length n
131+
:param y: Lens values for the n points of the dataset.
132+
:type y: array-like of shape (n, k) or list-like of length n
133+
:return: The object itself.
134+
"""
135+
return super().fit(X, y)
136+
137+
def fit_transform(self, X, y):
138+
"""
139+
Create the Mapper graph.
140+
141+
This method is equivalent to calling
142+
:func:`tdamapper.core.mapper_graph`.
143+
144+
:param X: A dataset of n points.
145+
:type X: array-like of shape (n, m) or list-like of length n
146+
:param y: Lens values for the n points of the dataset.
147+
:type y: array-like of shape (n, k) or list-like of length n
148+
:return: The Mapper graph.
149+
:rtype: :class:`networkx.Graph`
150+
"""
151+
return super().fit_transform(X, y)

0 commit comments

Comments
 (0)