|
| 1 | +""" |
| 2 | +This module provides classes based on the Mapper algorithm, a technique from |
| 3 | +topological data analysis (TDA) for extracting insights from complex data. |
| 4 | +Each class is designed to be compatible with scikit-learn's estimator APIs, |
| 5 | +ensuring seamless integration with existing machine learning pipelines. |
| 6 | +
|
| 7 | +Users can leverage these classes to explore high-dimensional data, visualize |
| 8 | +relationships, and uncover meaningful structures in a manner that aligns with |
| 9 | +scikit-learn's conventions for estimators. |
| 10 | +""" |
| 11 | + |
| 12 | +from tdamapper.core import _MapperAlgorithm |
| 13 | +from tdamapper.clustering import _MapperClustering |
| 14 | + |
| 15 | + |
| 16 | +class MapperClustering(_MapperClustering): |
| 17 | + """ |
| 18 | + A clustering algorithm based on the Mapper graph. |
| 19 | +
|
| 20 | + The Mapper algorithm constructs a graph from a dataset, where each node |
| 21 | + represents a cluster of points and each edge represents an overlap between |
| 22 | + clusters. Each point in the dataset belongs to one or more nodes in the |
| 23 | + graph. These nodes are therefore all connected and share the same connected |
| 24 | + component in the Mapper graph. This class builds clusters of points |
| 25 | + according to their connected component in the Mapper graph. |
| 26 | +
|
| 27 | + This class does not compute the Mapper graph but calls the function |
| 28 | + :func:`tdamapper.core.mapper_connected_components`, which is faster. |
| 29 | +
|
| 30 | + :param cover: A cover algorithm. |
| 31 | + :type cover: A class compatible with :class:`tdamapper.core.Cover` |
| 32 | + :param clustering: The clustering algorithm to apply to each subset of the |
| 33 | + dataset. |
| 34 | + :type clustering: A class compatible with scikit-learn estimators from |
| 35 | + :mod:`sklearn.cluster` |
| 36 | + :param n_jobs: The maximum number of parallel clustering jobs. This |
| 37 | + parameter is passed to the constructor of :class:`joblib.Parallel`. |
| 38 | + Defaults to 1. |
| 39 | + :type n_jobs: int |
| 40 | + """ |
| 41 | + |
| 42 | + def __init__( |
| 43 | + self, |
| 44 | + cover=None, |
| 45 | + clustering=None, |
| 46 | + n_jobs=1, |
| 47 | + ): |
| 48 | + super().__init__( |
| 49 | + cover=cover, |
| 50 | + clustering=clustering, |
| 51 | + n_jobs=n_jobs, |
| 52 | + ) |
| 53 | + |
| 54 | + def fit(self, X, y=None): |
| 55 | + """ |
| 56 | + Fit the clustering algorithm to the data. |
| 57 | +
|
| 58 | + :param X: A dataset of n points. |
| 59 | + :type X: array-like of shape (n, m) or list-like of length n |
| 60 | + :param y: Ignored. |
| 61 | + :return: self |
| 62 | + """ |
| 63 | + return super().fit(X, y) |
| 64 | + |
| 65 | + |
| 66 | +class MapperAlgorithm(_MapperAlgorithm): |
| 67 | + """ |
| 68 | + A class for creating and analyzing Mapper graphs. |
| 69 | +
|
| 70 | + This class provides two methods :func:`fit` and :func:`fit_transform`. Once |
| 71 | + fitted, the Mapper graph is stored in the attribute `graph_` as a |
| 72 | + :class:`networkx.Graph` object. |
| 73 | +
|
| 74 | + This class adopts the same interface as scikit-learn's estimators for ease |
| 75 | + and consistency of use. However, it's important to note that this is not a |
| 76 | + proper scikit-learn estimator as it does not validata the input in the same |
| 77 | + way as a scikit-learn estimator is required to do. This class can work |
| 78 | + with datasets whose elements are arbitrary objects when feasible for the |
| 79 | + supplied parameters. |
| 80 | +
|
| 81 | + :param cover: A cover algorithm. If no cover is specified, |
| 82 | + :class:`tdamapper.core.TrivialCover` is used, which produces a single |
| 83 | + open cover containing the whole dataset. Defaults to None. |
| 84 | + :type cover: A class compatible with :class:`tdamapper.core.Cover` |
| 85 | + :param clustering: The clustering algorithm to apply to each subset of the |
| 86 | + dataset. If no clustering is specified, |
| 87 | + :class:`tdamapper.core.TrivialClustering` is used, which produces a |
| 88 | + single cluster for each subset. Defaults to None. |
| 89 | + :type clustering: An estimator compatible with scikit-learn's clustering |
| 90 | + interface, typically from :mod:`sklearn.cluster`. |
| 91 | + :param failsafe: A flag that is used to prevent failures. If True, the |
| 92 | + clustering object is wrapped by |
| 93 | + :class:`tdamapper.core.FailSafeClustering`. Defaults to True. |
| 94 | + :type failsafe: bool, optional |
| 95 | + :param verbose: A flag that is used for logging, supplied to |
| 96 | + :class:`tdamapper.core.FailSafeClustering`. If True, clustering |
| 97 | + failures are logged. Set to False to suppress these messages. Defaults |
| 98 | + to True. |
| 99 | + :type verbose: bool, optional |
| 100 | + :param n_jobs: The maximum number of parallel clustering jobs. This |
| 101 | + parameter is passed to the constructor of :class:`joblib.Parallel`. |
| 102 | + Defaults to 1. |
| 103 | + :type n_jobs: int |
| 104 | + """ |
| 105 | + |
| 106 | + def __init__( |
| 107 | + self, |
| 108 | + cover=None, |
| 109 | + clustering=None, |
| 110 | + failsafe=True, |
| 111 | + verbose=True, |
| 112 | + n_jobs=1, |
| 113 | + ): |
| 114 | + super().__init__( |
| 115 | + cover=cover, |
| 116 | + clustering=clustering, |
| 117 | + failsafe=failsafe, |
| 118 | + verbose=verbose, |
| 119 | + n_jobs=n_jobs |
| 120 | + ) |
| 121 | + |
| 122 | + def fit(self, X, y=None): |
| 123 | + """ |
| 124 | + Create the Mapper graph and store it for later use. |
| 125 | +
|
| 126 | + This method stores the result of :func:`tdamapper.core.mapper_graph` in |
| 127 | + the attribute `graph_` and returns a reference to the calling object. |
| 128 | +
|
| 129 | + :param X: A dataset of n points. |
| 130 | + :type X: array-like of shape (n, m) or list-like of length n |
| 131 | + :param y: Lens values for the n points of the dataset. |
| 132 | + :type y: array-like of shape (n, k) or list-like of length n |
| 133 | + :return: The object itself. |
| 134 | + """ |
| 135 | + return super().fit(X, y) |
| 136 | + |
| 137 | + def fit_transform(self, X, y): |
| 138 | + """ |
| 139 | + Create the Mapper graph. |
| 140 | +
|
| 141 | + This method is equivalent to calling |
| 142 | + :func:`tdamapper.core.mapper_graph`. |
| 143 | +
|
| 144 | + :param X: A dataset of n points. |
| 145 | + :type X: array-like of shape (n, m) or list-like of length n |
| 146 | + :param y: Lens values for the n points of the dataset. |
| 147 | + :type y: array-like of shape (n, k) or list-like of length n |
| 148 | + :return: The Mapper graph. |
| 149 | + :rtype: :class:`networkx.Graph` |
| 150 | + """ |
| 151 | + return super().fit_transform(X, y) |
0 commit comments