|
| 1 | +"""Function to load EEG Datasets from Zenodo.""" |
| 2 | + |
| 3 | +import os |
| 4 | +from urllib.request import urlretrieve |
| 5 | + |
| 6 | +from aeon.datasets._single_problem_loaders import _load_saved_dataset |
| 7 | +from aeon.datasets.dataset_collections import get_downloaded_tsc_tsr_datasets |
| 8 | + |
| 9 | +import aeon_neuro |
| 10 | +from aeon_neuro.datasets.classification_datasets import dataset_map |
| 11 | + |
| 12 | +DIRNAME = "data" |
| 13 | +MODULE = os.path.join(os.path.dirname(aeon_neuro.__file__), "datasets") |
| 14 | + |
| 15 | + |
| 16 | +def load_eeg_classification( |
| 17 | + name, |
| 18 | + split=None, |
| 19 | + extract_path=None, |
| 20 | + return_metadata=False, |
| 21 | +): |
| 22 | + """Load an EEG classification dataset. |
| 23 | +
|
| 24 | + This function loads EEG TSC problems into memory, attempting to load from the |
| 25 | + specified local path `extract_path`` or trying to download from |
| 26 | + https://zenodo.org// if the data is not in the local path. To download from |
| 27 | + zenodo, the dataset must be in the list ``dataset_map`` in data._data_loaders.py. |
| 28 | + This function assumes the data is stored in format |
| 29 | + ``<extract_path>/<name>/<name>_TRAIN.ts`` and |
| 30 | + ``<extract_path>/<name>/<name>_TEST.ts.`` If you want to load a file directly |
| 31 | + from a full path that is in ``aeon`` ts format, use the function |
| 32 | + `load_from_ts_file`` in ``aeon`` directly. If |
| 33 | + you do not specify ``extract_path``, it will set the path to |
| 34 | + ``aeon_neuro/datasets/local_data``. |
| 35 | +
|
| 36 | + Data is assumed to be in the standard ``aeon`` .ts format: each row is a (possibly |
| 37 | + multivariate) time series. Each channel is separated by a colon, each value in |
| 38 | + a series is comma separated. For examples see aeon_neuro.datasets.data. |
| 39 | +
|
| 40 | + Parameters |
| 41 | + ---------- |
| 42 | + name : str |
| 43 | + Name of data set. If a dataset that is listed in tsc_datasets is given, |
| 44 | + this function will look in the extract_path first, and if it is not present, |
| 45 | + attempt to download the data from www.timeseriesclassification.com, saving it to |
| 46 | + the extract_path. |
| 47 | + split : None or str{"train", "test"}, default=None |
| 48 | + Whether to load the train or test partition of the problem. By default it |
| 49 | + loads both into a single dataset, otherwise it looks only for files of the |
| 50 | + format <name>_TRAIN.ts or <name>_TEST.ts. |
| 51 | + extract_path : str, default=None |
| 52 | + the path to look for the data. If no path is provided, the function |
| 53 | + looks in `aeon/datasets/local_data/`. If a path is given, it can be absolute, |
| 54 | + e.g. C:/Temp/ or relative, e.g. Temp/ or ./Temp/. |
| 55 | + return_metadata : boolean, default = True |
| 56 | + If True, returns a tuple (X, y, metadata) |
| 57 | +
|
| 58 | + Returns |
| 59 | + ------- |
| 60 | + X: np.ndarray or list of np.ndarray |
| 61 | + y: np.ndarray |
| 62 | + The class labels for each case in X |
| 63 | + metadata: dict, optional |
| 64 | + returns the following metadata |
| 65 | + 'problemname',timestamps, missing,univariate,equallength, class_values |
| 66 | + targetlabel should be false, and classlabel true |
| 67 | +
|
| 68 | + Raises |
| 69 | + ------ |
| 70 | + URLError or HTTPError |
| 71 | + If the website is not accessible. |
| 72 | + ValueError |
| 73 | + If a dataset name that does not exist on the repo is given or if a |
| 74 | + webpage is requested that does not exist. |
| 75 | +
|
| 76 | + Examples |
| 77 | + -------- |
| 78 | + >>> from aeon.datasets import load_classification |
| 79 | + >>> X, y = load_classification(name="ArrowHead") # doctest: +SKIP |
| 80 | + """ |
| 81 | + if extract_path is not None: |
| 82 | + local_module = extract_path |
| 83 | + local_dirname = None |
| 84 | + else: |
| 85 | + local_module = MODULE |
| 86 | + local_dirname = "data" |
| 87 | + if local_dirname is None: |
| 88 | + path = local_module |
| 89 | + else: |
| 90 | + path = os.path.join(local_module, local_dirname) |
| 91 | + if not os.path.exists(path): |
| 92 | + os.makedirs(path) |
| 93 | + if name not in get_downloaded_tsc_tsr_datasets(path): |
| 94 | + if extract_path is None: |
| 95 | + local_dirname = "local_data" |
| 96 | + path = os.path.join(local_module, local_dirname) |
| 97 | + else: |
| 98 | + path = extract_path |
| 99 | + if not os.path.exists(path): |
| 100 | + os.makedirs(path) |
| 101 | + error_str = ( |
| 102 | + f"File name {name} is not in the list of valid files to download," |
| 103 | + f"see aeon_neuro.datasets.classification for the current list of " |
| 104 | + f"maintained datasets." |
| 105 | + ) |
| 106 | + |
| 107 | + if name not in get_downloaded_tsc_tsr_datasets(path): |
| 108 | + # Check if in the zenodo list |
| 109 | + if name in dataset_map.keys(): |
| 110 | + id = dataset_map[name] |
| 111 | + if id == 49: |
| 112 | + raise ValueError(error_str) |
| 113 | + url_train = f"https://zenodo.org/record/{id}/files/{name}_TRAIN.ts" |
| 114 | + url_test = f"https://zenodo.org/record/{id}/files/{name}_TEST.ts" |
| 115 | + full_path = os.path.join(path, name) |
| 116 | + if not os.path.exists(full_path): |
| 117 | + os.makedirs(full_path) |
| 118 | + train_save = f"{full_path}/{name}_TRAIN.ts" |
| 119 | + test_save = f"{full_path}/{name}_TEST.ts" |
| 120 | + try: |
| 121 | + urlretrieve(url_train, train_save) |
| 122 | + urlretrieve(url_test, test_save) |
| 123 | + except Exception: |
| 124 | + raise ValueError(error_str) |
| 125 | + else: |
| 126 | + raise ValueError(error_str) |
| 127 | + X, y, meta = _load_saved_dataset( |
| 128 | + name=name, |
| 129 | + dir_name=name, |
| 130 | + split=split, |
| 131 | + local_module=local_module, |
| 132 | + local_dirname=local_dirname, |
| 133 | + return_meta=True, |
| 134 | + ) |
| 135 | + # Check this is a classification problem |
| 136 | + if "classlabel" not in meta or not meta["classlabel"]: |
| 137 | + raise ValueError( |
| 138 | + f"You have tried to load a regression problem called {name} with " |
| 139 | + f"load_classifier. This will cause unintended consequences for any " |
| 140 | + f"classifier you build. If you want to load a regression problem, " |
| 141 | + f"use load_regression in ``aeon`` " |
| 142 | + ) |
| 143 | + if return_metadata: |
| 144 | + return X, y, meta |
| 145 | + return X, y |
0 commit comments