From 7eb22c4b8aafa0bca57914f0f40c7e7902d4fde5 Mon Sep 17 00:00:00 2001 From: "sam.hunt" Date: Wed, 13 Nov 2024 16:40:43 +0000 Subject: [PATCH 1/7] add method to rename dims --- obsarray/test/test_unc_accessor.py | 39 +++++++++++++++++++----------- obsarray/unc_accessor.py | 34 ++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 14 deletions(-) diff --git a/obsarray/test/test_unc_accessor.py b/obsarray/test/test_unc_accessor.py index ad63f5d..3520f33 100644 --- a/obsarray/test/test_unc_accessor.py +++ b/obsarray/test/test_unc_accessor.py @@ -34,7 +34,7 @@ def compare_err_corr_form(self, form, exp_form): self.assertCountEqual(form._unc_var_name, exp_form._unc_var_name) -def create_ds(): +def create_ds(dim_suffix=""): np.random.seed(0) temperature = 15 + 8 * np.random.randn(2, 2, 3) u_r_temperature = temperature * 0.02 @@ -48,41 +48,41 @@ def create_ds(): ds = xr.Dataset( data_vars=dict( - temperature=(["x", "y", "time"], temperature, {"units": "K"}), + temperature=(["x" + dim_suffix, "y" + dim_suffix, "time" + dim_suffix], temperature, {"units": "K"}), ), coords=dict( - lon=(["x", "y"], lon), - lat=(["x", "y"], lat), - time=time, + lon=(["x" + dim_suffix, "y" + dim_suffix], lon), + lat=(["x" + dim_suffix, "y" + dim_suffix], lat), + time=("time" + dim_suffix, time), reference_time=reference_time, ), attrs=dict(description="Weather related data."), ) ds.unc["temperature"]["u_ran_temperature"] = ( - ["x", "y", "time"], + ["x" + dim_suffix, "y" + dim_suffix, "time" + dim_suffix], temperature * 0.05, {"units": "K", "pdf_shape": "gaussian"}, ) ds.unc["temperature"]["u_sys_temperature"] = ( - ["x", "y", "time"], + ["x" + dim_suffix, "y" + dim_suffix, "time" + dim_suffix], temperature * 0.03, { "units": "K", "err_corr": [ { - "dim": "x", + "dim": "x" + dim_suffix, "form": "systematic", "params": [], }, { - "dim": "y", + "dim": "y" + dim_suffix, "form": "systematic", "params": [], }, { - "dim": "time", + "dim": "time" + dim_suffix, "form": "systematic", "params": [], }, @@ -92,18 +92,18 @@ def create_ds(): ) ds.unc["temperature"]["u_str_temperature"] = ( - ["x", "y", "time"], + ["x" + dim_suffix, "y" + dim_suffix, "time" + dim_suffix], temperature * 0.1, { "units": "K", "err_corr": [ { - "dim": ["x", "time"], + "dim": ["x" + dim_suffix, "time" + dim_suffix], "form": "err_corr_matrix", "params": ["err_corr_str_temperature"], }, { - "dim": "y", + "dim": "y" + dim_suffix, "form": "systematic", "params": [], }, @@ -113,7 +113,7 @@ def create_ds(): ) ds["err_corr_str_temperature"] = ( - ["x.time", "x.time"], + ["x.time" + dim_suffix, "x.time" + dim_suffix], np.ones( ( temperature.shape[0] * temperature.shape[2], @@ -741,5 +741,16 @@ def test_err_cov_matrix(self, mock_cr2cv, mock_ecrm, mock_value): xr.testing.assert_equal(ecm, exp_ecm) + def test_rename_dims(self): + dim_suffix = "_test" + input_ds = create_ds() + + ds = input_ds.unc.rename_dims({"x": "x" + dim_suffix, "y": "y" + dim_suffix, "time": "time" + dim_suffix, "x.time": "x.time_test"}) + + exp_ds = create_ds(dim_suffix=dim_suffix) + + xr.testing.assert_identical(ds, exp_ds) + + if __name__ == "__main__": unittest.main() diff --git a/obsarray/unc_accessor.py b/obsarray/unc_accessor.py index 1ade721..7dab47d 100644 --- a/obsarray/unc_accessor.py +++ b/obsarray/unc_accessor.py @@ -9,6 +9,7 @@ from obsarray.templater.dataset_util import DatasetUtil from obsarray.err_corr import err_corr_forms, BaseErrCorrForm from obsarray.utils import empty_err_corr_matrix +from xarray.core.types import T_Dataset __author__ = "Sam Hunt " @@ -894,6 +895,39 @@ def _remove_unc_var(self, obs_var: str, unc_var: str) -> None: del self._obj[unc_var] self._obj[obs_var].attrs["unc_comps"].remove(unc_var) + def rename_dims(self, dims_dict: dict[str, str]) -> T_Dataset: + """ + Returns a new dataset with renamed dimensions - safely handling `unc_vars` related metadata + + :params dims_dict : Dictionary whose keys are current dimension names and whose values are the desired names. The desired names must not be the name of an existing dimension or Variable in the Dataset. + :returns: Dataset with renamed dimensions + """ + + # update dimension names + obj = self._obj.rename_dims(dims_dict) + + # update uncertainty metadata related to variable names + unc_var_names = [] + for obs_var in obj.unc.obs_vars: + for unc_var in obj.unc[obs_var]: + unc_var_names.append(unc_var._unc_var_name) + + for unc_var_name in unc_var_names: + for attr in obj[unc_var_name].attrs.keys(): + if (attr[:9] == "err_corr_") and (attr[-4:] == "_dim"): + if isinstance(obj[unc_var_name].attrs[attr], str): + if obj[unc_var_name].attrs[attr] in dims_dict: + obj[unc_var_name].attrs[attr] = dims_dict[obj[unc_var_name].attrs[attr]] + + if isinstance(obj[unc_var_name].attrs[attr], list): + for i, attr_i in enumerate(obj[unc_var_name].attrs[attr]): + if attr_i in dims_dict: + obj[unc_var_name].attrs[attr][i] = dims_dict[attr_i] + + return obj + + + if __name__ == "__main__": pass From e5ec9df963b5119c50d13430c63bc72c8ca3f557 Mon Sep 17 00:00:00 2001 From: "sam.hunt" Date: Mon, 18 Nov 2024 13:49:09 +0000 Subject: [PATCH 2/7] add method for renaming variables --- obsarray/test/test_unc_accessor.py | 49 ++++++++++++++++++------------ obsarray/unc_accessor.py | 42 +++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 20 deletions(-) diff --git a/obsarray/test/test_unc_accessor.py b/obsarray/test/test_unc_accessor.py index 3520f33..ed76aa0 100644 --- a/obsarray/test/test_unc_accessor.py +++ b/obsarray/test/test_unc_accessor.py @@ -34,7 +34,7 @@ def compare_err_corr_form(self, form, exp_form): self.assertCountEqual(form._unc_var_name, exp_form._unc_var_name) -def create_ds(dim_suffix=""): +def create_ds(var_suffix="", dim_suffix="", coord_dim_suffix_extra = ""): np.random.seed(0) temperature = 15 + 8 * np.random.randn(2, 2, 3) u_r_temperature = temperature * 0.02 @@ -47,26 +47,26 @@ def create_ds(dim_suffix=""): reference_time = pd.Timestamp("2014-09-05") ds = xr.Dataset( - data_vars=dict( - temperature=(["x" + dim_suffix, "y" + dim_suffix, "time" + dim_suffix], temperature, {"units": "K"}), - ), - coords=dict( - lon=(["x" + dim_suffix, "y" + dim_suffix], lon), - lat=(["x" + dim_suffix, "y" + dim_suffix], lat), - time=("time" + dim_suffix, time), - reference_time=reference_time, - ), + data_vars={ + "temperature" + var_suffix: (["x" + dim_suffix, "y" + dim_suffix, "time" + dim_suffix + coord_dim_suffix_extra], temperature, {"units": "K"}), + }, + coords={ + "lon" + var_suffix: (["x" + dim_suffix, "y" + dim_suffix], lon), + "lat" + var_suffix: (["x" + dim_suffix, "y" + dim_suffix], lat), + "time" + var_suffix: ("time" + dim_suffix + coord_dim_suffix_extra, time), + "reference_time": reference_time, + }, attrs=dict(description="Weather related data."), ) - ds.unc["temperature"]["u_ran_temperature"] = ( - ["x" + dim_suffix, "y" + dim_suffix, "time" + dim_suffix], + ds.unc["temperature" + var_suffix]["u_ran_temperature" + var_suffix] = ( + ["x" + dim_suffix, "y" + dim_suffix, "time" + dim_suffix + coord_dim_suffix_extra], temperature * 0.05, {"units": "K", "pdf_shape": "gaussian"}, ) - ds.unc["temperature"]["u_sys_temperature"] = ( - ["x" + dim_suffix, "y" + dim_suffix, "time" + dim_suffix], + ds.unc["temperature" + var_suffix]["u_sys_temperature" + var_suffix] = ( + ["x" + dim_suffix, "y" + dim_suffix, "time" + dim_suffix + coord_dim_suffix_extra], temperature * 0.03, { "units": "K", @@ -82,7 +82,7 @@ def create_ds(dim_suffix=""): "params": [], }, { - "dim": "time" + dim_suffix, + "dim": "time" + dim_suffix + coord_dim_suffix_extra, "form": "systematic", "params": [], }, @@ -91,16 +91,16 @@ def create_ds(dim_suffix=""): }, ) - ds.unc["temperature"]["u_str_temperature"] = ( - ["x" + dim_suffix, "y" + dim_suffix, "time" + dim_suffix], + ds.unc["temperature" + var_suffix]["u_str_temperature" + var_suffix] = ( + ["x" + dim_suffix, "y" + dim_suffix, "time" + dim_suffix + coord_dim_suffix_extra], temperature * 0.1, { "units": "K", "err_corr": [ { - "dim": ["x" + dim_suffix, "time" + dim_suffix], + "dim": ["x" + dim_suffix, "time" + dim_suffix + coord_dim_suffix_extra], "form": "err_corr_matrix", - "params": ["err_corr_str_temperature"], + "params": ["err_corr_str_temperature" + var_suffix], }, { "dim": "y" + dim_suffix, @@ -112,7 +112,7 @@ def create_ds(dim_suffix=""): }, ) - ds["err_corr_str_temperature"] = ( + ds["err_corr_str_temperature" + var_suffix] = ( ["x.time" + dim_suffix, "x.time" + dim_suffix], np.ones( ( @@ -740,6 +740,15 @@ def test_err_cov_matrix(self, mock_cr2cv, mock_ecrm, mock_value): exp_ecm = xr.DataArray(np.ones((12, 12)), dims=["x.y.time", "x.y.time"]) xr.testing.assert_equal(ecm, exp_ecm) + def test_rename_vars(self): + var_suffix = "_test" + input_ds = create_ds() + + ds = input_ds.unc.rename({"temperature": "temperature" + var_suffix, "lon": "lon" + var_suffix, "lat": "lat" + var_suffix, "time": "time" + var_suffix, "u_ran_temperature": "u_ran_temperature" + var_suffix, "u_str_temperature": "u_str_temperature" + var_suffix, "u_sys_temperature": "u_sys_temperature" + var_suffix, "err_corr_str_temperature": "err_corr_str_temperature" + var_suffix}) + + exp_ds = create_ds(var_suffix=var_suffix, coord_dim_suffix_extra=var_suffix) + + xr.testing.assert_identical(ds, exp_ds) def test_rename_dims(self): dim_suffix = "_test" diff --git a/obsarray/unc_accessor.py b/obsarray/unc_accessor.py index 7dab47d..fa4288f 100644 --- a/obsarray/unc_accessor.py +++ b/obsarray/unc_accessor.py @@ -895,6 +895,48 @@ def _remove_unc_var(self, obs_var: str, unc_var: str) -> None: del self._obj[unc_var] self._obj[obs_var].attrs["unc_comps"].remove(unc_var) + + def rename(self, vars_dict: dict[str, str]) -> T_Dataset: + """ + Returns a new dataset with renamed variables - safely handling `unc_vars` and related metadata + + :params vars_dict : Dictionary whose keys are current variable names and whose values are the desired names. The desired names must not be the name of an existing dimension or Variable in the Dataset. + :returns: Dataset with renamed variables + """ + + # handle case that xarray.Dataset.rename renames the dimension associated with a renamed coordinate dimension + coord_dim_dict = {str(dim): vars_dict[dim] for dim in self._obj.dims if (dim in self._obj.coords) and (dim in vars_dict.keys())} + ds = self.rename_dims(coord_dim_dict) + + # update metadata where unc_var err corr param to be renamed + unc_var_paths = [] + for obs_var in ds.unc.obs_vars: + for unc_var in ds.unc[obs_var]: + unc_var_paths.append((obs_var, unc_var._unc_var_name)) + + for unc_var_path in unc_var_paths: + unc_var_i = unc_var_path[1] + + for attr in ds[unc_var_i].attrs.keys(): + if (attr[:9] == "err_corr_") and (attr[-7:] == "_params"): + for i, param in enumerate(ds[unc_var_i].attrs[attr]): + if param in vars_dict.keys(): + ds[unc_var_i].attrs[attr][i] = vars_dict[param] + + # safely update unc_vars + for unc_var_path in unc_var_paths: + obs_var_i = unc_var_path[0] + unc_var_i = unc_var_path[1] + if unc_var_i in vars_dict: + ds = ds.unc[obs_var_i][unc_var_i].rename(vars_dict[unc_var_i]) + + # update remaining variable names + non_unc_var_names = list(filter(lambda x: x not in self.unc_vars.keys(), self._obj.variables.keys())) + var_dict_no_unc = {n: vars_dict[n] for n in non_unc_var_names if n in vars_dict.keys()} + ds = ds.rename(var_dict_no_unc) + + return ds + def rename_dims(self, dims_dict: dict[str, str]) -> T_Dataset: """ Returns a new dataset with renamed dimensions - safely handling `unc_vars` related metadata From 35832ec4d38caf55ff1a172bd46649e3eb555c12 Mon Sep 17 00:00:00 2001 From: "sam.hunt" Date: Mon, 18 Nov 2024 13:53:14 +0000 Subject: [PATCH 3/7] add function to append suffix onto all dataset names --- obsarray/__init__.py | 1 + obsarray/test/test_utils.py | 188 ++++++++++++++++++++++++++++++++++++ obsarray/utils.py | 39 +++++++- 3 files changed, 226 insertions(+), 2 deletions(-) create mode 100644 obsarray/test/test_utils.py diff --git a/obsarray/__init__.py b/obsarray/__init__.py index 4a5f4a6..c085c51 100644 --- a/obsarray/__init__.py +++ b/obsarray/__init__.py @@ -9,6 +9,7 @@ from obsarray.templater.template_util import create_ds from obsarray.templater.dstemplater import DSTemplater from obsarray.templater.dswriter import DSWriter +from obsarray.utils import append_names __version__ = get_versions()["version"] del get_versions diff --git a/obsarray/test/test_utils.py b/obsarray/test/test_utils.py new file mode 100644 index 0000000..1432519 --- /dev/null +++ b/obsarray/test/test_utils.py @@ -0,0 +1,188 @@ +"""test_utils - tests for obsarray.utils""" + +import unittest +import numpy as np +from obsarray import append_names, create_ds +import xarray as xr + +__author__ = "Sam Hunt " +__all__ = [] + +def create_test_ds(suffix): + # define ds variables + template = { + "temperature" + suffix: { + "dtype": np.float32, + "dim": ["x" + suffix, "y" + suffix, "time" + suffix], + "attributes": { + "units": "K", + "unc_comps": ["u_ran_temperature" + suffix, "u_sys_temperature" + suffix] + } + }, + "u_ran_temperature" + suffix: { + "dtype": np.float32, + "dim": ["x" + suffix, "y" + suffix, "time" + suffix], + "attributes": { + "units": "K", + "err_corr": [ + { + "dim": "x" + suffix, + "form": "random", + "params": [], + "units": [] + }, + { + "dim": "y" + suffix, + "form": "random", + "params": [], + "units": [] + }, + { + "dim": "time" + suffix, + "form": "random", + "params": [], + "units": [] + } + ] + }, + }, + "u_sys_temperature" + suffix: { + "dtype": np.float32, + "dim": ["x" + suffix, "y" + suffix, "time" + suffix], + "attributes": { + "units": "K", + "err_corr": [ + { + "dim": "x" + suffix, + "form": "systematic", + "params": [], + "units": [] + }, + { + "dim": "y" + suffix, + "form": "systematic", + "params": [], + "units": [] + }, + { + "dim": "time" + suffix, + "form": "systematic", + "params": [], + "units": [] + } + ] + } + }, + "pressure" + suffix: { + "dtype": np.float32, + "dim": ["x" + suffix, "y" + suffix, "time" + suffix], + "attributes": { + "units": "Pa", + "unc_comps": ["u_str_pressure" + suffix] + } + }, + "u_str_pressure" + suffix: { + "dtype": np.float32, + "dim": ["x" + suffix, "y" + suffix, "time" + suffix], + "attributes": { + "units": "Pa", + "err_corr": [ + { + "dim": "x" + suffix, + "form": "random", + "params": [], + "units": [] + }, + { + "dim": "y" + suffix, + "form": "err_corr_matrix", + "params": "err_corr_str_pressure_y", + "units": [] + }, + { + "dim": "time" + suffix, + "form": "systematic", + "params": [], + "units": [] + } + ] + }, + }, + "err_corr_str_pressure_y" + suffix: { + "dtype": np.float32, + "dim": ["y" + suffix, "y" + suffix], + "attributes": {"units": ""}, + }, + "n_moles" + suffix: { + "dtype": np.float32, + "dim": ["x" + suffix, "y" + suffix, "time" + suffix], + "attributes": { + "units": "", + "unc_comps": ["u_ran_n_moles" + suffix] + } + }, + "u_ran_n_moles" + suffix: { + "dtype": np.float32, + "dim": ["x" + suffix, "y" + suffix, "time" + suffix], + "attributes": { + "units": "", + "err_corr": [ + { + "dim": "x" + suffix, + "form": "random", + "params": [], + "units": [] + }, + { + "dim": "y" + suffix, + "form": "random", + "params": [], + "units": [] + }, + { + "dim": "time" + suffix, + "form": "random", + "params": [], + "units": [] + } + ] + }, + }, + } + + # define dim_size_dict to specify size of arrays + dim_sizes = { + "x" + suffix: 20, + "y" + suffix: 30, + "time" + suffix: 6 + } + + # create dataset template + ds = create_ds(template, dim_sizes) + + # populate with example data + ds["temperature" + suffix].values = 293 * np.ones((20, 30, 6)) + ds["u_ran_temperature" + suffix].values = 1 * np.ones((20, 30, 6)) + ds["u_sys_temperature" + suffix].values = 0.4 * np.ones((20, 30, 6)) + ds["pressure" + suffix].values = 10 ** 5 * np.ones((20, 30, 6)) + ds["u_str_pressure" + suffix].values = 10 * np.ones((20, 30, 6)) + ds["err_corr_str_pressure_y" + suffix].values = 0.5 * np.ones((30, 30)) + 0.5 * np.eye(30) + ds["n_moles" + suffix].values = 40 * np.ones((20, 30, 6)) + ds["u_ran_n_moles" + suffix].values = 1 * np.ones((20, 30, 6)) + + ds.attrs["attr" + suffix] = "val" + + return ds + +class TestAppendNames(unittest.TestCase): + def test_append_names(self): + + input_ds = create_test_ds(suffix = "") + ds = append_names(input_ds, "_test") + + exp_ds = create_test_ds(suffix="_test") + + xr.testing.assert_identical(ds, exp_ds) + +if __name__ == "__main__": + unittest.main() diff --git a/obsarray/utils.py b/obsarray/utils.py index a531dca..aaf4837 100644 --- a/obsarray/utils.py +++ b/obsarray/utils.py @@ -2,10 +2,10 @@ import numpy as np import xarray as xr - +from xarray.core.types import T_Dataset __author__ = "Sam Hunt " -__all__ = ["empty_err_corr_matrix"] +__all__ = ["empty_err_corr_matrix", "append_names"] def empty_err_corr_matrix(obs_var: xr.DataArray): @@ -27,5 +27,40 @@ def empty_err_corr_matrix(obs_var: xr.DataArray): return err_corr_matrix +def append_names( + ds: T_Dataset, + suffix: str, + skip_vars: bool = False, + skip_dims: bool = False, + skip_attrs: bool = False + ) -> T_Dataset: + """ + Appends a suffix to the names of dataset variables, dimensions, and attributes - safely handling `unc_vars` and associated metadata + + :param ds: xarray dataset + :param suffix: suffix to append to dataset variable, dimension, and attribute names + :param skip_vars: (default: `False`) switch to skip applying suffix to variable names + :param skip_dims: (default: `False`) switch to skip applying suffix to dimension names + :param skip_attrs: (default: `False`) switch to skip applying suffix to variable names + :returns: ds with suffix appended to names of variables, dimensions, attributes + """ + + # update variable names + if not skip_vars: + var_rename = {var_name: var_name + suffix for var_name in ds.variables.keys()} + ds = ds.unc.rename(var_rename) + + # update dimension names + if not skip_dims: + dim_rename = {dim_name: dim_name + suffix for dim_name in ds.dims.keys()} + ds = ds.unc.rename_dims(dim_rename) + + # update attribute names + if not skip_attrs: + ds.attrs = {key + suffix: value for key, value in ds.attrs.items()} + + return ds + + if __name__ == "__main__": pass From f9ca9d4c090708b795ae788f3217362dd262d91f Mon Sep 17 00:00:00 2001 From: "sam.hunt" Date: Mon, 18 Nov 2024 14:01:30 +0000 Subject: [PATCH 4/7] fix doc strings --- obsarray/unc_accessor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/obsarray/unc_accessor.py b/obsarray/unc_accessor.py index fa4288f..ae95116 100644 --- a/obsarray/unc_accessor.py +++ b/obsarray/unc_accessor.py @@ -900,7 +900,7 @@ def rename(self, vars_dict: dict[str, str]) -> T_Dataset: """ Returns a new dataset with renamed variables - safely handling `unc_vars` and related metadata - :params vars_dict : Dictionary whose keys are current variable names and whose values are the desired names. The desired names must not be the name of an existing dimension or Variable in the Dataset. + :param vars_dict: Dictionary whose keys are current variable names and whose values are the desired names. The desired names must not be the name of an existing dimension or Variable in the Dataset. :returns: Dataset with renamed variables """ @@ -941,7 +941,7 @@ def rename_dims(self, dims_dict: dict[str, str]) -> T_Dataset: """ Returns a new dataset with renamed dimensions - safely handling `unc_vars` related metadata - :params dims_dict : Dictionary whose keys are current dimension names and whose values are the desired names. The desired names must not be the name of an existing dimension or Variable in the Dataset. + :param dims_dict: Dictionary whose keys are current dimension names and whose values are the desired names. The desired names must not be the name of an existing dimension or Variable in the Dataset. :returns: Dataset with renamed dimensions """ From 61e27002e76cb4db0cc4765c24df4d21fbbaadfe Mon Sep 17 00:00:00 2001 From: "sam.hunt" Date: Mon, 18 Nov 2024 14:01:48 +0000 Subject: [PATCH 5/7] add rename functions to api interface list --- docs/content/user/api.rst | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/docs/content/user/api.rst b/docs/content/user/api.rst index 26b00a8..cd88953 100644 --- a/docs/content/user/api.rst +++ b/docs/content/user/api.rst @@ -30,6 +30,8 @@ Uncertainty functions unc_accessor.UncAccessor.unc_vars unc_accessor.UncAccessor.__getitem__ unc_accessor.UncAccessor.keys + unc_accessor.UncAccessor.rename + unc_accessor.UncAccessor.rename_dims unc_accessor.VariableUncertainty unc_accessor.VariableUncertainty.__getitem__ unc_accessor.VariableUncertainty.__setitem__ @@ -83,4 +85,12 @@ Flag functions flag_accessor.Flag flag_accessor.Flag.__getitem__ flag_accessor.Flag.__setitem__ - flag_accessor.Flag.value \ No newline at end of file + flag_accessor.Flag.value + +Utility functions +================= + +.. autosummary:: + :toctree: generated/ + + utils.append_names \ No newline at end of file From 4794db9fd218718efdf8c8d695885f3b582d11d6 Mon Sep 17 00:00:00 2001 From: "sam.hunt" Date: Mon, 18 Nov 2024 14:03:37 +0000 Subject: [PATCH 6/7] run black --- obsarray/test/test_unc_accessor.py | 60 +++++++++++--- obsarray/test/test_utils.py | 125 ++++++++++++----------------- obsarray/unc_accessor.py | 21 +++-- obsarray/utils.py | 12 +-- 4 files changed, 124 insertions(+), 94 deletions(-) diff --git a/obsarray/test/test_unc_accessor.py b/obsarray/test/test_unc_accessor.py index ed76aa0..d162433 100644 --- a/obsarray/test/test_unc_accessor.py +++ b/obsarray/test/test_unc_accessor.py @@ -34,7 +34,7 @@ def compare_err_corr_form(self, form, exp_form): self.assertCountEqual(form._unc_var_name, exp_form._unc_var_name) -def create_ds(var_suffix="", dim_suffix="", coord_dim_suffix_extra = ""): +def create_ds(var_suffix="", dim_suffix="", coord_dim_suffix_extra=""): np.random.seed(0) temperature = 15 + 8 * np.random.randn(2, 2, 3) u_r_temperature = temperature * 0.02 @@ -48,25 +48,42 @@ def create_ds(var_suffix="", dim_suffix="", coord_dim_suffix_extra = ""): ds = xr.Dataset( data_vars={ - "temperature" + var_suffix: (["x" + dim_suffix, "y" + dim_suffix, "time" + dim_suffix + coord_dim_suffix_extra], temperature, {"units": "K"}), + "temperature" + + var_suffix: ( + [ + "x" + dim_suffix, + "y" + dim_suffix, + "time" + dim_suffix + coord_dim_suffix_extra, + ], + temperature, + {"units": "K"}, + ), }, coords={ "lon" + var_suffix: (["x" + dim_suffix, "y" + dim_suffix], lon), "lat" + var_suffix: (["x" + dim_suffix, "y" + dim_suffix], lat), "time" + var_suffix: ("time" + dim_suffix + coord_dim_suffix_extra, time), "reference_time": reference_time, - }, + }, attrs=dict(description="Weather related data."), ) ds.unc["temperature" + var_suffix]["u_ran_temperature" + var_suffix] = ( - ["x" + dim_suffix, "y" + dim_suffix, "time" + dim_suffix + coord_dim_suffix_extra], + [ + "x" + dim_suffix, + "y" + dim_suffix, + "time" + dim_suffix + coord_dim_suffix_extra, + ], temperature * 0.05, {"units": "K", "pdf_shape": "gaussian"}, ) ds.unc["temperature" + var_suffix]["u_sys_temperature" + var_suffix] = ( - ["x" + dim_suffix, "y" + dim_suffix, "time" + dim_suffix + coord_dim_suffix_extra], + [ + "x" + dim_suffix, + "y" + dim_suffix, + "time" + dim_suffix + coord_dim_suffix_extra, + ], temperature * 0.03, { "units": "K", @@ -92,13 +109,20 @@ def create_ds(var_suffix="", dim_suffix="", coord_dim_suffix_extra = ""): ) ds.unc["temperature" + var_suffix]["u_str_temperature" + var_suffix] = ( - ["x" + dim_suffix, "y" + dim_suffix, "time" + dim_suffix + coord_dim_suffix_extra], + [ + "x" + dim_suffix, + "y" + dim_suffix, + "time" + dim_suffix + coord_dim_suffix_extra, + ], temperature * 0.1, { "units": "K", "err_corr": [ { - "dim": ["x" + dim_suffix, "time" + dim_suffix + coord_dim_suffix_extra], + "dim": [ + "x" + dim_suffix, + "time" + dim_suffix + coord_dim_suffix_extra, + ], "form": "err_corr_matrix", "params": ["err_corr_str_temperature" + var_suffix], }, @@ -744,7 +768,18 @@ def test_rename_vars(self): var_suffix = "_test" input_ds = create_ds() - ds = input_ds.unc.rename({"temperature": "temperature" + var_suffix, "lon": "lon" + var_suffix, "lat": "lat" + var_suffix, "time": "time" + var_suffix, "u_ran_temperature": "u_ran_temperature" + var_suffix, "u_str_temperature": "u_str_temperature" + var_suffix, "u_sys_temperature": "u_sys_temperature" + var_suffix, "err_corr_str_temperature": "err_corr_str_temperature" + var_suffix}) + ds = input_ds.unc.rename( + { + "temperature": "temperature" + var_suffix, + "lon": "lon" + var_suffix, + "lat": "lat" + var_suffix, + "time": "time" + var_suffix, + "u_ran_temperature": "u_ran_temperature" + var_suffix, + "u_str_temperature": "u_str_temperature" + var_suffix, + "u_sys_temperature": "u_sys_temperature" + var_suffix, + "err_corr_str_temperature": "err_corr_str_temperature" + var_suffix, + } + ) exp_ds = create_ds(var_suffix=var_suffix, coord_dim_suffix_extra=var_suffix) @@ -754,7 +789,14 @@ def test_rename_dims(self): dim_suffix = "_test" input_ds = create_ds() - ds = input_ds.unc.rename_dims({"x": "x" + dim_suffix, "y": "y" + dim_suffix, "time": "time" + dim_suffix, "x.time": "x.time_test"}) + ds = input_ds.unc.rename_dims( + { + "x": "x" + dim_suffix, + "y": "y" + dim_suffix, + "time": "time" + dim_suffix, + "x.time": "x.time_test", + } + ) exp_ds = create_ds(dim_suffix=dim_suffix) diff --git a/obsarray/test/test_utils.py b/obsarray/test/test_utils.py index 1432519..fb1537a 100644 --- a/obsarray/test/test_utils.py +++ b/obsarray/test/test_utils.py @@ -8,45 +8,42 @@ __author__ = "Sam Hunt " __all__ = [] + def create_test_ds(suffix): # define ds variables template = { - "temperature" + suffix: { + "temperature" + + suffix: { "dtype": np.float32, "dim": ["x" + suffix, "y" + suffix, "time" + suffix], "attributes": { "units": "K", - "unc_comps": ["u_ran_temperature" + suffix, "u_sys_temperature" + suffix] - } + "unc_comps": [ + "u_ran_temperature" + suffix, + "u_sys_temperature" + suffix, + ], + }, }, - "u_ran_temperature" + suffix: { + "u_ran_temperature" + + suffix: { "dtype": np.float32, "dim": ["x" + suffix, "y" + suffix, "time" + suffix], "attributes": { "units": "K", "err_corr": [ - { - "dim": "x" + suffix, - "form": "random", - "params": [], - "units": [] - }, - { - "dim": "y" + suffix, - "form": "random", - "params": [], - "units": [] - }, + {"dim": "x" + suffix, "form": "random", "params": [], "units": []}, + {"dim": "y" + suffix, "form": "random", "params": [], "units": []}, { "dim": "time" + suffix, "form": "random", "params": [], - "units": [] - } - ] + "units": [], + }, + ], }, }, - "u_sys_temperature" + suffix: { + "u_sys_temperature" + + suffix: { "dtype": np.float32, "dim": ["x" + suffix, "y" + suffix, "time" + suffix], "attributes": { @@ -56,106 +53,86 @@ def create_test_ds(suffix): "dim": "x" + suffix, "form": "systematic", "params": [], - "units": [] + "units": [], }, { "dim": "y" + suffix, "form": "systematic", "params": [], - "units": [] + "units": [], }, { "dim": "time" + suffix, "form": "systematic", "params": [], - "units": [] - } - ] - } + "units": [], + }, + ], + }, }, - "pressure" + suffix: { + "pressure" + + suffix: { "dtype": np.float32, "dim": ["x" + suffix, "y" + suffix, "time" + suffix], - "attributes": { - "units": "Pa", - "unc_comps": ["u_str_pressure" + suffix] - } + "attributes": {"units": "Pa", "unc_comps": ["u_str_pressure" + suffix]}, }, - "u_str_pressure" + suffix: { + "u_str_pressure" + + suffix: { "dtype": np.float32, "dim": ["x" + suffix, "y" + suffix, "time" + suffix], "attributes": { "units": "Pa", "err_corr": [ - { - "dim": "x" + suffix, - "form": "random", - "params": [], - "units": [] - }, + {"dim": "x" + suffix, "form": "random", "params": [], "units": []}, { "dim": "y" + suffix, "form": "err_corr_matrix", "params": "err_corr_str_pressure_y", - "units": [] + "units": [], }, { "dim": "time" + suffix, "form": "systematic", "params": [], - "units": [] - } - ] + "units": [], + }, + ], }, }, - "err_corr_str_pressure_y" + suffix: { + "err_corr_str_pressure_y" + + suffix: { "dtype": np.float32, "dim": ["y" + suffix, "y" + suffix], "attributes": {"units": ""}, }, - "n_moles" + suffix: { + "n_moles" + + suffix: { "dtype": np.float32, "dim": ["x" + suffix, "y" + suffix, "time" + suffix], - "attributes": { - "units": "", - "unc_comps": ["u_ran_n_moles" + suffix] - } + "attributes": {"units": "", "unc_comps": ["u_ran_n_moles" + suffix]}, }, - "u_ran_n_moles" + suffix: { + "u_ran_n_moles" + + suffix: { "dtype": np.float32, "dim": ["x" + suffix, "y" + suffix, "time" + suffix], "attributes": { "units": "", "err_corr": [ - { - "dim": "x" + suffix, - "form": "random", - "params": [], - "units": [] - }, - { - "dim": "y" + suffix, - "form": "random", - "params": [], - "units": [] - }, + {"dim": "x" + suffix, "form": "random", "params": [], "units": []}, + {"dim": "y" + suffix, "form": "random", "params": [], "units": []}, { "dim": "time" + suffix, "form": "random", "params": [], - "units": [] - } - ] + "units": [], + }, + ], }, }, } # define dim_size_dict to specify size of arrays - dim_sizes = { - "x" + suffix: 20, - "y" + suffix: 30, - "time" + suffix: 6 - } + dim_sizes = {"x" + suffix: 20, "y" + suffix: 30, "time" + suffix: 6} # create dataset template ds = create_ds(template, dim_sizes) @@ -164,9 +141,11 @@ def create_test_ds(suffix): ds["temperature" + suffix].values = 293 * np.ones((20, 30, 6)) ds["u_ran_temperature" + suffix].values = 1 * np.ones((20, 30, 6)) ds["u_sys_temperature" + suffix].values = 0.4 * np.ones((20, 30, 6)) - ds["pressure" + suffix].values = 10 ** 5 * np.ones((20, 30, 6)) + ds["pressure" + suffix].values = 10**5 * np.ones((20, 30, 6)) ds["u_str_pressure" + suffix].values = 10 * np.ones((20, 30, 6)) - ds["err_corr_str_pressure_y" + suffix].values = 0.5 * np.ones((30, 30)) + 0.5 * np.eye(30) + ds["err_corr_str_pressure_y" + suffix].values = 0.5 * np.ones( + (30, 30) + ) + 0.5 * np.eye(30) ds["n_moles" + suffix].values = 40 * np.ones((20, 30, 6)) ds["u_ran_n_moles" + suffix].values = 1 * np.ones((20, 30, 6)) @@ -174,15 +153,17 @@ def create_test_ds(suffix): return ds + class TestAppendNames(unittest.TestCase): def test_append_names(self): - input_ds = create_test_ds(suffix = "") + input_ds = create_test_ds(suffix="") ds = append_names(input_ds, "_test") exp_ds = create_test_ds(suffix="_test") xr.testing.assert_identical(ds, exp_ds) + if __name__ == "__main__": unittest.main() diff --git a/obsarray/unc_accessor.py b/obsarray/unc_accessor.py index ae95116..e65f35c 100644 --- a/obsarray/unc_accessor.py +++ b/obsarray/unc_accessor.py @@ -895,7 +895,6 @@ def _remove_unc_var(self, obs_var: str, unc_var: str) -> None: del self._obj[unc_var] self._obj[obs_var].attrs["unc_comps"].remove(unc_var) - def rename(self, vars_dict: dict[str, str]) -> T_Dataset: """ Returns a new dataset with renamed variables - safely handling `unc_vars` and related metadata @@ -905,7 +904,11 @@ def rename(self, vars_dict: dict[str, str]) -> T_Dataset: """ # handle case that xarray.Dataset.rename renames the dimension associated with a renamed coordinate dimension - coord_dim_dict = {str(dim): vars_dict[dim] for dim in self._obj.dims if (dim in self._obj.coords) and (dim in vars_dict.keys())} + coord_dim_dict = { + str(dim): vars_dict[dim] + for dim in self._obj.dims + if (dim in self._obj.coords) and (dim in vars_dict.keys()) + } ds = self.rename_dims(coord_dim_dict) # update metadata where unc_var err corr param to be renamed @@ -931,8 +934,12 @@ def rename(self, vars_dict: dict[str, str]) -> T_Dataset: ds = ds.unc[obs_var_i][unc_var_i].rename(vars_dict[unc_var_i]) # update remaining variable names - non_unc_var_names = list(filter(lambda x: x not in self.unc_vars.keys(), self._obj.variables.keys())) - var_dict_no_unc = {n: vars_dict[n] for n in non_unc_var_names if n in vars_dict.keys()} + non_unc_var_names = list( + filter(lambda x: x not in self.unc_vars.keys(), self._obj.variables.keys()) + ) + var_dict_no_unc = { + n: vars_dict[n] for n in non_unc_var_names if n in vars_dict.keys() + } ds = ds.rename(var_dict_no_unc) return ds @@ -959,7 +966,9 @@ def rename_dims(self, dims_dict: dict[str, str]) -> T_Dataset: if (attr[:9] == "err_corr_") and (attr[-4:] == "_dim"): if isinstance(obj[unc_var_name].attrs[attr], str): if obj[unc_var_name].attrs[attr] in dims_dict: - obj[unc_var_name].attrs[attr] = dims_dict[obj[unc_var_name].attrs[attr]] + obj[unc_var_name].attrs[attr] = dims_dict[ + obj[unc_var_name].attrs[attr] + ] if isinstance(obj[unc_var_name].attrs[attr], list): for i, attr_i in enumerate(obj[unc_var_name].attrs[attr]): @@ -969,7 +978,5 @@ def rename_dims(self, dims_dict: dict[str, str]) -> T_Dataset: return obj - - if __name__ == "__main__": pass diff --git a/obsarray/utils.py b/obsarray/utils.py index aaf4837..17e1fe0 100644 --- a/obsarray/utils.py +++ b/obsarray/utils.py @@ -28,12 +28,12 @@ def empty_err_corr_matrix(obs_var: xr.DataArray): def append_names( - ds: T_Dataset, - suffix: str, - skip_vars: bool = False, - skip_dims: bool = False, - skip_attrs: bool = False - ) -> T_Dataset: + ds: T_Dataset, + suffix: str, + skip_vars: bool = False, + skip_dims: bool = False, + skip_attrs: bool = False, +) -> T_Dataset: """ Appends a suffix to the names of dataset variables, dimensions, and attributes - safely handling `unc_vars` and associated metadata From 8ac909635942e86647e09804e4a715ed0601954e Mon Sep 17 00:00:00 2001 From: "sam.hunt" Date: Mon, 18 Nov 2024 14:54:39 +0000 Subject: [PATCH 7/7] update renaming docs --- docs/content/user/unc_accessor.rst | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/content/user/unc_accessor.rst b/docs/content/user/unc_accessor.rst index 922790d..189014d 100644 --- a/docs/content/user/unc_accessor.rst +++ b/docs/content/user/unc_accessor.rst @@ -186,15 +186,16 @@ A component of uncertainty can be simply be deleted as, # Check uncertainties ds.unc["temperature"].keys() -Renaming Variables ------------------- +Renaming Variables and Dimensions +--------------------------------- -The storage of uncertainty information is underpinned by variable attributes, which include referencing other variables (for example, which variables are the uncertainties associated with a particular observation variable). Because of this it is important, if renaming uncertainty variables, to use **obsarray**'s renaming functionality. This renames the uncertainty variable and safely updates attribute variable references. This is done as follows: +The storage of uncertainty information is underpinned by variable attributes, which include referencing other variables/dimensions (for example, which variables are the uncertainties associated with a particular observation variable). Because of this it is important, if renaming uncertainty variables or dimensions, to use **obsarray**'s renaming functionality. This renames the uncertainty variable or dimension and safely updates attribute variable references. This is done as follows (mirroring the interface to `xarray renaming `_): .. ipython:: python :okwarning: print(ds.unc["temperature"]) - ds = ds.unc["temperature"]["u_ran_temperature"].rename("u_noise") + ds = ds.unc.rename({"u_ran_temperature": "u_noise"}) + ds = ds.unc.rename_dims({"time": "t"}) print(ds.unc["temperature"]) \ No newline at end of file