diff --git a/docs/content/user/api.rst b/docs/content/user/api.rst index 26b00a8..cd88953 100644 --- a/docs/content/user/api.rst +++ b/docs/content/user/api.rst @@ -30,6 +30,8 @@ Uncertainty functions unc_accessor.UncAccessor.unc_vars unc_accessor.UncAccessor.__getitem__ unc_accessor.UncAccessor.keys + unc_accessor.UncAccessor.rename + unc_accessor.UncAccessor.rename_dims unc_accessor.VariableUncertainty unc_accessor.VariableUncertainty.__getitem__ unc_accessor.VariableUncertainty.__setitem__ @@ -83,4 +85,12 @@ Flag functions flag_accessor.Flag flag_accessor.Flag.__getitem__ flag_accessor.Flag.__setitem__ - flag_accessor.Flag.value \ No newline at end of file + flag_accessor.Flag.value + +Utility functions +================= + +.. autosummary:: + :toctree: generated/ + + utils.append_names \ No newline at end of file diff --git a/docs/content/user/unc_accessor.rst b/docs/content/user/unc_accessor.rst index 922790d..189014d 100644 --- a/docs/content/user/unc_accessor.rst +++ b/docs/content/user/unc_accessor.rst @@ -186,15 +186,16 @@ A component of uncertainty can be simply be deleted as, # Check uncertainties ds.unc["temperature"].keys() -Renaming Variables ------------------- +Renaming Variables and Dimensions +--------------------------------- -The storage of uncertainty information is underpinned by variable attributes, which include referencing other variables (for example, which variables are the uncertainties associated with a particular observation variable). Because of this it is important, if renaming uncertainty variables, to use **obsarray**'s renaming functionality. This renames the uncertainty variable and safely updates attribute variable references. This is done as follows: +The storage of uncertainty information is underpinned by variable attributes, which include referencing other variables/dimensions (for example, which variables are the uncertainties associated with a particular observation variable). Because of this it is important, if renaming uncertainty variables or dimensions, to use **obsarray**'s renaming functionality. This renames the uncertainty variable or dimension and safely updates attribute variable references. This is done as follows (mirroring the interface to `xarray renaming `_): .. ipython:: python :okwarning: print(ds.unc["temperature"]) - ds = ds.unc["temperature"]["u_ran_temperature"].rename("u_noise") + ds = ds.unc.rename({"u_ran_temperature": "u_noise"}) + ds = ds.unc.rename_dims({"time": "t"}) print(ds.unc["temperature"]) \ No newline at end of file diff --git a/obsarray/__init__.py b/obsarray/__init__.py index 4a5f4a6..c085c51 100644 --- a/obsarray/__init__.py +++ b/obsarray/__init__.py @@ -9,6 +9,7 @@ from obsarray.templater.template_util import create_ds from obsarray.templater.dstemplater import DSTemplater from obsarray.templater.dswriter import DSWriter +from obsarray.utils import append_names __version__ = get_versions()["version"] del get_versions diff --git a/obsarray/test/test_unc_accessor.py b/obsarray/test/test_unc_accessor.py index ad63f5d..d162433 100644 --- a/obsarray/test/test_unc_accessor.py +++ b/obsarray/test/test_unc_accessor.py @@ -34,7 +34,7 @@ def compare_err_corr_form(self, form, exp_form): self.assertCountEqual(form._unc_var_name, exp_form._unc_var_name) -def create_ds(): +def create_ds(var_suffix="", dim_suffix="", coord_dim_suffix_extra=""): np.random.seed(0) temperature = 15 + 8 * np.random.randn(2, 2, 3) u_r_temperature = temperature * 0.02 @@ -47,42 +47,59 @@ def create_ds(): reference_time = pd.Timestamp("2014-09-05") ds = xr.Dataset( - data_vars=dict( - temperature=(["x", "y", "time"], temperature, {"units": "K"}), - ), - coords=dict( - lon=(["x", "y"], lon), - lat=(["x", "y"], lat), - time=time, - reference_time=reference_time, - ), + data_vars={ + "temperature" + + var_suffix: ( + [ + "x" + dim_suffix, + "y" + dim_suffix, + "time" + dim_suffix + coord_dim_suffix_extra, + ], + temperature, + {"units": "K"}, + ), + }, + coords={ + "lon" + var_suffix: (["x" + dim_suffix, "y" + dim_suffix], lon), + "lat" + var_suffix: (["x" + dim_suffix, "y" + dim_suffix], lat), + "time" + var_suffix: ("time" + dim_suffix + coord_dim_suffix_extra, time), + "reference_time": reference_time, + }, attrs=dict(description="Weather related data."), ) - ds.unc["temperature"]["u_ran_temperature"] = ( - ["x", "y", "time"], + ds.unc["temperature" + var_suffix]["u_ran_temperature" + var_suffix] = ( + [ + "x" + dim_suffix, + "y" + dim_suffix, + "time" + dim_suffix + coord_dim_suffix_extra, + ], temperature * 0.05, {"units": "K", "pdf_shape": "gaussian"}, ) - ds.unc["temperature"]["u_sys_temperature"] = ( - ["x", "y", "time"], + ds.unc["temperature" + var_suffix]["u_sys_temperature" + var_suffix] = ( + [ + "x" + dim_suffix, + "y" + dim_suffix, + "time" + dim_suffix + coord_dim_suffix_extra, + ], temperature * 0.03, { "units": "K", "err_corr": [ { - "dim": "x", + "dim": "x" + dim_suffix, "form": "systematic", "params": [], }, { - "dim": "y", + "dim": "y" + dim_suffix, "form": "systematic", "params": [], }, { - "dim": "time", + "dim": "time" + dim_suffix + coord_dim_suffix_extra, "form": "systematic", "params": [], }, @@ -91,19 +108,26 @@ def create_ds(): }, ) - ds.unc["temperature"]["u_str_temperature"] = ( - ["x", "y", "time"], + ds.unc["temperature" + var_suffix]["u_str_temperature" + var_suffix] = ( + [ + "x" + dim_suffix, + "y" + dim_suffix, + "time" + dim_suffix + coord_dim_suffix_extra, + ], temperature * 0.1, { "units": "K", "err_corr": [ { - "dim": ["x", "time"], + "dim": [ + "x" + dim_suffix, + "time" + dim_suffix + coord_dim_suffix_extra, + ], "form": "err_corr_matrix", - "params": ["err_corr_str_temperature"], + "params": ["err_corr_str_temperature" + var_suffix], }, { - "dim": "y", + "dim": "y" + dim_suffix, "form": "systematic", "params": [], }, @@ -112,8 +136,8 @@ def create_ds(): }, ) - ds["err_corr_str_temperature"] = ( - ["x.time", "x.time"], + ds["err_corr_str_temperature" + var_suffix] = ( + ["x.time" + dim_suffix, "x.time" + dim_suffix], np.ones( ( temperature.shape[0] * temperature.shape[2], @@ -740,6 +764,44 @@ def test_err_cov_matrix(self, mock_cr2cv, mock_ecrm, mock_value): exp_ecm = xr.DataArray(np.ones((12, 12)), dims=["x.y.time", "x.y.time"]) xr.testing.assert_equal(ecm, exp_ecm) + def test_rename_vars(self): + var_suffix = "_test" + input_ds = create_ds() + + ds = input_ds.unc.rename( + { + "temperature": "temperature" + var_suffix, + "lon": "lon" + var_suffix, + "lat": "lat" + var_suffix, + "time": "time" + var_suffix, + "u_ran_temperature": "u_ran_temperature" + var_suffix, + "u_str_temperature": "u_str_temperature" + var_suffix, + "u_sys_temperature": "u_sys_temperature" + var_suffix, + "err_corr_str_temperature": "err_corr_str_temperature" + var_suffix, + } + ) + + exp_ds = create_ds(var_suffix=var_suffix, coord_dim_suffix_extra=var_suffix) + + xr.testing.assert_identical(ds, exp_ds) + + def test_rename_dims(self): + dim_suffix = "_test" + input_ds = create_ds() + + ds = input_ds.unc.rename_dims( + { + "x": "x" + dim_suffix, + "y": "y" + dim_suffix, + "time": "time" + dim_suffix, + "x.time": "x.time_test", + } + ) + + exp_ds = create_ds(dim_suffix=dim_suffix) + + xr.testing.assert_identical(ds, exp_ds) + if __name__ == "__main__": unittest.main() diff --git a/obsarray/test/test_utils.py b/obsarray/test/test_utils.py new file mode 100644 index 0000000..fb1537a --- /dev/null +++ b/obsarray/test/test_utils.py @@ -0,0 +1,169 @@ +"""test_utils - tests for obsarray.utils""" + +import unittest +import numpy as np +from obsarray import append_names, create_ds +import xarray as xr + +__author__ = "Sam Hunt " +__all__ = [] + + +def create_test_ds(suffix): + # define ds variables + template = { + "temperature" + + suffix: { + "dtype": np.float32, + "dim": ["x" + suffix, "y" + suffix, "time" + suffix], + "attributes": { + "units": "K", + "unc_comps": [ + "u_ran_temperature" + suffix, + "u_sys_temperature" + suffix, + ], + }, + }, + "u_ran_temperature" + + suffix: { + "dtype": np.float32, + "dim": ["x" + suffix, "y" + suffix, "time" + suffix], + "attributes": { + "units": "K", + "err_corr": [ + {"dim": "x" + suffix, "form": "random", "params": [], "units": []}, + {"dim": "y" + suffix, "form": "random", "params": [], "units": []}, + { + "dim": "time" + suffix, + "form": "random", + "params": [], + "units": [], + }, + ], + }, + }, + "u_sys_temperature" + + suffix: { + "dtype": np.float32, + "dim": ["x" + suffix, "y" + suffix, "time" + suffix], + "attributes": { + "units": "K", + "err_corr": [ + { + "dim": "x" + suffix, + "form": "systematic", + "params": [], + "units": [], + }, + { + "dim": "y" + suffix, + "form": "systematic", + "params": [], + "units": [], + }, + { + "dim": "time" + suffix, + "form": "systematic", + "params": [], + "units": [], + }, + ], + }, + }, + "pressure" + + suffix: { + "dtype": np.float32, + "dim": ["x" + suffix, "y" + suffix, "time" + suffix], + "attributes": {"units": "Pa", "unc_comps": ["u_str_pressure" + suffix]}, + }, + "u_str_pressure" + + suffix: { + "dtype": np.float32, + "dim": ["x" + suffix, "y" + suffix, "time" + suffix], + "attributes": { + "units": "Pa", + "err_corr": [ + {"dim": "x" + suffix, "form": "random", "params": [], "units": []}, + { + "dim": "y" + suffix, + "form": "err_corr_matrix", + "params": "err_corr_str_pressure_y", + "units": [], + }, + { + "dim": "time" + suffix, + "form": "systematic", + "params": [], + "units": [], + }, + ], + }, + }, + "err_corr_str_pressure_y" + + suffix: { + "dtype": np.float32, + "dim": ["y" + suffix, "y" + suffix], + "attributes": {"units": ""}, + }, + "n_moles" + + suffix: { + "dtype": np.float32, + "dim": ["x" + suffix, "y" + suffix, "time" + suffix], + "attributes": {"units": "", "unc_comps": ["u_ran_n_moles" + suffix]}, + }, + "u_ran_n_moles" + + suffix: { + "dtype": np.float32, + "dim": ["x" + suffix, "y" + suffix, "time" + suffix], + "attributes": { + "units": "", + "err_corr": [ + {"dim": "x" + suffix, "form": "random", "params": [], "units": []}, + {"dim": "y" + suffix, "form": "random", "params": [], "units": []}, + { + "dim": "time" + suffix, + "form": "random", + "params": [], + "units": [], + }, + ], + }, + }, + } + + # define dim_size_dict to specify size of arrays + dim_sizes = {"x" + suffix: 20, "y" + suffix: 30, "time" + suffix: 6} + + # create dataset template + ds = create_ds(template, dim_sizes) + + # populate with example data + ds["temperature" + suffix].values = 293 * np.ones((20, 30, 6)) + ds["u_ran_temperature" + suffix].values = 1 * np.ones((20, 30, 6)) + ds["u_sys_temperature" + suffix].values = 0.4 * np.ones((20, 30, 6)) + ds["pressure" + suffix].values = 10**5 * np.ones((20, 30, 6)) + ds["u_str_pressure" + suffix].values = 10 * np.ones((20, 30, 6)) + ds["err_corr_str_pressure_y" + suffix].values = 0.5 * np.ones( + (30, 30) + ) + 0.5 * np.eye(30) + ds["n_moles" + suffix].values = 40 * np.ones((20, 30, 6)) + ds["u_ran_n_moles" + suffix].values = 1 * np.ones((20, 30, 6)) + + ds.attrs["attr" + suffix] = "val" + + return ds + + +class TestAppendNames(unittest.TestCase): + def test_append_names(self): + + input_ds = create_test_ds(suffix="") + ds = append_names(input_ds, "_test") + + exp_ds = create_test_ds(suffix="_test") + + xr.testing.assert_identical(ds, exp_ds) + + +if __name__ == "__main__": + unittest.main() diff --git a/obsarray/unc_accessor.py b/obsarray/unc_accessor.py index 1ade721..e65f35c 100644 --- a/obsarray/unc_accessor.py +++ b/obsarray/unc_accessor.py @@ -9,6 +9,7 @@ from obsarray.templater.dataset_util import DatasetUtil from obsarray.err_corr import err_corr_forms, BaseErrCorrForm from obsarray.utils import empty_err_corr_matrix +from xarray.core.types import T_Dataset __author__ = "Sam Hunt " @@ -894,6 +895,88 @@ def _remove_unc_var(self, obs_var: str, unc_var: str) -> None: del self._obj[unc_var] self._obj[obs_var].attrs["unc_comps"].remove(unc_var) + def rename(self, vars_dict: dict[str, str]) -> T_Dataset: + """ + Returns a new dataset with renamed variables - safely handling `unc_vars` and related metadata + + :param vars_dict: Dictionary whose keys are current variable names and whose values are the desired names. The desired names must not be the name of an existing dimension or Variable in the Dataset. + :returns: Dataset with renamed variables + """ + + # handle case that xarray.Dataset.rename renames the dimension associated with a renamed coordinate dimension + coord_dim_dict = { + str(dim): vars_dict[dim] + for dim in self._obj.dims + if (dim in self._obj.coords) and (dim in vars_dict.keys()) + } + ds = self.rename_dims(coord_dim_dict) + + # update metadata where unc_var err corr param to be renamed + unc_var_paths = [] + for obs_var in ds.unc.obs_vars: + for unc_var in ds.unc[obs_var]: + unc_var_paths.append((obs_var, unc_var._unc_var_name)) + + for unc_var_path in unc_var_paths: + unc_var_i = unc_var_path[1] + + for attr in ds[unc_var_i].attrs.keys(): + if (attr[:9] == "err_corr_") and (attr[-7:] == "_params"): + for i, param in enumerate(ds[unc_var_i].attrs[attr]): + if param in vars_dict.keys(): + ds[unc_var_i].attrs[attr][i] = vars_dict[param] + + # safely update unc_vars + for unc_var_path in unc_var_paths: + obs_var_i = unc_var_path[0] + unc_var_i = unc_var_path[1] + if unc_var_i in vars_dict: + ds = ds.unc[obs_var_i][unc_var_i].rename(vars_dict[unc_var_i]) + + # update remaining variable names + non_unc_var_names = list( + filter(lambda x: x not in self.unc_vars.keys(), self._obj.variables.keys()) + ) + var_dict_no_unc = { + n: vars_dict[n] for n in non_unc_var_names if n in vars_dict.keys() + } + ds = ds.rename(var_dict_no_unc) + + return ds + + def rename_dims(self, dims_dict: dict[str, str]) -> T_Dataset: + """ + Returns a new dataset with renamed dimensions - safely handling `unc_vars` related metadata + + :param dims_dict: Dictionary whose keys are current dimension names and whose values are the desired names. The desired names must not be the name of an existing dimension or Variable in the Dataset. + :returns: Dataset with renamed dimensions + """ + + # update dimension names + obj = self._obj.rename_dims(dims_dict) + + # update uncertainty metadata related to variable names + unc_var_names = [] + for obs_var in obj.unc.obs_vars: + for unc_var in obj.unc[obs_var]: + unc_var_names.append(unc_var._unc_var_name) + + for unc_var_name in unc_var_names: + for attr in obj[unc_var_name].attrs.keys(): + if (attr[:9] == "err_corr_") and (attr[-4:] == "_dim"): + if isinstance(obj[unc_var_name].attrs[attr], str): + if obj[unc_var_name].attrs[attr] in dims_dict: + obj[unc_var_name].attrs[attr] = dims_dict[ + obj[unc_var_name].attrs[attr] + ] + + if isinstance(obj[unc_var_name].attrs[attr], list): + for i, attr_i in enumerate(obj[unc_var_name].attrs[attr]): + if attr_i in dims_dict: + obj[unc_var_name].attrs[attr][i] = dims_dict[attr_i] + + return obj + if __name__ == "__main__": pass diff --git a/obsarray/utils.py b/obsarray/utils.py index a531dca..17e1fe0 100644 --- a/obsarray/utils.py +++ b/obsarray/utils.py @@ -2,10 +2,10 @@ import numpy as np import xarray as xr - +from xarray.core.types import T_Dataset __author__ = "Sam Hunt " -__all__ = ["empty_err_corr_matrix"] +__all__ = ["empty_err_corr_matrix", "append_names"] def empty_err_corr_matrix(obs_var: xr.DataArray): @@ -27,5 +27,40 @@ def empty_err_corr_matrix(obs_var: xr.DataArray): return err_corr_matrix +def append_names( + ds: T_Dataset, + suffix: str, + skip_vars: bool = False, + skip_dims: bool = False, + skip_attrs: bool = False, +) -> T_Dataset: + """ + Appends a suffix to the names of dataset variables, dimensions, and attributes - safely handling `unc_vars` and associated metadata + + :param ds: xarray dataset + :param suffix: suffix to append to dataset variable, dimension, and attribute names + :param skip_vars: (default: `False`) switch to skip applying suffix to variable names + :param skip_dims: (default: `False`) switch to skip applying suffix to dimension names + :param skip_attrs: (default: `False`) switch to skip applying suffix to variable names + :returns: ds with suffix appended to names of variables, dimensions, attributes + """ + + # update variable names + if not skip_vars: + var_rename = {var_name: var_name + suffix for var_name in ds.variables.keys()} + ds = ds.unc.rename(var_rename) + + # update dimension names + if not skip_dims: + dim_rename = {dim_name: dim_name + suffix for dim_name in ds.dims.keys()} + ds = ds.unc.rename_dims(dim_rename) + + # update attribute names + if not skip_attrs: + ds.attrs = {key + suffix: value for key, value in ds.attrs.items()} + + return ds + + if __name__ == "__main__": pass