Source code for xcdat.utils
import importlib
import json
from typing import Hashable, cast
import xarray as xr
from dask.array.core import Array
[docs]
def compare_datasets(ds1: xr.Dataset, ds2: xr.Dataset) -> dict[str, list[Hashable]]:
"""Compares the keys and values of two datasets.
This utility function is especially useful for debugging tests that
involve comparing two Dataset objects for being identical or equal.
Checks include:
- Unique keys - keys that exist only in one of the two datasets.
- Non-identical - keys whose values have the same dimension, coordinates,
values, name, attributes, and attributes on all coordinates.
- Non-equal keys - keys whose values have the same dimension, coordinates,
and values, but not necessarily the same attributes. Key values that are
non-equal will also be non-identical.
Parameters
----------
ds1 : xr.Dataset
The first Dataset.
ds2 : xr.Dataset
The second Dataset.
Returns
-------
dict[str, list[Hashable]]
A dictionary mapping unique, non-identical, and non-equal keys in
both Datasets.
"""
results = {
"unique_coords": list(ds1.coords.keys() ^ ds2.coords.keys()),
"unique_data_vars": list(ds1.data_vars.keys() ^ ds2.data_vars.keys()),
"nonidentical_coords": [],
"nonidentical_data_vars": [],
"nonequal_coords": [],
"nonequal_data_vars": [],
}
ds_keys = {
"coords": ds1.coords.keys() & ds2.coords.keys(),
"data_vars": ds1.data_vars.keys() & ds2.data_vars.keys(),
}
for key_type, keys in ds_keys.items():
for key in keys:
identical = ds1[key].identical(ds2[key])
equals = ds1[key].equals(ds2[key])
if not identical:
results[f"nonidentical_{key_type}"].append(key)
if not equals:
results[f"nonequal_{key_type}"].append(key)
return results
def str_to_bool(attr: str) -> bool:
"""Converts bool string to bool.
netCDF files can only store attributes with a type of str, Number, ndarray,
number, list, or tuple.
xCDAT methods store boolean attributes as strings. This function will
convert such attributes back to booleans.
Parameters
----------
attr : str
The boolean attribute as type str.
Returns
-------
bool
The boolean attribute as type bool.
"""
if attr != "True" and attr != "False":
raise ValueError(
"The attribute is not a string representation of a Python"
"bool ('True' or 'False')"
)
bool_attr = json.loads(attr.lower())
return bool_attr
def _has_module(modname: str) -> bool: # pragma: no cover
"""Checks if the specified module is installed in the Python environment.
Parameters
----------
modname : str
The name of the module.
Returns
-------
bool
"""
try:
importlib.import_module(modname)
has = True
except ImportError:
has = False
return has
def _if_multidim_dask_array_then_load(
obj: xr.DataArray | xr.Dataset,
) -> xr.DataArray | xr.Dataset | None:
"""
If the underlying array for an xr.DataArray or xr.Dataset is a
multidimensional, lazy Dask Array, load it into an in-memory NumPy array.
This function must be called before manipulating values in a
multidimensional Dask Array, which xarray does not support directly.
Otherwise, it raises `NotImplementedError xarray can't set arrays with
multiple array indices to dask yet`.
Parameters
----------
obj : xr.DataArray | xr.Dataset | None
The xr.DataArray or xr.Dataset. If the xarray object is chunked,
the underlying array will be a Dask Array. Otherwise, return None.
"""
if isinstance(obj.data, Array) and obj.ndim > 1:
return obj.load()
return None
def _get_masked_weights(dv: xr.DataArray, weights: xr.DataArray) -> xr.DataArray:
"""Get weights with missing data (`np.nan`) receiving no weight (zero).
Parameters
----------
dv : xr.DataArray
The variable.
weights : xr.DataArray
A DataArray containing either the regional or temporal weights used for
weighted averaging. ``weights`` must include the same axis dimensions
and dimensional sizes as the data variable.
Returns
-------
xr.DataArray
The masked weights.
"""
masked_weights = xr.where(dv.copy().isnull(), 0.0, weights)
return masked_weights
def _validate_min_weight(min_weight: float | None) -> float:
"""Validate the ``min_weight`` value.
Parameters
----------
min_weight : float | None
Fraction of data coverage (i..e, weight) needed to return a
spatial average value. Value must range from 0 to 1.
Returns
-------
float
The required weight percentage.
Raises
------
ValueError
If the `min_weight` argument is less than 0.
ValueError
If the `min_weight` argument is greater than 1.
"""
if min_weight is None:
return 0.0
elif min_weight < 0.0:
raise ValueError(
"min_weight argument is less than 0. min_weight must be between 0 and 1.",
)
elif min_weight > 1.0:
raise ValueError(
"min_weight argument is greater than 1. min_weight must be between 0 and 1.",
)
return min_weight
def _as_dataarray(x) -> xr.DataArray:
"""Assert (for typing purposes) that `x` is an xarray.DataArray.
This function is useful for type checkers like mypy to understand that
the input `x` should be treated as an xarray.DataArray. For example, calling
NumPy functions directly on Xarray objects can cause mypy to think a
NumPy ndarray is being used, leading to type errors.
It relies on xarray's __array_ufunc__ behavior when applying NumPy ufuncs
to DataArray inputs. No runtime conversion is performed.
Parameters
----------
x : Any
The input to be treated as an xarray.DataArray.
Returns
-------
xr.DataArray
The input `x` cast as an xarray.DataArray.
"""
return cast(xr.DataArray, x)