Source code for xcdat.utils
import importlib
import json
from typing import Dict, List, Optional, Union
import xarray as xr
from dask.array.core import Array
[docs]
def compare_datasets(ds1: xr.Dataset, ds2: xr.Dataset) -> Dict[str, List[str]]:
"""Compares the keys and values of two datasets.
This utility function is especially useful for debugging tests that
involve comparing two Dataset objects for being identical or equal.
Checks include:
- Unique keys - keys that exist only in one of the two datasets.
- Non-identical - keys whose values have the same dimension, coordinates,
values, name, attributes, and attributes on all coordinates.
- Non-equal keys - keys whose values have the same dimension, coordinates,
and values, but not necessarily the same attributes. Key values that are
non-equal will also be non-identical.
Parameters
----------
ds1 : xr.Dataset
The first Dataset.
ds2 : xr.Dataset
The second Dataset.
Returns
-------
Dict[str, Union[List[str]]]
A dictionary mapping unique, non-identical, and
non-equal keys in both Datasets.
"""
results = {
"unique_coords": list(ds1.coords.keys() ^ ds2.coords.keys()),
"unique_data_vars": list(ds1.data_vars.keys() ^ ds2.data_vars.keys()),
"nonidentical_coords": [],
"nonidentical_data_vars": [],
"nonequal_coords": [],
"nonequal_data_vars": [],
}
ds_keys = {
"coords": ds1.coords.keys() & ds2.coords.keys(),
"data_vars": ds1.data_vars.keys() & ds2.data_vars.keys(),
}
for key_type, keys in ds_keys.items():
for key in keys:
identical = ds1[key].identical(ds2[key])
equals = ds1[key].equals(ds2[key])
if not identical:
results[f"nonidentical_{key_type}"].append(key)
if not equals:
results[f"nonequal_{key_type}"].append(key)
return results
def str_to_bool(attr: str) -> bool:
"""Converts bool string to bool.
netCDF files can only store attributes with a type of str, Number, ndarray,
number, list, or tuple.
xCDAT methods store boolean attributes as strings. This function will
convert such attributes back to booleans.
Parameters
----------
attr : str
The boolean attribute as type str.
Returns
-------
bool
The boolean attribute as type bool.
"""
if attr != "True" and attr != "False":
raise ValueError(
"The attribute is not a string representation of a Python"
"bool ('True' or 'False')"
)
bool_attr = json.loads(attr.lower())
return bool_attr
def _has_module(modname: str) -> bool: # pragma: no cover
"""Checks if the specified module is installed in the Python environment.
Parameters
----------
modname : str
The name of the module.
Returns
-------
bool
"""
try:
importlib.import_module(modname)
has = True
except ImportError:
has = False
return has
def _if_multidim_dask_array_then_load(
obj: Union[xr.DataArray, xr.Dataset],
) -> Optional[Union[xr.DataArray, xr.Dataset]]:
"""
If the underlying array for an xr.DataArray or xr.Dataset is a
multidimensional, lazy Dask Array, load it into an in-memory NumPy array.
This function must be called before manipulating values in a
multidimensional Dask Array, which xarray does not support directly.
Otherwise, it raises `NotImplementedError xarray can't set arrays with
multiple array indices to dask yet`.
Parameters
----------
obj : Union[xr.DataArray, xr.Dataset]
The xr.DataArray or xr.Dataset. If the xarray object is chunked,
the underlying array will be a Dask Array.
"""
if isinstance(obj.data, Array) and obj.ndim > 1:
return obj.load()
return None