Source code for xcdat.utils

import importlib
import json
from typing import Hashable, cast

import xarray as xr
from dask.array.core import Array


[docs] def compare_datasets(ds1: xr.Dataset, ds2: xr.Dataset) -> dict[str, list[Hashable]]: """Compares the keys and values of two datasets. This utility function is especially useful for debugging tests that involve comparing two Dataset objects for being identical or equal. Checks include: - Unique keys - keys that exist only in one of the two datasets. - Non-identical - keys whose values have the same dimension, coordinates, values, name, attributes, and attributes on all coordinates. - Non-equal keys - keys whose values have the same dimension, coordinates, and values, but not necessarily the same attributes. Key values that are non-equal will also be non-identical. Parameters ---------- ds1 : xr.Dataset The first Dataset. ds2 : xr.Dataset The second Dataset. Returns ------- dict[str, list[Hashable]] A dictionary mapping unique, non-identical, and non-equal keys in both Datasets. """ results = { "unique_coords": list(ds1.coords.keys() ^ ds2.coords.keys()), "unique_data_vars": list(ds1.data_vars.keys() ^ ds2.data_vars.keys()), "nonidentical_coords": [], "nonidentical_data_vars": [], "nonequal_coords": [], "nonequal_data_vars": [], } ds_keys = { "coords": ds1.coords.keys() & ds2.coords.keys(), "data_vars": ds1.data_vars.keys() & ds2.data_vars.keys(), } for key_type, keys in ds_keys.items(): for key in keys: identical = ds1[key].identical(ds2[key]) equals = ds1[key].equals(ds2[key]) if not identical: results[f"nonidentical_{key_type}"].append(key) if not equals: results[f"nonequal_{key_type}"].append(key) return results
def str_to_bool(attr: str) -> bool: """Converts bool string to bool. netCDF files can only store attributes with a type of str, Number, ndarray, number, list, or tuple. xCDAT methods store boolean attributes as strings. This function will convert such attributes back to booleans. Parameters ---------- attr : str The boolean attribute as type str. Returns ------- bool The boolean attribute as type bool. """ if attr != "True" and attr != "False": raise ValueError( "The attribute is not a string representation of a Python" "bool ('True' or 'False')" ) bool_attr = json.loads(attr.lower()) return bool_attr def _has_module(modname: str) -> bool: # pragma: no cover """Checks if the specified module is installed in the Python environment. Parameters ---------- modname : str The name of the module. Returns ------- bool """ try: importlib.import_module(modname) has = True except ImportError: has = False return has def _if_multidim_dask_array_then_load( obj: xr.DataArray | xr.Dataset, ) -> xr.DataArray | xr.Dataset | None: """ If the underlying array for an xr.DataArray or xr.Dataset is a multidimensional, lazy Dask Array, load it into an in-memory NumPy array. This function must be called before manipulating values in a multidimensional Dask Array, which xarray does not support directly. Otherwise, it raises `NotImplementedError xarray can't set arrays with multiple array indices to dask yet`. Parameters ---------- obj : xr.DataArray | xr.Dataset | None The xr.DataArray or xr.Dataset. If the xarray object is chunked, the underlying array will be a Dask Array. Otherwise, return None. """ if isinstance(obj.data, Array) and obj.ndim > 1: return obj.load() return None def _get_masked_weights(dv: xr.DataArray, weights: xr.DataArray) -> xr.DataArray: """Get weights with missing data (`np.nan`) receiving no weight (zero). Parameters ---------- dv : xr.DataArray The variable. weights : xr.DataArray A DataArray containing either the regional or temporal weights used for weighted averaging. ``weights`` must include the same axis dimensions and dimensional sizes as the data variable. Returns ------- xr.DataArray The masked weights. """ masked_weights = xr.where(dv.copy().isnull(), 0.0, weights) return masked_weights def _validate_min_weight(min_weight: float | None) -> float: """Validate the ``min_weight`` value. Parameters ---------- min_weight : float | None Fraction of data coverage (i..e, weight) needed to return a spatial average value. Value must range from 0 to 1. Returns ------- float The required weight percentage. Raises ------ ValueError If the `min_weight` argument is less than 0. ValueError If the `min_weight` argument is greater than 1. """ if min_weight is None: return 0.0 elif min_weight < 0.0: raise ValueError( "min_weight argument is less than 0. min_weight must be between 0 and 1.", ) elif min_weight > 1.0: raise ValueError( "min_weight argument is greater than 1. min_weight must be between 0 and 1.", ) return min_weight def _as_dataarray(x) -> xr.DataArray: """Assert (for typing purposes) that `x` is an xarray.DataArray. This function is useful for type checkers like mypy to understand that the input `x` should be treated as an xarray.DataArray. For example, calling NumPy functions directly on Xarray objects can cause mypy to think a NumPy ndarray is being used, leading to type errors. It relies on xarray's __array_ufunc__ behavior when applying NumPy ufuncs to DataArray inputs. No runtime conversion is performed. Parameters ---------- x : Any The input to be treated as an xarray.DataArray. Returns ------- xr.DataArray The input `x` cast as an xarray.DataArray. """ return cast(xr.DataArray, x)