Source code for xcdat.utils

import importlib
import json
from typing import Dict, List, Optional, Union

import xarray as xr
from dask.array.core import Array



[docs]
def compare_datasets(ds1: xr.Dataset, ds2: xr.Dataset) -> Dict[str, List[str]]:
    """Compares the keys and values of two datasets.

    This utility function is especially useful for debugging tests that
    involve comparing two Dataset objects for being identical or equal.

    Checks include:

    - Unique keys - keys that exist only in one of the two datasets.
    - Non-identical - keys whose values have the same dimension, coordinates,
      values, name, attributes, and attributes on all coordinates.
    - Non-equal keys - keys whose values have the same dimension, coordinates,
      and values, but not necessarily the same attributes. Key values that are
      non-equal will also be non-identical.

    Parameters
    ----------
    ds1 : xr.Dataset
        The first Dataset.
    ds2 : xr.Dataset
        The second Dataset.

    Returns
    -------
    Dict[str, Union[List[str]]]
        A dictionary mapping unique, non-identical, and
        non-equal keys in both Datasets.
    """
    results = {
        "unique_coords": list(ds1.coords.keys() ^ ds2.coords.keys()),
        "unique_data_vars": list(ds1.data_vars.keys() ^ ds2.data_vars.keys()),
        "nonidentical_coords": [],
        "nonidentical_data_vars": [],
        "nonequal_coords": [],
        "nonequal_data_vars": [],
    }

    ds_keys = {
        "coords": ds1.coords.keys() & ds2.coords.keys(),
        "data_vars": ds1.data_vars.keys() & ds2.data_vars.keys(),
    }
    for key_type, keys in ds_keys.items():
        for key in keys:
            identical = ds1[key].identical(ds2[key])
            equals = ds1[key].equals(ds2[key])

            if not identical:
                results[f"nonidentical_{key_type}"].append(key)
            if not equals:
                results[f"nonequal_{key_type}"].append(key)

    return results



def str_to_bool(attr: str) -> bool:
    """Converts bool string to bool.

    netCDF files can only store attributes with a type of str, Number, ndarray,
    number, list, or tuple.

    xCDAT methods store boolean attributes as strings. This function will
    convert such attributes back to booleans.

    Parameters
    ----------
    attr : str
        The boolean attribute as type str.

    Returns
    -------
    bool
        The boolean attribute as type bool.
    """
    if attr != "True" and attr != "False":
        raise ValueError(
            "The attribute is not a string representation of a Python"
            "bool ('True' or 'False')"
        )

    bool_attr = json.loads(attr.lower())
    return bool_attr


def _has_module(modname: str) -> bool:  # pragma: no cover
    """Checks if the specified module is installed in the Python environment.

    Parameters
    ----------
    modname : str
        The name of the module.

    Returns
    -------
    bool
    """
    try:
        importlib.import_module(modname)
        has = True
    except ImportError:
        has = False

    return has


def _if_multidim_dask_array_then_load(
    obj: Union[xr.DataArray, xr.Dataset]
) -> Optional[Union[xr.DataArray, xr.Dataset]]:
    """
    If the underlying array for an xr.DataArray or xr.Dataset is a
    multidimensional, lazy Dask Array, load it into an in-memory NumPy array.

    This function must be called before manipulating values in a
    multidimensional Dask Array, which xarray does not support directly.
    Otherwise, it raises `NotImplementedError xarray can't set arrays with
    multiple array indices to dask yet`.

    Parameters
    ----------
    obj : Union[xr.DataArray, xr.Dataset]
        The xr.DataArray or xr.Dataset. If the xarray object is chunked,
        the underlying array will be a Dask Array.
    """
    if isinstance(obj.data, Array) and obj.ndim > 1:
        return obj.load()

    return None