Source code for xcdat.tutorial
"""
This module provides functionality for accessing and working with tutorial
datasets hosted online for use with xCDAT. It includes a function to open
datasets, either from a local cache or by downloading them from a remote
repository.
"""
import os
import pathlib
import sys
import xarray as xr
from xarray.tutorial import _construct_cache_dir, file_formats
import xcdat.bounds # noqa: F401
from xcdat.axis import CFAxisKey
DEFAULT_CACHE_DIR_NAME = "xcdat_tutorial_data"
base_url = "https://github.com/xCDAT/xcdat-data"
version = "main"
XARRAY_DATASETS = list(file_formats.keys()) + ["era5-2mt-2019-03-uk.grib"]
XCDAT_DATASETS: dict[str, str] = {
# Monthly precipitation data from the ACCESS-ESM1-5 model.
"pr_amon_access": "pr_Amon_ACCESS-ESM1-5_historical_r10i1p1f1_gn_185001-201412_subset.nc",
# Monthly ocean salinity data from the CESM2 model.
"so_omon_cesm2": "so_Omon_CESM2_historical_r1i1p1f1_gn_185001-201412_subset.nc",
# Monthly near-surface air temperature from the ACCESS-ESM1-5 model.
"tas_amon_access": "tas_Amon_ACCESS-ESM1-5_historical_r10i1p1f1_gn_185001-201412_subset.nc",
# 3-hourly near-surface air temperature from the ACCESS-ESM1-5 model.
"tas_3hr_access": "tas_3hr_ACCESS-ESM1-5_historical_r10i1p1f1_gn_201001010300-201501010000_subset.nc",
# Monthly near-surface air temperature from the CanESM5 model.
"tas_amon_canesm5": "tas_Amon_CanESM5_historical_r13i1p1f1_gn_185001-201412_subset.nc",
# Monthly ocean potential temperature from the CESM2 model.
"thetao_omon_cesm2": "thetao_Omon_CESM2_historical_r1i1p1f1_gn_185001-201412_subset.nc",
# Monthly cloud fraction data from the E3SM-2-0 model.
"cl_amon_e3sm2": "cl_Amon_E3SM-2-0_historical_r1i1p1f1_gr_185001-189912_subset.nc",
# Monthly air temperature data from the E3SM-2-0 model.
"ta_amon_e3sm2": "ta_Amon_E3SM-2-0_historical_r1i1p1f1_gr_185001-189912_subset.nc",
}
[docs]
def open_dataset(
name: str,
cache: bool = True,
cache_dir: None | str | os.PathLike = DEFAULT_CACHE_DIR_NAME,
add_bounds: list[CFAxisKey] | tuple[CFAxisKey, ...] | None = ("X", "Y"),
**kargs,
) -> xr.Dataset:
"""Open a dataset from the online repository (requires internet).
This function is mostly based on ``xarray.tutorial.open_dataset()`` with
some modifications, including adding missing bounds to the dataset.
If a local copy is found then always use that to avoid network traffic.
Available xCDAT datasets:
- ``"pr_amon_access"``: Monthly precipitation data from the ACCESS-ESM1-5 model.
- ``"so_omon_cesm2"``: Monthly ocean salinity data from the CESM2 model.
- ``"tas_amon_access"``: Monthly near-surface air temperature from the ACCESS-ESM1-5 model.
- ``"tas_3hr_access"``: 3-hourly near-surface air temperature from the ACCESS-ESM1-5 model.
- ``"tas_amon_canesm5"``: Monthly near-surface air temperature from the CanESM5 model.
- ``"thetao_omon_cesm2"``: Monthly ocean potential temperature from the CESM2 model.
- ``"cl_amon_e3sm2"``: Monthly cloud fraction data from the E3SM-2-0 model.
- ``"ta_amon_e3sm2"``: Monthly air temperature data from the E3SM-2-0 model.
Parameters
----------
name : str
Name of the file containing the dataset (e.g., "tas_amon_access").
cache_dir : path-like, optional
The directory in which to search for and write cached data.
cache : bool, optional
If True, then cache data locally for use on subsequent calls
add_bounds : list[CFAxisKey] | tuple[CFAxisKey] | None, optional
List or tuple of axis keys for which to add bounds, by default
("X", "Y").
**kargs : dict, optional
Passed to ``xcdat.open_dataset``.
"""
try:
import pooch
except ImportError as e:
raise ImportError(
"tutorial.open_dataset depends on pooch to download and manage datasets."
" To proceed please install pooch."
) from e
# Avoid circular import in __init__.py
from xcdat.dataset import open_dataset
logger = pooch.get_logger()
logger.setLevel("WARNING")
cache_dir = _construct_cache_dir(cache_dir)
filename = XCDAT_DATASETS.get(name)
if filename is None:
raise ValueError(
f"Dataset {name} not found. Available xcdat datasets are: {XCDAT_DATASETS.keys()}"
)
path = pathlib.Path(filename)
url = f"{base_url}/raw/{version}/{path.name}"
headers = {"User-Agent": f"xcdat {sys.modules['xcdat'].__version__}"}
downloader = pooch.HTTPDownloader(headers=headers)
filepath = pooch.retrieve(
url=url, known_hash=None, path=cache_dir, downloader=downloader
)
ds = open_dataset(filepath, **kargs, add_bounds=add_bounds)
if not cache:
ds = ds.load()
pathlib.Path(filepath).unlink()
return ds