Source code for esmvalcore.cmor.table

"""CMOR information reader for ESMValTool.

Read variable information from CMOR 2 and CMOR 3 tables and make it
easily available for the other components of ESMValTool
"""

from __future__ import annotations

import copy
import glob
import importlib
import json
import logging
import os
from collections import Counter
from functools import lru_cache, total_ordering
from pathlib import Path
from typing import TYPE_CHECKING, Self

import yaml

from esmvalcore.exceptions import RecipeError

if TYPE_CHECKING:
    from collections.abc import Iterable
    from io import TextIOWrapper

    from esmvalcore.config import Config, Session
    from esmvalcore.typing import Facets

logger = logging.getLogger(__name__)

CMOR_TABLES: dict[str, InfoBase] = {}
"""dict of str, obj: CMOR info objects.

.. deprecated:: 2.14.0
    The global ``CMOR_TABLES`` dictionary is deprecated and will be removed in
    ESMValCore v2.16.0. Please use :func:`~esmvalcore.cmor.table.get_tables`
    to access the CMOR tables instead.

.. note::
    If this dictionary is empty, it can be populated by loading the global
    configuration by importing the :mod:`esmvalcore.config` module.
"""

_CMOR_KEYS = (
    "standard_name",
    "long_name",
    "units",
    "modeling_realm",
    "frequency",
)


def _get_institutes(project: str, dataset: str) -> list[str]:
    """Return the institutes from the controlled vocabulary given the dataset name."""
    try:
        return CMOR_TABLES[project].institutes[dataset]  # type: ignore[attr-defined]
    except (KeyError, AttributeError):
        return []


def _get_activity(
    project: str,
    exp: str | list[str],
) -> str | list[str] | None:
    """Return the activity from the controlled vocabulary given the experiment name."""
    try:
        if isinstance(exp, list):
            return [CMOR_TABLES[project].activities[value][0] for value in exp]  # type: ignore[attr-defined]
        return CMOR_TABLES[project].activities[exp][0]  # type: ignore[attr-defined]
    except (KeyError, AttributeError):
        return None


def _update_cmor_facets(facets: Facets) -> None:
    """Update `facets` with information from CMOR table."""
    project: str = facets["project"]  # type: ignore[assignment]
    mip: str = facets["mip"]  # type: ignore[assignment]
    short_name: str = facets["short_name"]  # type: ignore[assignment]
    derive: bool = facets.get("derive", False)  # type: ignore[assignment]
    table = CMOR_TABLES.get(project)
    if table:
        table_entry = table.get_variable(
            mip,
            short_name,
            branding_suffix=facets.get("branding_suffix"),  # type: ignore[arg-type]
            derived=derive,
        )
    else:
        table_entry = None
    if table_entry is None:
        msg = (
            f"Unable to load CMOR table (project) '{project}' for variable "
            f"'{short_name}' with mip '{mip}'"
        )
        raise RecipeError(msg)
    facets["original_short_name"] = table_entry.short_name
    for key in _CMOR_KEYS:
        if key not in facets:
            value = getattr(table_entry, key, None)
            if value is not None:
                facets[key] = value
            else:
                logger.debug(
                    "Failed to add key %s to variable %s from CMOR table",
                    key,
                    facets,
                )
    if "dataset" in facets and "institute" not in facets:
        institute = _get_institutes(project, facets["dataset"])  # type: ignore[arg-type]
        if institute:
            facets["institute"] = institute
    if "exp" in facets and "activity" not in facets:
        activity = _get_activity(project, facets["exp"])  # type: ignore[arg-type]
        if activity:
            facets["activity"] = activity


def _get_mips(project: str, short_name: str) -> list[str]:
    """Get all available MIP tables in a project."""
    tables = CMOR_TABLES[project].tables
    return [
        mip
        for mip, table in tables.items()
        if short_name in table
        or any(short_name == vardef.short_name for vardef in table.values())
    ]


def _get_branding_suffixes(
    project: str,
    mip: str,
    short_name: str,
) -> list[str]:
    """Get all available branding suffixes for a variable in a MIP table."""
    table = CMOR_TABLES[project].tables[mip]
    return [
        branded_name.split("_", 1)[1]
        for branded_name, vardef in table.items()
        if short_name == vardef.short_name and "_" in branded_name
    ]


[docs] def get_var_info( project: str, mip: str, short_name: str, branding_suffix: str | None = None, ) -> VariableInfo | None: """Get variable information. Note ---- If `project=CORDEX` and the `mip` ends with 'hr', it is cropped to 'h' since CORDEX X-hourly tables define the `mip` as ending in 'h' instead of 'hr'. Parameters ---------- project: Dataset's project. mip: Variable's CMOR table, i.e., MIP. short_name: Variable's short name. branding_suffix: A suffix that will be appended to ``short_name`` when looking up the variable in the CMOR table. Returns ------- VariableInfo | None `VariableInfo` object for the requested variable if found, ``None`` otherwise. Raises ------ KeyError No CMOR tables available for `project`. """ if project not in CMOR_TABLES: msg = ( f"No CMOR tables available for project '{project}'. The following " f"tables are available: {', '.join(CMOR_TABLES)}." ) raise KeyError(msg) # CORDEX X-hourly tables define the mip as ending in 'h' instead of 'hr' if project == "CORDEX" and mip.endswith("hr"): mip = mip.replace("hr", "h") return CMOR_TABLES[project].get_variable( mip, short_name, branding_suffix=branding_suffix, )
[docs] def read_cmor_tables(cfg_developer: Path | None = None) -> None: """Read cmor tables required in the configuration. .. deprecated:: 2.14.0 The config-developer.yml file based configuration is deprecated and will no longer be supported in ESMValCore v2.16.0. Please use :func:`~esmvalcore.cmor.table.load_cmor_tables` instead of this function. Parameters ---------- cfg_developer: Path to config-developer.yml file. Raises ------ TypeError If `cfg_developer` is not a Path-like object """ if cfg_developer is None: cfg_developer = Path(__file__).parents[1] / "config-developer.yml" elif not isinstance(cfg_developer, Path): msg = "cfg_developer is not a Path-like object, got " raise TypeError(msg, cfg_developer) mtime = cfg_developer.stat().st_mtime cmor_tables = _read_cmor_tables(cfg_developer, mtime) CMOR_TABLES.clear() CMOR_TABLES.update(cmor_tables)
@lru_cache def _read_cmor_tables( cfg_file: Path, mtime: float, # noqa: ARG001 ) -> dict[str, InfoBase]: """Read cmor tables required in the configuration. Parameters ---------- cfg_file: pathlib.Path Path to config-developer.yml file. mtime: float Modification time of config-developer.yml file. Only used by the `lru_cache` decorator to make sure the file is read again when it is changed. """ with cfg_file.open("r", encoding="utf-8") as file: cfg_developer = yaml.safe_load(file) cwd = os.path.dirname(os.path.realpath(__file__)) var_alt_names_file = os.path.join(cwd, "variable_alt_names.yml") with open(var_alt_names_file, encoding="utf-8") as yfile: alt_names = yaml.safe_load(yfile) cmor_tables: dict[str, InfoBase] = {} # Try to infer location for custom tables from config-developer.yml file, # if not possible, use default location custom_path = None if "custom" in cfg_developer: custom_path = cfg_developer["custom"].get("cmor_path") if custom_path is not None: custom_path = os.path.expandvars(os.path.expanduser(custom_path)) custom = CustomInfo(custom_path) cmor_tables["custom"] = custom install_dir = os.path.dirname(os.path.realpath(__file__)) for table in cfg_developer: if table == "custom": continue cmor_tables[table] = _read_table( cfg_developer, table, install_dir, custom, alt_names, ) return cmor_tables def _read_table(cfg_developer, table, install_dir, custom, alt_names): project = cfg_developer[table] cmor_type = project.get("cmor_type", "CMIP5") default_path = os.path.join(install_dir, "tables", cmor_type.lower()) table_path = project.get("cmor_path", default_path) table_path = os.path.expandvars(os.path.expanduser(table_path)) cmor_strict = project.get("cmor_strict", True) default_table_prefix = project.get("cmor_default_table_prefix", "") if cmor_type == "CMIP3": return CMIP3Info( table_path, default=custom, strict=cmor_strict, alt_names=alt_names, ) if cmor_type == "CMIP5": return CMIP5Info( table_path, default=custom, strict=cmor_strict, alt_names=alt_names, ) if cmor_type == "CMIP6": return CMIP6Info( table_path, default=custom, strict=cmor_strict, default_table_prefix=default_table_prefix, alt_names=alt_names, ) msg = f"Unsupported CMOR type {cmor_type}" raise ValueError(msg) _TABLE_CACHE: dict[str, InfoBase] = {} """The CMOR tables are cached for faster access."""
[docs] def clear_table_cache() -> None: """Clear the CMOR table cache.""" _TABLE_CACHE.clear()
[docs] def get_tables( session: Session | Config, project: str, ) -> InfoBase: """Get the CMOR tables for a project. Parameters ---------- session: The configuration. project: The project to load a CMOR table for. """ if project not in session["projects"]: msg = f"Unknown project '{project}', please configure it under 'projects'." raise ValueError(msg) kwargs = ( session["projects"][project] .get( "cmor_table", { "type": "esmvalcore.cmor.table.NoInfo", }, ) .copy() ) if "type" not in kwargs: msg = ( f"Missing CMOR table 'type' in configuration of project {project}. " f"Current configuration is:\n{yaml.safe_dump(kwargs)}" ) raise ValueError(msg) cache_key = str(kwargs) if cache_key not in _TABLE_CACHE: module_name, cls_name = kwargs.pop("type").rsplit(".", 1) module = importlib.import_module(module_name) cls = getattr(module, cls_name) tables = cls(**kwargs) if not isinstance(tables, InfoBase): msg = ( "`type` should be a subclass `esmvalcore.cmor.table.InfoBase`, " f"but your configuration for project '{project}' contains " f"'{tables}' of type: '{type(tables)}'." ) raise TypeError(msg) _TABLE_CACHE[cache_key] = tables return _TABLE_CACHE[cache_key]
[docs] class InfoBase: """Base class for all CMOR table info classes. Parameters ---------- default: Default table to look variables on if not found. .. deprecated:: 2.14.0 The ``default`` parameter is deprecated and will be removed in ESMValCore v2.16.0. Please use the ``paths`` parameter instead to aggregate multiple tables. alt_names: List of known alternative names for variables. If no value is provided, the default values from the installed copy of `variable_alt_names.yml <https://github.com/ESMValGroup/ESMValCore/blob/main/esmvalcore/cmor/variable_alt_names.yml>`_ will be used. strict: If :obj:`False`, the function :meth:`~esmvalcore.cmor.table.InfoBase.get_variable` will look for a variable in other tables if it can not be found in the table specified by ``mip`` in the :ref:`recipe <recipe>` or :class:`~esmvalcore.dataset.Dataset`. paths: A list of paths to CMOR tables. The path can be relative to the built-in tables in the `esmvalcore/cmor/tables <https://github.com/ESMValGroup/ESMValCore/tree/main/esmvalcore/cmor/tables>`_ directory, or any other path. The built-in tables will be used if the path is relative and exists in the built-in tables directory. """ def __init__( self, default: CustomInfo | None = None, alt_names: list[list[str]] | None = None, strict: bool = True, paths: Iterable[Path] = (), ) -> None: # Configure the paths to the CMOR tables. builtin_tables_path = Path(__file__).parent / "tables" paths = tuple(Path(os.path.expandvars(p)).expanduser() for p in paths) self.paths = tuple( builtin_tables_path / p if (builtin_tables_path / p).is_dir() else p for p in paths ) """A list of paths to CMOR tables.""" for path in self.paths: if not path.is_dir(): raise NotADirectoryError(path) # Configure the alternative names. if alt_names is None: alt_names_path = Path(__file__).parent / "variable_alt_names.yml" alt_names = yaml.safe_load( alt_names_path.read_text(encoding="utf-8"), ) self.alt_names = alt_names """List of known alternative names for variables.""" self.coords: dict[str, CoordinateInfo] = {} """The coordinates defined in these tables.""" self.default = default """ Default table to look variables on if not found. .. deprecated:: 2.14.0 The ``default`` attribute is deprecated and will be removed in ESMValCore v2.16.0. """ self.strict = strict """If False, will look for a variable in other tables if it can not be found in the requested one. """ self.tables: dict[str, TableInfo] = {} """A mapping from table names to :class:`TableInfo` objects.""" def __repr__(self) -> str: return f"{self.__class__.__name__}(paths={list(self.paths)}, strict={self.strict}, alt_names={self.alt_names})"
[docs] def get_table(self, table: str) -> TableInfo | None: """Search and return the table info. Parameters ---------- table: str Table name Returns ------- TableInfo Return the TableInfo object for the requested table if found, returns None if not """ return self.tables.get(table)
[docs] def get_variable( self, table_name: str, short_name: str, *, branding_suffix: str | None = None, derived: bool = False, ) -> VariableInfo | None: """Search and return the variable information. Parameters ---------- table_name: Table name, i.e., the ``mip`` in the :ref:`recipe <recipe>` or :class:`~esmvalcore.dataset.Dataset`. short_name: Variable's short name. branding_suffix: A suffix that will be appended to ``short_name`` when looking up the variable in the CMOR table. derived: Variable is derived. Information retrieval for derived variables always looks in the default tables (usually, the custom tables) if variable is not found in the requested table. Returns ------- VariableInfo | None `VariableInfo` object for the requested variable if found, ``None`` otherwise. """ alt_names_list = self._get_alt_names_list(short_name) if branding_suffix: # The branding suffix was introduced for CMIP7. The branded variable # name used in the CMOR tables is the short_name followed by an # an underscore and the branding suffix. # # For projects prior to CMIP7 the name used in the CMOR table may # also contain a suffix, but without an underscore. For example # ch4Clim in the CMIP6 Amon table, where ch4 is the short_name and # Clim is the suffix. This is not a branding suffix, but we can # use it to select the correct variable in the CMOR table anyway. alt_names_list = [ f"{name}_{branding_suffix}" for name in alt_names_list ] + [f"{name}{branding_suffix}" for name in alt_names_list] # First, look in requested table table = self.get_table(table_name) if table: for alt_names in alt_names_list: try: return table[alt_names] except KeyError: pass # If that didn't work, look in all tables (i.e., other MIPs) if # cmor_strict=False or derived=True var_info = self._look_in_all_tables(derived, alt_names_list) # If that didn't work either, look in default table if # cmor_strict=False or derived=True if not var_info and self.default is not None: var_info = self._look_in_default( derived, alt_names_list, table_name, ) # If necessary, adapt frequency of variable (set it to the one from the # requested MIP). E.g., if the user asked for table `Amon`, but the # variable has been found in `day`, use frequency `mon`. if var_info: var_info = var_info.copy() var_info = self._update_frequency_from_mip(table_name, var_info) return var_info
def _look_in_default(self, derived, alt_names_list, table_name): """Look for variable in default table.""" # TODO: remove in v2.16.0 var_info = None if not self.strict or derived: for alt_names in alt_names_list: var_info = self.default.get_variable(table_name, alt_names) if var_info: break return var_info def _look_in_all_tables(self, derived, alt_names_list): """Look for variable in all tables.""" var_info = None if not self.strict or derived: for alt_names in alt_names_list: var_info = self._look_all_tables(alt_names) if var_info: break return var_info def _get_alt_names_list(self, short_name): """Get list of alternative variable names.""" alt_names_list = [short_name] for alt_names in self.alt_names: if short_name in alt_names: alt_names_list.extend( [ alt_name for alt_name in alt_names if alt_name not in alt_names_list ], ) return alt_names_list def _update_frequency_from_mip(self, table_name, var_info): """Update frequency information of var_info from table.""" mip_info = self.get_table(table_name) if mip_info: var_info.frequency = mip_info.frequency return var_info def _look_all_tables(self, alt_names): """Look for variable in all tables.""" for table_vars in sorted(self.tables.values()): if alt_names in table_vars: return table_vars[alt_names] return None
[docs] class CMIP6Info(InfoBase): """Class to read CMIP6-like CMOR tables. This class reads CMOR 3 json format tables. Parameters ---------- cmor_tables_path: The path to a directory with subdirectory "Tables" where the CMOR tables are located. .. deprecated:: 2.14.0 The ``cmor_tables_path`` parameter is deprecated and will be removed in ESMValCore v2.16.0. Please use the ``paths`` parameter instead. default: Default table to look variables on if not found. .. deprecated:: 2.14.0 The ``default`` parameter is deprecated and will be removed in ESMValCore v2.16.0. Please use the ``paths`` parameter instead to aggregate multiple tables. alt_names: List of known alternative names for variables. If no value is provided, the default values from the installed copy of `variable_alt_names.yml`_ will be used. strict: If :obj:`False`, the function :meth:`~esmvalcore.cmor.table.InfoBase.get_variable` will look for a variable in other tables if it can not be found in the table specified by ``mip`` in the :ref:`recipe <recipe>` or :class:`~esmvalcore.dataset.Dataset`. default_table_prefix: If the table_id contains a prefix, it can be specified here. .. deprecated:: 2.14.0 The ``default_table_prefix`` parameter is deprecated and will be removed in ESMValCore v2.16.0. paths: A list of paths to CMOR tables. The path can be relative to the built-in tables in the `esmvalcore/cmor/tables <https://github.com/ESMValGroup/ESMValCore/tree/main/esmvalcore/cmor/tables>`_ directory, or any other path. The built-in tables will be used if the path is relative and exists in the built-in tables directory. """ def __init__( self, cmor_tables_path: str | None = None, default: CustomInfo | None = None, alt_names: list[list[str]] | None = None, strict: bool = True, default_table_prefix: str = "", paths: Iterable[Path] = (), ) -> None: if cmor_tables_path is not None: # Support cmor_tables_path for backward compatibility. # TODO: remove in v2.16.0 tables_path = Path(self._get_cmor_path(cmor_tables_path)) if (tables_path / "tables").exists(): # Support CMIP7 which uses a lowercase "tables" subdirectory. cmor_folder = tables_path / "tables" else: cmor_folder = tables_path / "Tables" paths = (*tuple(paths), cmor_folder) super().__init__(default, alt_names, strict, paths=paths) self.default_table_prefix = default_table_prefix """ If the table_id contains a prefix, it can be specified here. .. deprecated:: 2.14.0 The ``default_table_prefix`` attribute is deprecated and will be removed in ESMValCore v2.16.0. """ self.var_to_freq: dict[str, dict[str, str]] = {} self.activities: dict[str, list[str]] = {} """A mapping from ``exp`` to ``activity`` from the controlled vocabulary.""" self.institutes: dict[str, list[str]] = {} """A mapping from ``dataset`` to ``institute`` from the controlled vocabulary.""" for path in self.paths: if not any(path.glob("*.json")): msg = f"No CMOR tables found in {path}" raise ValueError(msg) self._load_controlled_vocabulary(path) self._load_coordinates(path) for json_file in glob.glob(os.path.join(path, "*.json")): if "CV_test" in json_file or "grids" in json_file: continue try: self._load_table(json_file) except Exception: msg = f"Exception raised when loading {json_file}" # Logger may not be ready at this stage if logger.handlers: logger.error(msg) else: print(msg) # noqa: T201 raise @staticmethod def _get_cmor_path(cmor_tables_path: str) -> str: if os.path.isdir(cmor_tables_path): return cmor_tables_path cwd = os.path.dirname(os.path.realpath(__file__)) cmor_tables_path = os.path.join(cwd, "tables", cmor_tables_path) if os.path.isdir(cmor_tables_path): return cmor_tables_path msg = f"CMOR tables not found in {cmor_tables_path}" raise ValueError(msg) def _load_table(self, json_file): with open(json_file, encoding="utf-8") as inf: raw_data = json.loads(inf.read()) if not self._is_table(raw_data): return header = raw_data["Header"] table_name = header["table_id"].split(" ")[-1] if table_name not in self.tables: table = TableInfo() table.name = table_name self.tables[table_name] = table table = self.tables[table_name] generic_levels = header["generic_levels"].split() self.var_to_freq[table.name] = {} for var_name, var_data in raw_data["variable_entry"].items(): var = VariableInfo("CMIP6") var.read_json(var_data, table.frequency) self._assign_dimensions(var, generic_levels) table[var_name] = var self.var_to_freq[table.name][var_name] = var.frequency if not table.frequency: var_freqs = (var.frequency for var in table.values()) table_freq, _ = Counter(var_freqs).most_common(1)[0] table.frequency = table_freq def _assign_dimensions(self, var, generic_levels): for dimension in var.dimensions: if dimension in generic_levels: coord = CoordinateInfo(dimension) coord.generic_level = True for name in self.coords: generic_level = self.coords[name].generic_lev_name if dimension in [generic_level]: coord.generic_lev_coords[name] = self.coords[name] else: try: coord = self.coords[dimension] except KeyError: logger.exception( "Can not find dimension %s for variable %s", dimension, var, ) raise var.coordinates[dimension] = coord def _load_coordinates(self, path: Path) -> None: for json_file in glob.glob( os.path.join(path, "*coordinate*.json"), ): with open(json_file, encoding="utf-8") as inf: table_data = json.loads(inf.read()) for coord_name in table_data["axis_entry"]: coord = CoordinateInfo(coord_name) coord.read_json(table_data["axis_entry"][coord_name]) self.coords[coord_name] = coord def _load_controlled_vocabulary(self, path: Path) -> None: for json_file in glob.glob( os.path.join(path, "*_CV.json"), ): with open(json_file, encoding="utf-8") as inf: table_data = json.loads(inf.read()) try: exps = table_data["CV"]["experiment_id"] for exp_id in exps: activity = exps[exp_id]["activity_id"][0].split(" ") self.activities[exp_id] = activity except (KeyError, AttributeError): pass try: sources = table_data["CV"]["source_id"] for source_id in sources: institution = sources[source_id]["institution_id"] self.institutes[source_id] = institution except (KeyError, AttributeError): pass
[docs] def get_table(self, table: str) -> TableInfo | None: """Search and return the table info. Parameters ---------- table: Table name Returns ------- : Return the TableInfo object for the requested table if found, returns None if not """ try: return self.tables[table] except KeyError: return self.tables.get(f"{self.default_table_prefix}{table}")
@staticmethod def _is_table(table_data): if "variable_entry" not in table_data: return False return "Header" in table_data
[docs] class Obs4MIPsInfo(CMIP6Info): """Class to read obs4MIPs-like CMOR tables. Parameters ---------- alt_names: List of known alternative names for variables. If no value is provided, the default values from the installed copy of `variable_alt_names.yml`_ will be used. strict: If :obj:`False`, the function :meth:`~esmvalcore.cmor.table.InfoBase.get_variable` will look for a variable in other tables if it can not be found in the table specified by ``mip`` in the :ref:`recipe <recipe>` or :class:`~esmvalcore.dataset.Dataset`. paths: A list of paths to CMOR tables. The path can be relative to the built-in tables in the `esmvalcore/cmor/tables <https://github.com/ESMValGroup/ESMValCore/tree/main/esmvalcore/cmor/tables>`_ directory, or any other path. The built-in tables will be used if the path is relative and exists in the built-in tables directory. """ def __init__( self, alt_names: list[list[str]] | None = None, strict: bool = True, paths: Iterable[Path] = (), ) -> None: super().__init__( alt_names=alt_names, strict=strict, paths=paths, ) # Remove the prefix from the table_id. table_id_prefix = "obs4MIPs_" for name in list(self.tables): if name.startswith(table_id_prefix): table = self.tables.pop(name) self.tables[name[len(table_id_prefix) :]] = table
[docs] @total_ordering class TableInfo(dict): """Container class for storing a CMOR table.""" def __init__(self, *args, **kwargs): """Create a new TableInfo object for storing VariableInfo objects.""" super().__init__(*args, **kwargs) self.name = "" """Table name.""" self.frequency = "" """Table frequency (if defined).""" self.realm = "" """Table realm (if defined).""" def __eq__(self, other): return (self.name, self.frequency, self.realm) == ( other.name, other.frequency, other.realm, ) def __ne__(self, other): return (self.name, self.frequency, self.realm) != ( other.name, other.frequency, other.realm, ) def __lt__(self, other): return (self.name, self.frequency, self.realm) < ( other.name, other.frequency, other.realm, )
[docs] class JsonInfo: """Base class for the info classes. Provides common utility methods to read json variables """ def __init__(self): self._json_data = {} def _read_json_variable(self, parameter, default=""): """Read a json parameter in json_data. Parameters ---------- parameter: str parameter to read Returns ------- str Option's value or empty string if parameter is not present """ if parameter not in self._json_data: return default return str(self._json_data[parameter]) def _read_json_list_variable(self, parameter): """Read a json list parameter in json_data. Parameters ---------- parameter: str parameter to read Returns ------- list Option's value or empty list if parameter is not present """ if parameter not in self._json_data: return [] value = self._json_data[parameter] if isinstance(value, str): value = value.split() return value
[docs] class VariableInfo(JsonInfo): """Class to read and store variable information.""" def __init__( self, table_type: str = "", short_name: str = "", ) -> None: """Class to read and store variable information. Parameters ---------- table_type: Type of table (e.g., CMIP5, CMIP6). .. deprecated:: 2.14.0 The ``table_type`` parameter is deprecated and will be removed in ESMValCore v2.16.0. short_name: Variable's short name. .. deprecated:: 2.14.0 The ``short_name`` parameter is deprecated and will be removed in ESMValCore v2.16.0. """ super().__init__() self.table_type = table_type self.modeling_realm: list[str] = [] """Modeling realm""" self.short_name = short_name """Short name""" self.standard_name = "" """Standard name""" self.long_name = "" """Long name""" self.units = "" """Data units""" self.valid_min = "" """Minimum admitted value""" self.valid_max = "" """Maximum admitted value""" self.frequency = "" """Data frequency""" self.positive = "" """Increasing direction""" self.dimensions: list[str] = [] """List of dimensions""" self.coordinates: dict[str, CoordinateInfo] = {} """Coordinates This is a dict with the names of the dimensions as keys and CoordinateInfo objects as values. """ self._json_data = None def __repr__(self) -> str: return f"<{self.__class__.__name__} defining variable '{self.short_name}'>"
[docs] def copy(self) -> Self: """Return a shallow copy of VariableInfo. Returns ------- VariableInfo Shallow copy of this object. """ return copy.copy(self)
[docs] def read_json(self, json_data: dict, default_freq: str) -> None: """Read variable information from json. Non-present options will be set to empty Parameters ---------- json_data: Dictionary created by the json reader containing variable information. default_freq: Default frequency to use if it is not defined at variable level. """ self._json_data = json_data self.short_name = self._read_json_variable("out_name") self.standard_name = self._read_json_variable("standard_name") self.long_name = self._read_json_variable("long_name") self.units = self._read_json_variable("units") self.valid_min = self._read_json_variable("valid_min") self.valid_max = self._read_json_variable("valid_max") self.positive = self._read_json_variable("positive") self.modeling_realm = self._read_json_variable( "modeling_realm", ).split() self.frequency = self._read_json_variable("frequency", default_freq) # "dimensions" is a list of str in CMIP7 and a space separated str in CMIP6 CMOR tables. self.dimensions = self._read_json_list_variable("dimensions")
[docs] def has_coord_with_standard_name(self, standard_name: str) -> bool: """Check if a coordinate with a given `standard_name` exists. For some coordinates, multiple (slightly different) versions with different dimension names but identical `standard_name` exist. For example, the CMIP6 tables provide 4 different `standard_name=time` dimensions: `time`, `time1`, `time2`, and `time3`. Other examples would be the CMIP6 pressure levels (`plev19`, `plev23`, `plev27`, etc. with standard name `air_pressure`) and the altitudes (`alt16`, `alt40` with standard name `altitude`). This function can be used to check for the existence of a specific coordinate defined by its `standard_name`, not its dimension name. Parameters ---------- standard_name: Standard name to be checked. Returns ------- : `True` if there is at least one coordinate with the given `standard_name`, `False` if not. """ for coord in self.coordinates.values(): if coord.standard_name == standard_name: return True return False
[docs] class CoordinateInfo(JsonInfo): """Class to read and store coordinate information.""" def __init__(self, name: str) -> None: """Class to read and store coordinate information. Parameters ---------- name: coordinate's name """ super().__init__() self.name = name """Name of the coordinate entry in the CMOR table.""" self.generic_level = False self.generic_lev_coords: dict[str, CoordinateInfo] = {} self.axis = "" """Axis""" self.value = "" """Coordinate value""" self.standard_name = "" """Standard name""" self.long_name = "" """Long name""" self.out_name = "" """ Out name This is the name of the variable in the file """ self.var_name = "" """Short name""" self.units = "" """Units""" self.stored_direction = "" """Direction in which the coordinate increases""" self.requested: list[str] = [] """Values requested""" self.valid_min = "" """Minimum allowed value""" self.valid_max = "" """Maximum allowed value""" self.must_have_bounds = "" """Whether bounds are required on this dimension""" self.generic_lev_name = "" """Generic level name"""
[docs] def read_json(self, json_data): """Read coordinate information from json. Non-present options will be set to empty Parameters ---------- json_data: dict dictionary created by the json reader containing coordinate information """ self._json_data = json_data self.axis = self._read_json_variable("axis") self.value = self._read_json_variable("value") self.out_name = self._read_json_variable("out_name") self.var_name = self._read_json_variable("out_name") self.standard_name = self._read_json_variable("standard_name") self.long_name = self._read_json_variable("long_name") self.units = self._read_json_variable("units") self.stored_direction = self._read_json_variable("stored_direction") self.valid_min = self._read_json_variable("valid_min") self.valid_max = self._read_json_variable("valid_max") self.requested = self._read_json_list_variable("requested") self.must_have_bounds = self._read_json_variable("must_have_bounds") self.generic_lev_name = self._read_json_variable("generic_level_name")
[docs] class CMIP5Info(InfoBase): """Class to read CMIP5-like CMOR tables. This class reads CMOR 2 format tables. Parameters ---------- cmor_tables_path: The path to a directory with subdirectory "Tables" where the CMOR tables are located. .. deprecated:: 2.14.0 The ``cmor_tables_path`` parameter is deprecated and will be removed in ESMValCore v2.16.0. Please use the ``paths`` parameter instead. default: Default table to look variables on if not found. .. deprecated:: 2.14.0 The ``default`` parameter is deprecated and will be removed in ESMValCore v2.16.0. Please use the ``paths`` parameter instead to aggregate multiple tables. alt_names: List of known alternative names for variables. If no value is provided, the default values from the installed copy of `variable_alt_names.yml`_ will be used. strict: If :obj:`False`, the function :meth:`~esmvalcore.cmor.table.InfoBase.get_variable` will look for a variable in other tables if it can not be found in the table specified by ``mip`` in the :ref:`recipe <recipe>` or :class:`~esmvalcore.dataset.Dataset`. default_table_prefix: If the table_id contains a prefix, it can be specified here. .. deprecated:: 2.14.0 The ``default_table_prefix`` parameter is deprecated and will be removed in ESMValCore v2.16.0. paths: A list of paths to CMOR tables. The path can be relative to the built-in tables in the `esmvalcore/cmor/tables <https://github.com/ESMValGroup/ESMValCore/tree/main/esmvalcore/cmor/tables>`_ directory, or any other path. The built-in tables will be used if the path is relative and exists in the built-in tables directory. """ def __init__( self, cmor_tables_path: str | None = None, default: CustomInfo | None = None, alt_names: list[list[str]] | None = None, strict: bool = True, paths: Iterable[Path] = (), ) -> None: if cmor_tables_path is not None: # Support cmor_tables_path for backward compatibility. # TODO: remove in v2.16.0 cmor_tables_path = self._get_cmor_path(cmor_tables_path) cmor_folder = Path(cmor_tables_path) / "Tables" paths = (*tuple(paths), cmor_folder) super().__init__(default, alt_names, strict, paths=paths) self._current_table: TextIOWrapper | None = None self._last_line_read = ("", "") for path in self.paths: for table_file in sorted( glob.glob(os.path.join(path, "*")), # Read coordinate files before variable files so we can link the # variables with the coordinates. key=lambda filename: "coordinate" not in filename, ): if "_grids" in table_file: continue try: self._load_table(table_file) except Exception: msg = f"Exception raised when loading {table_file}" # Logger may not be ready at this stage if logger.handlers: logger.error(msg) else: print(msg) # noqa: T201 raise @staticmethod def _get_cmor_path(cmor_tables_path): if os.path.isdir(cmor_tables_path): return cmor_tables_path cwd = os.path.dirname(os.path.realpath(__file__)) return os.path.join(cwd, "tables", cmor_tables_path) def _load_table(self, table_file: str) -> None: table = self._read_table_file(table_file) if table.name in self.tables: self.tables[table.name].update(table) else: self.tables[table.name] = table def _read_table_file(self, table_file: str) -> TableInfo: table = TableInfo() with open(table_file, encoding="utf-8") as self._current_table: self._read_line() while True: key, value = self._last_line_read if key == "table_id": table.name = value[len("Table ") :] elif key == "frequency": table.frequency = value elif key == "modeling_realm": table.realm = value elif key == "generic_levels": for dim in value.split(" "): coord = CoordinateInfo(dim) coord.generic_level = True coord.axis = "Z" self.coords[dim] = coord elif key == "axis_entry": self.coords[value] = self._read_coordinate(value) continue elif key == "variable_entry": table[value] = self._read_variable(value, table.frequency) continue if not self._read_line(): break return table def _read_line(self): line = self._current_table.readline() if line == "": return False if line.startswith("!"): return self._read_line() line = line.replace("\n", "") if "!" in line: line = line[: line.index("!")] line = line.strip() if not line: self._last_line_read = ("", "") else: index = line.index(":") self._last_line_read = ( line[:index].strip(), line[index + 1 :].strip(), ) return True def _read_coordinate(self, value): coord = CoordinateInfo(value) while self._read_line(): key, value = self._last_line_read if key in ("variable_entry", "axis_entry"): return coord if key == "requested": coord.requested.extend(val for val in value.split(" ") if val) continue if hasattr(coord, key): setattr(coord, key, value) return coord def _read_variable(self, entry_name, frequency): var = VariableInfo(table_type="CMIP5") var.frequency = frequency while self._read_line(): key, value = self._last_line_read if key in ("variable_entry", "axis_entry"): break if key in ("dimensions", "modeling_realm"): setattr(var, key, value.split()) elif hasattr(var, key): setattr(var, key, value) elif key == "out_name": var.short_name = value if not var.short_name: # Some of our custom CMIP5 table entries are missing the `out_name` field. # In that case, we assume the entry name is the same as short_name. var.short_name = entry_name for dim in var.dimensions: var.coordinates[dim] = self.coords[dim] return var
[docs] def get_table(self, table: str) -> TableInfo | None: """Search and return the table info. Parameters ---------- table: Table name Returns ------- : Return the TableInfo object for the requested table if found, returns None if not """ return self.tables.get(table)
[docs] class CMIP3Info(CMIP5Info): """Class to read CMIP3-like CMOR tables. Parameters ---------- cmor_tables_path: The path to a directory with subdirectory "Tables" where the CMOR tables are located. .. deprecated:: 2.14.0 The ``cmor_tables_path`` parameter is deprecated and will be removed in ESMValCore v2.16.0. Please use the ``paths`` parameter instead. default: Default table to look variables on if not found. .. deprecated:: 2.14.0 The ``default`` parameter is deprecated and will be removed in ESMValCore v2.16.0. Please use the ``paths`` parameter instead to aggregate multiple tables. alt_names: List of known alternative names for variables. If no value is provided, the default values from the installed copy of `variable_alt_names.yml`_ will be used. strict: If :obj:`False`, the function :meth:`~esmvalcore.cmor.table.InfoBase.get_variable` will look for a variable in other tables if it can not be found in the table specified by ``mip`` in the :ref:`recipe <recipe>` or :class:`~esmvalcore.dataset.Dataset`. default_table_prefix: If the table_id contains a prefix, it can be specified here. .. deprecated:: 2.14.0 The ``default_table_prefix`` parameter is deprecated and will be removed in ESMValCore v2.16.0. paths: A list of paths to CMOR tables. The path can be relative to the built-in tables in the `esmvalcore/cmor/tables <https://github.com/ESMValGroup/ESMValCore/tree/main/esmvalcore/cmor/tables>`_ directory, or any other path. The built-in tables will be used if the path is relative and exists in the built-in tables directory. """ def _read_table_file(self, table_file: str) -> TableInfo: for dim in ("zlevel",): coord = CoordinateInfo(dim) coord.generic_level = True coord.axis = "Z" self.coords[dim] = coord return super()._read_table_file(table_file) def _read_coordinate(self, value): coord = super()._read_coordinate(value) if not coord.out_name: coord.out_name = coord.name coord.var_name = coord.name return coord def _read_variable(self, entry_name, frequency): var = super()._read_variable(entry_name, frequency) var.frequency = "" var.modeling_realm = [] return var
[docs] class CustomInfo(CMIP5Info): """Class to read custom var info for ESMVal. .. deprecated:: 2.14.0 This class is deprecated and will be removed in ESMValCore v2.16.0. Please use :class:`~esmvalcore.cmor.tables.table.CMIP5Info` instead. Parameters ---------- cmor_tables_path: Full path to the table or name for the table if it is present in ESMValTool repository. If ``None``, use default tables from `esmvalcore/cmor/tables/custom`. """ def __init__(self, cmor_tables_path: str | Path | None = None) -> None: """Initialize class member.""" self.coords = {} self.tables = {} self.var_to_freq: dict[str, dict[str, str]] = {} table = TableInfo() table.name = "custom" self.tables[table.name] = table # First, read default custom tables from repository self.paths = (Path(self._get_cmor_path("cmip5-custom")),) # Second, if given, update default tables with user-defined custom # tables if cmor_tables_path is not None: user_table_folder = Path(self._get_cmor_path(cmor_tables_path)) if not user_table_folder.is_dir(): msg = ( f"Custom CMOR tables path {user_table_folder} is " f"not a directory" ) raise ValueError(msg) self.paths += (user_table_folder,) for path in self.paths: self._read_table_dir(str(path)) def __repr__(self) -> str: return f"{self.__class__.__name__}(paths={list(self.paths)})" def _read_table_dir(self, table_dir: str) -> None: """Read CMOR tables from directory.""" # If present, read coordinates coordinates_file = os.path.join(table_dir, "CMOR_coordinates.dat") if os.path.isfile(coordinates_file): self._read_table_file(coordinates_file) # Read other variables for dat_file in glob.glob(os.path.join(table_dir, "*.dat")): if dat_file == coordinates_file: continue try: self._load_table(dat_file) except Exception: msg = f"Exception raised when loading {dat_file}" # Logger may not be ready at this stage if logger.handlers: logger.error(msg) else: print(msg) # noqa: T201 raise
[docs] def get_variable( self, table_name: str, # noqa: ARG002 short_name: str, *, branding_suffix: str | None = None, # noqa: ARG002 derived: bool = False, # noqa: ARG002 ) -> VariableInfo | None: """Search and return the variable info. Parameters ---------- table: Table name. Ignored for custom tables. short_name: Variable's short name. branding_suffix: A suffix that will be appended to ``short_name`` when looking up the variable in the CMOR table. Ignored for custom tables. derived: Variable is derived. Info retrieval for derived variables always looks on the default tables if variable is not found in the requested table. Ignored for custom tables. Returns ------- VariableInfo | None `VariableInfo` object for the requested variable if found, returns None if not. """ return self.tables["custom"].get(short_name, None)
def _read_table_file(self, table_file: str) -> TableInfo: """Read a single table file.""" table = TableInfo() table.name = "custom" with open(table_file, encoding="utf-8") as self._current_table: self._read_line() while True: key, value = self._last_line_read if key == "generic_levels": for dim in value.split(" "): coord = CoordinateInfo(dim) coord.generic_level = True coord.axis = "Z" self.coords[dim] = coord elif key == "axis_entry": self.coords[value] = self._read_coordinate(value) continue elif key == "variable_entry": table[value] = self._read_variable(value, "") continue if not self._read_line(): return table
[docs] class NoInfo(InfoBase): """Table that can be used for projects that do not provide a CMOR table.""" def __init__(self) -> None: super().__init__() def __repr__(self) -> str: return f"{self.__class__.__name__}()"
[docs] def get_variable( self, table_name: str, # noqa: ARG002 short_name: str, *, branding_suffix: str | None = None, # noqa: ARG002 derived: bool = False, # noqa: ARG002 ) -> VariableInfo | None: """Search and return the variable information. Parameters ---------- table_name: Table name, i.e., the ``mip`` in the :ref:`recipe <recipe>` or :class:`~esmvalcore.dataset.Dataset`. short_name: Variable's short name. branding_suffix: A suffix that will be appended to ``short_name`` when looking up the variable in the CMOR table. derived: Variable is derived. Information retrieval for derived variables always looks in the default tables (usually, the custom tables) if variable is not found in the requested table. Returns ------- VariableInfo | None `VariableInfo` object for the requested variable if found, ``None`` otherwise. """ vardef = VariableInfo() vardef.short_name = short_name return vardef