Source code for aeolus.io

"""Input and output functionality."""

from collections.abc import Generator, Sequence
from pathlib import Path
import re
from typing import Any, Optional, Union

import iris
from iris.coord_systems import GeogCS
from iris.coords import AuxCoord
from iris.cube import Cube, CubeList
from iris.fileformats.pp import EARTH_RADIUS
from iris.fileformats.um import structured_um_loading
import iris.pandas
import numpy as np
import pandas as pd

__all__ = (
    "create_dummy_cube",
    "get_filename_list",
    "load_conservation_diag",
    "load_data",
    "load_multidir",
    "load_vert_lev",
    "save_cubelist",
)

GLM_RUNID = r"umglaa"  # file prefix
GLM_FILE_REGEX = (
    GLM_RUNID + r".p[b,c,d,e]{1}[0]{6}(?P<timestamp>[0-9]{2,6})_00"
)


[docs] def create_dummy_cube( nlat: Optional[int] = None, nlon: Optional[int] = None, n_res: Optional[int] = None, endgame: Optional[bool] = True, grid_type: Optional[str] = "a", pm180: Optional[bool] = False, ) -> Cube: """ Create a dummy 2D cube with given resolution compatible with the UM grid. Parameters ---------- nlat: int, optional Number of points in latitude. Not needed if `n_res` is given. nlon: int, optional Number of points in longitude. Not needed if `n_res` is given. n_res: int, optional N-notation resolution. If given, `nlat` and `nlon` are calculated automatically. endgame: bool, optional Use ENDGame grid. If True, "A" grid starts at lat=-90+dlat/2, lon=dlon/2. If False, "A" grid starts at lat=-90, lon=0. ENDGame's "A" grid is NewDyn "B" grid and vice versa. grid_type: str, optional Type of the UM grid. - A is main UM grid - B is staggered lorenz wind grid - Cu is staggered U wind grid - Cv is staggered V wind grid pm180: bool, optional Use -/+180 instead of 0 to 360 for the longitude span. Returns ------- cube: iris.cube.Cube A dummy 2D cube of zeros. Examples -------- >>> create_dummy_cube(nlat=90, nlon=144) >>> create_dummy_cube(n=96) """ assert grid_type.lower() in [ "a", "b", "cu", "cv", ], f"Grid {grid_type} not valid." # Calculate the number of points given the N-notation resolution. if n_res is not None: nlon = 2 * n_res nlat = 3 * (n_res // 2) # Default grid set up is for ENDGame A Grid, equivalent to NewDyn B grid. if endgame: lat_shift = grid_type.lower() in ("b", "cv") lon_shift = grid_type.lower() in ("b", "cu") else: lat_shift = grid_type.lower() in ("a", "cu") lon_shift = grid_type.lower() in ("a", "cv") # Set coordinate system to match PP data. geog_cs = GeogCS(EARTH_RADIUS) # Latitude spacing = 180.0 / nlat if lat_shift: lower_bound = -90.0 upper_bound = 90.0 act_nlat = nlat + 1 else: lower_bound = -90.0 + spacing / 2.0 upper_bound = 90.0 - spacing / 2.0 act_nlat = nlat lats = np.linspace(lower_bound, upper_bound, act_nlat) lat_coord = iris.coords.DimCoord( lats, standard_name="latitude", units="degrees", coord_system=geog_cs ) lat_coord.guess_bounds() # Longitude spacing = 360.0 / nlon if lon_shift: lower_bound = 0.0 upper_bound = 360.0 - spacing else: lower_bound = 0.0 + spacing / 2.0 upper_bound = 360.0 - spacing / 2.0 lons = np.linspace(lower_bound, upper_bound, nlon) if pm180: lons -= 180.0 lon_coord = iris.coords.DimCoord( lons, standard_name="longitude", units="degrees", circular=True, coord_system=geog_cs, ) lon_coord.guess_bounds() # Create a numpy array of zeros. data = np.zeros((len(lats), len(lons)), dtype=np.int32) # Assemble the cube. cube = Cube( data, dim_coords_and_dims=((lat_coord, 0), (lon_coord, 1)), var_name="dummy_cube", units="1", ) return cube
def full_path_glob(path: Path) -> Generator: """Recursive glob, including directories with regex.""" resolved = Path(path.absolute().parts[0]) glob_parts = [] to_path = True for part in path.parts[1:]: if to_path and ("*" in part): to_path = False if to_path: resolved /= part else: glob_parts.append(part) gen = resolved.rglob(path.anchor.join(glob_parts)) return gen
[docs] def get_filename_list( path_to_dir: Path, glob_pattern: Optional[str] = f"{GLM_RUNID}*", ts_start: Optional[int] = 0, ts_end: Optional[int] = -1, every: Optional[int] = 1, regex: Optional[str] = GLM_FILE_REGEX, regex_key: Optional[str] = "timestamp", sort: Optional[bool] = True, ) -> Sequence[Path]: """Get a list of files with timestamps >= than start in a directory.""" glob_gen = full_path_glob(path_to_dir / glob_pattern) fnames = [] tstamps = {} for fpath in glob_gen: match = re.match(regex, fpath.name) if match: ts_num = int(match[regex_key]) if (ts_num >= ts_start) and (ts_num % every == 0): if (ts_end == -1) or (ts_num <= ts_end): fnames.append(fpath) tstamps[fpath] = ts_num if sort: fnames = sorted(fnames, key=lambda x: tstamps[x]) return fnames
[docs] def load_conservation_diag( fnames: Sequence[Union[Path, str]], convert_to_iris: Optional[bool] = True, drop_duplicates: Optional[bool] = True, ) -> Union[pd.DataFrame, CubeList]: """Load UM conservation diagnostics from a series of text files.""" dset = pd.concat( pd.read_csv(fpath, header=None, sep=r"\s+") for fpath in fnames ) if drop_duplicates: dset = dset.drop_duplicates() dset = ( dset.rename( { 0: "timestep", 1: "total_atmosphere_mass", 2: "total_axial_angular_momentum", 3: "total_kinetic_energy", }, axis="columns", ) .sort_values(by="timestep") .set_index("timestep") ) if convert_to_iris: dset = iris.pandas.as_cubes(dset) cube = dset.extract_cube("total_atmosphere_mass") cube.units = "kg" cube = dset.extract_cube("total_axial_angular_momentum") cube.units = "kg m**2 s**-1" cube = dset.extract_cube("total_kinetic_energy") cube.units = "kg m**2 s**-2" return dset
[docs] def load_data(files: Sequence, structured: Optional[bool] = False) -> CubeList: """Use `iris.load` with optional structured loading for PP files.""" if structured: with structured_um_loading(): cubes = iris.load(files) else: with iris.FUTURE.context(datum_support=True): cubes = iris.load(files) return cubes
[docs] def load_multidir( path_mask: str, labels: Sequence[str], label_name: Optional[str] = "run" ) -> CubeList: """Load cubelists from multiple directories and merge.""" joint_cl = CubeList() for label in labels: cl = iris.load(str(path_mask).format(label)) for cube in cl: cube.attributes["um_version"] = "" # FIXME cube.add_aux_coord(AuxCoord([label], long_name=label_name)) joint_cl.append(cube) return joint_cl.merge()
[docs] def load_vert_lev( path_to_file: Path, lev_type: Optional[str] = "theta" ) -> np.array: """ Read data from the UM vertical levels file. Parameters ---------- path_to_file: pathlib.Path Full path to the vertical levels file. lev_type: str, optional What levels to return: "theta" or "rho". Returns ------- levs: numpy.array Array of height levels. """ import f90nml # noqa with path_to_file.open("r") as nml_file: nml = f90nml.read(nml_file) levs = ( np.array(nml["vertlevs"][f"eta_{lev_type}"]) * nml["vertlevs"]["z_top_of_model"] ) return levs
[docs] def save_cubelist( cubelist: CubeList, path: Path, **aux_attrs: Optional[Any] ) -> None: """ Save a cubelist w/o the `planet_conf` container to a file. Parameters ---------- cubelist: iris.cube.CubeList Cube list to write to disk. path: str or pathlib.Path File path. aux_attrs: dict, optional Dictionary of additional attributes to save with the cubes. """ # Remove planet_conf attribute before saving out = CubeList() old_attrs = [] for cube in cubelist: old_attrs.append(cube.attributes.copy()) new_attrs = {**cube.attributes, **aux_attrs} try: new_attrs.pop("planet_conf") except KeyError: pass cube.attributes = new_attrs out.append(cube) with iris.FUTURE.context(save_split_attrs=True): iris.save(out, str(path)) # Restore original attributes for cube, attrs in zip(cubelist, old_attrs): cube.attributes = attrs