Coverage for src/CSET/operators/read.py: 90%

1# © Crown copyright, Met Office (2022-2025) and CSET contributors.

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

15"""Operators for reading various types of files from disk."""

17import ast

18import datetime

19import functools

20import glob

21import itertools

22import logging

23from pathlib import Path

24from typing import Literal

26import iris

27import iris.coord_systems

28import iris.coords

29import iris.cube

30import iris.exceptions

31import iris.util

32import numpy as np

33from iris.analysis.cartography import rotate_pole, rotate_winds

35from CSET._common import iter_maybe

36from CSET.operators._stash_to_lfric import STASH_TO_LFRIC

37from CSET.operators._utils import (

38 get_cube_coordindex,

39 get_cube_yxcoordname,

40 is_spatialdim,

41)

44class NoDataError(FileNotFoundError):

45 """Error that no data has been loaded."""

48def read_cube(

49 file_paths: list[str] | str,

50 constraint: iris.Constraint = None,

51 model_names: list[str] | str | None = None,

52 subarea_type: str = None,

53 subarea_extent: list[float] = None,

54 **kwargs,

55) -> iris.cube.Cube:

56 """Read a single cube from files.

58 Read operator that takes a path string (can include shell-style glob

59 patterns), and loads the cube matching the constraint. If any paths point to

60 directory, all the files contained within are loaded.

62 Ensemble data can also be loaded. If it has a realization coordinate

63 already, it will be directly used. If not, it will have its member number

64 guessed from the filename, based on one of several common patterns. For

65 example the pattern *emXX*, where XX is the realization.

67 Deterministic data will be loaded with a realization of 0, allowing it to be

68 processed in the same way as ensemble data.

70 Arguments

71 ---------

72 file_paths: str | list[str]

73 Path or paths to where .pp/.nc files are located

74 constraint: iris.Constraint | iris.ConstraintCombination, optional

75 Constraints to filter data by. Defaults to unconstrained.

76 model_names: str | list[str], optional

77 Names of the models that correspond to respective paths in file_paths.

78 subarea_type: "gridcells" | "modelrelative" | "realworld", optional

79 Whether to constrain data by model relative coordinates or real world

80 coordinates.

81 subarea_extent: list, optional

82 List of coordinates to constraint data by, in order lower latitude,

83 upper latitude, lower longitude, upper longitude.

85 Returns

86 -------

87 cubes: iris.cube.Cube

88 Cube loaded

90 Raises

91 ------

92 FileNotFoundError

93 If the provided path does not exist

94 ValueError

95 If the constraint doesn't produce a single cube.

96 """

97 cubes = read_cubes(

98 file_paths=file_paths,

99 constraint=constraint,

100 model_names=model_names,

101 subarea_type=subarea_type,

102 subarea_extent=subarea_extent,

103 )

104 # Check filtered cubes is a CubeList containing one cube.

105 if len(cubes) == 1:

106 return cubes[0]

107 else:

108 raise ValueError(

109 f"Constraint doesn't produce single cube: {constraint}\n{cubes}"

110 )

111

112

113def read_cubes(

114 file_paths: list[str] | str,

115 constraint: iris.Constraint | None = None,

116 model_names: str | list[str] | None = None,

117 subarea_type: str = None,

118 subarea_extent: list = None,

119 **kwargs,

120) -> iris.cube.CubeList:

121 """Read cubes from files.

122

123 Read operator that takes a path string (can include shell-style glob

124 patterns), and loads the cubes matching the constraint. If any paths point

125 to directory, all the files contained within are loaded.

126

127 Ensemble data can also be loaded. If it has a realization coordinate

128 already, it will be directly used. If not, it will have its member number

129 guessed from the filename, based on one of several common patterns. For

130 example the pattern *emXX*, where XX is the realization.

131

132 Deterministic data will be loaded with a realization of 0, allowing it to be

133 processed in the same way as ensemble data.

134

135 Data output by XIOS (such as LFRic) has its per-file metadata removed so

136 that the cubes merge across files.

137

138 Arguments

139 ---------

140 file_paths: str | list[str]

141 Path or paths to where .pp/.nc files are located. Can include globs.

142 constraint: iris.Constraint | iris.ConstraintCombination, optional

143 Constraints to filter data by. Defaults to unconstrained.

144 model_names: str | list[str], optional

145 Names of the models that correspond to respective paths in file_paths.

146 subarea_type: str, optional

147 Whether to constrain data by model relative coordinates or real world

148 coordinates.

149 subarea_extent: list[float], optional

150 List of coordinates to constraint data by, in order lower latitude,

151 upper latitude, lower longitude, upper longitude.

152

153 Returns

154 -------

155 cubes: iris.cube.CubeList

156 Cubes loaded after being merged and concatenated.

157

158 Raises

159 ------

160 FileNotFoundError

161 If the provided path does not exist

162 """

163 # Get iterable of paths. Each path corresponds to 1 model.

164 paths = iter_maybe(file_paths)

165 model_names = iter_maybe(model_names)

166

167 # Check we have appropriate number of model names.

168 if model_names != (None,) and len(model_names) != len(paths):

169 raise ValueError(

170 f"The number of model names ({len(model_names)}) should equal "

171 f"the number of paths given ({len(paths)})."

172 )

173

174 # Load the data for each model into a CubeList per model.

175 model_cubes = (

176 _load_model(path, name, constraint)

177 for path, name in itertools.zip_longest(paths, model_names, fillvalue=None)

178 )

179

180 # Split out first model's cubes and mark it as the base for comparisons.

181 cubes = next(model_cubes)

182 for cube in cubes:

183 # Use 1 to indicate True, as booleans can't be saved in NetCDF attributes.

184 cube.attributes["cset_comparison_base"] = 1

185

186 # Load the rest of the models.

187 cubes.extend(itertools.chain.from_iterable(model_cubes))

188

189 # Unify time units so different case studies can merge.

190 iris.util.unify_time_units(cubes)

191

192 # Select sub region.

193 cubes = _cutout_cubes(cubes, subarea_type, subarea_extent)

194

195 # Merge and concatenate cubes now metadata has been fixed.

196 cubes = cubes.merge()

197 cubes = cubes.concatenate()

198

199 # Squeeze single valued coordinates into scalar coordinates.

200 cubes = iris.cube.CubeList(iris.util.squeeze(cube) for cube in cubes)

201

202 # Ensure dimension coordinates are bounded.

203 for cube in cubes:

204 for dim_coord in cube.coords(dim_coords=True):

205 # Iris can't guess the bounds of a scalar coordinate.

206 if not dim_coord.has_bounds() and dim_coord.shape[0] > 1:

207 dim_coord.guess_bounds()

208

209 logging.info("Loaded cubes: %s", cubes)

210 if len(cubes) == 0:

211 raise NoDataError("No cubes loaded, check your constraints!")

212 return cubes

213

214

215def _load_model(

216 paths: str | list[str],

217 model_name: str | None,

218 constraint: iris.Constraint | None,

219) -> iris.cube.CubeList:

220 """Load a single model's data into a CubeList."""

221 input_files = _check_input_files(paths)

222 # If unset, a constraint of None lets everything be loaded.

223 logging.debug("Constraint: %s", constraint)

224 cubes = iris.load(input_files, constraint, callback=_loading_callback)

225 # Make the UM's winds consistent with LFRic.

226 _fix_um_winds(cubes)

227

228 # Add model_name attribute to each cube to make it available at any further

229 # step without needing to pass it as function parameter.

230 if model_name is not None:

231 for cube in cubes:

232 cube.attributes["model_name"] = model_name

233 return cubes

234

235

236def _check_input_files(input_paths: str | list[str]) -> list[Path]:

237 """Get an iterable of files to load, and check that they all exist.

238

239 Arguments

240 ---------

241 input_paths: list[str]

242 List of paths to input files or directories. The path may itself contain

243 glob patterns, but unlike in shells it will match directly first.

244

245 Returns

246 -------

247 list[Path]

248 A list of files to load.

249

250 Raises

251 ------

252 FileNotFoundError:

253 If the provided arguments don't resolve to at least one existing file.

254 """

255 files = []

256 for raw_filename in iter_maybe(input_paths):

257 # Match glob-like files first, if they exist.

258 raw_path = Path(raw_filename)

259 if raw_path.is_file():

260 files.append(raw_path)

261 else:

262 for input_path in glob.glob(raw_filename):

263 # Convert string paths into Path objects.

264 input_path = Path(input_path)

265 # Get the list of files in the directory, or use it directly.

266 if input_path.is_dir():

267 logging.debug("Checking directory '%s' for files", input_path)

268 files.extend(p for p in input_path.iterdir() if p.is_file())

269 else:

270 files.append(input_path)

271

272 files.sort()

273 logging.info("Loading files:\n%s", "\n".join(str(path) for path in files))

274 if len(files) == 0:

275 raise FileNotFoundError(f"No files found for {input_paths}")

276 return files

277

278

279def _cutout_cubes(

280 cubes: iris.cube.CubeList,

281 subarea_type: Literal["gridcells", "realworld", "modelrelative"] | None,

282 subarea_extent: list[float, float, float, float],

283):

284 """Cut out a subarea from a CubeList."""

285 if subarea_type is None:

286 logging.debug("Subarea selection is disabled.")

287 return cubes

288

289 # If selected, cutout according to number of grid cells to trim from each edge.

290 cutout_cubes = iris.cube.CubeList()

291 # Find spatial coordinates

292 for cube in cubes:

293 # Find dimension coordinates.

294 lat_name, lon_name = get_cube_yxcoordname(cube)

295

296 # Compute cutout based on number of cells to trim from edges.

297 if subarea_type == "gridcells":

298 logging.debug(

299 "User requested LowerTrim: %s LeftTrim: %s UpperTrim: %s RightTrim: %s",

300 subarea_extent[0],

301 subarea_extent[1],

302 subarea_extent[2],

303 subarea_extent[3],

304 )

305 lat_points = np.sort(cube.coord(lat_name).points)

306 lon_points = np.sort(cube.coord(lon_name).points)

307 # Define cutout region using user provided cell points.

308 lats = [lat_points[subarea_extent[0]], lat_points[-subarea_extent[2] - 1]]

309 lons = [lon_points[subarea_extent[1]], lon_points[-subarea_extent[3] - 1]]

310

311 # Compute cutout based on specified coordinate values.

312 elif subarea_type == "realworld" or subarea_type == "modelrelative":

313 # If not gridcells, cutout by requested geographic area,

314 logging.debug(

315 "User requested LLat: %s ULat: %s LLon: %s ULon: %s",

316 subarea_extent[0],

317 subarea_extent[1],

318 subarea_extent[2],

319 subarea_extent[3],

320 )

321 # Define cutout region using user provided coordinates.

322 lats = np.array(subarea_extent[0:2])

323 lons = np.array(subarea_extent[2:4])

324 # Ensure cutout longitudes are within +/- 180.0 bounds.

325 while lons[0] < -180.0:

326 lons += 360.0

327 while lons[1] > 180.0:

328 lons -= 360.0

329 # If the coordinate system is rotated we convert coordinates into

330 # model-relative coordinates to extract the appropriate cutout.

331 coord_system = cube.coord(lat_name).coord_system

332 if subarea_type == "realworld" and isinstance(

333 coord_system, iris.coord_systems.RotatedGeogCS

334 ):

335 lons, lats = rotate_pole(

336 lons,

337 lats,

338 pole_lon=coord_system.grid_north_pole_longitude,

339 pole_lat=coord_system.grid_north_pole_latitude,

340 )

341 else:

342 raise ValueError("Unknown subarea_type:", subarea_type)

343

344 # Do cutout and add to cutout_cubes.

345 intersection_args = {lat_name: lats, lon_name: lons}

346 logging.debug("Cutting out coords: %s", intersection_args)

347 try:

348 cutout_cubes.append(cube.intersection(**intersection_args))

349 except IndexError as err:

350 raise ValueError(

351 "Region cutout error. Check and update SUBAREA_EXTENT."

352 "Cutout region requested should be contained within data area. "

353 "Also check if cutout region requested is smaller than input grid spacing."

354 ) from err

355

356 return cutout_cubes

357

358

359def _loading_callback(cube: iris.cube.Cube, field, filename: str) -> iris.cube.Cube:

360 """Compose together the needed callbacks into a single function."""

361 # Most callbacks operate in-place, but save the cube when returned!

362 _realization_callback(cube, field, filename)

363 _um_normalise_callback(cube, field, filename)

364 _lfric_normalise_callback(cube, field, filename)

365 cube = _lfric_time_coord_fix_callback(cube, field, filename)

366 _normalise_var0_varname(cube)

367 _fix_spatial_coords_callback(cube)

368 _fix_pressure_coord_callback(cube)

369 _fix_um_radtime(cube)

370 _fix_cell_methods(cube)

371 cube = _convert_cube_units_callback(cube)

372 cube = _grid_longitude_fix_callback(cube)

373 _fix_lfric_cloud_base_altitude(cube)

374 _proleptic_gregorian_fix(cube)

375 _lfric_time_callback(cube)

376 _lfric_forecast_period_standard_name_callback(cube)

377 _normalise_ML_varname(cube)

378 return cube

379

380

381def _realization_callback(cube, field, filename):

382 """Give deterministic cubes a realization of 0.

383

384 This means they can be handled in the same way as ensembles through the rest

385 of the code.

386 """

387 # Only add if realization coordinate does not exist.

388 if not cube.coords("realization"):

389 cube.add_aux_coord(

390 iris.coords.DimCoord(0, standard_name="realization", units="1")

391 )

392

393

394@functools.lru_cache(None)

395def _warn_once(msg):

396 """Print a warning message, skipping recent duplicates."""

397 logging.warning(msg)

398

399

400def _um_normalise_callback(cube: iris.cube.Cube, field, filename):

401 """Normalise UM STASH variable long names to LFRic variable names.

402

403 Note standard names will remain associated with cubes where different.

404 Long name will be used consistently in output filename and titles.

405 """

406 # Convert STASH to LFRic variable name

407 if "STASH" in cube.attributes:

408 stash = cube.attributes["STASH"]

409 try:

410 (name, grid) = STASH_TO_LFRIC[str(stash)]

411 cube.long_name = name

412 except KeyError:

413 # Don't change cubes with unknown stash codes.

414 _warn_once(

415 f"Unknown STASH code: {stash}. Please check file stash_to_lfric.py to update."

416 )

417

418

419def _lfric_normalise_callback(cube: iris.cube.Cube, field, filename):

420 """Normalise attributes that prevents LFRic cube from merging.

421

422 The uuid and timeStamp relate to the output file, as saved by XIOS, and has

423 no relation to the data contained. These attributes are removed.

424

425 The um_stash_source is a list of STASH codes for when an LFRic field maps to

426 multiple UM fields, however it can be encoded in any order. This attribute

427 is sorted to prevent this. This attribute is only present in LFRic data that

428 has been converted to look like UM data.

429 """

430 # Remove unwanted attributes.

431 cube.attributes.pop("timeStamp", None)

432 cube.attributes.pop("uuid", None)

433 cube.attributes.pop("name", None)

434 cube.attributes.pop("source", None)

435 cube.attributes.pop("analysis_source", None)

436 cube.attributes.pop("history", None)

437

438 # Sort STASH code list.

439 stash_list = cube.attributes.get("um_stash_source")

440 if stash_list:

441 # Parse the string as a list, sort, then re-encode as a string.

442 cube.attributes["um_stash_source"] = str(sorted(ast.literal_eval(stash_list)))

443

444

445def _lfric_time_coord_fix_callback(

446 cube: iris.cube.Cube, field, filename

447) -> iris.cube.Cube:

448 """Ensure the time coordinate is a DimCoord rather than an AuxCoord.

449

450 The coordinate is converted and replaced if not. SLAMed LFRic data has this

451 issue, though the coordinate satisfies all the properties for a DimCoord.

452 Scalar time values are left as AuxCoords.

453 """

454 # This issue seems to come from iris's handling of NetCDF files where time

455 # always ends up as an AuxCoord.

456 if cube.coords("time"):

457 time_coord = cube.coord("time")

458 if (

459 not isinstance(time_coord, iris.coords.DimCoord)

460 and len(cube.coord_dims(time_coord)) == 1

461 ):

462 # Fudge the bounds to foil checking for strict monotonicity.

463 if time_coord.has_bounds(): 463 ↛ 464line 463 didn't jump to line 464 because the condition on line 463 was never true

464 if (time_coord.bounds[-1][0] - time_coord.bounds[0][0]) < 1.0e-8:

465 time_coord.bounds = [

466 [

467 time_coord.bounds[i][0] + 1.0e-8 * float(i),

468 time_coord.bounds[i][1],

469 ]

470 for i in range(len(time_coord.bounds))

471 ]

472 iris.util.promote_aux_coord_to_dim_coord(cube, time_coord)

473 return cube

474

475

476def _grid_longitude_fix_callback(cube: iris.cube.Cube) -> iris.cube.Cube:

477 """Check grid_longitude coordinates are in the range -180 deg to 180 deg.

478

479 This is necessary if comparing two models with different conventions --

480 for example, models where the prime meridian is defined as 0 deg or

481 360 deg. If not in the range -180 deg to 180 deg, we wrap the grid_longitude

482 so that it falls in this range. Checks are for near-180 bounds given

483 model data bounds may not extend exactly to 0. or 360.

484 Input cubes on non-rotated grid coordinates are not impacted.

485 """

486 try:

487 y, x = get_cube_yxcoordname(cube)

488 except ValueError:

489 # Don't modify non-spatial cubes.

490 return cube

491

492 long_coord = cube.coord(x)

493 # Wrap longitudes if rotated pole coordinates

494 coord_system = long_coord.coord_system

495 if x == "grid_longitude" and isinstance(

496 coord_system, iris.coord_systems.RotatedGeogCS

497 ):

498 long_points = long_coord.points.copy()

499 long_centre = np.median(long_points)

500 while long_centre < -175.0:

501 long_centre += 360.0

502 long_points += 360.0

503 while long_centre >= 175.0:

504 long_centre -= 360.0

505 long_points -= 360.0

506 long_coord.points = long_points

507

508 # Update coord bounds to be consistent with wrapping.

509 if long_coord.has_bounds():

510 long_coord.bounds = None

511 long_coord.guess_bounds()

512

513 return cube

514

515

516def _fix_spatial_coords_callback(cube: iris.cube.Cube):

517 """Check latitude and longitude coordinates name.

518

519 This is necessary as some models define their grid as on rotated

520 'grid_latitude' and 'grid_longitude' coordinates while others define

521 the grid on non-rotated 'latitude' and 'longitude'.

522 Cube dimensions need to be made consistent to avoid recipe failures,

523 particularly where comparing multiple input models with differing spatial

524 coordinates.

525 """

526 # Check if cube is spatial.

527 if not is_spatialdim(cube):

528 # Don't modify non-spatial cubes.

529 return

530

531 # Get spatial coords and dimension index.

532 y_name, x_name = get_cube_yxcoordname(cube)

533 ny = get_cube_coordindex(cube, y_name)

534 nx = get_cube_coordindex(cube, x_name)

535

536 # Remove spatial coords bounds if erroneous values detected.

537 # Aims to catch some errors in input coord bounds by setting

538 # invalid threshold of 10000.0

539 if cube.coord(x_name).has_bounds() and cube.coord(y_name).has_bounds():

540 bx_max = np.max(np.abs(cube.coord(x_name).bounds))

541 by_max = np.max(np.abs(cube.coord(y_name).bounds))

542 if bx_max > 10000.0 or by_max > 10000.0:

543 cube.coord(x_name).bounds = None

544 cube.coord(y_name).bounds = None

545

546 # Translate [grid_latitude, grid_longitude] to an unrotated 1-d DimCoord

547 # [latitude, longitude] for instances where rotated_pole=90.0

548 if "grid_latitude" in [coord.name() for coord in cube.coords(dim_coords=True)]:

549 coord_system = cube.coord("grid_latitude").coord_system

550 pole_lat = getattr(coord_system, "grid_north_pole_latitude", None)

551 if pole_lat == 90.0: 551 ↛ 552line 551 didn't jump to line 552 because the condition on line 551 was never true

552 lats = cube.coord("grid_latitude").points

553 lons = cube.coord("grid_longitude").points

554

555 cube.remove_coord("grid_latitude")

556 cube.add_dim_coord(

557 iris.coords.DimCoord(

558 lats,

559 standard_name="latitude",

560 var_name="latitude",

561 units="degrees",

562 coord_system=iris.coord_systems.GeogCS(6371229.0),

563 circular=True,

564 ),

565 ny,

566 )

567 y_name = "latitude"

568 cube.remove_coord("grid_longitude")

569 cube.add_dim_coord(

570 iris.coords.DimCoord(

571 lons,

572 standard_name="longitude",

573 var_name="longitude",

574 units="degrees",

575 coord_system=iris.coord_systems.GeogCS(6371229.0),

576 circular=True,

577 ),

578 nx,

579 )

580 x_name = "longitude"

581

582 # Create additional AuxCoord [grid_latitude, grid_longitude] with

583 # rotated pole attributes for cases with [lat, lon] inputs

584 if y_name in ["latitude"] and cube.coord(y_name).units in [

585 "degrees",

586 "degrees_north",

587 "degrees_south",

588 ]:

589 # Add grid_latitude AuxCoord

590 if "grid_latitude" not in [ 590 ↛ 603line 590 didn't jump to line 603 because the condition on line 590 was always true

591 coord.name() for coord in cube.coords(dim_coords=False)

592 ]:

593 cube.add_aux_coord(

594 iris.coords.AuxCoord(

595 cube.coord(y_name).points,

596 var_name="grid_latitude",

597 units="degrees",

598 ),

599 ny,

600 )

601 # Ensure input latitude DimCoord has CoordSystem

602 # This attribute is sometimes lost on iris.save

603 if not cube.coord(y_name).coord_system:

604 cube.coord(y_name).coord_system = iris.coord_systems.GeogCS(6371229.0)

605

606 if x_name in ["longitude"] and cube.coord(x_name).units in [

607 "degrees",

608 "degrees_west",

609 "degrees_east",

610 ]:

611 # Add grid_longitude AuxCoord

612 if "grid_longitude" not in [ 612 ↛ 626line 612 didn't jump to line 626 because the condition on line 612 was always true

613 coord.name() for coord in cube.coords(dim_coords=False)

614 ]:

615 cube.add_aux_coord(

616 iris.coords.AuxCoord(

617 cube.coord(x_name).points,

618 var_name="grid_longitude",

619 units="degrees",

620 ),

621 nx,

622 )

623

624 # Ensure input longitude DimCoord has CoordSystem

625 # This attribute is sometimes lost on iris.save

626 if not cube.coord(x_name).coord_system:

627 cube.coord(x_name).coord_system = iris.coord_systems.GeogCS(6371229.0)

628

629

630def _fix_pressure_coord_callback(cube: iris.cube.Cube):

631 """Rename pressure coordinate to "pressure" if it exists and ensure hPa units.

632

633 This problem was raised because the AIFS model data from ECMWF

634 defines the pressure coordinate with the name "pressure_level" rather

635 than compliant CF coordinate names.

636

637 Additionally, set the units of pressure to be hPa to be consistent with the UM,

638 and approach the coordinates in a unified way.

639 """

640 for coord in cube.dim_coords:

641 if coord.name() in ["pressure_level", "pressure_levels"]:

642 coord.rename("pressure")

643

644 if coord.name() == "pressure":

645 if str(cube.coord("pressure").units) != "hPa":

646 cube.coord("pressure").convert_units("hPa")

647

648

649def _fix_um_radtime(cube: iris.cube.Cube):

650 """Move radiation diagnostics from timestamps which are output N minutes or seconds past every hour.

651

652 This callback does not have any effect for output diagnostics with

653 timestamps exactly 00 or 30 minutes past the hour. Only radiation

654 diagnostics are checked.

655 Note this callback does not interpolate the data in time, only adjust

656 timestamps to sit on the hour to enable time-to-time difference plotting

657 with models which may output radiation data on the hour.

658 """

659 try:

660 if cube.attributes["STASH"] in [

661 "m01s01i207",

662 "m01s01i208",

663 "m01s02i205",

664 "m01s02i201",

665 "m01s01i207",

666 "m01s02i207",

667 "m01s01i235",

668 ]:

669 time_coord = cube.coord("time")

670

671 # Convert time points to datetime objects

672 time_unit = time_coord.units

673 time_points = time_unit.num2date(time_coord.points)

674 # Skip if times don't need fixing.

675 if time_points[0].minute == 0 and time_points[0].second == 0:

676 return

677 if time_points[0].minute == 30 and time_points[0].second == 0: 677 ↛ 678line 677 didn't jump to line 678 because the condition on line 677 was never true

678 return

679

680 # Subtract time difference from the hour from each time point

681 n_minute = time_points[0].minute

682 n_second = time_points[0].second

683 # If times closer to next hour, compute difference to add on to following hour

684 if n_minute > 30:

685 n_minute = n_minute - 60

686 # Compute new diagnostic time stamp

687 new_time_points = (

688 time_points

689 - datetime.timedelta(minutes=n_minute)

690 - datetime.timedelta(seconds=n_second)

691 )

692

693 # Convert back to numeric values using the original time unit.

694 new_time_values = time_unit.date2num(new_time_points)

695

696 # Replace the time coordinate with updated values.

697 time_coord.points = new_time_values

698

699 # Recompute forecast_period with corrected values.

700 if cube.coord("forecast_period"): 700 ↛ exitline 700 didn't return from function '_fix_um_radtime' because the condition on line 700 was always true

701 fcst_prd_points = cube.coord("forecast_period").points

702 new_fcst_points = (

703 time_unit.num2date(fcst_prd_points)

704 - datetime.timedelta(minutes=n_minute)

705 - datetime.timedelta(seconds=n_second)

706 )

707 cube.coord("forecast_period").points = time_unit.date2num(

708 new_fcst_points

709 )

710 except KeyError:

711 pass

712

713

714def _fix_cell_methods(cube: iris.cube.Cube):

715 """To fix the assumed cell_methods in accumulation STASH from UM.

716

717 Lightning (m01s21i104), rainfall amount (m01s04i201, m01s05i201) and snowfall amount

718 (m01s04i202, m01s05i202) in UM is being output as a time accumulation,

719 over each hour (TAcc1hr), but input cubes show cell_methods as "mean".

720 For UM and LFRic inputs to be compatible, we assume accumulated cell_methods are

721 "sum". This callback changes "mean" cube attribute cell_method to "sum",

722 enabling the cell_method constraint on reading to select correct input.

723 """

724 # Shift "mean" cell_method to "sum" for selected UM inputs.

725 if cube.attributes.get("STASH") in [

726 "m01s21i104",

727 "m01s04i201",

728 "m01s04i202",

729 "m01s05i201",

730 "m01s05i202",

731 ]:

732 # Check if input cell_method contains "mean" time-processing.

733 if set(cm.method for cm in cube.cell_methods) == {"mean"}: 733 ↛ exitline 733 didn't return from function '_fix_cell_methods' because the condition on line 733 was always true

734 # Retrieve interval and any comment information.

735 for cell_method in cube.cell_methods:

736 interval_str = cell_method.intervals

737 comment_str = cell_method.comments

738

739 # Remove input aggregation method.

740 cube.cell_methods = ()

741

742 # Replace "mean" with "sum" cell_method to indicate aggregation.

743 cube.add_cell_method(

744 iris.coords.CellMethod(

745 method="sum",

746 coords="time",

747 intervals=interval_str,

748 comments=comment_str,

749 )

750 )

751

752

753def _convert_cube_units_callback(cube: iris.cube.Cube):

754 """Adjust diagnostic units for specific variables.

755

756 Some precipitation diagnostics are output with unit kg m-2 s-1 and are

757 converted here to mm hr-1.

758

759 Visibility diagnostics are converted here from m to km to improve output

760 formatting.

761 """

762 # Convert precipitation diagnostic units if required.

763 varnames = filter(None, [cube.long_name, cube.standard_name, cube.var_name])

764 if any("surface_microphysical" in name for name in varnames):

765 if cube.units == "kg m-2 s-1":

766 logging.debug(

767 "Converting precipitation rate units from kg m-2 s-1 to mm hr-1"

768 )

769 # Convert from kg m-2 s-1 to mm s-1 assuming 1kg water = 1l water = 1dm^3 water.

770 # This is a 1:1 conversion, so we just change the units.

771 cube.units = "mm s-1"

772 # Convert the units to per hour.

773 cube.convert_units("mm hr-1")

774 elif cube.units == "kg m-2": 774 ↛ 781line 774 didn't jump to line 781 because the condition on line 774 was always true

775 logging.debug("Converting precipitation amount units from kg m-2 to mm")

776 # Convert from kg m-2 to mm assuming 1kg water = 1l water = 1dm^3 water.

777 # This is a 1:1 conversion, so we just change the units.

778 cube.units = "mm"

779

780 # Convert visibility diagnostic units if required.

781 varnames = filter(None, [cube.long_name, cube.standard_name, cube.var_name])

782 if any("visibility" in name for name in varnames):

783 if cube.units == "m": 783 ↛ 788line 783 didn't jump to line 788 because the condition on line 783 was always true

784 logging.debug("Converting visibility units m to km.")

785 # Convert the units to km.

786 cube.convert_units("km")

787

788 return cube

789

790

791def _fix_lfric_cloud_base_altitude(cube: iris.cube.Cube):

792 """Mask cloud_base_altitude diagnostic in regions with no cloud."""

793 varnames = filter(None, [cube.long_name, cube.standard_name, cube.var_name])

794 if any("cloud_base_altitude" in name for name in varnames):

795 # Mask cube where set > 144kft to catch default 144.35695538058164

796 cube.data = np.ma.masked_array(cube.data)

797 cube.data[cube.data > 144.0] = np.ma.masked

798

799

800def _fix_um_winds(cubes: iris.cube.CubeList):

801 """To make winds from the UM consistent with those from LFRic.

802

803 Diagnostics of wind are not always consistent between the UM

804 and LFric. Here, winds from the UM are adjusted to make them i

805 consistent with LFRic.

806 """

807 # Check whether we have components of the wind identified by STASH,

808 # (so this will apply only to cubes from the UM), but not the

809 # wind speed and calculate it if it is missing. Note that

810 # this will be biased low in general because the components will mostly

811 # be time averages. For simplicity, we do this only if there is just one

812 # cube of a component. A more complicated approach would be to consider

813 # the cell methods, but it may not be warranted.

814 u_constr = iris.AttributeConstraint(STASH="m01s03i225")

815 v_constr = iris.AttributeConstraint(STASH="m01s03i226")

816 speed_constr = iris.AttributeConstraint(STASH="m01s03i227")

817 try:

818 if cubes.extract(u_constr) and cubes.extract(v_constr): 818 ↛ 819line 818 didn't jump to line 819 because the condition on line 818 was never true

819 if len(cubes.extract(u_constr)) == 1 and not cubes.extract(speed_constr):

820 _add_wind_speed_um(cubes)

821 # Convert winds in the UM to be relative to true east and true north.

822 _convert_wind_true_dirn_um(cubes)

823 except (KeyError, AttributeError):

824 pass

825

826

827def _add_wind_speed_um(cubes: iris.cube.CubeList):

828 """Add windspeeds to cubes from the UM."""

829 wspd10 = (

830 cubes.extract_cube(iris.AttributeConstraint(STASH="m01s03i225"))[0] ** 2

831 + cubes.extract_cube(iris.AttributeConstraint(STASH="m01s03i226"))[0] ** 2

832 ) ** 0.5

833 wspd10.attributes["STASH"] = "m01s03i227"

834 wspd10.standard_name = "wind_speed"

835 wspd10.long_name = "wind_speed_at_10m"

836 cubes.append(wspd10)

837

838

839def _convert_wind_true_dirn_um(cubes: iris.cube.CubeList):

840 """To convert winds to true directions.

841

842 Convert from the components relative to the grid to true directions.

843 This functionality only handles the simplest case.

844 """

845 u_grids = cubes.extract(iris.AttributeConstraint(STASH="m01s03i225"))

846 v_grids = cubes.extract(iris.AttributeConstraint(STASH="m01s03i226"))

847 for u, v in zip(u_grids, v_grids, strict=True):

848 true_u, true_v = rotate_winds(u, v, iris.coord_systems.GeogCS(6371229.0))

849 u.data = true_u.data

850 v.data = true_v.data

851

852

853def _normalise_var0_varname(cube: iris.cube.Cube):

854 """Fix varnames for consistency to allow merging.

855

856 Some model data netCDF sometimes have a coordinate name end in

857 "_0" etc, where duplicate coordinates of same name are defined but

858 with different attributes. This can be inconsistently managed in

859 different model inputs and can cause cubes to fail to merge.

860 """

861 for coord in cube.coords():

862 if coord.var_name and coord.var_name.endswith("_0"):

863 coord.var_name = coord.var_name.removesuffix("_0")

864 if coord.var_name and coord.var_name.endswith("_1"):

865 coord.var_name = coord.var_name.removesuffix("_1")

866 if coord.var_name and coord.var_name.endswith("_2"): 866 ↛ 867line 866 didn't jump to line 867 because the condition on line 866 was never true

867 coord.var_name = coord.var_name.removesuffix("_2")

868 if coord.var_name and coord.var_name.endswith("_3"): 868 ↛ 869line 868 didn't jump to line 869 because the condition on line 868 was never true

869 coord.var_name = coord.var_name.removesuffix("_3")

870

871 if cube.var_name and cube.var_name.endswith("_0"):

872 cube.var_name = cube.var_name.removesuffix("_0")

873

874

875def _proleptic_gregorian_fix(cube: iris.cube.Cube):

876 """Convert the calendars of time units to use a standard calendar."""

877 try:

878 time_coord = cube.coord("time")

879 if time_coord.units.calendar == "proleptic_gregorian":

880 logging.debug(

881 "Changing proleptic Gregorian calendar to standard calendar for %s",

882 repr(time_coord.units),

883 )

884 time_coord.units = time_coord.units.change_calendar("standard")

885 except iris.exceptions.CoordinateNotFoundError:

886 pass

887

888

889def _lfric_time_callback(cube: iris.cube.Cube):

890 """Fix time coordinate metadata if missing dimensions.

891

892 Some model data does not contain forecast_reference_time or forecast_period as

893 expected coordinates, and so we cannot aggregate over case studies without this

894 metadata. This callback fixes these issues.

895

896 This callback also ensures all time coordinates are referenced as hours since

897 1970-01-01 00:00:00 for consistency across different model inputs.

898

899 Notes

900 -----

901 Some parts of the code have been adapted from Paul Earnshaw's scripts.

902 """

903 # Construct forecast_reference time if it doesn't exist.

904 try:

905 tcoord = cube.coord("time")

906 # Set time coordinate to common basis "hours since 1970"

907 try:

908 tcoord.convert_units("hours since 1970-01-01 00:00:00")

909 except ValueError:

910 logging.warning("Unrecognised base time unit: %s", tcoord.units)

911

912 if not cube.coords("forecast_reference_time"):

913 try:

914 init_time = datetime.datetime.fromisoformat(

915 tcoord.attributes["time_origin"]

916 )

917 frt_point = tcoord.units.date2num(init_time)

918 frt_coord = iris.coords.AuxCoord(

919 frt_point,

920 units=tcoord.units,

921 standard_name="forecast_reference_time",

922 long_name="forecast_reference_time",

923 )

924 cube.add_aux_coord(frt_coord)

925 except KeyError:

926 logging.warning(

927 "Cannot find forecast_reference_time, but no `time_origin` attribute to construct it from."

928 )

929

930 # Remove time_origin to allow multiple case studies to merge.

931 tcoord.attributes.pop("time_origin", None)

932

933 # Construct forecast_period axis (forecast lead time) if it doesn't exist.

934 if not cube.coords("forecast_period"):

935 try:

936 # Create array of forecast lead times.

937 init_coord = cube.coord("forecast_reference_time")

938 init_time_points_in_tcoord_units = tcoord.units.date2num(

939 init_coord.units.num2date(init_coord.points)

940 )

941 lead_times = tcoord.points - init_time_points_in_tcoord_units

942

943 # Get unit for lead time from time coordinate's unit.

944 # Convert all lead time to hours for consistency between models.

945 if "seconds" in str(tcoord.units): 945 ↛ 946line 945 didn't jump to line 946 because the condition on line 945 was never true

946 lead_times = lead_times / 3600.0

947 units = "hours"

948 elif "hours" in str(tcoord.units): 948 ↛ 951line 948 didn't jump to line 951 because the condition on line 948 was always true

949 units = "hours"

950 else:

951 raise ValueError(f"Unrecognised base time unit: {tcoord.units}")

952

953 # Create lead time coordinate.

954 lead_time_coord = iris.coords.AuxCoord(

955 lead_times,

956 standard_name="forecast_period",

957 long_name="forecast_period",

958 units=units,

959 )

960

961 # Associate lead time coordinate with time dimension.

962 cube.add_aux_coord(lead_time_coord, cube.coord_dims("time"))

963 except iris.exceptions.CoordinateNotFoundError:

964 logging.warning(

965 "Cube does not have both time and forecast_reference_time coordinate, so cannot construct forecast_period"

966 )

967 except iris.exceptions.CoordinateNotFoundError:

968 logging.warning("No time coordinate on cube.")

969

970

971def _lfric_forecast_period_standard_name_callback(cube: iris.cube.Cube):

972 """Add forecast_period standard name if missing."""

973 try:

974 coord = cube.coord("forecast_period")

975 if coord.units != "hours": 975 ↛ 976line 975 didn't jump to line 976 because the condition on line 975 was never true

976 cube.coord("forecast_period").convert_units("hours")

977 if not coord.standard_name:

978 coord.standard_name = "forecast_period"

979 except iris.exceptions.CoordinateNotFoundError:

980 pass

981

982

983def _normalise_ML_varname(cube: iris.cube.Cube):

984 """Fix variable names in ML models to standard names."""

985 if cube.coords("pressure"):

986 if cube.name() == "x_wind":

987 cube.long_name = "zonal_wind_at_pressure_levels"

988 if cube.name() == "y_wind":

989 cube.long_name = "meridional_wind_at_pressure_levels"

990 if cube.name() == "air_temperature":

991 cube.long_name = "temperature_at_pressure_levels"

Coverage for src / CSET / operators / read.py: 90%

365 statements