Coverage for src / CSET / operators / read.py: 90%

351 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-11 10:48 +0000

1# © Crown copyright, Met Office (2022-2025) and CSET contributors. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15"""Operators for reading various types of files from disk.""" 

16 

17import ast 

18import datetime 

19import functools 

20import glob 

21import itertools 

22import logging 

23from pathlib import Path 

24from typing import Literal 

25 

26import iris 

27import iris.coord_systems 

28import iris.coords 

29import iris.cube 

30import iris.exceptions 

31import iris.util 

32import numpy as np 

33from iris.analysis.cartography import rotate_pole, rotate_winds 

34 

35from CSET._common import iter_maybe 

36from CSET.operators._stash_to_lfric import STASH_TO_LFRIC 

37from CSET.operators._utils import ( 

38 get_cube_coordindex, 

39 get_cube_yxcoordname, 

40 is_spatialdim, 

41) 

42 

43 

44class NoDataError(FileNotFoundError): 

45 """Error that no data has been loaded.""" 

46 

47 

48def read_cube( 

49 file_paths: list[str] | str, 

50 constraint: iris.Constraint = None, 

51 model_names: list[str] | str | None = None, 

52 subarea_type: str = None, 

53 subarea_extent: list[float] = None, 

54 **kwargs, 

55) -> iris.cube.Cube: 

56 """Read a single cube from files. 

57 

58 Read operator that takes a path string (can include shell-style glob 

59 patterns), and loads the cube matching the constraint. If any paths point to 

60 directory, all the files contained within are loaded. 

61 

62 Ensemble data can also be loaded. If it has a realization coordinate 

63 already, it will be directly used. If not, it will have its member number 

64 guessed from the filename, based on one of several common patterns. For 

65 example the pattern *emXX*, where XX is the realization. 

66 

67 Deterministic data will be loaded with a realization of 0, allowing it to be 

68 processed in the same way as ensemble data. 

69 

70 Arguments 

71 --------- 

72 file_paths: str | list[str] 

73 Path or paths to where .pp/.nc files are located 

74 constraint: iris.Constraint | iris.ConstraintCombination, optional 

75 Constraints to filter data by. Defaults to unconstrained. 

76 model_names: str | list[str], optional 

77 Names of the models that correspond to respective paths in file_paths. 

78 subarea_type: "gridcells" | "modelrelative" | "realworld", optional 

79 Whether to constrain data by model relative coordinates or real world 

80 coordinates. 

81 subarea_extent: list, optional 

82 List of coordinates to constraint data by, in order lower latitude, 

83 upper latitude, lower longitude, upper longitude. 

84 

85 Returns 

86 ------- 

87 cubes: iris.cube.Cube 

88 Cube loaded 

89 

90 Raises 

91 ------ 

92 FileNotFoundError 

93 If the provided path does not exist 

94 ValueError 

95 If the constraint doesn't produce a single cube. 

96 """ 

97 cubes = read_cubes( 

98 file_paths=file_paths, 

99 constraint=constraint, 

100 model_names=model_names, 

101 subarea_type=subarea_type, 

102 subarea_extent=subarea_extent, 

103 ) 

104 # Check filtered cubes is a CubeList containing one cube. 

105 if len(cubes) == 1: 

106 return cubes[0] 

107 else: 

108 raise ValueError( 

109 f"Constraint doesn't produce single cube: {constraint}\n{cubes}" 

110 ) 

111 

112 

113def read_cubes( 

114 file_paths: list[str] | str, 

115 constraint: iris.Constraint | None = None, 

116 model_names: str | list[str] | None = None, 

117 subarea_type: str = None, 

118 subarea_extent: list = None, 

119 **kwargs, 

120) -> iris.cube.CubeList: 

121 """Read cubes from files. 

122 

123 Read operator that takes a path string (can include shell-style glob 

124 patterns), and loads the cubes matching the constraint. If any paths point 

125 to directory, all the files contained within are loaded. 

126 

127 Ensemble data can also be loaded. If it has a realization coordinate 

128 already, it will be directly used. If not, it will have its member number 

129 guessed from the filename, based on one of several common patterns. For 

130 example the pattern *emXX*, where XX is the realization. 

131 

132 Deterministic data will be loaded with a realization of 0, allowing it to be 

133 processed in the same way as ensemble data. 

134 

135 Data output by XIOS (such as LFRic) has its per-file metadata removed so 

136 that the cubes merge across files. 

137 

138 Arguments 

139 --------- 

140 file_paths: str | list[str] 

141 Path or paths to where .pp/.nc files are located. Can include globs. 

142 constraint: iris.Constraint | iris.ConstraintCombination, optional 

143 Constraints to filter data by. Defaults to unconstrained. 

144 model_names: str | list[str], optional 

145 Names of the models that correspond to respective paths in file_paths. 

146 subarea_type: str, optional 

147 Whether to constrain data by model relative coordinates or real world 

148 coordinates. 

149 subarea_extent: list[float], optional 

150 List of coordinates to constraint data by, in order lower latitude, 

151 upper latitude, lower longitude, upper longitude. 

152 

153 Returns 

154 ------- 

155 cubes: iris.cube.CubeList 

156 Cubes loaded after being merged and concatenated. 

157 

158 Raises 

159 ------ 

160 FileNotFoundError 

161 If the provided path does not exist 

162 """ 

163 # Get iterable of paths. Each path corresponds to 1 model. 

164 paths = iter_maybe(file_paths) 

165 model_names = iter_maybe(model_names) 

166 

167 # Check we have appropriate number of model names. 

168 if model_names != (None,) and len(model_names) != len(paths): 

169 raise ValueError( 

170 f"The number of model names ({len(model_names)}) should equal " 

171 f"the number of paths given ({len(paths)})." 

172 ) 

173 

174 # Load the data for each model into a CubeList per model. 

175 model_cubes = ( 

176 _load_model(path, name, constraint) 

177 for path, name in itertools.zip_longest(paths, model_names, fillvalue=None) 

178 ) 

179 

180 # Split out first model's cubes and mark it as the base for comparisons. 

181 cubes = next(model_cubes) 

182 for cube in cubes: 

183 # Use 1 to indicate True, as booleans can't be saved in NetCDF attributes. 

184 cube.attributes["cset_comparison_base"] = 1 

185 

186 # Load the rest of the models. 

187 cubes.extend(itertools.chain.from_iterable(model_cubes)) 

188 

189 # Unify time units so different case studies can merge. 

190 iris.util.unify_time_units(cubes) 

191 

192 # Select sub region. 

193 cubes = _cutout_cubes(cubes, subarea_type, subarea_extent) 

194 

195 # Merge and concatenate cubes now metadata has been fixed. 

196 cubes = cubes.merge() 

197 cubes = cubes.concatenate() 

198 

199 # Squeeze single valued coordinates into scalar coordinates. 

200 cubes = iris.cube.CubeList(iris.util.squeeze(cube) for cube in cubes) 

201 

202 # Ensure dimension coordinates are bounded. 

203 for cube in cubes: 

204 for dim_coord in cube.coords(dim_coords=True): 

205 # Iris can't guess the bounds of a scalar coordinate. 

206 if not dim_coord.has_bounds() and dim_coord.shape[0] > 1: 

207 dim_coord.guess_bounds() 

208 

209 logging.info("Loaded cubes: %s", cubes) 

210 if len(cubes) == 0: 

211 raise NoDataError("No cubes loaded, check your constraints!") 

212 return cubes 

213 

214 

215def _load_model( 

216 paths: str | list[str], 

217 model_name: str | None, 

218 constraint: iris.Constraint | None, 

219) -> iris.cube.CubeList: 

220 """Load a single model's data into a CubeList.""" 

221 input_files = _check_input_files(paths) 

222 # If unset, a constraint of None lets everything be loaded. 

223 logging.debug("Constraint: %s", constraint) 

224 cubes = iris.load(input_files, constraint, callback=_loading_callback) 

225 # Make the UM's winds consistent with LFRic. 

226 _fix_um_winds(cubes) 

227 

228 # Add model_name attribute to each cube to make it available at any further 

229 # step without needing to pass it as function parameter. 

230 if model_name is not None: 

231 for cube in cubes: 

232 cube.attributes["model_name"] = model_name 

233 return cubes 

234 

235 

236def _check_input_files(input_paths: str | list[str]) -> list[Path]: 

237 """Get an iterable of files to load, and check that they all exist. 

238 

239 Arguments 

240 --------- 

241 input_paths: list[str] 

242 List of paths to input files or directories. The path may itself contain 

243 glob patterns, but unlike in shells it will match directly first. 

244 

245 Returns 

246 ------- 

247 list[Path] 

248 A list of files to load. 

249 

250 Raises 

251 ------ 

252 FileNotFoundError: 

253 If the provided arguments don't resolve to at least one existing file. 

254 """ 

255 files = [] 

256 for raw_filename in iter_maybe(input_paths): 

257 # Match glob-like files first, if they exist. 

258 raw_path = Path(raw_filename) 

259 if raw_path.is_file(): 

260 files.append(raw_path) 

261 else: 

262 for input_path in glob.glob(raw_filename): 

263 # Convert string paths into Path objects. 

264 input_path = Path(input_path) 

265 # Get the list of files in the directory, or use it directly. 

266 if input_path.is_dir(): 

267 logging.debug("Checking directory '%s' for files", input_path) 

268 files.extend(p for p in input_path.iterdir() if p.is_file()) 

269 else: 

270 files.append(input_path) 

271 

272 files.sort() 

273 logging.info("Loading files:\n%s", "\n".join(str(path) for path in files)) 

274 if len(files) == 0: 

275 raise FileNotFoundError(f"No files found for {input_paths}") 

276 return files 

277 

278 

279def _cutout_cubes( 

280 cubes: iris.cube.CubeList, 

281 subarea_type: Literal["gridcells", "realworld", "modelrelative"] | None, 

282 subarea_extent: list[float, float, float, float], 

283): 

284 """Cut out a subarea from a CubeList.""" 

285 if subarea_type is None: 

286 logging.debug("Subarea selection is disabled.") 

287 return cubes 

288 

289 # If selected, cutout according to number of grid cells to trim from each edge. 

290 cutout_cubes = iris.cube.CubeList() 

291 # Find spatial coordinates 

292 for cube in cubes: 

293 # Find dimension coordinates. 

294 lat_name, lon_name = get_cube_yxcoordname(cube) 

295 

296 # Compute cutout based on number of cells to trim from edges. 

297 if subarea_type == "gridcells": 

298 logging.debug( 

299 "User requested LowerTrim: %s LeftTrim: %s UpperTrim: %s RightTrim: %s", 

300 subarea_extent[0], 

301 subarea_extent[1], 

302 subarea_extent[2], 

303 subarea_extent[3], 

304 ) 

305 lat_points = np.sort(cube.coord(lat_name).points) 

306 lon_points = np.sort(cube.coord(lon_name).points) 

307 # Define cutout region using user provided cell points. 

308 lats = [lat_points[subarea_extent[0]], lat_points[-subarea_extent[2] - 1]] 

309 lons = [lon_points[subarea_extent[1]], lon_points[-subarea_extent[3] - 1]] 

310 

311 # Compute cutout based on specified coordinate values. 

312 elif subarea_type == "realworld" or subarea_type == "modelrelative": 

313 # If not gridcells, cutout by requested geographic area, 

314 logging.debug( 

315 "User requested LLat: %s ULat: %s LLon: %s ULon: %s", 

316 subarea_extent[0], 

317 subarea_extent[1], 

318 subarea_extent[2], 

319 subarea_extent[3], 

320 ) 

321 # Define cutout region using user provided coordinates. 

322 lats = np.array(subarea_extent[0:2]) 

323 lons = np.array(subarea_extent[2:4]) 

324 # Ensure cutout longitudes are within +/- 180.0 bounds. 

325 while lons[0] < -180.0: 

326 lons += 360.0 

327 while lons[1] > 180.0: 

328 lons -= 360.0 

329 # If the coordinate system is rotated we convert coordinates into 

330 # model-relative coordinates to extract the appropriate cutout. 

331 coord_system = cube.coord(lat_name).coord_system 

332 if subarea_type == "realworld" and isinstance( 

333 coord_system, iris.coord_systems.RotatedGeogCS 

334 ): 

335 lons, lats = rotate_pole( 

336 lons, 

337 lats, 

338 pole_lon=coord_system.grid_north_pole_longitude, 

339 pole_lat=coord_system.grid_north_pole_latitude, 

340 ) 

341 else: 

342 raise ValueError("Unknown subarea_type:", subarea_type) 

343 

344 # Do cutout and add to cutout_cubes. 

345 intersection_args = {lat_name: lats, lon_name: lons} 

346 logging.debug("Cutting out coords: %s", intersection_args) 

347 try: 

348 cutout_cubes.append(cube.intersection(**intersection_args)) 

349 except IndexError as err: 

350 raise ValueError( 

351 "Region cutout error. Check and update SUBAREA_EXTENT." 

352 "Cutout region requested should be contained within data area. " 

353 "Also check if cutout region requested is smaller than input grid spacing." 

354 ) from err 

355 

356 return cutout_cubes 

357 

358 

359def _loading_callback(cube: iris.cube.Cube, field, filename: str) -> iris.cube.Cube: 

360 """Compose together the needed callbacks into a single function.""" 

361 # Most callbacks operate in-place, but save the cube when returned! 

362 _realization_callback(cube, field, filename) 

363 _um_normalise_callback(cube, field, filename) 

364 _lfric_normalise_callback(cube, field, filename) 

365 cube = _lfric_time_coord_fix_callback(cube, field, filename) 

366 _normalise_var0_varname(cube) 

367 _fix_spatial_coords_callback(cube) 

368 _fix_pressure_coord_callback(cube) 

369 _fix_um_radtime(cube) 

370 _fix_cell_methods(cube) 

371 cube = _convert_cube_units_callback(cube) 

372 cube = _grid_longitude_fix_callback(cube) 

373 _fix_lfric_cloud_base_altitude(cube) 

374 _proleptic_gregorian_fix(cube) 

375 _lfric_time_callback(cube) 

376 _lfric_forecast_period_standard_name_callback(cube) 

377 return cube 

378 

379 

380def _realization_callback(cube, field, filename): 

381 """Give deterministic cubes a realization of 0. 

382 

383 This means they can be handled in the same way as ensembles through the rest 

384 of the code. 

385 """ 

386 # Only add if realization coordinate does not exist. 

387 if not cube.coords("realization"): 

388 cube.add_aux_coord( 

389 iris.coords.DimCoord(0, standard_name="realization", units="1") 

390 ) 

391 

392 

393@functools.lru_cache(None) 

394def _warn_once(msg): 

395 """Print a warning message, skipping recent duplicates.""" 

396 logging.warning(msg) 

397 

398 

399def _um_normalise_callback(cube: iris.cube.Cube, field, filename): 

400 """Normalise UM STASH variable long names to LFRic variable names. 

401 

402 Note standard names will remain associated with cubes where different. 

403 Long name will be used consistently in output filename and titles. 

404 """ 

405 # Convert STASH to LFRic variable name 

406 if "STASH" in cube.attributes: 

407 stash = cube.attributes["STASH"] 

408 try: 

409 (name, grid) = STASH_TO_LFRIC[str(stash)] 

410 cube.long_name = name 

411 except KeyError: 

412 # Don't change cubes with unknown stash codes. 

413 _warn_once( 

414 f"Unknown STASH code: {stash}. Please check file stash_to_lfric.py to update." 

415 ) 

416 

417 

418def _lfric_normalise_callback(cube: iris.cube.Cube, field, filename): 

419 """Normalise attributes that prevents LFRic cube from merging. 

420 

421 The uuid and timeStamp relate to the output file, as saved by XIOS, and has 

422 no relation to the data contained. These attributes are removed. 

423 

424 The um_stash_source is a list of STASH codes for when an LFRic field maps to 

425 multiple UM fields, however it can be encoded in any order. This attribute 

426 is sorted to prevent this. This attribute is only present in LFRic data that 

427 has been converted to look like UM data. 

428 """ 

429 # Remove unwanted attributes. 

430 cube.attributes.pop("timeStamp", None) 

431 cube.attributes.pop("uuid", None) 

432 cube.attributes.pop("name", None) 

433 

434 # Sort STASH code list. 

435 stash_list = cube.attributes.get("um_stash_source") 

436 if stash_list: 

437 # Parse the string as a list, sort, then re-encode as a string. 

438 cube.attributes["um_stash_source"] = str(sorted(ast.literal_eval(stash_list))) 

439 

440 

441def _lfric_time_coord_fix_callback( 

442 cube: iris.cube.Cube, field, filename 

443) -> iris.cube.Cube: 

444 """Ensure the time coordinate is a DimCoord rather than an AuxCoord. 

445 

446 The coordinate is converted and replaced if not. SLAMed LFRic data has this 

447 issue, though the coordinate satisfies all the properties for a DimCoord. 

448 Scalar time values are left as AuxCoords. 

449 """ 

450 # This issue seems to come from iris's handling of NetCDF files where time 

451 # always ends up as an AuxCoord. 

452 if cube.coords("time"): 

453 time_coord = cube.coord("time") 

454 if ( 

455 not isinstance(time_coord, iris.coords.DimCoord) 

456 and len(cube.coord_dims(time_coord)) == 1 

457 ): 

458 # Fudge the bounds to foil checking for strict monotonicity. 

459 if time_coord.has_bounds(): 459 ↛ 460line 459 didn't jump to line 460 because the condition on line 459 was never true

460 if (time_coord.bounds[-1][0] - time_coord.bounds[0][0]) < 1.0e-8: 

461 time_coord.bounds = [ 

462 [ 

463 time_coord.bounds[i][0] + 1.0e-8 * float(i), 

464 time_coord.bounds[i][1], 

465 ] 

466 for i in range(len(time_coord.bounds)) 

467 ] 

468 iris.util.promote_aux_coord_to_dim_coord(cube, time_coord) 

469 return cube 

470 

471 

472def _grid_longitude_fix_callback(cube: iris.cube.Cube) -> iris.cube.Cube: 

473 """Check grid_longitude coordinates are in the range -180 deg to 180 deg. 

474 

475 This is necessary if comparing two models with different conventions -- 

476 for example, models where the prime meridian is defined as 0 deg or 

477 360 deg. If not in the range -180 deg to 180 deg, we wrap the grid_longitude 

478 so that it falls in this range. Checks are for near-180 bounds given 

479 model data bounds may not extend exactly to 0. or 360. 

480 Input cubes on non-rotated grid coordinates are not impacted. 

481 """ 

482 try: 

483 y, x = get_cube_yxcoordname(cube) 

484 except ValueError: 

485 # Don't modify non-spatial cubes. 

486 return cube 

487 

488 long_coord = cube.coord(x) 

489 # Wrap longitudes if rotated pole coordinates 

490 coord_system = long_coord.coord_system 

491 if x == "grid_longitude" and isinstance( 

492 coord_system, iris.coord_systems.RotatedGeogCS 

493 ): 

494 long_points = long_coord.points.copy() 

495 long_centre = np.median(long_points) 

496 while long_centre < -175.0: 

497 long_centre += 360.0 

498 long_points += 360.0 

499 while long_centre >= 175.0: 

500 long_centre -= 360.0 

501 long_points -= 360.0 

502 long_coord.points = long_points 

503 

504 # Update coord bounds to be consistent with wrapping. 

505 if long_coord.has_bounds(): 

506 long_coord.bounds = None 

507 long_coord.guess_bounds() 

508 

509 return cube 

510 

511 

512def _fix_spatial_coords_callback(cube: iris.cube.Cube): 

513 """Check latitude and longitude coordinates name. 

514 

515 This is necessary as some models define their grid as on rotated 

516 'grid_latitude' and 'grid_longitude' coordinates while others define 

517 the grid on non-rotated 'latitude' and 'longitude'. 

518 Cube dimensions need to be made consistent to avoid recipe failures, 

519 particularly where comparing multiple input models with differing spatial 

520 coordinates. 

521 """ 

522 # Check if cube is spatial. 

523 if not is_spatialdim(cube): 

524 # Don't modify non-spatial cubes. 

525 return 

526 

527 # Get spatial coords and dimension index. 

528 y_name, x_name = get_cube_yxcoordname(cube) 

529 ny = get_cube_coordindex(cube, y_name) 

530 nx = get_cube_coordindex(cube, x_name) 

531 

532 # Remove spatial coords bounds if erroneous values detected. 

533 # Aims to catch some errors in input coord bounds by setting 

534 # invalid threshold of 10000.0 

535 if cube.coord(x_name).has_bounds() and cube.coord(y_name).has_bounds(): 

536 bx_max = np.max(np.abs(cube.coord(x_name).bounds)) 

537 by_max = np.max(np.abs(cube.coord(y_name).bounds)) 

538 if bx_max > 10000.0 or by_max > 10000.0: 

539 cube.coord(x_name).bounds = None 

540 cube.coord(y_name).bounds = None 

541 

542 # Translate [grid_latitude, grid_longitude] to an unrotated 1-d DimCoord 

543 # [latitude, longitude] for instances where rotated_pole=90.0 

544 if "grid_latitude" in [coord.name() for coord in cube.coords(dim_coords=True)]: 

545 coord_system = cube.coord("grid_latitude").coord_system 

546 pole_lat = getattr(coord_system, "grid_north_pole_latitude", None) 

547 if pole_lat == 90.0: 547 ↛ 548line 547 didn't jump to line 548 because the condition on line 547 was never true

548 lats = cube.coord("grid_latitude").points 

549 lons = cube.coord("grid_longitude").points 

550 

551 cube.remove_coord("grid_latitude") 

552 cube.add_dim_coord( 

553 iris.coords.DimCoord( 

554 lats, 

555 standard_name="latitude", 

556 var_name="latitude", 

557 units="degrees", 

558 coord_system=iris.coord_systems.GeogCS(6371229.0), 

559 circular=True, 

560 ), 

561 ny, 

562 ) 

563 y_name = "latitude" 

564 cube.remove_coord("grid_longitude") 

565 cube.add_dim_coord( 

566 iris.coords.DimCoord( 

567 lons, 

568 standard_name="longitude", 

569 var_name="longitude", 

570 units="degrees", 

571 coord_system=iris.coord_systems.GeogCS(6371229.0), 

572 circular=True, 

573 ), 

574 nx, 

575 ) 

576 x_name = "longitude" 

577 

578 # Create additional AuxCoord [grid_latitude, grid_longitude] with 

579 # rotated pole attributes for cases with [lat, lon] inputs 

580 if y_name in ["latitude"] and cube.coord(y_name).units in [ 

581 "degrees", 

582 "degrees_north", 

583 "degrees_south", 

584 ]: 

585 # Add grid_latitude AuxCoord 

586 if "grid_latitude" not in [ 586 ↛ 599line 586 didn't jump to line 599 because the condition on line 586 was always true

587 coord.name() for coord in cube.coords(dim_coords=False) 

588 ]: 

589 cube.add_aux_coord( 

590 iris.coords.AuxCoord( 

591 cube.coord(y_name).points, 

592 var_name="grid_latitude", 

593 units="degrees", 

594 ), 

595 ny, 

596 ) 

597 # Ensure input latitude DimCoord has CoordSystem 

598 # This attribute is sometimes lost on iris.save 

599 if not cube.coord(y_name).coord_system: 

600 cube.coord(y_name).coord_system = iris.coord_systems.GeogCS(6371229.0) 

601 

602 if x_name in ["longitude"] and cube.coord(x_name).units in [ 

603 "degrees", 

604 "degrees_west", 

605 "degrees_east", 

606 ]: 

607 # Add grid_longitude AuxCoord 

608 if "grid_longitude" not in [ 608 ↛ 622line 608 didn't jump to line 622 because the condition on line 608 was always true

609 coord.name() for coord in cube.coords(dim_coords=False) 

610 ]: 

611 cube.add_aux_coord( 

612 iris.coords.AuxCoord( 

613 cube.coord(x_name).points, 

614 var_name="grid_longitude", 

615 units="degrees", 

616 ), 

617 nx, 

618 ) 

619 

620 # Ensure input longitude DimCoord has CoordSystem 

621 # This attribute is sometimes lost on iris.save 

622 if not cube.coord(x_name).coord_system: 

623 cube.coord(x_name).coord_system = iris.coord_systems.GeogCS(6371229.0) 

624 

625 

626def _fix_pressure_coord_callback(cube: iris.cube.Cube): 

627 """Rename pressure coordinate to "pressure" if it exists and ensure hPa units. 

628 

629 This problem was raised because the AIFS model data from ECMWF 

630 defines the pressure coordinate with the name "pressure_level" rather 

631 than compliant CF coordinate names. 

632 

633 Additionally, set the units of pressure to be hPa to be consistent with the UM, 

634 and approach the coordinates in a unified way. 

635 """ 

636 for coord in cube.dim_coords: 

637 if coord.name() in ["pressure_level", "pressure_levels"]: 

638 coord.rename("pressure") 

639 

640 if coord.name() == "pressure": 

641 if str(cube.coord("pressure").units) != "hPa": 

642 cube.coord("pressure").convert_units("hPa") 

643 

644 

645def _fix_um_radtime(cube: iris.cube.Cube): 

646 """Move radiation diagnostics from timestamps which are output N minutes or seconds past every hour. 

647 

648 This callback does not have any effect for output diagnostics with 

649 timestamps exactly 00 or 30 minutes past the hour. Only radiation 

650 diagnostics are checked. 

651 Note this callback does not interpolate the data in time, only adjust 

652 timestamps to sit on the hour to enable time-to-time difference plotting 

653 with models which may output radiation data on the hour. 

654 """ 

655 try: 

656 if cube.attributes["STASH"] in [ 

657 "m01s01i207", 

658 "m01s01i208", 

659 "m01s02i205", 

660 "m01s02i201", 

661 "m01s01i207", 

662 "m01s02i207", 

663 "m01s01i235", 

664 ]: 

665 time_coord = cube.coord("time") 

666 

667 # Convert time points to datetime objects 

668 time_unit = time_coord.units 

669 time_points = time_unit.num2date(time_coord.points) 

670 # Skip if times don't need fixing. 

671 if time_points[0].minute == 0 and time_points[0].second == 0: 

672 return 

673 if time_points[0].minute == 30 and time_points[0].second == 0: 673 ↛ 674line 673 didn't jump to line 674 because the condition on line 673 was never true

674 return 

675 

676 # Subtract time difference from the hour from each time point 

677 n_minute = time_points[0].minute 

678 n_second = time_points[0].second 

679 # If times closer to next hour, compute difference to add on to following hour 

680 if n_minute > 30: 

681 n_minute = n_minute - 60 

682 # Compute new diagnostic time stamp 

683 new_time_points = ( 

684 time_points 

685 - datetime.timedelta(minutes=n_minute) 

686 - datetime.timedelta(seconds=n_second) 

687 ) 

688 

689 # Convert back to numeric values using the original time unit. 

690 new_time_values = time_unit.date2num(new_time_points) 

691 

692 # Replace the time coordinate with updated values. 

693 time_coord.points = new_time_values 

694 

695 # Recompute forecast_period with corrected values. 

696 if cube.coord("forecast_period"): 696 ↛ exitline 696 didn't return from function '_fix_um_radtime' because the condition on line 696 was always true

697 fcst_prd_points = cube.coord("forecast_period").points 

698 new_fcst_points = ( 

699 time_unit.num2date(fcst_prd_points) 

700 - datetime.timedelta(minutes=n_minute) 

701 - datetime.timedelta(seconds=n_second) 

702 ) 

703 cube.coord("forecast_period").points = time_unit.date2num( 

704 new_fcst_points 

705 ) 

706 except KeyError: 

707 pass 

708 

709 

710def _fix_cell_methods(cube: iris.cube.Cube): 

711 """To fix the assumed cell_methods in accumulation STASH from UM. 

712 

713 Lightning (m01s21i104), rainfall amount (m01s04i201, m01s05i201) and snowfall amount 

714 (m01s04i202, m01s05i202) in UM is being output as a time accumulation, 

715 over each hour (TAcc1hr), but input cubes show cell_methods as "mean". 

716 For UM and LFRic inputs to be compatible, we assume accumulated cell_methods are 

717 "sum". This callback changes "mean" cube attribute cell_method to "sum", 

718 enabling the cell_method constraint on reading to select correct input. 

719 """ 

720 # Shift "mean" cell_method to "sum" for selected UM inputs. 

721 if cube.attributes.get("STASH") in [ 

722 "m01s21i104", 

723 "m01s04i201", 

724 "m01s04i202", 

725 "m01s05i201", 

726 "m01s05i202", 

727 ]: 

728 # Check if input cell_method contains "mean" time-processing. 

729 if set(cm.method for cm in cube.cell_methods) == {"mean"}: 729 ↛ exitline 729 didn't return from function '_fix_cell_methods' because the condition on line 729 was always true

730 # Retrieve interval and any comment information. 

731 for cell_method in cube.cell_methods: 

732 interval_str = cell_method.intervals 

733 comment_str = cell_method.comments 

734 

735 # Remove input aggregation method. 

736 cube.cell_methods = () 

737 

738 # Replace "mean" with "sum" cell_method to indicate aggregation. 

739 cube.add_cell_method( 

740 iris.coords.CellMethod( 

741 method="sum", 

742 coords="time", 

743 intervals=interval_str, 

744 comments=comment_str, 

745 ) 

746 ) 

747 

748 

749def _convert_cube_units_callback(cube: iris.cube.Cube): 

750 """Adjust diagnostic units for specific variables. 

751 

752 Some precipitation diagnostics are output with unit kg m-2 s-1 and are 

753 converted here to mm hr-1. 

754 

755 Visibility diagnostics are converted here from m to km to improve output 

756 formatting. 

757 """ 

758 # Convert precipitation diagnostic units if required. 

759 varnames = filter(None, [cube.long_name, cube.standard_name, cube.var_name]) 

760 if any("surface_microphysical" in name for name in varnames): 

761 if cube.units == "kg m-2 s-1": 

762 logging.debug( 

763 "Converting precipitation rate units from kg m-2 s-1 to mm hr-1" 

764 ) 

765 # Convert from kg m-2 s-1 to mm s-1 assuming 1kg water = 1l water = 1dm^3 water. 

766 # This is a 1:1 conversion, so we just change the units. 

767 cube.units = "mm s-1" 

768 # Convert the units to per hour. 

769 cube.convert_units("mm hr-1") 

770 elif cube.units == "kg m-2": 770 ↛ 777line 770 didn't jump to line 777 because the condition on line 770 was always true

771 logging.debug("Converting precipitation amount units from kg m-2 to mm") 

772 # Convert from kg m-2 to mm assuming 1kg water = 1l water = 1dm^3 water. 

773 # This is a 1:1 conversion, so we just change the units. 

774 cube.units = "mm" 

775 

776 # Convert visibility diagnostic units if required. 

777 varnames = filter(None, [cube.long_name, cube.standard_name, cube.var_name]) 

778 if any("visibility" in name for name in varnames): 

779 if cube.units == "m": 779 ↛ 784line 779 didn't jump to line 784 because the condition on line 779 was always true

780 logging.debug("Converting visibility units m to km.") 

781 # Convert the units to km. 

782 cube.convert_units("km") 

783 

784 return cube 

785 

786 

787def _fix_lfric_cloud_base_altitude(cube: iris.cube.Cube): 

788 """Mask cloud_base_altitude diagnostic in regions with no cloud.""" 

789 varnames = filter(None, [cube.long_name, cube.standard_name, cube.var_name]) 

790 if any("cloud_base_altitude" in name for name in varnames): 

791 # Mask cube where set > 144kft to catch default 144.35695538058164 

792 cube.data = np.ma.masked_array(cube.data) 

793 cube.data[cube.data > 144.0] = np.ma.masked 

794 

795 

796def _fix_um_winds(cubes: iris.cube.CubeList): 

797 """To make winds from the UM consistent with those from LFRic. 

798 

799 Diagnostics of wind are not always consistent between the UM 

800 and LFric. Here, winds from the UM are adjusted to make them i 

801 consistent with LFRic. 

802 """ 

803 # Check whether we have components of the wind identified by STASH, 

804 # (so this will apply only to cubes from the UM), but not the 

805 # wind speed and calculate it if it is missing. Note that 

806 # this will be biased low in general because the components will mostly 

807 # be time averages. For simplicity, we do this only if there is just one 

808 # cube of a component. A more complicated approach would be to consider 

809 # the cell methods, but it may not be warranted. 

810 u_constr = iris.AttributeConstraint(STASH="m01s03i225") 

811 v_constr = iris.AttributeConstraint(STASH="m01s03i226") 

812 speed_constr = iris.AttributeConstraint(STASH="m01s03i227") 

813 try: 

814 if cubes.extract(u_constr) and cubes.extract(v_constr): 814 ↛ 815line 814 didn't jump to line 815 because the condition on line 814 was never true

815 if len(cubes.extract(u_constr)) == 1 and not cubes.extract(speed_constr): 

816 _add_wind_speed_um(cubes) 

817 # Convert winds in the UM to be relative to true east and true north. 

818 _convert_wind_true_dirn_um(cubes) 

819 except (KeyError, AttributeError): 

820 pass 

821 

822 

823def _add_wind_speed_um(cubes: iris.cube.CubeList): 

824 """Add windspeeds to cubes from the UM.""" 

825 wspd10 = ( 

826 cubes.extract_cube(iris.AttributeConstraint(STASH="m01s03i225"))[0] ** 2 

827 + cubes.extract_cube(iris.AttributeConstraint(STASH="m01s03i226"))[0] ** 2 

828 ) ** 0.5 

829 wspd10.attributes["STASH"] = "m01s03i227" 

830 wspd10.standard_name = "wind_speed" 

831 wspd10.long_name = "wind_speed_at_10m" 

832 cubes.append(wspd10) 

833 

834 

835def _convert_wind_true_dirn_um(cubes: iris.cube.CubeList): 

836 """To convert winds to true directions. 

837 

838 Convert from the components relative to the grid to true directions. 

839 This functionality only handles the simplest case. 

840 """ 

841 u_grids = cubes.extract(iris.AttributeConstraint(STASH="m01s03i225")) 

842 v_grids = cubes.extract(iris.AttributeConstraint(STASH="m01s03i226")) 

843 for u, v in zip(u_grids, v_grids, strict=True): 

844 true_u, true_v = rotate_winds(u, v, iris.coord_systems.GeogCS(6371229.0)) 

845 u.data = true_u.data 

846 v.data = true_v.data 

847 

848 

849def _normalise_var0_varname(cube: iris.cube.Cube): 

850 """Fix varnames for consistency to allow merging. 

851 

852 Some model data netCDF sometimes have a coordinate name end in 

853 "_0" etc, where duplicate coordinates of same name are defined but 

854 with different attributes. This can be inconsistently managed in 

855 different model inputs and can cause cubes to fail to merge. 

856 """ 

857 for coord in cube.coords(): 

858 if coord.var_name and coord.var_name.endswith("_0"): 

859 coord.var_name = coord.var_name.removesuffix("_0") 

860 if coord.var_name and coord.var_name.endswith("_1"): 

861 coord.var_name = coord.var_name.removesuffix("_1") 

862 if coord.var_name and coord.var_name.endswith("_2"): 862 ↛ 863line 862 didn't jump to line 863 because the condition on line 862 was never true

863 coord.var_name = coord.var_name.removesuffix("_2") 

864 if coord.var_name and coord.var_name.endswith("_3"): 864 ↛ 865line 864 didn't jump to line 865 because the condition on line 864 was never true

865 coord.var_name = coord.var_name.removesuffix("_3") 

866 

867 if cube.var_name and cube.var_name.endswith("_0"): 

868 cube.var_name = cube.var_name.removesuffix("_0") 

869 

870 

871def _proleptic_gregorian_fix(cube: iris.cube.Cube): 

872 """Convert the calendars of time units to use a standard calendar.""" 

873 try: 

874 time_coord = cube.coord("time") 

875 if time_coord.units.calendar == "proleptic_gregorian": 

876 logging.debug( 

877 "Changing proleptic Gregorian calendar to standard calendar for %s", 

878 repr(time_coord.units), 

879 ) 

880 time_coord.units = time_coord.units.change_calendar("standard") 

881 except iris.exceptions.CoordinateNotFoundError: 

882 pass 

883 

884 

885def _lfric_time_callback(cube: iris.cube.Cube): 

886 """Fix time coordinate metadata if missing dimensions. 

887 

888 Some model data does not contain forecast_reference_time or forecast_period as 

889 expected coordinates, and so we cannot aggregate over case studies without this 

890 metadata. This callback fixes these issues. 

891 

892 This callback also ensures all time coordinates are referenced as hours since 

893 1970-01-01 00:00:00 for consistency across different model inputs. 

894 

895 Notes 

896 ----- 

897 Some parts of the code have been adapted from Paul Earnshaw's scripts. 

898 """ 

899 # Construct forecast_reference time if it doesn't exist. 

900 try: 

901 tcoord = cube.coord("time") 

902 # Set time coordinate to common basis "hours since 1970" 

903 try: 

904 tcoord.convert_units("hours since 1970-01-01 00:00:00") 

905 except ValueError: 

906 logging.warning("Unrecognised base time unit: %s", tcoord.units) 

907 

908 if not cube.coords("forecast_reference_time"): 

909 try: 

910 init_time = datetime.datetime.fromisoformat( 

911 tcoord.attributes["time_origin"] 

912 ) 

913 frt_point = tcoord.units.date2num(init_time) 

914 frt_coord = iris.coords.AuxCoord( 

915 frt_point, 

916 units=tcoord.units, 

917 standard_name="forecast_reference_time", 

918 long_name="forecast_reference_time", 

919 ) 

920 cube.add_aux_coord(frt_coord) 

921 except KeyError: 

922 logging.warning( 

923 "Cannot find forecast_reference_time, but no `time_origin` attribute to construct it from." 

924 ) 

925 

926 # Remove time_origin to allow multiple case studies to merge. 

927 tcoord.attributes.pop("time_origin", None) 

928 

929 # Construct forecast_period axis (forecast lead time) if it doesn't exist. 

930 if not cube.coords("forecast_period"): 

931 try: 

932 # Create array of forecast lead times. 

933 init_coord = cube.coord("forecast_reference_time") 

934 init_time_points_in_tcoord_units = tcoord.units.date2num( 

935 init_coord.units.num2date(init_coord.points) 

936 ) 

937 lead_times = tcoord.points - init_time_points_in_tcoord_units 

938 

939 # Get unit for lead time from time coordinate's unit. 

940 # Convert all lead time to hours for consistency between models. 

941 if "seconds" in str(tcoord.units): 941 ↛ 942line 941 didn't jump to line 942 because the condition on line 941 was never true

942 lead_times = lead_times / 3600.0 

943 units = "hours" 

944 elif "hours" in str(tcoord.units): 944 ↛ 947line 944 didn't jump to line 947 because the condition on line 944 was always true

945 units = "hours" 

946 else: 

947 raise ValueError(f"Unrecognised base time unit: {tcoord.units}") 

948 

949 # Create lead time coordinate. 

950 lead_time_coord = iris.coords.AuxCoord( 

951 lead_times, 

952 standard_name="forecast_period", 

953 long_name="forecast_period", 

954 units=units, 

955 ) 

956 

957 # Associate lead time coordinate with time dimension. 

958 cube.add_aux_coord(lead_time_coord, cube.coord_dims("time")) 

959 except iris.exceptions.CoordinateNotFoundError: 

960 logging.warning( 

961 "Cube does not have both time and forecast_reference_time coordinate, so cannot construct forecast_period" 

962 ) 

963 except iris.exceptions.CoordinateNotFoundError: 

964 logging.warning("No time coordinate on cube.") 

965 

966 

967def _lfric_forecast_period_standard_name_callback(cube: iris.cube.Cube): 

968 """Add forecast_period standard name if missing.""" 

969 try: 

970 coord = cube.coord("forecast_period") 

971 if not coord.standard_name: 

972 coord.standard_name = "forecast_period" 

973 except iris.exceptions.CoordinateNotFoundError: 

974 pass