Coverage for src/CSET/operators/read.py: 90%

357 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-05 21:08 +0000

1# © Crown copyright, Met Office (2022-2025) and CSET contributors. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15"""Operators for reading various types of files from disk.""" 

16 

17import ast 

18import datetime 

19import functools 

20import glob 

21import itertools 

22import logging 

23from pathlib import Path 

24from typing import Literal 

25 

26import iris 

27import iris.coord_systems 

28import iris.coords 

29import iris.cube 

30import iris.exceptions 

31import iris.util 

32import numpy as np 

33from iris.analysis.cartography import rotate_pole, rotate_winds 

34 

35from CSET._common import iter_maybe 

36from CSET.operators._stash_to_lfric import STASH_TO_LFRIC 

37from CSET.operators._utils import get_cube_yxcoordname 

38 

39 

40class NoDataError(FileNotFoundError): 

41 """Error that no data has been loaded.""" 

42 

43 

44def read_cube( 

45 file_paths: list[str] | str, 

46 constraint: iris.Constraint = None, 

47 model_names: list[str] | str | None = None, 

48 subarea_type: str = None, 

49 subarea_extent: list[float] = None, 

50 **kwargs, 

51) -> iris.cube.Cube: 

52 """Read a single cube from files. 

53 

54 Read operator that takes a path string (can include shell-style glob 

55 patterns), and loads the cube matching the constraint. If any paths point to 

56 directory, all the files contained within are loaded. 

57 

58 Ensemble data can also be loaded. If it has a realization coordinate 

59 already, it will be directly used. If not, it will have its member number 

60 guessed from the filename, based on one of several common patterns. For 

61 example the pattern *emXX*, where XX is the realization. 

62 

63 Deterministic data will be loaded with a realization of 0, allowing it to be 

64 processed in the same way as ensemble data. 

65 

66 Arguments 

67 --------- 

68 file_paths: str | list[str] 

69 Path or paths to where .pp/.nc files are located 

70 constraint: iris.Constraint | iris.ConstraintCombination, optional 

71 Constraints to filter data by. Defaults to unconstrained. 

72 model_names: str | list[str], optional 

73 Names of the models that correspond to respective paths in file_paths. 

74 subarea_type: "gridcells" | "modelrelative" | "realworld", optional 

75 Whether to constrain data by model relative coordinates or real world 

76 coordinates. 

77 subarea_extent: list, optional 

78 List of coordinates to constraint data by, in order lower latitude, 

79 upper latitude, lower longitude, upper longitude. 

80 

81 Returns 

82 ------- 

83 cubes: iris.cube.Cube 

84 Cube loaded 

85 

86 Raises 

87 ------ 

88 FileNotFoundError 

89 If the provided path does not exist 

90 ValueError 

91 If the constraint doesn't produce a single cube. 

92 """ 

93 cubes = read_cubes( 

94 file_paths=file_paths, 

95 constraint=constraint, 

96 model_names=model_names, 

97 subarea_type=subarea_type, 

98 subarea_extent=subarea_extent, 

99 ) 

100 # Check filtered cubes is a CubeList containing one cube. 

101 if len(cubes) == 1: 

102 return cubes[0] 

103 else: 

104 raise ValueError( 

105 f"Constraint doesn't produce single cube: {constraint}\n{cubes}" 

106 ) 

107 

108 

109def read_cubes( 

110 file_paths: list[str] | str, 

111 constraint: iris.Constraint | None = None, 

112 model_names: str | list[str] | None = None, 

113 subarea_type: str = None, 

114 subarea_extent: list = None, 

115 **kwargs, 

116) -> iris.cube.CubeList: 

117 """Read cubes from files. 

118 

119 Read operator that takes a path string (can include shell-style glob 

120 patterns), and loads the cubes matching the constraint. If any paths point 

121 to directory, all the files contained within are loaded. 

122 

123 Ensemble data can also be loaded. If it has a realization coordinate 

124 already, it will be directly used. If not, it will have its member number 

125 guessed from the filename, based on one of several common patterns. For 

126 example the pattern *emXX*, where XX is the realization. 

127 

128 Deterministic data will be loaded with a realization of 0, allowing it to be 

129 processed in the same way as ensemble data. 

130 

131 Data output by XIOS (such as LFRic) has its per-file metadata removed so 

132 that the cubes merge across files. 

133 

134 Arguments 

135 --------- 

136 file_paths: str | list[str] 

137 Path or paths to where .pp/.nc files are located. Can include globs. 

138 constraint: iris.Constraint | iris.ConstraintCombination, optional 

139 Constraints to filter data by. Defaults to unconstrained. 

140 model_names: str | list[str], optional 

141 Names of the models that correspond to respective paths in file_paths. 

142 subarea_type: str, optional 

143 Whether to constrain data by model relative coordinates or real world 

144 coordinates. 

145 subarea_extent: list[float], optional 

146 List of coordinates to constraint data by, in order lower latitude, 

147 upper latitude, lower longitude, upper longitude. 

148 

149 Returns 

150 ------- 

151 cubes: iris.cube.CubeList 

152 Cubes loaded after being merged and concatenated. 

153 

154 Raises 

155 ------ 

156 FileNotFoundError 

157 If the provided path does not exist 

158 """ 

159 # Get iterable of paths. Each path corresponds to 1 model. 

160 paths = iter_maybe(file_paths) 

161 model_names = iter_maybe(model_names) 

162 

163 # Check we have appropriate number of model names. 

164 if model_names != (None,) and len(model_names) != len(paths): 

165 raise ValueError( 

166 f"The number of model names ({len(model_names)}) should equal " 

167 f"the number of paths given ({len(paths)})." 

168 ) 

169 

170 # Load the data for each model into a CubeList per model. 

171 model_cubes = ( 

172 _load_model(path, name, constraint) 

173 for path, name in itertools.zip_longest(paths, model_names, fillvalue=None) 

174 ) 

175 

176 # Split out first model's cubes and mark it as the base for comparisons. 

177 cubes = next(model_cubes) 

178 for cube in cubes: 

179 # Use 1 to indicate True, as booleans can't be saved in NetCDF attributes. 

180 cube.attributes["cset_comparison_base"] = 1 

181 

182 # Load the rest of the models. 

183 cubes.extend(itertools.chain.from_iterable(model_cubes)) 

184 

185 # Unify time units so different case studies can merge. 

186 iris.util.unify_time_units(cubes) 

187 

188 # Select sub region. 

189 cubes = _cutout_cubes(cubes, subarea_type, subarea_extent) 

190 # Merge and concatenate cubes now metadata has been fixed. 

191 cubes = cubes.merge() 

192 cubes = cubes.concatenate() 

193 

194 # Ensure dimension coordinates are bounded. 

195 for cube in cubes: 

196 for dim_coord in cube.coords(dim_coords=True): 

197 # Iris can't guess the bounds of a scalar coordinate. 

198 if not dim_coord.has_bounds() and dim_coord.shape[0] > 1: 

199 dim_coord.guess_bounds() 

200 

201 logging.info("Loaded cubes: %s", cubes) 

202 if len(cubes) == 0: 

203 raise NoDataError("No cubes loaded, check your constraints!") 

204 return cubes 

205 

206 

207def _load_model( 

208 paths: str | list[str], 

209 model_name: str | None, 

210 constraint: iris.Constraint | None, 

211) -> iris.cube.CubeList: 

212 """Load a single model's data into a CubeList.""" 

213 input_files = _check_input_files(paths) 

214 # If unset, a constraint of None lets everything be loaded. 

215 logging.debug("Constraint: %s", constraint) 

216 cubes = iris.load( 

217 input_files, constraint, callback=_create_callback(is_ensemble=False) 

218 ) 

219 # Make the UM's winds consistent with LFRic. 

220 _fix_um_winds(cubes) 

221 

222 # Reload with ensemble handling if needed. 

223 if _is_ensemble(cubes): 

224 cubes = iris.load( 

225 input_files, constraint, callback=_create_callback(is_ensemble=True) 

226 ) 

227 

228 # Add model_name attribute to each cube to make it available at any further 

229 # step without needing to pass it as function parameter. 

230 if model_name is not None: 

231 for cube in cubes: 

232 cube.attributes["model_name"] = model_name 

233 return cubes 

234 

235 

236def _check_input_files(input_paths: str | list[str]) -> list[Path]: 

237 """Get an iterable of files to load, and check that they all exist. 

238 

239 Arguments 

240 --------- 

241 input_paths: list[str] 

242 List of paths to input files or directories. The path may itself contain 

243 glob patterns, but unlike in shells it will match directly first. 

244 

245 Returns 

246 ------- 

247 list[Path] 

248 A list of files to load. 

249 

250 Raises 

251 ------ 

252 FileNotFoundError: 

253 If the provided arguments don't resolve to at least one existing file. 

254 """ 

255 files = [] 

256 for raw_filename in iter_maybe(input_paths): 

257 # Match glob-like files first, if they exist. 

258 raw_path = Path(raw_filename) 

259 if raw_path.is_file(): 

260 files.append(raw_path) 

261 else: 

262 for input_path in glob.glob(raw_filename): 

263 # Convert string paths into Path objects. 

264 input_path = Path(input_path) 

265 # Get the list of files in the directory, or use it directly. 

266 if input_path.is_dir(): 

267 logging.debug("Checking directory '%s' for files", input_path) 

268 files.extend(p for p in input_path.iterdir() if p.is_file()) 

269 else: 

270 files.append(input_path) 

271 

272 files.sort() 

273 logging.info("Loading files:\n%s", "\n".join(str(path) for path in files)) 

274 if len(files) == 0: 

275 raise FileNotFoundError(f"No files found for {input_paths}") 

276 return files 

277 

278 

279def _cutout_cubes( 

280 cubes: iris.cube.CubeList, 

281 subarea_type: Literal["gridcells", "realworld", "modelrelative"] | None, 

282 subarea_extent: list[float, float, float, float], 

283): 

284 """Cut out a subarea from a CubeList.""" 

285 if subarea_type is None: 

286 logging.debug("Subarea selection is disabled.") 

287 return cubes 

288 

289 # If selected, cutout according to number of grid cells to trim from each edge. 

290 cutout_cubes = iris.cube.CubeList() 

291 # Find spatial coordinates 

292 for cube in cubes: 

293 # Find dimension coordinates. 

294 lat_name, lon_name = get_cube_yxcoordname(cube) 

295 

296 # Compute cutout based on number of cells to trim from edges. 

297 if subarea_type == "gridcells": 

298 logging.debug( 

299 "User requested LowerTrim: %s LeftTrim: %s UpperTrim: %s RightTrim: %s", 

300 subarea_extent[0], 

301 subarea_extent[1], 

302 subarea_extent[2], 

303 subarea_extent[3], 

304 ) 

305 lat_points = np.sort(cube.coord(lat_name).points) 

306 lon_points = np.sort(cube.coord(lon_name).points) 

307 # Define cutout region using user provided cell points. 

308 lats = [lat_points[subarea_extent[0]], lat_points[-subarea_extent[2] - 1]] 

309 lons = [lon_points[subarea_extent[1]], lon_points[-subarea_extent[3] - 1]] 

310 

311 # Compute cutout based on specified coordinate values. 

312 elif subarea_type == "realworld" or subarea_type == "modelrelative": 

313 # If not gridcells, cutout by requested geographic area, 

314 logging.debug( 

315 "User requested LLat: %s ULat: %s LLon: %s ULon: %s", 

316 subarea_extent[0], 

317 subarea_extent[1], 

318 subarea_extent[2], 

319 subarea_extent[3], 

320 ) 

321 # Define cutout region using user provided coordinates. 

322 lats = np.array(subarea_extent[0:2]) 

323 lons = np.array(subarea_extent[2:4]) 

324 # Ensure cutout longitudes are within +/- 180.0 bounds. 

325 while lons[0] < -180.0: 

326 lons += 360.0 

327 while lons[1] > 180.0: 

328 lons -= 360.0 

329 # If the coordinate system is rotated we convert coordinates into 

330 # model-relative coordinates to extract the appropriate cutout. 

331 coord_system = cube.coord(lat_name).coord_system 

332 if subarea_type == "realworld" and isinstance( 

333 coord_system, iris.coord_systems.RotatedGeogCS 

334 ): 

335 lons, lats = rotate_pole( 

336 lons, 

337 lats, 

338 pole_lon=coord_system.grid_north_pole_longitude, 

339 pole_lat=coord_system.grid_north_pole_latitude, 

340 ) 

341 else: 

342 raise ValueError("Unknown subarea_type:", subarea_type) 

343 

344 # Do cutout and add to cutout_cubes. 

345 intersection_args = {lat_name: lats, lon_name: lons} 

346 logging.debug("Cutting out coords: %s", intersection_args) 

347 try: 

348 cutout_cubes.append(cube.intersection(**intersection_args)) 

349 except IndexError as err: 

350 raise ValueError( 

351 "Region cutout error. Check and update SUBAREA_EXTENT." 

352 "Cutout region requested should be contained within data area. " 

353 "Also check if cutout region requested is smaller than input grid spacing." 

354 ) from err 

355 

356 return cutout_cubes 

357 

358 

359def _is_ensemble(cubelist: iris.cube.CubeList) -> bool: 

360 """Test if a CubeList is likely to be ensemble data. 

361 

362 If cubes either have a realization dimension, or there are multiple files 

363 for the same time-step, we can assume it is ensemble data. 

364 """ 

365 unique_cubes = set() 

366 for cube in cubelist: 

367 # Ignore realization of 0, as that is given to deterministic data. 

368 if cube.coords("realization") and any(cube.coord("realization").points != 0): 

369 return True 

370 # Compare XML representation of cube structure check for duplicates. 

371 cube_content = cube.xml() 

372 if cube_content in unique_cubes: 

373 logging.info("Ensemble data loaded.") 

374 return True 

375 else: 

376 unique_cubes.add(cube_content) 

377 logging.info("Deterministic data loaded.") 

378 return False 

379 

380 

381def _create_callback(is_ensemble: bool) -> callable: 

382 """Compose together the needed callbacks into a single function.""" 

383 

384 def callback(cube: iris.cube.Cube, field, filename: str): 

385 if is_ensemble: 

386 _ensemble_callback(cube, field, filename) 

387 else: 

388 _deterministic_callback(cube, field, filename) 

389 

390 _um_normalise_callback(cube, field, filename) 

391 _lfric_normalise_callback(cube, field, filename) 

392 _lfric_time_coord_fix_callback(cube, field, filename) 

393 _normalise_var0_varname(cube) 

394 _fix_spatial_coords_callback(cube) 

395 _fix_pressure_coord_callback(cube) 

396 _fix_um_radtime(cube) 

397 _fix_cell_methods(cube) 

398 _convert_cube_units_callback(cube) 

399 _grid_longitude_fix_callback(cube) 

400 _fix_lfric_cloud_base_altitude(cube) 

401 _lfric_time_callback(cube) 

402 _lfric_forecast_period_standard_name_callback(cube) 

403 

404 return callback 

405 

406 

407def _ensemble_callback(cube, field, filename): 

408 """Add a realization coordinate to a cube. 

409 

410 Uses the filename to add an ensemble member ('realization') to each cube. 

411 Assumes data is formatted enuk_um_0XX/enukaa_pd0HH.pp where XX is the 

412 ensemble member. 

413 

414 Arguments 

415 --------- 

416 cube: Cube 

417 ensemble member cube 

418 field 

419 Raw data variable, unused. 

420 filename: str 

421 filename of ensemble member data 

422 """ 

423 if not cube.coords("realization"): 

424 if "em" in filename: 

425 # Assuming format is *emXX* 

426 loc = filename.find("em") + 2 

427 member = np.int32(filename[loc : loc + 2]) 

428 else: 

429 # Assuming raw fields files format is enuk_um_0XX/enukaa_pd0HH 

430 member = np.int32(filename[-15:-13]) 

431 

432 cube.add_aux_coord(iris.coords.AuxCoord(member, standard_name="realization")) 

433 

434 

435def _deterministic_callback(cube, field, filename): 

436 """Give deterministic cubes a realization of 0. 

437 

438 This means they can be handled in the same way as ensembles through the rest 

439 of the code. 

440 """ 

441 # Only add if realization coordinate does not exist. 

442 if not cube.coords("realization"): 

443 cube.add_aux_coord( 

444 iris.coords.AuxCoord(np.int32(0), standard_name="realization", units="1") 

445 ) 

446 

447 

448@functools.lru_cache(None) 

449def _warn_once(msg): 

450 """Print a warning message, skipping recent duplicates.""" 

451 logging.warning(msg) 

452 

453 

454def _um_normalise_callback(cube: iris.cube.Cube, field, filename): 

455 """Normalise UM STASH variable long names to LFRic variable names. 

456 

457 Note standard names will remain associated with cubes where different. 

458 Long name will be used consistently in output filename and titles. 

459 """ 

460 # Convert STASH to LFRic variable name 

461 if "STASH" in cube.attributes: 

462 stash = cube.attributes["STASH"] 

463 try: 

464 (name, grid) = STASH_TO_LFRIC[str(stash)] 

465 cube.long_name = name 

466 except KeyError: 

467 # Don't change cubes with unknown stash codes. 

468 _warn_once( 

469 f"Unknown STASH code: {stash}. Please check file stash_to_lfric.py to update." 

470 ) 

471 

472 

473def _lfric_normalise_callback(cube: iris.cube.Cube, field, filename): 

474 """Normalise attributes that prevents LFRic cube from merging. 

475 

476 The uuid and timeStamp relate to the output file, as saved by XIOS, and has 

477 no relation to the data contained. These attributes are removed. 

478 

479 The um_stash_source is a list of STASH codes for when an LFRic field maps to 

480 multiple UM fields, however it can be encoded in any order. This attribute 

481 is sorted to prevent this. This attribute is only present in LFRic data that 

482 has been converted to look like UM data. 

483 """ 

484 # Remove unwanted attributes. 

485 cube.attributes.pop("timeStamp", None) 

486 cube.attributes.pop("uuid", None) 

487 cube.attributes.pop("name", None) 

488 

489 # Sort STASH code list. 

490 stash_list = cube.attributes.get("um_stash_source") 

491 if stash_list: 

492 # Parse the string as a list, sort, then re-encode as a string. 

493 cube.attributes["um_stash_source"] = str(sorted(ast.literal_eval(stash_list))) 

494 

495 

496def _lfric_time_coord_fix_callback(cube: iris.cube.Cube, field, filename): 

497 """Ensure the time coordinate is a DimCoord rather than an AuxCoord. 

498 

499 The coordinate is converted and replaced if not. SLAMed LFRic data has this 

500 issue, though the coordinate satisfies all the properties for a DimCoord. 

501 Scalar time values are left as AuxCoords. 

502 """ 

503 # This issue seems to come from iris's handling of NetCDF files where time 

504 # always ends up as an AuxCoord. 

505 if cube.coords("time"): 

506 time_coord = cube.coord("time") 

507 if ( 

508 not isinstance(time_coord, iris.coords.DimCoord) 

509 and len(cube.coord_dims(time_coord)) == 1 

510 ): 

511 iris.util.promote_aux_coord_to_dim_coord(cube, time_coord) 

512 

513 # Force single-valued coordinates to be scalar coordinates. 

514 return iris.util.squeeze(cube) 

515 

516 

517def _grid_longitude_fix_callback(cube: iris.cube.Cube): 

518 """Check grid_longitude coordinates are in the range -180 deg to 180 deg. 

519 

520 This is necessary if comparing two models with different conventions -- 

521 for example, models where the prime meridian is defined as 0 deg or 

522 360 deg. If not in the range -180 deg to 180 deg, we wrap the grid_longitude 

523 so that it falls in this range. Checks are for near-180 bounds given 

524 model data bounds may not extend exactly to 0. or 360. 

525 Input cubes on non-rotated grid coordinates are not impacted. 

526 """ 

527 import CSET.operators._utils as utils 

528 

529 try: 

530 y, x = utils.get_cube_yxcoordname(cube) 

531 except ValueError: 

532 # Don't modify non-spatial cubes. 

533 return cube 

534 

535 long_coord = cube.coord(x) 

536 # Wrap longitudes if rotated pole coordinates 

537 coord_system = long_coord.coord_system 

538 if x == "grid_longitude" and isinstance( 

539 coord_system, iris.coord_systems.RotatedGeogCS 

540 ): 

541 long_points = long_coord.points.copy() 

542 long_centre = np.median(long_points) 

543 while long_centre < -175.0: 

544 long_centre += 360.0 

545 long_points += 360.0 

546 while long_centre >= 175.0: 

547 long_centre -= 360.0 

548 long_points -= 360.0 

549 long_coord.points = long_points 

550 

551 # Update coord bounds to be consistent with wrapping. 

552 if long_coord.has_bounds() and np.size(long_coord) > 1: 552 ↛ 553line 552 didn't jump to line 553 because the condition on line 552 was never true

553 long_coord.bounds = None 

554 long_coord.guess_bounds() 

555 

556 return cube 

557 

558 

559def _fix_spatial_coords_callback(cube: iris.cube.Cube): 

560 """Check latitude and longitude coordinates name. 

561 

562 This is necessary as some models define their grid as on rotated 

563 'grid_latitude' and 'grid_longitude' coordinates while others define 

564 the grid on non-rotated 'latitude' and 'longitude'. 

565 Cube dimensions need to be made consistent to avoid recipe failures, 

566 particularly where comparing multiple input models with differing spatial 

567 coordinates. 

568 """ 

569 import CSET.operators._utils as utils 

570 

571 # Check if cube is spatial. 

572 if not utils.is_spatialdim(cube): 

573 # Don't modify non-spatial cubes. 

574 return cube 

575 

576 # Get spatial coords and dimension index. 

577 y_name, x_name = utils.get_cube_yxcoordname(cube) 

578 ny = utils.get_cube_coordindex(cube, y_name) 

579 nx = utils.get_cube_coordindex(cube, x_name) 

580 

581 # Translate [grid_latitude, grid_longitude] to an unrotated 1-d DimCoord 

582 # [latitude, longitude] for instances where rotated_pole=90.0 

583 if "grid_latitude" in [coord.name() for coord in cube.coords(dim_coords=True)]: 

584 coord_system = cube.coord("grid_latitude").coord_system 

585 pole_lat = coord_system.grid_north_pole_latitude 

586 if pole_lat == 90.0: 586 ↛ 587line 586 didn't jump to line 587 because the condition on line 586 was never true

587 lats = cube.coord("grid_latitude").points 

588 lons = cube.coord("grid_longitude").points 

589 

590 cube.remove_coord("grid_latitude") 

591 cube.add_dim_coord( 

592 iris.coords.DimCoord( 

593 lats, 

594 standard_name="latitude", 

595 var_name="latitude", 

596 units="degrees", 

597 coord_system=iris.coord_systems.GeogCS(6371229.0), 

598 circular=True, 

599 ), 

600 ny, 

601 ) 

602 y_name = "latitude" 

603 cube.remove_coord("grid_longitude") 

604 cube.add_dim_coord( 

605 iris.coords.DimCoord( 

606 lons, 

607 standard_name="longitude", 

608 var_name="longitude", 

609 units="degrees", 

610 coord_system=iris.coord_systems.GeogCS(6371229.0), 

611 circular=True, 

612 ), 

613 nx, 

614 ) 

615 x_name = "longitude" 

616 

617 # Create additional AuxCoord [grid_latitude, grid_longitude] with 

618 # rotated pole attributes for cases with [lat, lon] inputs 

619 if y_name in ["latitude"] and cube.coord(y_name).units in [ 

620 "degrees", 

621 "degrees_north", 

622 "degrees_south", 

623 ]: 

624 # Add grid_latitude AuxCoord 

625 if "grid_latitude" not in [ 625 ↛ 638line 625 didn't jump to line 638 because the condition on line 625 was always true

626 coord.name() for coord in cube.coords(dim_coords=False) 

627 ]: 

628 cube.add_aux_coord( 

629 iris.coords.AuxCoord( 

630 cube.coord(y_name).points, 

631 var_name="grid_latitude", 

632 units="degrees", 

633 ), 

634 ny, 

635 ) 

636 # Ensure input latitude DimCoord has CoordSystem 

637 # This attribute is sometimes lost on iris.save 

638 if not cube.coord(y_name).coord_system: 

639 cube.coord(y_name).coord_system = iris.coord_systems.GeogCS(6371229.0) 

640 

641 if x_name in ["longitude"] and cube.coord(x_name).units in [ 

642 "degrees", 

643 "degrees_west", 

644 "degrees_east", 

645 ]: 

646 # Add grid_longitude AuxCoord 

647 if "grid_longitude" not in [ 647 ↛ 661line 647 didn't jump to line 661 because the condition on line 647 was always true

648 coord.name() for coord in cube.coords(dim_coords=False) 

649 ]: 

650 cube.add_aux_coord( 

651 iris.coords.AuxCoord( 

652 cube.coord(x_name).points, 

653 var_name="grid_longitude", 

654 units="degrees", 

655 ), 

656 nx, 

657 ) 

658 

659 # Ensure input longitude DimCoord has CoordSystem 

660 # This attribute is sometimes lost on iris.save 

661 if not cube.coord(x_name).coord_system: 

662 cube.coord(x_name).coord_system = iris.coord_systems.GeogCS(6371229.0) 

663 

664 

665def _fix_pressure_coord_callback(cube: iris.cube.Cube): 

666 """Rename pressure coordinate to "pressure" if it exists and ensure hPa units. 

667 

668 This problem was raised because the AIFS model data from ECMWF 

669 defines the pressure coordinate with the name "pressure_level" rather 

670 than compliant CF coordinate names. 

671 

672 Additionally, set the units of pressure to be hPa to be consistent with the UM, 

673 and approach the coordinates in a unified way. 

674 """ 

675 for coord in cube.dim_coords: 

676 if coord.name() in ["pressure_level", "pressure_levels"]: 

677 coord.rename("pressure") 

678 

679 if coord.name() == "pressure": 

680 if str(cube.coord("pressure").units) != "hPa": 

681 cube.coord("pressure").convert_units("hPa") 

682 

683 

684def _fix_um_radtime(cube: iris.cube.Cube): 

685 """Move radiation diagnostics from timestamps which are output N minutes or seconds past every hour. 

686 

687 This callback does not have any effect for output diagnostics with 

688 timestamps exactly 00 or 30 minutes past the hour. Only radiation 

689 diagnostics are checked. 

690 Note this callback does not interpolate the data in time, only adjust 

691 timestamps to sit on the hour to enable time-to-time difference plotting 

692 with models which may output radiation data on the hour. 

693 """ 

694 try: 

695 if cube.attributes["STASH"] in [ 

696 "m01s01i207", 

697 "m01s01i208", 

698 "m01s02i205", 

699 "m01s02i201", 

700 "m01s01i207", 

701 "m01s02i207", 

702 "m01s01i235", 

703 ]: 

704 time_coord = cube.coord("time") 

705 

706 # Convert time points to datetime objects 

707 time_unit = time_coord.units 

708 time_points = time_unit.num2date(time_coord.points) 

709 # Skip if times don't need fixing. 

710 if time_points[0].minute == 0 and time_points[0].second == 0: 

711 return 

712 if time_points[0].minute == 30 and time_points[0].second == 0: 712 ↛ 713line 712 didn't jump to line 713 because the condition on line 712 was never true

713 return 

714 

715 # Subtract time difference from the hour from each time point 

716 n_minute = time_points[0].minute 

717 n_second = time_points[0].second 

718 # If times closer to next hour, compute difference to add on to following hour 

719 if n_minute > 30: 

720 n_minute = n_minute - 60 

721 # Compute new diagnostic time stamp 

722 new_time_points = ( 

723 time_points 

724 - datetime.timedelta(minutes=n_minute) 

725 - datetime.timedelta(seconds=n_second) 

726 ) 

727 

728 # Convert back to numeric values using the original time unit. 

729 new_time_values = time_unit.date2num(new_time_points) 

730 

731 # Replace the time coordinate with updated values. 

732 time_coord.points = new_time_values 

733 

734 # Recompute forecast_period with corrected values. 

735 if cube.coord("forecast_period"): 735 ↛ exitline 735 didn't return from function '_fix_um_radtime' because the condition on line 735 was always true

736 fcst_prd_points = cube.coord("forecast_period").points 

737 new_fcst_points = ( 

738 time_unit.num2date(fcst_prd_points) 

739 - datetime.timedelta(minutes=n_minute) 

740 - datetime.timedelta(seconds=n_second) 

741 ) 

742 cube.coord("forecast_period").points = time_unit.date2num( 

743 new_fcst_points 

744 ) 

745 except KeyError: 

746 pass 

747 

748 

749def _fix_cell_methods(cube: iris.cube.Cube): 

750 """To fix the assumed cell_methods in accumulation STASH from UM. 

751 

752 Lightning (m01s21i104), rainfall amount (m01s04i201, m01s05i201) and snowfall amount 

753 (m01s04i202, m01s05i202) in UM is being output as a time accumulation, 

754 over each hour (TAcc1hr), but input cubes show cell_methods as "mean". 

755 For UM and LFRic inputs to be compatible, we assume accumulated cell_methods are 

756 "sum". This callback changes "mean" cube attribute cell_method to "sum", 

757 enabling the cell_method constraint on reading to select correct input. 

758 """ 

759 # Shift "mean" cell_method to "sum" for selected UM inputs. 

760 if cube.attributes.get("STASH") in [ 

761 "m01s21i104", 

762 "m01s04i201", 

763 "m01s04i202", 

764 "m01s05i201", 

765 "m01s05i202", 

766 ]: 

767 # Check if input cell_method contains "mean" time-processing. 

768 if set(cm.method for cm in cube.cell_methods) == {"mean"}: 768 ↛ exitline 768 didn't return from function '_fix_cell_methods' because the condition on line 768 was always true

769 # Retrieve interval and any comment information. 

770 for cell_method in cube.cell_methods: 

771 interval_str = cell_method.intervals 

772 comment_str = cell_method.comments 

773 

774 # Remove input aggregation method. 

775 cube.cell_methods = () 

776 

777 # Replace "mean" with "sum" cell_method to indicate aggregation. 

778 cube.add_cell_method( 

779 iris.coords.CellMethod( 

780 method="sum", 

781 coords="time", 

782 intervals=interval_str, 

783 comments=comment_str, 

784 ) 

785 ) 

786 

787 

788def _convert_cube_units_callback(cube: iris.cube.Cube): 

789 """Adjust diagnostic units for specific variables. 

790 

791 Some precipitation diagnostics are output with unit kg m-2 s-1 and are 

792 converted here to mm hr-1. 

793 

794 Visibility diagnostics are converted here from m to km to improve output 

795 formatting. 

796 """ 

797 # Convert precipitation diagnostic units if required. 

798 varnames = filter(None, [cube.long_name, cube.standard_name, cube.var_name]) 

799 if any("surface_microphysical" in name for name in varnames): 

800 if cube.units == "kg m-2 s-1": 

801 logging.debug( 

802 "Converting precipitation rate units from kg m-2 s-1 to mm hr-1" 

803 ) 

804 # Convert from kg m-2 s-1 to mm s-1 assuming 1kg water = 1l water = 1dm^3 water. 

805 # This is a 1:1 conversion, so we just change the units. 

806 cube.units = "mm s-1" 

807 # Convert the units to per hour. 

808 cube.convert_units("mm hr-1") 

809 elif cube.units == "kg m-2": 809 ↛ 816line 809 didn't jump to line 816 because the condition on line 809 was always true

810 logging.debug("Converting precipitation amount units from kg m-2 to mm") 

811 # Convert from kg m-2 to mm assuming 1kg water = 1l water = 1dm^3 water. 

812 # This is a 1:1 conversion, so we just change the units. 

813 cube.units = "mm" 

814 

815 # Convert visibility diagnostic units if required. 

816 varnames = filter(None, [cube.long_name, cube.standard_name, cube.var_name]) 

817 if any("visibility" in name for name in varnames): 

818 if cube.units == "m": 818 ↛ 823line 818 didn't jump to line 823 because the condition on line 818 was always true

819 logging.debug("Converting visibility units m to km.") 

820 # Convert the units to km. 

821 cube.convert_units("km") 

822 

823 return cube 

824 

825 

826def _fix_lfric_cloud_base_altitude(cube: iris.cube.Cube): 

827 """Mask cloud_base_altitude diagnostic in regions with no cloud.""" 

828 varnames = filter(None, [cube.long_name, cube.standard_name, cube.var_name]) 

829 if any("cloud_base_altitude" in name for name in varnames): 

830 # Mask cube where set > 144kft to catch default 144.35695538058164 

831 cube.data = np.ma.masked_array(cube.data) 

832 cube.data[cube.data > 144.0] = np.ma.masked 

833 

834 

835def _fix_um_winds(cubes: iris.cube.CubeList): 

836 """To make winds from the UM consistent with those from LFRic. 

837 

838 Diagnostics of wind are not always consistent between the UM 

839 and LFric. Here, winds from the UM are adjusted to make them i 

840 consistent with LFRic. 

841 """ 

842 # Check whether we have components of the wind identified by STASH, 

843 # (so this will apply only to cubes from the UM), but not the 

844 # wind speed and calculate it if it is missing. Note that 

845 # this will be biased low in general because the components will mostly 

846 # be time averages. For simplicity, we do this only if there is just one 

847 # cube of a component. A more complicated approach would be to consider 

848 # the cell methods, but it may not be warranted. 

849 u_constr = iris.AttributeConstraint(STASH="m01s03i225") 

850 v_constr = iris.AttributeConstraint(STASH="m01s03i226") 

851 speed_constr = iris.AttributeConstraint(STASH="m01s03i227") 

852 try: 

853 if cubes.extract(u_constr) and cubes.extract(v_constr): 853 ↛ 854line 853 didn't jump to line 854 because the condition on line 853 was never true

854 if len(cubes.extract(u_constr)) == 1 and not cubes.extract(speed_constr): 

855 _add_wind_speed_um(cubes) 

856 # Convert winds in the UM to be relative to true east and true north. 

857 _convert_wind_true_dirn_um(cubes) 

858 except (KeyError, AttributeError): 

859 pass 

860 

861 

862def _add_wind_speed_um(cubes: iris.cube.CubeList): 

863 """Add windspeeds to cubes from the UM.""" 

864 wspd10 = ( 

865 cubes.extract_cube(iris.AttributeConstraint(STASH="m01s03i225"))[0] ** 2 

866 + cubes.extract_cube(iris.AttributeConstraint(STASH="m01s03i226"))[0] ** 2 

867 ) ** 0.5 

868 wspd10.attributes["STASH"] = "m01s03i227" 

869 wspd10.standard_name = "wind_speed" 

870 wspd10.long_name = "wind_speed_at_10m" 

871 cubes.append(wspd10) 

872 

873 

874def _convert_wind_true_dirn_um(cubes: iris.cube.CubeList): 

875 """To convert winds to true directions. 

876 

877 Convert from the components relative to the grid to true directions. 

878 This functionality only handles the simplest case. 

879 """ 

880 u_grid = cubes.extract_cube(iris.AttributeConstraint(STASH="m01s03i225")) 

881 v_grid = cubes.extract_cube(iris.AttributeConstraint(STASH="m01s03i226")) 

882 true_u, true_v = rotate_winds(u_grid, v_grid, iris.coord_systems.GeogCS(6371229.0)) 

883 u_grid.data = true_u.data 

884 v_grid.data = true_v.data 

885 

886 

887def _normalise_var0_varname(cube: iris.cube.Cube): 

888 """Fix varnames for consistency to allow merging. 

889 

890 Some model data netCDF sometimes have a coordinate name end in 

891 "_0" etc, where duplicate coordinates of same name are defined but 

892 with different attributes. This can be inconsistently managed in 

893 different model inputs and can cause cubes to fail to merge. 

894 """ 

895 for coord in cube.coords(): 

896 if coord.var_name and coord.var_name.endswith("_0"): 

897 coord.var_name = coord.var_name.removesuffix("_0") 

898 if coord.var_name and coord.var_name.endswith("_1"): 898 ↛ 899line 898 didn't jump to line 899 because the condition on line 898 was never true

899 coord.var_name = coord.var_name.removesuffix("_1") 

900 if coord.var_name and coord.var_name.endswith("_2"): 900 ↛ 901line 900 didn't jump to line 901 because the condition on line 900 was never true

901 coord.var_name = coord.var_name.removesuffix("_2") 

902 if coord.var_name and coord.var_name.endswith("_3"): 902 ↛ 903line 902 didn't jump to line 903 because the condition on line 902 was never true

903 coord.var_name = coord.var_name.removesuffix("_3") 

904 

905 if cube.var_name and cube.var_name.endswith("_0"): 

906 cube.var_name = cube.var_name.removesuffix("_0") 

907 

908 

909def _lfric_time_callback(cube: iris.cube.Cube): 

910 """Fix time coordinate metadata if missing dimensions. 

911 

912 Some model data does not contain forecast_reference_time or forecast_period as 

913 expected coordinates, and so we cannot aggregate over case studies without this 

914 metadata. This callback fixes these issues. 

915 

916 This callback also ensures all time coordinates are referenced as hours since 

917 1970-01-01 00:00:00 for consistency across different model inputs. 

918 

919 Notes 

920 ----- 

921 Some parts of the code have been adapted from Paul Earnshaw's scripts. 

922 """ 

923 # Construct forecast_reference time if it doesn't exist. 

924 try: 

925 tcoord = cube.coord("time") 

926 # Set time coordinate to common basis "hours since 1970" 

927 try: 

928 tcoord.convert_units("hours since 1970-01-01 00:00:00") 

929 except ValueError: 

930 logging.error("Unrecognised base time unit: {tcoord.units}") 

931 

932 if not cube.coords("forecast_reference_time"): 

933 try: 

934 init_time = datetime.datetime.fromisoformat( 

935 tcoord.attributes["time_origin"] 

936 ) 

937 frt_point = tcoord.units.date2num(init_time) 

938 frt_coord = iris.coords.AuxCoord( 

939 frt_point, 

940 units=tcoord.units, 

941 standard_name="forecast_reference_time", 

942 long_name="forecast_reference_time", 

943 ) 

944 cube.add_aux_coord(frt_coord) 

945 except KeyError: 

946 logging.warning( 

947 "Cannot find forecast_reference_time, but no `time_origin` attribute to construct it from." 

948 ) 

949 

950 # Remove time_origin to allow multiple case studies to merge. 

951 tcoord.attributes.pop("time_origin", None) 

952 

953 # Construct forecast_period axis (forecast lead time) if it doesn't exist. 

954 if not cube.coords("forecast_period"): 

955 try: 

956 # Create array of forecast lead times. 

957 init_coord = cube.coord("forecast_reference_time") 

958 init_time_points_in_tcoord_units = tcoord.units.date2num( 

959 init_coord.units.num2date(init_coord.points) 

960 ) 

961 lead_times = tcoord.points - init_time_points_in_tcoord_units 

962 

963 # Get unit for lead time from time coordinate's unit. 

964 # Convert all lead time to hours for consistency between models. 

965 if "seconds" in str(tcoord.units): 965 ↛ 966line 965 didn't jump to line 966 because the condition on line 965 was never true

966 lead_times = lead_times / 3600.0 

967 units = "hours" 

968 elif "hours" in str(tcoord.units): 968 ↛ 971line 968 didn't jump to line 971 because the condition on line 968 was always true

969 units = "hours" 

970 else: 

971 raise ValueError(f"Unrecognised base time unit: {tcoord.units}") 

972 

973 # Create lead time coordinate. 

974 lead_time_coord = iris.coords.AuxCoord( 

975 lead_times, 

976 standard_name="forecast_period", 

977 long_name="forecast_period", 

978 units=units, 

979 ) 

980 

981 # Associate lead time coordinate with time dimension. 

982 cube.add_aux_coord(lead_time_coord, cube.coord_dims("time")) 

983 except iris.exceptions.CoordinateNotFoundError: 

984 logging.warning( 

985 "Cube does not have both time and forecast_reference_time coordinate, so cannot construct forecast_period" 

986 ) 

987 except iris.exceptions.CoordinateNotFoundError: 

988 logging.warning("No time coordinate on cube.") 

989 

990 

991def _lfric_forecast_period_standard_name_callback(cube: iris.cube.Cube): 

992 """Add forecast_period standard name if missing.""" 

993 try: 

994 coord = cube.coord("forecast_period") 

995 if not coord.standard_name: 

996 coord.standard_name = "forecast_period" 

997 except iris.exceptions.CoordinateNotFoundError: 

998 pass