Coverage for src / CSET / _common.py: 100%

149 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-11 10:48 +0000

1# © Crown copyright, Met Office (2022-2024) and CSET contributors. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15"""Common functionality used across CSET.""" 

16 

17import ast 

18import io 

19import json 

20import logging 

21import re 

22from collections.abc import Iterable 

23from pathlib import Path 

24from textwrap import dedent 

25from typing import Any 

26 

27import ruamel.yaml 

28 

29 

30class ArgumentError(ValueError): 

31 """Provided arguments are not understood.""" 

32 

33 

34def parse_recipe(recipe_yaml: Path | str, variables: dict | None = None) -> dict: 

35 """Parse a recipe into a python dictionary. 

36 

37 Parameters 

38 ---------- 

39 recipe_yaml: Path | str 

40 Path to a file containing, or a string of, a recipe's YAML describing 

41 the operators that need running. If a Path is provided it is opened and 

42 read. 

43 variables: dict 

44 Dictionary of recipe variables. If None templating is not attempted. 

45 

46 Returns 

47 ------- 

48 recipe: dict 

49 The recipe as a python dictionary. 

50 

51 Raises 

52 ------ 

53 ValueError 

54 If the recipe is invalid. E.g. invalid YAML, missing any steps, etc. 

55 TypeError 

56 If recipe_yaml isn't a Path or string. 

57 KeyError 

58 If needed recipe variables are not supplied. 

59 

60 Examples 

61 -------- 

62 >>> CSET._common.parse_recipe(Path("myrecipe.yaml")) 

63 {'steps': [{'operator': 'misc.noop'}]} 

64 """ 

65 # Ensure recipe_yaml is something the YAML parser can read. 

66 if isinstance(recipe_yaml, str): 

67 recipe_yaml = io.StringIO(recipe_yaml) 

68 elif not isinstance(recipe_yaml, Path): 

69 raise TypeError("recipe_yaml must be a str or Path.") 

70 

71 # Parse the recipe YAML. 

72 with ruamel.yaml.YAML(typ="safe", pure=True) as yaml: 

73 try: 

74 recipe = yaml.load(recipe_yaml) 

75 except ruamel.yaml.parser.ParserError as err: 

76 raise ValueError("ParserError: Invalid YAML") from err 

77 

78 logging.debug("Recipe before templating:\n%s", recipe) 

79 check_recipe_has_steps(recipe) 

80 

81 if variables is not None: 

82 logging.debug("Recipe variables: %s", variables) 

83 recipe = template_variables(recipe, variables) 

84 

85 logging.debug("Recipe after templating:\n%s", recipe) 

86 return recipe 

87 

88 

89def check_recipe_has_steps(recipe: dict): 

90 """Check a recipe has the minimum required steps. 

91 

92 Checking that the recipe actually has some steps, and providing helpful 

93 error messages otherwise. We must have at least a steps step, as that 

94 reads the raw data. 

95 

96 Parameters 

97 ---------- 

98 recipe: dict 

99 The recipe as a python dictionary. 

100 

101 Raises 

102 ------ 

103 ValueError 

104 If the recipe is invalid. E.g. invalid YAML, missing any steps, etc. 

105 TypeError 

106 If recipe isn't a dict. 

107 KeyError 

108 If needed recipe variables are not supplied. 

109 """ 

110 if not isinstance(recipe, dict): 

111 raise TypeError("Recipe must contain a mapping.") 

112 if "steps" not in recipe: 

113 raise ValueError("Recipe must contain a 'steps' key.") 

114 try: 

115 if len(recipe["steps"]) < 1: 

116 raise ValueError("Recipe must have at least 1 step.") 

117 except TypeError as err: 

118 raise ValueError("'steps' key must contain a sequence of steps.") from err 

119 

120 

121def slugify(s: str) -> str: 

122 """Turn a string into a version that can be used everywhere. 

123 

124 The resultant string will only consist of a-z, 0-9, dots, dashes, and 

125 underscores. 

126 """ 

127 return re.sub(r"[^a-z0-9\._-]+", "_", s.casefold()).strip("_") 

128 

129 

130def filename_slugify(s: str) -> str: 

131 """Turn a string into a version that can be used in filenames. 

132 

133 The resultant string will only consist of a-z, 0-9. 

134 """ 

135 return re.sub(r"[^a-z0-9\.]+", "", s.casefold()).strip("_") 

136 

137 

138def get_recipe_metadata() -> dict: 

139 """Get the metadata of the running recipe.""" 

140 try: 

141 with open("meta.json", "rt", encoding="UTF-8") as fp: 

142 return json.load(fp) 

143 except FileNotFoundError: 

144 meta = {} 

145 with open("meta.json", "wt", encoding="UTF-8") as fp: 

146 json.dump(meta, fp, indent=2) 

147 return {} 

148 

149 

150def parse_variable_options( 

151 arguments: list[str], input_dir: str | list[str] | None = None 

152) -> dict: 

153 """Parse a list of arguments into a dictionary of variables. 

154 

155 The variable name arguments start with two hyphen-minus (`--`), consisting 

156 of only capital letters (`A`-`Z`) and underscores (`_`). While the variable 

157 name is restricted, the value of the variable can be any string. 

158 

159 Parameters 

160 ---------- 

161 arguments: list[str] 

162 List of arguments, e.g: `["--LEVEL", "2", "--STASH=m01s01i001"]` 

163 input_dir: str | list[str], optional 

164 List of input directories to add into the returned variables. 

165 

166 Returns 

167 ------- 

168 recipe_variables: dict 

169 Dictionary keyed with the variable names. 

170 

171 Raises 

172 ------ 

173 ValueError 

174 If any arguments cannot be parsed. 

175 """ 

176 # Convert --input_dir=... to INPUT_PATHS recipe variable. 

177 if input_dir is not None: 

178 abs_paths = [str(Path(p).absolute()) for p in iter_maybe(input_dir)] 

179 arguments.append(f"--INPUT_PATHS={abs_paths}") 

180 recipe_variables = {} 

181 i = 0 

182 while i < len(arguments): 

183 if re.fullmatch(r"--[A-Z_]+=.*", arguments[i]): 

184 key, value = arguments[i].split("=", 1) 

185 elif re.fullmatch(r"--[A-Z_]+", arguments[i]): 

186 try: 

187 key = arguments[i].strip("-") 

188 value = arguments[i + 1] 

189 except IndexError as err: 

190 raise ArgumentError(f"No value for variable {arguments[i]}") from err 

191 i += 1 

192 else: 

193 raise ArgumentError(f"Unknown argument: {arguments[i]}") 

194 try: 

195 # Remove quotes from arguments, in case left in CSET_ADDOPTS. 

196 if re.fullmatch(r"""["'].+["']""", value): 

197 value = value[1:-1] 

198 recipe_variables[key.strip("-")] = ast.literal_eval(value) 

199 # Capture the many possible exceptions from ast.literal_eval 

200 except (ValueError, TypeError, SyntaxError, MemoryError, RecursionError): 

201 recipe_variables[key.strip("-")] = value 

202 i += 1 

203 return recipe_variables 

204 

205 

206def template_variables(recipe: dict | list, variables: dict) -> dict: 

207 """Insert variables into recipe. 

208 

209 Parameters 

210 ---------- 

211 recipe: dict | list 

212 The recipe as a python dictionary. It is updated in-place. 

213 variables: dict 

214 Dictionary of variables for the recipe. 

215 

216 Returns 

217 ------- 

218 recipe: dict 

219 Filled recipe as a python dictionary. 

220 

221 Raises 

222 ------ 

223 KeyError 

224 If needed recipe variables are not supplied. 

225 """ 

226 if isinstance(recipe, dict): 

227 index = recipe.keys() 

228 elif isinstance(recipe, list): 

229 # We have to handle lists for when we have one inside a recipe. 

230 index = range(len(recipe)) 

231 else: 

232 raise TypeError("recipe must be a dict or list.", recipe) 

233 

234 for i in index: 

235 if isinstance(recipe[i], (dict, list)): 

236 recipe[i] = template_variables(recipe[i], variables) 

237 elif isinstance(recipe[i], str): 

238 recipe[i] = replace_template_variable(recipe[i], variables) 

239 return recipe 

240 

241 

242def replace_template_variable(s: str, variables: dict[str, Any]): 

243 """Fill all variable placeholders in the string.""" 

244 for var_name, var_value in variables.items(): 

245 placeholder = f"${var_name}" 

246 # If the value is just the placeholder we directly overwrite it 

247 # to keep the value type. 

248 if s == placeholder: 

249 # Specially handle Paths and lists of Paths. 

250 if isinstance(var_value, Path): 

251 var_value = str(var_value) 

252 if ( 

253 isinstance(var_value, list) 

254 and var_value 

255 and isinstance(var_value[0], Path) 

256 ): 

257 var_value = [str(p) for p in var_value] 

258 s = var_value 

259 # We have replaced the whole string, so stop here to avoid 

260 # interpreting the new value. 

261 break 

262 else: 

263 s = s.replace(placeholder, str(var_value)) 

264 if isinstance(s, str) and re.match(r"^.*\$[A-Z_].*", s): 

265 raise KeyError("Variable without a value.", s) 

266 return s 

267 

268 

269################################################################################ 

270# Templating code taken from the simple_template package under the 0BSD licence. 

271# Original at https://github.com/Fraetor/simple_template 

272################################################################################ 

273 

274 

275class TemplateError(KeyError): 

276 """Rendering a template failed due a placeholder without a value.""" 

277 

278 

279def render(template: str, /, **variables) -> str: 

280 """Render the template with the provided variables. 

281 

282 The template should contain placeholders that will be replaced. These 

283 placeholders consist of the placeholder name within double curly braces. The 

284 name of the placeholder should be a valid python identifier. Whitespace 

285 between the braces and the name is ignored. E.g.: `{{ placeholder_name }}` 

286 

287 An exception will be raised if there are placeholders without corresponding 

288 values. It is acceptable to provide unused values; they will be ignored. 

289 

290 Parameters 

291 ---------- 

292 template: str 

293 Template to fill with variables. 

294 

295 **variables: Any 

296 Keyword arguments for the placeholder values. The argument name should 

297 be the same as the placeholder's name. You can unpack a dictionary of 

298 value with `render(template, **my_dict)`. 

299 

300 Returns 

301 ------- 

302 rendered_template: str 

303 Filled template. 

304 

305 Raises 

306 ------ 

307 TemplateError 

308 Value not given for a placeholder in the template. 

309 TypeError 

310 If the template is not a string, or a variable cannot be casted to a 

311 string. 

312 

313 Examples 

314 -------- 

315 >>> template = "<p>Hello {{myplaceholder}}!</p>" 

316 >>> simple_template.render(template, myplaceholder="World") 

317 "<p>Hello World!</p>" 

318 """ 

319 

320 def isidentifier(s: str): 

321 return s.isidentifier() 

322 

323 def extract_placeholders(): 

324 matches = re.finditer(r"{{\s*([^}]+)\s*}}", template) 

325 unique_names = {match.group(1) for match in matches} 

326 return filter(isidentifier, unique_names) 

327 

328 def substitute_placeholder(name): 

329 try: 

330 value = str(variables[name]) 

331 except KeyError as err: 

332 raise TemplateError("Placeholder missing value", name) from err 

333 pattern = r"{{\s*%s\s*}}" % re.escape(name) 

334 return re.sub(pattern, value, template) 

335 

336 for name in extract_placeholders(): 

337 template = substitute_placeholder(name) 

338 return template 

339 

340 

341def render_file(template_path: str, /, **variables) -> str: 

342 """Render a template directly from a file. 

343 

344 Otherwise the same as `simple_template.render()`. 

345 

346 Examples 

347 -------- 

348 >>> simple_template.render_file("/path/to/template.html", myplaceholder="World") 

349 "<p>Hello World!</p>" 

350 """ 

351 with open(template_path, "rt", encoding="UTF-8") as fp: 

352 template = fp.read() 

353 return render(template, **variables) 

354 

355 

356def iter_maybe(thing) -> Iterable: 

357 """Ensure thing is Iterable. Strings count as atoms.""" 

358 if isinstance(thing, Iterable) and not isinstance(thing, str): 

359 return thing 

360 return (thing,) 

361 

362 

363def human_sorted(iterable: Iterable, reverse: bool = False) -> list: 

364 """Sort such numbers within strings are sorted correctly.""" 

365 # Adapted from https://nedbatchelder.com/blog/200712/human_sorting.html 

366 

367 def alphanum_key(s): 

368 """Turn a string into a list of string and number chunks. 

369 

370 >>> alphanum_key("z23a") 

371 ["z", 23, "a"] 

372 """ 

373 try: 

374 return [int(c) if c.isdecimal() else c for c in re.split(r"(\d+)", s)] 

375 except TypeError: 

376 return s 

377 

378 return sorted(iterable, key=alphanum_key, reverse=reverse) 

379 

380 

381def combine_dicts(d1: dict, d2: dict) -> dict: 

382 """Recursively combines two dictionaries. 

383 

384 Duplicate atoms favour the second dictionary. 

385 """ 

386 # Update existing keys. 

387 for key in d1.keys() & d2.keys(): 

388 if isinstance(d1[key], dict): 

389 d1[key] = combine_dicts(d1[key], d2[key]) 

390 else: 

391 d1[key] = d2[key] 

392 # Add any new keys. 

393 for key in d2.keys() - d1.keys(): 

394 d1[key] = d2[key] 

395 return d1 

396 

397 

398def sort_dict(d: dict) -> dict: 

399 """Recursively sort a dictionary.""" 

400 # Thank you to https://stackoverflow.com/a/47882384 

401 return { 

402 k: sort_dict(v) if isinstance(v, dict) else v 

403 for k, v in human_sorted(d.items()) 

404 } 

405 

406 

407def sstrip(text): 

408 """Dedent and strip text. 

409 

410 Parameters 

411 ---------- 

412 text: str 

413 The string to strip. 

414 

415 Examples 

416 -------- 

417 >>> print(sstrip(''' 

418 ... foo 

419 ... bar 

420 ... baz 

421 ... ''')) 

422 foo 

423 bar 

424 baz 

425 """ 

426 return dedent(text).strip() 

427 

428 

429def is_increasing(sequence: list) -> bool: 

430 """Determine the direction of an ordered sequence. 

431 

432 Returns a boolean indicating that the values of a sequence are 

433 increasing. The sequence should already be monotonic, with no 

434 duplicate values. An iris DimCoord's points fulfils this criteria. 

435 """ 

436 return sequence[0] < sequence[1]