Coverage for src / CSET / _common.py: 100%
149 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-11 10:48 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-11 10:48 +0000
1# © Crown copyright, Met Office (2022-2024) and CSET contributors.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
15"""Common functionality used across CSET."""
17import ast
18import io
19import json
20import logging
21import re
22from collections.abc import Iterable
23from pathlib import Path
24from textwrap import dedent
25from typing import Any
27import ruamel.yaml
30class ArgumentError(ValueError):
31 """Provided arguments are not understood."""
34def parse_recipe(recipe_yaml: Path | str, variables: dict | None = None) -> dict:
35 """Parse a recipe into a python dictionary.
37 Parameters
38 ----------
39 recipe_yaml: Path | str
40 Path to a file containing, or a string of, a recipe's YAML describing
41 the operators that need running. If a Path is provided it is opened and
42 read.
43 variables: dict
44 Dictionary of recipe variables. If None templating is not attempted.
46 Returns
47 -------
48 recipe: dict
49 The recipe as a python dictionary.
51 Raises
52 ------
53 ValueError
54 If the recipe is invalid. E.g. invalid YAML, missing any steps, etc.
55 TypeError
56 If recipe_yaml isn't a Path or string.
57 KeyError
58 If needed recipe variables are not supplied.
60 Examples
61 --------
62 >>> CSET._common.parse_recipe(Path("myrecipe.yaml"))
63 {'steps': [{'operator': 'misc.noop'}]}
64 """
65 # Ensure recipe_yaml is something the YAML parser can read.
66 if isinstance(recipe_yaml, str):
67 recipe_yaml = io.StringIO(recipe_yaml)
68 elif not isinstance(recipe_yaml, Path):
69 raise TypeError("recipe_yaml must be a str or Path.")
71 # Parse the recipe YAML.
72 with ruamel.yaml.YAML(typ="safe", pure=True) as yaml:
73 try:
74 recipe = yaml.load(recipe_yaml)
75 except ruamel.yaml.parser.ParserError as err:
76 raise ValueError("ParserError: Invalid YAML") from err
78 logging.debug("Recipe before templating:\n%s", recipe)
79 check_recipe_has_steps(recipe)
81 if variables is not None:
82 logging.debug("Recipe variables: %s", variables)
83 recipe = template_variables(recipe, variables)
85 logging.debug("Recipe after templating:\n%s", recipe)
86 return recipe
89def check_recipe_has_steps(recipe: dict):
90 """Check a recipe has the minimum required steps.
92 Checking that the recipe actually has some steps, and providing helpful
93 error messages otherwise. We must have at least a steps step, as that
94 reads the raw data.
96 Parameters
97 ----------
98 recipe: dict
99 The recipe as a python dictionary.
101 Raises
102 ------
103 ValueError
104 If the recipe is invalid. E.g. invalid YAML, missing any steps, etc.
105 TypeError
106 If recipe isn't a dict.
107 KeyError
108 If needed recipe variables are not supplied.
109 """
110 if not isinstance(recipe, dict):
111 raise TypeError("Recipe must contain a mapping.")
112 if "steps" not in recipe:
113 raise ValueError("Recipe must contain a 'steps' key.")
114 try:
115 if len(recipe["steps"]) < 1:
116 raise ValueError("Recipe must have at least 1 step.")
117 except TypeError as err:
118 raise ValueError("'steps' key must contain a sequence of steps.") from err
121def slugify(s: str) -> str:
122 """Turn a string into a version that can be used everywhere.
124 The resultant string will only consist of a-z, 0-9, dots, dashes, and
125 underscores.
126 """
127 return re.sub(r"[^a-z0-9\._-]+", "_", s.casefold()).strip("_")
130def filename_slugify(s: str) -> str:
131 """Turn a string into a version that can be used in filenames.
133 The resultant string will only consist of a-z, 0-9.
134 """
135 return re.sub(r"[^a-z0-9\.]+", "", s.casefold()).strip("_")
138def get_recipe_metadata() -> dict:
139 """Get the metadata of the running recipe."""
140 try:
141 with open("meta.json", "rt", encoding="UTF-8") as fp:
142 return json.load(fp)
143 except FileNotFoundError:
144 meta = {}
145 with open("meta.json", "wt", encoding="UTF-8") as fp:
146 json.dump(meta, fp, indent=2)
147 return {}
150def parse_variable_options(
151 arguments: list[str], input_dir: str | list[str] | None = None
152) -> dict:
153 """Parse a list of arguments into a dictionary of variables.
155 The variable name arguments start with two hyphen-minus (`--`), consisting
156 of only capital letters (`A`-`Z`) and underscores (`_`). While the variable
157 name is restricted, the value of the variable can be any string.
159 Parameters
160 ----------
161 arguments: list[str]
162 List of arguments, e.g: `["--LEVEL", "2", "--STASH=m01s01i001"]`
163 input_dir: str | list[str], optional
164 List of input directories to add into the returned variables.
166 Returns
167 -------
168 recipe_variables: dict
169 Dictionary keyed with the variable names.
171 Raises
172 ------
173 ValueError
174 If any arguments cannot be parsed.
175 """
176 # Convert --input_dir=... to INPUT_PATHS recipe variable.
177 if input_dir is not None:
178 abs_paths = [str(Path(p).absolute()) for p in iter_maybe(input_dir)]
179 arguments.append(f"--INPUT_PATHS={abs_paths}")
180 recipe_variables = {}
181 i = 0
182 while i < len(arguments):
183 if re.fullmatch(r"--[A-Z_]+=.*", arguments[i]):
184 key, value = arguments[i].split("=", 1)
185 elif re.fullmatch(r"--[A-Z_]+", arguments[i]):
186 try:
187 key = arguments[i].strip("-")
188 value = arguments[i + 1]
189 except IndexError as err:
190 raise ArgumentError(f"No value for variable {arguments[i]}") from err
191 i += 1
192 else:
193 raise ArgumentError(f"Unknown argument: {arguments[i]}")
194 try:
195 # Remove quotes from arguments, in case left in CSET_ADDOPTS.
196 if re.fullmatch(r"""["'].+["']""", value):
197 value = value[1:-1]
198 recipe_variables[key.strip("-")] = ast.literal_eval(value)
199 # Capture the many possible exceptions from ast.literal_eval
200 except (ValueError, TypeError, SyntaxError, MemoryError, RecursionError):
201 recipe_variables[key.strip("-")] = value
202 i += 1
203 return recipe_variables
206def template_variables(recipe: dict | list, variables: dict) -> dict:
207 """Insert variables into recipe.
209 Parameters
210 ----------
211 recipe: dict | list
212 The recipe as a python dictionary. It is updated in-place.
213 variables: dict
214 Dictionary of variables for the recipe.
216 Returns
217 -------
218 recipe: dict
219 Filled recipe as a python dictionary.
221 Raises
222 ------
223 KeyError
224 If needed recipe variables are not supplied.
225 """
226 if isinstance(recipe, dict):
227 index = recipe.keys()
228 elif isinstance(recipe, list):
229 # We have to handle lists for when we have one inside a recipe.
230 index = range(len(recipe))
231 else:
232 raise TypeError("recipe must be a dict or list.", recipe)
234 for i in index:
235 if isinstance(recipe[i], (dict, list)):
236 recipe[i] = template_variables(recipe[i], variables)
237 elif isinstance(recipe[i], str):
238 recipe[i] = replace_template_variable(recipe[i], variables)
239 return recipe
242def replace_template_variable(s: str, variables: dict[str, Any]):
243 """Fill all variable placeholders in the string."""
244 for var_name, var_value in variables.items():
245 placeholder = f"${var_name}"
246 # If the value is just the placeholder we directly overwrite it
247 # to keep the value type.
248 if s == placeholder:
249 # Specially handle Paths and lists of Paths.
250 if isinstance(var_value, Path):
251 var_value = str(var_value)
252 if (
253 isinstance(var_value, list)
254 and var_value
255 and isinstance(var_value[0], Path)
256 ):
257 var_value = [str(p) for p in var_value]
258 s = var_value
259 # We have replaced the whole string, so stop here to avoid
260 # interpreting the new value.
261 break
262 else:
263 s = s.replace(placeholder, str(var_value))
264 if isinstance(s, str) and re.match(r"^.*\$[A-Z_].*", s):
265 raise KeyError("Variable without a value.", s)
266 return s
269################################################################################
270# Templating code taken from the simple_template package under the 0BSD licence.
271# Original at https://github.com/Fraetor/simple_template
272################################################################################
275class TemplateError(KeyError):
276 """Rendering a template failed due a placeholder without a value."""
279def render(template: str, /, **variables) -> str:
280 """Render the template with the provided variables.
282 The template should contain placeholders that will be replaced. These
283 placeholders consist of the placeholder name within double curly braces. The
284 name of the placeholder should be a valid python identifier. Whitespace
285 between the braces and the name is ignored. E.g.: `{{ placeholder_name }}`
287 An exception will be raised if there are placeholders without corresponding
288 values. It is acceptable to provide unused values; they will be ignored.
290 Parameters
291 ----------
292 template: str
293 Template to fill with variables.
295 **variables: Any
296 Keyword arguments for the placeholder values. The argument name should
297 be the same as the placeholder's name. You can unpack a dictionary of
298 value with `render(template, **my_dict)`.
300 Returns
301 -------
302 rendered_template: str
303 Filled template.
305 Raises
306 ------
307 TemplateError
308 Value not given for a placeholder in the template.
309 TypeError
310 If the template is not a string, or a variable cannot be casted to a
311 string.
313 Examples
314 --------
315 >>> template = "<p>Hello {{myplaceholder}}!</p>"
316 >>> simple_template.render(template, myplaceholder="World")
317 "<p>Hello World!</p>"
318 """
320 def isidentifier(s: str):
321 return s.isidentifier()
323 def extract_placeholders():
324 matches = re.finditer(r"{{\s*([^}]+)\s*}}", template)
325 unique_names = {match.group(1) for match in matches}
326 return filter(isidentifier, unique_names)
328 def substitute_placeholder(name):
329 try:
330 value = str(variables[name])
331 except KeyError as err:
332 raise TemplateError("Placeholder missing value", name) from err
333 pattern = r"{{\s*%s\s*}}" % re.escape(name)
334 return re.sub(pattern, value, template)
336 for name in extract_placeholders():
337 template = substitute_placeholder(name)
338 return template
341def render_file(template_path: str, /, **variables) -> str:
342 """Render a template directly from a file.
344 Otherwise the same as `simple_template.render()`.
346 Examples
347 --------
348 >>> simple_template.render_file("/path/to/template.html", myplaceholder="World")
349 "<p>Hello World!</p>"
350 """
351 with open(template_path, "rt", encoding="UTF-8") as fp:
352 template = fp.read()
353 return render(template, **variables)
356def iter_maybe(thing) -> Iterable:
357 """Ensure thing is Iterable. Strings count as atoms."""
358 if isinstance(thing, Iterable) and not isinstance(thing, str):
359 return thing
360 return (thing,)
363def human_sorted(iterable: Iterable, reverse: bool = False) -> list:
364 """Sort such numbers within strings are sorted correctly."""
365 # Adapted from https://nedbatchelder.com/blog/200712/human_sorting.html
367 def alphanum_key(s):
368 """Turn a string into a list of string and number chunks.
370 >>> alphanum_key("z23a")
371 ["z", 23, "a"]
372 """
373 try:
374 return [int(c) if c.isdecimal() else c for c in re.split(r"(\d+)", s)]
375 except TypeError:
376 return s
378 return sorted(iterable, key=alphanum_key, reverse=reverse)
381def combine_dicts(d1: dict, d2: dict) -> dict:
382 """Recursively combines two dictionaries.
384 Duplicate atoms favour the second dictionary.
385 """
386 # Update existing keys.
387 for key in d1.keys() & d2.keys():
388 if isinstance(d1[key], dict):
389 d1[key] = combine_dicts(d1[key], d2[key])
390 else:
391 d1[key] = d2[key]
392 # Add any new keys.
393 for key in d2.keys() - d1.keys():
394 d1[key] = d2[key]
395 return d1
398def sort_dict(d: dict) -> dict:
399 """Recursively sort a dictionary."""
400 # Thank you to https://stackoverflow.com/a/47882384
401 return {
402 k: sort_dict(v) if isinstance(v, dict) else v
403 for k, v in human_sorted(d.items())
404 }
407def sstrip(text):
408 """Dedent and strip text.
410 Parameters
411 ----------
412 text: str
413 The string to strip.
415 Examples
416 --------
417 >>> print(sstrip('''
418 ... foo
419 ... bar
420 ... baz
421 ... '''))
422 foo
423 bar
424 baz
425 """
426 return dedent(text).strip()
429def is_increasing(sequence: list) -> bool:
430 """Determine the direction of an ordered sequence.
432 Returns a boolean indicating that the values of a sequence are
433 increasing. The sequence should already be monotonic, with no
434 duplicate values. An iris DimCoord's points fulfils this criteria.
435 """
436 return sequence[0] < sequence[1]