diff --git a/.cirrus.yml b/.cirrus.yml index 5ad53a18..52b4e6aa 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -24,7 +24,7 @@ env: # Increment the build number to force new conda cache upload. CONDA_CACHE_BUILD: "1" # Increment the build number to force new nox cache upload. - NOX_CACHE_BUILD: "1" + NOX_CACHE_BUILD: "3" # Increment the build number to force new pip cache upload. PIP_CACHE_BUILD: "0" # Pip package to be installed. @@ -153,7 +153,7 @@ benchmark_task: fi << : *LINUX_CONDA_TEMPLATE asv_cache: - folder: ${CIRRUS_WORKING_DIR}/benchmarks/.asv-env + folder: ${CIRRUS_WORKING_DIR}/benchmarks/.asv/env reupload_on_changes: true fingerprint_script: - echo "${CIRRUS_TASK_NAME}" @@ -169,4 +169,4 @@ benchmark_task: - export CONDA_OVERRIDE_LINUX="$(uname -r | cut -d'+' -f1)" - nox --session=tests --install-only - export DATA_GEN_PYTHON=$(realpath $(find .nox -path "*tests*bin/python")) - - nox --session="benchmarks(ci compare)" + - nox --no-reuse-existing-virtualenvs --session="benchmarks(branch)" -- "${CIRRUS_BASE_SHA}" diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b4a018f..8b7d89e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - [PR#217](https://github.com/SciTools-incubator/iris-esmf-regrid/pull/217) Changed the behaviour of coordinate fetching to allow Cubes with both 1D DimCoords and 2D AuxCoords. In this case the DimCoords are prioritised. +- [PR#220](https://github.com/SciTools-incubator/iris-esmf-regrid/pull/220) + Matured the benchmarking architecture in line with the latest setup in + SciTools/iris. ## [0.5] - 2022-10-14 diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 00000000..09936090 --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,116 @@ +# iris-esmf-regrid Performance Benchmarking + +iris-esmf-regrid uses an +[Airspeed Velocity](https://github.com/airspeed-velocity/asv) +(ASV) setup to benchmark performance. This is primarily designed to check for +performance shifts between commits using statistical analysis, but can also +be easily repurposed for manual comparative and scalability analyses. + +The benchmarks are run as part of the CI (the `benchmark_task` in +[`.cirrus.yml`](../.cirrus.yml)), with any notable shifts in performance +raising a ❌ failure. + +## Running benchmarks + +`asv ...` commands must be run from this directory. You will need to have ASV +installed, as well as Nox (see +[Benchmark environments](#benchmark-environments)). + +[iris-esmf-regrid's noxfile](../noxfile.py) includes a `benchmarks` session +that provides conveniences for setting up before benchmarking, and can also +replicate the CI run locally. See the session docstring for detail. + +### Environment variables + +* `DATA_GEN_PYTHON` - required - path to a Python executable that can be +used to generate benchmark test objects/files; see +[Data generation](#data-generation). The Nox session sets this automatically, +but will defer to any value already set in the shell. +* `BENCHMARK_DATA` - optional - path to a directory for benchmark synthetic +test data, which the benchmark scripts will create if it doesn't already +exist. Defaults to `/benchmarks/.data/` if not set. Note that some of +the generated files, especially in the 'SPerf' suite, are many GB in size so +plan accordingly. +* `ON_DEMAND_BENCHMARKS` - optional - when set (to any value): benchmarks +decorated with `@on_demand_benchmark` are included in the ASV run. Usually +coupled with the ASV `--bench` argument to only run the benchmark(s) of +interest. Is set during the Nox `sperf` session. + +### Reducing run time + +Before benchmarks are run on a commit, the benchmark environment is +automatically aligned with the lock-file for that commit. You can significantly +speed up any environment updates by co-locating the benchmark environment and your +[Conda package cache](https://conda.io/projects/conda/en/latest/user-guide/configuration/use-condarc.html#specify-package-directories-pkgs-dirs) +on the same [file system](https://en.wikipedia.org/wiki/File_system). This can +be done in several ways: + +* Move your iris-esmf-regrid checkout, this being the default location for the + benchmark environment. +* Move your package cache by editing + [`pkgs_dirs` in Conda config](https://conda.io/projects/conda/en/latest/user-guide/configuration/use-condarc.html#specify-package-directories-pkgs-dirs). +* Move the benchmark environment by **locally** editing the environment path of + `delegated_env_commands` and `delegated_env_parent` in + [asv.conf.json](asv.conf.json). + +## Writing benchmarks + +[See the ASV docs](https://asv.readthedocs.io/) for full detail. + +### Data generation +**Important:** be sure not to use the benchmarking environment to generate any +test objects/files, as this environment changes with each commit being +benchmarked, creating inconsistent benchmark 'conditions'. The +[generate_data](./benchmarks/generate_data.py) module offers a +solution; read more detail there. + +### ASV re-run behaviour + +Note that ASV re-runs a benchmark multiple times between its `setup()` routine. +This is a problem for benchmarking certain Iris operations such as data +realisation, since the data will no longer be lazy after the first run. +Consider writing extra steps to restore objects' original state _within_ the +benchmark itself. + +If adding steps to the benchmark will skew the result too much then re-running +can be disabled by setting an attribute on the benchmark: `number = 1`. To +maintain result accuracy this should be accompanied by increasing the number of +repeats _between_ `setup()` calls using the `repeat` attribute. +`warmup_time = 0` is also advisable since ASV performs independent re-runs to +estimate run-time, and these will still be subject to the original problem. A +decorator is available for this - `@disable_repeat_between_setup` in +[benchmarks init](./benchmarks/__init__.py). + +### Scaling / non-Scaling Performance Differences + +When comparing performance between commits/file-type/whatever it can be helpful +to know if the differences exist in scaling or non-scaling parts of the Iris +functionality in question. This can be done using a size parameter, setting +one value to be as small as possible (e.g. a scalar `Cube`), and the other to +be significantly larger (e.g. a 1000x1000 `Cube`). Performance differences +might only be seen for the larger value, or the smaller, or both, getting you +closer to the root cause. + +### On-demand benchmarks + +Some benchmarks provide useful insight but are inappropriate to be included in +a benchmark run by default, e.g. those with long run-times or requiring a local +file. These benchmarks should be decorated with `@on_demand_benchmark` +(see [benchmarks init](./benchmarks/__init__.py)), which +sets the benchmark to only be included in a run when the `ON_DEMAND_BENCHMARKS` +environment variable is set. Examples include the SPerf benchmark +suite for the UK Met Office NG-VAT project. + +## Benchmark environments + +We have disabled ASV's standard environment management, instead using an +environment built using the same Nox scripts as Iris' test environments. This +is done using ASV's plugin architecture - see +[asv_delegated_conda.py](asv_delegated_conda.py) and the extra config items in +[asv.conf.json](asv.conf.json). + +(ASV is written to control the environment(s) that benchmarks are run in - +minimising external factors and also allowing it to compare between a matrix +of dependencies (each in a separate environment). We have chosen to sacrifice +these features in favour of testing each commit with its intended dependencies, +controlled by Nox + lock-files). diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json index 2b261b52..46023fac 100644 --- a/benchmarks/asv.conf.json +++ b/benchmarks/asv.conf.json @@ -1,15 +1,27 @@ { "version": 1, "project": "esmf_regrid", - "repo": "..", - "environment_type": "nox-conda", - "pythons": [], - "branches": ["main"], - "benchmark_dir": "benchmarks", - "env_dir": ".asv-env", - "results_dir": ".asv-results", - "html_dir": ".asv-html", "project_url": "https://github.com/SciTools-incubator/iris-esmf-regrid", + "repo": "..", + "environment_type": "conda-delegated", "show_commit_url": "https://github.com/SciTools-incubator/iris-esmf-regrid/commit/", - "plugins": [".nox_asv_plugin"], + "branches": ["upstream/main"], + + "benchmark_dir": "./benchmarks", + "env_dir": ".asv/env", + "results_dir": ".asv/results", + "html_dir": ".asv/html", + "plugins": [".asv_delegated_conda"], + + // The command(s) that create/update an environment correctly for the + // checked-out commit. + // Interpreted the same as build_command, with following exceptions: + // * No build-time environment variables. + // * Is run in the same environment as the ASV install itself. + "delegated_env_commands": [ + "PY_VER=3.10 nox --envdir={conf_dir}/.asv/env/nox01 --session=tests --install-only --no-error-on-external-run --verbose" + ], + // The parent directory of the above environment. + // The most recently modified environment in the directory will be used. + "delegated_env_parent": "{conf_dir}/.asv/env/nox01" } diff --git a/benchmarks/asv_delegated_conda.py b/benchmarks/asv_delegated_conda.py new file mode 100644 index 00000000..d274fad5 --- /dev/null +++ b/benchmarks/asv_delegated_conda.py @@ -0,0 +1,193 @@ +""" +ASV plug-in providing an alternative :class:`asv.plugins.conda.Conda` +subclass that manages the Conda environment via custom user scripts. + +""" + +from os import environ +from os.path import getmtime +from pathlib import Path +from shutil import copy2, copytree, rmtree +from tempfile import TemporaryDirectory + +from asv import util as asv_util +from asv.config import Config +from asv.console import log +from asv.plugins.conda import Conda +from asv.repo import Repo + + +class CondaDelegated(Conda): + """ + Manage a Conda environment using custom user scripts, run at each commit. + + Ignores user input variations - ``matrix`` / ``pythons`` / + ``conda_environment_file``, since environment is being managed outside ASV. + + Original environment creation behaviour is inherited, but upon checking out + a commit the custom script(s) are run and the original environment is + replaced with a symlink to the custom environment. This arrangement is then + re-used in subsequent runs. + + """ + + tool_name = "conda-delegated" + + def __init__( + self, + conf: Config, + python: str, + requirements: dict, + tagged_env_vars: dict, + ) -> None: + """ + Parameters + ---------- + conf : Config instance + + python : str + Version of Python. Must be of the form "MAJOR.MINOR". + + requirements : dict + Dictionary mapping a PyPI package name to a version + identifier string. + + tagged_env_vars : dict + Environment variables, tagged for build vs. non-build + + """ + ignored = ["`python`"] + if requirements: + ignored.append("`requirements`") + if tagged_env_vars: + ignored.append("`tagged_env_vars`") + if conf.conda_environment_file: + ignored.append("`conda_environment_file`") + message = ( + f"Ignoring ASV setting(s): {', '.join(ignored)}. Benchmark " + "environment management is delegated to third party script(s)." + ) + log.warning(message) + requirements = {} + tagged_env_vars = {} + conf.conda_environment_file = None + + super().__init__(conf, python, requirements, tagged_env_vars) + self._update_info() + + self._env_commands = self._interpolate_commands(conf.delegated_env_commands) + # Again using _interpolate_commands to get env parent path - allows use + # of the same ASV env variables. + env_parent_interpolated = self._interpolate_commands(conf.delegated_env_parent) + # Returns list of tuples, we just want the first. + env_parent_first = env_parent_interpolated[0] + # The 'command' is the first item in the returned tuple. + env_parent_string = " ".join(env_parent_first[0]) + self._delegated_env_parent = Path(env_parent_string).resolve() + + @property + def name(self): + """Get a name to uniquely identify this environment.""" + return asv_util.sanitize_filename(self.tool_name) + + def _update_info(self) -> None: + """Make sure class properties reflect the actual environment being used.""" + # Follow symlink if it has been created. + actual_path = Path(self._path).resolve() + self._path = str(actual_path) + + # Get custom environment's Python version if it exists yet. + try: + get_version = ( + "from sys import version_info; " + "print(f'{version_info.major}.{version_info.minor}')" + ) + actual_python = self.run(["-c", get_version]) + self._python = actual_python + except OSError: + pass + + def _prep_env(self) -> None: + """Run the custom environment script(s) and switch to using that environment.""" + message = f"Running delegated environment management for: {self.name}" + log.info(message) + env_path = Path(self._path) + + def copy_asv_files(src_parent: Path, dst_parent: Path) -> None: + """For copying between self._path and a temporary cache.""" + asv_files = list(src_parent.glob("asv*")) + # build_root_path.name usually == "project" . + asv_files += [src_parent / Path(self._build_root).name] + for src_path in asv_files: + dst_path = dst_parent / src_path.name + if not dst_path.exists(): + # Only caching in case the environment has been rebuilt. + # If the dst_path already exists: rebuilding hasn't + # happened. Also a non-issue when copying in the reverse + # direction because the cache dir is temporary. + if src_path.is_dir(): + func = copytree + else: + func = copy2 + func(src_path, dst_path) + + with TemporaryDirectory(prefix="delegated_asv_cache_") as asv_cache: + asv_cache_path = Path(asv_cache) + # Cache all of ASV's files as delegated command may remove and + # re-build the environment. + copy_asv_files(env_path.resolve(), asv_cache_path) + + # Adapt the build_dir to the cache location. + build_root_path = Path(self._build_root) + build_dir_original = build_root_path / self._repo_subdir + build_dir_subpath = build_dir_original.relative_to(build_root_path.parent) + build_dir = asv_cache_path / build_dir_subpath + + # Run the script(s) for delegated environment creation/updating. + # (An adaptation of self._interpolate_and_run_commands). + for command, env, return_codes, cwd in self._env_commands: + local_envs = dict(environ) + local_envs.update(env) + if cwd is None: + cwd = str(build_dir) + _ = asv_util.check_output( + command, + timeout=self._install_timeout, + cwd=cwd, + env=local_envs, + valid_return_codes=return_codes, + ) + + # Replace the env that ASV created with a symlink to the env + # created/updated by the custom script. + delegated_env_path = sorted( + self._delegated_env_parent.glob("*"), + key=getmtime, + reverse=True, + )[0] + if env_path.resolve() != delegated_env_path: + try: + env_path.unlink(missing_ok=True) + except IsADirectoryError: + rmtree(env_path) + env_path.symlink_to(delegated_env_path, target_is_directory=True) + + # Check that environment exists. + try: + env_path.resolve(strict=True) + except FileNotFoundError: + message = f"Path does not resolve to environment: {env_path}" + log.error(message) + raise RuntimeError(message) + + # Restore ASV's files from the cache (if necessary). + copy_asv_files(asv_cache_path, env_path.resolve()) + + # Record new environment information in properties. + self._update_info() + + def checkout_project(self, repo: Repo, commit_hash: str) -> None: + """Check out the working tree of the project at given commit hash.""" + super().checkout_project(repo, commit_hash) + self._prep_env() + log.info(f"Environment {self.name} updated to spec at {commit_hash[:8]}") diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py index c99c91de..1804f935 100644 --- a/benchmarks/benchmarks/__init__.py +++ b/benchmarks/benchmarks/__init__.py @@ -1,6 +1,9 @@ """Benchmark tests for iris-esmf-regrid""" +from os import environ + + def disable_repeat_between_setup(benchmark_object): """ Decorator for benchmarks where object persistence would be inappropriate. @@ -30,15 +33,38 @@ def disable_repeat_between_setup(benchmark_object): def skip_benchmark(benchmark_object): """ Decorator for benchmarks skipping benchmarks. + + Simply doesn't return the object. + + Warnings + -------- + ASV's architecture means decorated classes cannot be sub-classed. Code for + inheritance should be in a mixin class that doesn't include any methods + which ASV will recognise as benchmarks + (e.g. ``def time_something(self):`` ). + + """ + pass + + +def on_demand_benchmark(benchmark_object): """ + Decorator. Disables these benchmark(s) unless ON_DEMAND_BENCHARKS env var is set. - def setup_cache(self): - pass + For benchmarks that, for whatever reason, should not be run by default. + E.g: + * Require a local file + * Used for scalability analysis instead of commit monitoring. - def setup(*args): - raise NotImplementedError + Can be applied to benchmark classes/methods/functions. - benchmark_object.setup_cache = setup_cache - benchmark_object.setup = setup + Warnings + -------- + ASV's architecture means decorated classes cannot be sub-classed. Code for + inheritance should be in a mixin class that doesn't include any methods + which ASV will recognise as benchmarks + (e.g. ``def time_something(self):`` ). - return benchmark_object + """ + if "ON_DEMAND_BENCHMARKS" in environ: + return benchmark_object diff --git a/benchmarks/benchmarks/ci/__init__.py b/benchmarks/benchmarks/ci/__init__.py deleted file mode 100644 index d9a29402..00000000 --- a/benchmarks/benchmarks/ci/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Quick running benchmarks to be included in iris-esmf-regrid's CI.""" diff --git a/benchmarks/benchmarks/ci/esmf_regridder.py b/benchmarks/benchmarks/esmf_regridder/__init__.py similarity index 96% rename from benchmarks/benchmarks/ci/esmf_regridder.py rename to benchmarks/benchmarks/esmf_regridder/__init__.py index 3a0efc01..743507da 100644 --- a/benchmarks/benchmarks/ci/esmf_regridder.py +++ b/benchmarks/benchmarks/esmf_regridder/__init__.py @@ -1,4 +1,4 @@ -"""Quick running benchmarks for :mod:`esmf_regrid.esmf_regridder`.""" +"""Benchmarks for :mod:`esmf_regrid.esmf_regridder`.""" import os from pathlib import Path @@ -9,7 +9,6 @@ from iris.cube import Cube from iris.experimental.ugrid import PARSE_UGRID_ON_LOAD -from benchmarks import disable_repeat_between_setup from esmf_regrid.esmf_regridder import GridInfo from esmf_regrid.experimental.unstructured_scheme import ( GridToMeshESMFRegridder, @@ -124,7 +123,6 @@ def time_perform_regridding(self, tp): _ = self.regridder(self.src) -@disable_repeat_between_setup class TimeLazyRegridding: def setup_cache(self): SYNTH_DATA_DIR = Path().cwd() / "tmp_data" @@ -172,8 +170,10 @@ def time_lazy_regridding(self, cache): _ = regridder(self.src) def time_regridding_realisation(self, cache): + # Don't touch result.data - permanent realisation plays badly with + # ASV's re-run strategy. assert self.result.has_lazy_data() - _ = self.result.data + self.result.core_data().compute() class TimeMeshToGridRegridding(TimeRegridding): @@ -208,7 +208,6 @@ def setup(self, tp): self.tgt = tgt -@disable_repeat_between_setup class TimeLazyMeshToGridRegridding: def setup_cache(self): SYNTH_DATA_DIR = Path().cwd() / "tmp_data" @@ -252,8 +251,10 @@ def time_lazy_regridding(self, cache): _ = regridder(self.src) def time_regridding_realisation(self, cache): + # Don't touch result.data - permanent realisation plays badly with + # ASV's re-run strategy. assert self.result.has_lazy_data() - _ = self.result.data + self.result.core_data().compute() class TimeGridToMeshRegridding(TimeRegridding): @@ -288,7 +289,6 @@ def setup(self, tp): self.tgt = tgt -@disable_repeat_between_setup class TimeLazyGridToMeshRegridding: def setup_cache(self): SYNTH_DATA_DIR = Path().cwd() / "tmp_data" @@ -328,8 +328,10 @@ def time_lazy_regridding(self, cache): _ = regridder(self.src) def time_regridding_realisation(self, cache): + # Don't touch result.data - permanent realisation plays badly with + # ASV's re-run strategy. assert self.result.has_lazy_data() - _ = self.result.data + self.result.core_data().compute() class TimeRegridderIO(MultiGridCompare): diff --git a/benchmarks/benchmarks/long/esmf_regridder.py b/benchmarks/benchmarks/esmf_regridder/scalability.py similarity index 80% rename from benchmarks/benchmarks/long/esmf_regridder.py rename to benchmarks/benchmarks/esmf_regridder/scalability.py index 1a655587..7500cd59 100644 --- a/benchmarks/benchmarks/long/esmf_regridder.py +++ b/benchmarks/benchmarks/esmf_regridder/scalability.py @@ -1,4 +1,4 @@ -"""Slower benchmarks for :mod:`esmf_regrid.esmf_regridder`.""" +"""Scalability benchmarks for :mod:`esmf_regrid.esmf_regridder`.""" import os from pathlib import Path @@ -8,17 +8,18 @@ import iris from iris.cube import Cube -from benchmarks import disable_repeat_between_setup, skip_benchmark from esmf_regrid.experimental.io import load_regridder, save_regridder from esmf_regrid.experimental.unstructured_scheme import ( GridToMeshESMFRegridder, MeshToGridESMFRegridder, ) from esmf_regrid.schemes import ESMFAreaWeightedRegridder + +from .. import on_demand_benchmark, skip_benchmark from ..generate_data import _grid_cube, _gridlike_mesh_cube -class PrepareScalabilityGridToGrid: +class PrepareScalabilityMixin: timeout = 180 params = [50, 100, 200, 400, 600, 800] param_names = ["grid width"] @@ -41,11 +42,18 @@ def setup(self, n): self.src = self.src_cube(n) self.tgt = self.tgt_cube(n) - def time_prepare(self, n): + def _time_prepare(self, n): _ = self.regridder(self.src, self.tgt) -class PrepareScalabilityMeshToGrid(PrepareScalabilityGridToGrid): +@on_demand_benchmark +class PrepareScalabilityGridToGrid(PrepareScalabilityMixin): + def time_prepare(self, n): + super()._time_prepare(n) + + +@on_demand_benchmark +class PrepareScalabilityMeshToGrid(PrepareScalabilityMixin): regridder = MeshToGridESMFRegridder def src_cube(self, n): @@ -82,10 +90,11 @@ def time_save(self, _, n): save_regridder(self.rg, self.destination_file) def time_prepare(self, _, n): - super().time_prepare(n) + super()._time_prepare(n) -class PrepareScalabilityGridToMesh(PrepareScalabilityGridToGrid): +@on_demand_benchmark +class PrepareScalabilityGridToMesh(PrepareScalabilityMixin): regridder = GridToMeshESMFRegridder def tgt_cube(self, n): @@ -122,11 +131,10 @@ def time_save(self, _, n): save_regridder(self.rg, self.destination_file) def time_prepare(self, _, n): - super().time_prepare(n) + super()._time_prepare(n) -@disable_repeat_between_setup -class PerformScalabilityGridToGrid: +class PerformScalabilityMixin: params = [100, 200, 400, 600, 800, 1000] param_names = ["height"] grid_size = 400 @@ -185,19 +193,31 @@ def setup(self, cache, height): cube = self.add_src_metadata(cube) self.result = regridder(cube) - def time_perform(self, cache, height): + def _time_perform(self, cache, height): assert not self.src.has_lazy_data() rg, _ = cache _ = rg(self.src) - def time_lazy_perform(self, cache, height): + def _time_lazy_perform(self, cache, height): + # Don't touch result.data - permanent realisation plays badly with + # ASV's re-run strategy. assert self.result.has_lazy_data() - _ = self.result.data + self.result.core_data().compute() -class PerformScalabilityMeshToGrid(PerformScalabilityGridToGrid): +@on_demand_benchmark +class PerformScalabilityGridToGrid(PerformScalabilityMixin): + def time_perform(self, cache, height): + super()._time_perform(cache, height) + + def time_lazy_perform(self, cache, height): + super()._time_lazy_perform(cache, height) + + +@on_demand_benchmark +class PerformScalabilityMeshToGrid(PerformScalabilityMixin): regridder = MeshToGridESMFRegridder - chunk_size = [PerformScalabilityGridToGrid.grid_size ^ 2, 10] + chunk_size = [PerformScalabilityMixin.grid_size ^ 2, 10] file_name = "chunked_cube_1d.nc" def setup_cache(self): @@ -221,8 +241,15 @@ def add_src_metadata(self, cube): cube.add_aux_coord(mesh_coord_y, 0) return cube + def time_perform(self, cache, height): + super()._time_perform(cache, height) + + def time_lazy_perform(self, cache, height): + super()._time_lazy_perform(cache, height) + -class PerformScalabilityGridToMesh(PerformScalabilityGridToGrid): +@on_demand_benchmark +class PerformScalabilityGridToMesh(PerformScalabilityMixin): regridder = GridToMeshESMFRegridder def setup_cache(self): @@ -240,11 +267,17 @@ def tgt_cube(self): tgt.add_aux_coord(mesh_coord_y, 0) return tgt + def time_perform(self, cache, height): + super()._time_perform(cache, height) + + def time_lazy_perform(self, cache, height): + super()._time_lazy_perform(cache, height) + # These benchmarks unusually long and are resource intensive so are skipped. # They can be run by manually removing the skip. @skip_benchmark -class PerformScalability1kGridToGrid(PerformScalabilityGridToGrid): +class PerformScalability1kGridToGrid(PerformScalabilityMixin): timeout = 600 grid_size = 1100 chunk_size = [grid_size, grid_size, 10] @@ -257,11 +290,17 @@ class PerformScalability1kGridToGrid(PerformScalabilityGridToGrid): def setup_cache(self): return super().setup_cache() + def time_perform(self, cache, height): + super()._time_perform(cache, height) + + def time_lazy_perform(self, cache, height): + super()._time_lazy_perform(cache, height) + # These benchmarks unusually long and are resource intensive so are skipped. # They can be run by manually removing the skip. @skip_benchmark -class PerformScalability2kGridToGrid(PerformScalabilityGridToGrid): +class PerformScalability2kGridToGrid(PerformScalabilityMixin): timeout = 600 grid_size = 2200 chunk_size = [grid_size, grid_size, 10] @@ -273,3 +312,9 @@ class PerformScalability2kGridToGrid(PerformScalabilityGridToGrid): def setup_cache(self): return super().setup_cache() + + def time_perform(self, cache, height): + super()._time_perform(cache, height) + + def time_lazy_perform(self, cache, height): + super()._time_lazy_perform(cache, height) diff --git a/benchmarks/benchmarks/generate_data.py b/benchmarks/benchmarks/generate_data.py index ac1029ea..467399bf 100644 --- a/benchmarks/benchmarks/generate_data.py +++ b/benchmarks/benchmarks/generate_data.py @@ -17,6 +17,7 @@ from pathlib import Path import re from textwrap import dedent +from warnings import warn from iris import load_cube from iris.experimental.ugrid import PARSE_UGRID_ON_LOAD @@ -35,15 +36,23 @@ error = "Env variable DATA_GEN_PYTHON not a runnable python executable path." raise ValueError(error) +# The default location of data files used in benchmarks. Used by CI. default_data_dir = (Path(__file__).parent.parent / ".data").resolve() +# Optionally override the default data location with environment variable. BENCHMARK_DATA = Path(environ.get("BENCHMARK_DATA", default_data_dir)) if BENCHMARK_DATA == default_data_dir: BENCHMARK_DATA.mkdir(exist_ok=True) + message = ( + f"No BENCHMARK_DATA env var, defaulting to {BENCHMARK_DATA}. " + "Note that some benchmark files are GB in size." + ) + warn(message) elif not BENCHMARK_DATA.is_dir(): message = f"Not a directory: {BENCHMARK_DATA} ." raise ValueError(message) -# Flag to allow the rebuilding of synthetic data. +# Manual flag to allow the rebuilding of synthetic data. +# False forces a benchmark run to re-make all the data files. REUSE_DATA = True @@ -74,6 +83,7 @@ def run_function_elsewhere(func_to_run, *args, **kwargs): """ func_string = dedent(getsource(func_to_run)) + func_string = func_string.replace("@staticmethod\n", "") func_call_term_strings = [repr(arg) for arg in args] func_call_term_strings += [f"{name}={repr(val)}" for name, val in kwargs.items()] func_call_string = ( diff --git a/benchmarks/benchmarks/long/__init__.py b/benchmarks/benchmarks/long/__init__.py deleted file mode 100644 index 49b2c37a..00000000 --- a/benchmarks/benchmarks/long/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Slower benchmarks for iris-esmf-regrid, to only be included on a 'full' run.""" diff --git a/benchmarks/nox_asv.conf.yaml b/benchmarks/nox_asv.conf.yaml deleted file mode 100644 index 157bdb5c..00000000 --- a/benchmarks/nox_asv.conf.yaml +++ /dev/null @@ -1,6 +0,0 @@ -# The commit to checkout to first run Nox to set up the environment. -setup_commit: "HEAD" -# The path of the noxfile's location relative to the project root. -noxfile_rel_path: "noxfile.py" -# The --session arg to be used with --install-only to prep an environment. -session_name: "tests" diff --git a/benchmarks/nox_asv_plugin.py b/benchmarks/nox_asv_plugin.py deleted file mode 100644 index 16062327..00000000 --- a/benchmarks/nox_asv_plugin.py +++ /dev/null @@ -1,257 +0,0 @@ -""" -ASV plug-in providing an alternative ``Environment`` subclass, which uses Nox -for environment management. - -""" -from importlib.util import find_spec -from pathlib import Path -from shutil import copy2, copytree -from tempfile import TemporaryDirectory - -from nox.sessions import _normalize_path -import yaml - -from asv.config import Config -from asv.console import log -from asv.plugins.conda import Conda, _find_conda -from asv.repo import get_repo, Repo -from asv import util as asv_util - - -# Fetch config variables. -with Path("nox_asv.conf.yaml").open("r") as file: - config = yaml.load(file, Loader=yaml.Loader) -#: The commit to checkout to first run Nox to set up the environment. -#: See ``nox_asv.conf.yaml``. -SETUP_COMMIT: str = config["setup_commit"] -#: The path of the noxfile's location relative to the project root. -#: See ``nox_asv.conf.yaml``. -NOXFILE_REL_PATH: str = config["noxfile_rel_path"] -#: The ``--session`` arg to be used with ``--install-only`` to prep an environment. -#: See ``nox_asv.conf.yaml``. -SESSION_NAME: str = config["session_name"] - - -class NoxConda(Conda): - """ - Manage a Conda environment using Nox, updating environment at each commit. - - Defers environment management to the project's noxfile, which must be able - to create/update the benchmarking environment using ``nox --install-only``, - with the ``--session`` specified in :const:`SESSION_NAME` (from - ``nox_asv_conf.yaml``). - - Notes - ----- - If not all benchmarked commits support this use of Nox: the plugin will - need to be modified to prep the environment in other ways. - - """ - - tool_name = "nox-conda" - - @classmethod - def matches(cls, python: str) -> bool: - """Used by ASV to work out if this type of environment can be used.""" - result = find_spec("nox") is not None - if result: - result = super().matches(python) - - if result: - message = ( - f"NOTE: ASV env match check incomplete. Not possible to know " - f"if ``nox --session={SESSION_NAME}`` is compatible with " - f"``--python={python}`` until project is checked out." - ) - log.warning(message) - - return result - - def __init__(self, conf: Config, python: str, requirements: dict) -> None: - """ - Parameters - ---------- - conf: Config instance - - python : str - Version of Python. Must be of the form "MAJOR.MINOR". - - requirements : dict - Dictionary mapping a PyPI package name to a version - identifier string. - - """ - # Need to checkout the project BEFORE the benchmark run - to access a noxfile. - self.project_temp_checkout = TemporaryDirectory(prefix="nox_asv_checkout_") - repo = get_repo(conf) - repo.checkout(self.project_temp_checkout.name, SETUP_COMMIT) - self.setup_noxfile = Path(self.project_temp_checkout.name) / NOXFILE_REL_PATH - - # Some duplication of parent code - need these attributes BEFORE - # running inherited code. - self._python = python - self._requirements = requirements - self._env_dir = conf.env_dir - - # Prepare the actual environment path, to override self._path. - nox_envdir = str(Path(self._env_dir).absolute() / self.hashname) - nox_friendly_name = self._get_nox_session_name(python) - self._nox_path = Path(_normalize_path(nox_envdir, nox_friendly_name)) - - # For storing any extra conda requirements from asv.conf.json. - self._extra_reqs_path = self._nox_path / "asv-extra-reqs.yaml" - - super().__init__(conf, python, requirements) - - @property - def _path(self) -> str: - """ - Using a property to override getting and setting in parent classes - - unable to modify parent classes as this is a plugin. - - """ - return str(self._nox_path) - - @_path.setter - def _path(self, value) -> None: - """Enforce overriding of this variable by disabling modification.""" - pass - - def _get_nox_session_name(self, python: str) -> str: - nox_cmd_substring = ( - f"--noxfile={self.setup_noxfile} " - f"--session={SESSION_NAME} " - f"--python={python}" - ) - - list_output = asv_util.check_output( - ["nox", "--list", *nox_cmd_substring.split(" ")], - display_error=False, - dots=False, - ) - list_output = list_output.split("\n") - list_matches = list(filter(lambda s: s.startswith("*"), list_output)) - matches_count = len(list_matches) - - if matches_count == 0: - message = f"No Nox sessions found for: {nox_cmd_substring} ." - log.error(message) - elif matches_count > 1: - message = f"Ambiguous - >1 Nox session found for: {nox_cmd_substring} ." - log.error(message) - else: - line = list_matches[0] - session_name = line.split(" ")[1] - return session_name - - def _nox_prep_env(self, setup: bool = False) -> None: - message = f"Running Nox environment update for: {self.name}" - log.info(message) - - build_root_path = Path(self._build_root) - env_path = Path(self._path) - - def copy_asv_files(src_parent: Path, dst_parent: Path) -> None: - """For copying between self._path and a temporary cache.""" - asv_files = list(src_parent.glob("asv*")) - # build_root_path.name usually == "project" . - asv_files += [src_parent / build_root_path.name] - for src_path in asv_files: - dst_path = dst_parent / src_path.name - if not dst_path.exists(): - # Only cache-ing in case Nox has rebuilt the env @ - # self._path. If the dst_path already exists: rebuilding - # hasn't happened. Also a non-issue when copying in the - # reverse direction because the cache dir is temporary. - if src_path.is_dir(): - func = copytree - else: - func = copy2 - func(src_path, dst_path) - - with TemporaryDirectory(prefix="nox_asv_cache_") as asv_cache: - asv_cache_path = Path(asv_cache) - if setup: - noxfile_path = self.setup_noxfile - else: - # Cache all of ASV's files as Nox may remove and re-build the environment. - copy_asv_files(env_path, asv_cache_path) - # Get location of noxfile in cache. - noxfile_path_build = ( - build_root_path / self._repo_subdir / NOXFILE_REL_PATH - ) - noxfile_path = asv_cache_path / noxfile_path_build.relative_to( - build_root_path.parent - ) - - nox_cmd = [ - "nox", - f"--noxfile={noxfile_path}", - f"--envdir={env_path.parent}", - f"--session={SESSION_NAME}", - f"--python={self._python}", - "--install-only", - "--no-error-on-external-run", - "--verbose", - ] - - _ = asv_util.check_output(nox_cmd) - if not env_path.is_dir(): - message = f"Expected Nox environment not found: {env_path}" - log.error(message) - - if not setup: - # Restore ASV's files from the cache (if necessary). - copy_asv_files(asv_cache_path, env_path) - - if (not setup) and self._extra_reqs_path.is_file(): - # No need during initial ASV setup - this will be run again before - # any benchmarks are run. - cmd = f"{self.conda} env update -f {self._extra_reqs_path} -p {env_path}" - asv_util.check_output(cmd.split(" ")) - - def _setup(self) -> None: - """Used for initial environment creation - mimics parent method where possible.""" - try: - self.conda = _find_conda() - except IOError as e: - raise asv_util.UserError(str(e)) - if find_spec("nox") is None: - raise asv_util.UserError("Module not found: nox") - - message = f"Creating Nox-Conda environment for {self.name} ." - log.info(message) - - try: - self._nox_prep_env(setup=True) - except Exception: - raise - finally: - # No longer need the setup checkout now that the environment has been built. - self.project_temp_checkout.cleanup() - - # Create an environment.yml file from the requirements in asv.conf.json. - # No default dependencies to specify - unlike parent - because Nox - # includes these by default. - conda_args, pip_args = self._get_requirements(self.conda) - if conda_args or pip_args: - with self._extra_reqs_path.open("w") as req_file: - req_file.write(f"name: {self.name}\n") - req_file.write("channels:\n") - req_file.writelines( - [f" - {channel}\n" for channel in self._conda_channels] - ) - req_file.write("dependencies:\n") - - # Categorise and write dependencies based on pip vs. conda. - req_file.writelines([f" - {package}\n" for package in conda_args]) - if pip_args: - # And now specify the packages that are to be installed in the - # pip subsection. - req_file.write(" - pip:\n") - req_file.writelines([f" - {package}\n" for package in pip_args]) - - def checkout_project(self, repo: Repo, commit_hash: str) -> None: - """Check out the working tree of the project at given commit hash.""" - super().checkout_project(repo, commit_hash) - self._nox_prep_env() diff --git a/noxfile.py b/noxfile.py index ca2bee6d..440e3ddd 100644 --- a/noxfile.py +++ b/noxfile.py @@ -5,9 +5,11 @@ """ +from datetime import datetime import os from pathlib import Path import shutil +from typing import Literal from urllib.request import urlopen import nox @@ -285,7 +287,7 @@ def flake8(session: nox.sessions.Session): """ # Pip install the session requirements. - session.install("flake8", "flake8-docstrings", "flake8-import-order") + session.install("flake8<6", "flake8-docstrings", "flake8-import-order") # Execute the flake8 linter on the package. session.run("flake8", PACKAGE) # Execute the flake8 linter on this file. @@ -335,80 +337,171 @@ def tests(session: nox.sessions.Session): session.run("pytest") -@nox.session(python=PY_VER, venv_backend="conda") +@nox.session @nox.parametrize( - ["ci_mode", "long_mode", "gh_pages"], - [ - (True, False, False), - (False, False, False), - (False, False, True), - (False, True, False), - ], - ids=["ci compare", "full", "full then publish", "long snapshot"], + "run_type", + ["branch", "sperf", "custom"], + ids=["branch", "sperf", "custom"], ) def benchmarks( - session: nox.sessions.Session, ci_mode: bool, long_mode: bool, gh_pages: bool + session: nox.sessions.Session, + run_type: Literal["overnight", "branch", "sperf", "custom"], ): """ - Perform esmf-regrid performance benchmarks (using Airspeed Velocity). + Perform iris-esmf-regrid performance benchmarks (using Airspeed Velocity). + + All run types require a single Nox positional argument (e.g. + ``nox --session="foo" -- my_pos_arg``) - detailed in the parameters + section - and can optionally accept a series of further arguments that will + be added to session's ASV command. Parameters ---------- session: object A `nox.sessions.Session` object. - ci_mode: bool - Run a cut-down selection of benchmarks, comparing the current commit to - the last commit for performance regressions. - long_mode: bool - Run the long running benchmarks at the current head of the repo. - gh_pages: bool - Run ``asv gh-pages --rewrite`` once finished. - - Notes - ----- - ASV is set up to use ``nox --session=tests --install-only`` to prepare - the benchmarking environment. + run_type: {"branch", "sperf", "custom"} + * ``branch``: compares ``HEAD`` and ``HEAD``'s merge-base with the + input **base branch**. Fails if a performance regression is detected. + This is the session used by IER's CI. + * ``sperf``: Run the on-demand SPerf suite of benchmarks (part of the + UK Met Office NG-VAT project) for the ``HEAD`` of ``upstream/main`` + only, and publish the results to the input **publish directory**, + within a unique subdirectory for this run. + * ``custom``: run ASV with the input **ASV sub-command**, without any + preset arguments - must all be supplied by the user. So just like + running ASV manually, with the convenience of re-using the session's + scripted setup steps. + + Examples + -------- + * ``nox --session="benchmarks(branch)" -- upstream/main`` + * ``nox --session="benchmarks(branch)" -- upstream/mesh-data-model`` + * ``nox --session="benchmarks(branch)" -- upstream/main --bench=ci`` + * ``nox --session="benchmarks(sperf)" -- my_publish_dir + * ``nox --session="benchmarks(custom)" -- continuous a1b23d4 HEAD --quick`` """ + # Make sure we're not working with a list of Python versions. + if not isinstance(PY_VER, str): + message = ( + "benchmarks session requires PY_VER to be a string - representing " + f"a single Python version - instead got: {type(PY_VER)} ." + ) + raise ValueError(message) + + # The threshold beyond which shifts are 'notable'. See `asv compare`` docs + # for more. + COMPARE_FACTOR = 2.0 + session.install("asv", "nox", "pyyaml") - if "DATA_GEN_PYTHON" in os.environ: + + data_gen_var = "DATA_GEN_PYTHON" + if data_gen_var in os.environ: print("Using existing data generation environment.") else: print("Setting up the data generation environment...") - session.run( - "nox", "--session=tests", "--install-only", f"--python={session.python}" + # Get Nox to build an environment for the `tests` session, but don't + # run the session. Will re-use a cached environment if appropriate. + session.run_always( + "nox", + "--session=tests", + "--install-only", + f"--python={PY_VER}", ) + # Find the environment built above, set it to be the data generation + # environment. data_gen_python = next( - Path(".nox").rglob(f"tests*/bin/python{session.python}") + Path(".nox").rglob(f"tests*/bin/python{PY_VER}") ).resolve() - session.env["DATA_GEN_PYTHON"] = data_gen_python + session.env[data_gen_var] = data_gen_python print("Running ASV...") session.cd("benchmarks") # Skip over setup questions for a new machine. session.run("asv", "machine", "--yes") - def asv_exec(*sub_args: str) -> None: - run_args = ["asv", *sub_args] - help_output = session.run(*run_args, "--help", silent=True) - if "--python" in help_output: - # Not all asv commands accept the --python kwarg. - run_args.append(f"--python={session.python}") - session.run(*run_args) - - if ci_mode: - # If on a PR: compare to the base (target) branch. - # Else: compare to previous commit. - previous_commit = os.environ.get("CIRRUS_BASE_SHA", "HEAD^1") + # All run types require one Nox posarg. + run_type_arg = { + "branch": "base branch", + "sperf": "publish directory", + "custom": "ASV sub-command", + } + if run_type not in run_type_arg.keys(): + message = f"Unsupported run-type: {run_type}" + raise NotImplementedError(message) + if not session.posargs: + message = ( + f"Missing mandatory first Nox session posarg: " f"{run_type_arg[run_type]}" + ) + raise ValueError(message) + first_arg = session.posargs[0] + # Optional extra arguments to be passed down to ASV. + asv_args = session.posargs[1:] + + if run_type == "branch": + base_branch = first_arg + git_command = f"git merge-base HEAD {base_branch}" + merge_base = session.run(*git_command.split(" "), silent=True, external=True)[ + :8 + ] + try: - asv_exec("continuous", previous_commit, "HEAD", "--bench=ci", "--factor=2") + asv_command = [ + "asv", + "continuous", + merge_base, + "HEAD", + f"--factor={COMPARE_FACTOR}", + "--strict", + ] + session.run(*asv_command, *asv_args) finally: - asv_exec("compare", previous_commit, "HEAD", "--factor=2") - elif long_mode: - asv_exec("run", "HEAD^!", "--bench=long") - else: - # f32f23a5 = first supporting commit for nox_asv_plugin.py . - asv_exec("run", "f32f23a5..HEAD") + asv_command = [ + "asv", + "compare", + merge_base, + "HEAD", + f"--factor={COMPARE_FACTOR}", + "--split", + ] + session.run(*asv_command) + + elif run_type == "sperf": + publish_dir = Path(first_arg) + if not publish_dir.is_dir(): + message = f"Input 'publish directory' is not a directory: {publish_dir}" + raise NotADirectoryError(message) + publish_subdir = ( + publish_dir / f"{run_type}_{datetime.now().strftime('%Y%m%d_%H%M%S')}" + ) + publish_subdir.mkdir() + + # Activate on demand benchmarks (C/SPerf are deactivated for 'standard' runs). + session.env["ON_DEMAND_BENCHMARKS"] = "True" + commit_range = "upstream/main^!" + + asv_command = [ + "asv", + "run", + commit_range, + "--bench=.*Scalability.*", + "--attribute", + "rounds=1", + ] + session.run(*asv_command, *asv_args) - if gh_pages: - asv_exec("gh-pages", "--rewrite") + asv_command = ["asv", "publish", commit_range, f"--html-dir={publish_subdir}"] + session.run(*asv_command) + + # Print completion message. + location = Path().cwd() / ".asv" + print( + f'New ASV results for "{run_type}".\n' + f'See "{publish_subdir}",' + f'\n or JSON files under "{location / "results"}".' + ) + + else: + asv_subcommand = first_arg + assert run_type == "custom" + session.run("asv", asv_subcommand, *asv_args) diff --git a/requirements/py310.yml b/requirements/py310.yml index 782db31f..2dee0b52 100644 --- a/requirements/py310.yml +++ b/requirements/py310.yml @@ -21,7 +21,7 @@ dependencies: - asv - black=22.3.0 - codecov - - flake8 + - flake8<6 - flake8-docstrings - flake8-import-order - nox diff --git a/requirements/py38.yml b/requirements/py38.yml index d56f9264..fc429998 100644 --- a/requirements/py38.yml +++ b/requirements/py38.yml @@ -21,7 +21,7 @@ dependencies: - asv - black=22.3.0 - codecov - - flake8 + - flake8<6 - flake8-docstrings - flake8-import-order - nox diff --git a/requirements/py39.yml b/requirements/py39.yml index 00eb99e1..c7844d3e 100644 --- a/requirements/py39.yml +++ b/requirements/py39.yml @@ -21,7 +21,7 @@ dependencies: - asv - black=22.3.0 - codecov - - flake8 + - flake8<6 - flake8-docstrings - flake8-import-order - nox