diff --git a/.github/renovate.json b/.github/renovate.json new file mode 100644 index 000000000..9209cc62e --- /dev/null +++ b/.github/renovate.json @@ -0,0 +1,18 @@ +{ + "$schema": "https://docs.renovatebot.com/renovate-schema.json", + "extends": ["config:recommended"], + "baseBranchPatterns":["main-with-renovate-comments"], + "packageRules": [ + { + "matchManagers": ["pip_requirements", "conda"], + "groupName": "Python dependencies", + "separateMajorMinor": false + }, + { + "matchManagers": ["github-actions"], + "groupName": "GitHub actions", + "separateMajorMinor": false + } + ], + "dependencyDashboard": true +} diff --git a/.gitignore b/.gitignore index 53b739e6d..6fb2b5617 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,6 @@ doc/build doc/source/*.rst doc/source/*.md -doc/source/tutorials/1-monte_carlo_uq.rst doc/source/tutorials/*.ipynb # No results or logs diff --git a/doc/README.md b/doc/README.md index 4130fd466..989ee8ad2 100644 --- a/doc/README.md +++ b/doc/README.md @@ -6,7 +6,13 @@ We believe that documentation is essential and therefore welcome any improvement ## :woman_teacher: Build the documentation -To build the documentation, you first need a development QUEENS installation as mentioned in the [CONTRIBUTING.md](../CONTRIBUTING.md). This adds additional packages needed for the html build. Once this is done, follow the next steps in the activated Python environment with the development installation. +To build the documentation, you first need to set up a QUEENS environment as described in the [README.md](../README.md). +In this Python environment, you also need to install packages for QUEENS development and tutorials and register the environment as a Jupyter kernel: + +```bash +pip install -e .[safe_develop,tutorial] +python -m ipykernel install --user --name queens --display-name "Python (queens)" +``` When building the documentation on your machine for the first time or after adding new modules or classes to QUEENS, one needs to first rebuild the `autodoc index` by running: diff --git a/doc/source/_ext/create_documentation_files.py b/doc/source/_ext/create_documentation_files.py index 3dde29703..4e87bd97a 100644 --- a/doc/source/_ext/create_documentation_files.py +++ b/doc/source/_ext/create_documentation_files.py @@ -157,10 +157,10 @@ def create_development(): inject({"md_paths": md_paths}, development_template, development_path) -def create_intro(): +def create_introduction(): """Generate landing page.""" - intro_template = get_template_path_by_name("intro.md.j2") - into_path = relative_to_doc_source("intro.md") + intro_template = get_template_path_by_name("introduction.md.j2") + into_path = relative_to_doc_source("introduction.md") def extract_from_markdown_by_marker(marker_name, md_path): return clean_markdown(extract_from_markdown_file_by_marker(marker_name, md_path)) @@ -176,10 +176,10 @@ def extract_from_markdown_by_marker(marker_name, md_path): ) -def create_overview(): +def create_packages(): """Create overview of the QUEENS package.""" - overview_template = get_template_path_by_name("overview.rst.j2") - overview_path = relative_to_doc_source("overview.rst") + overview_template = get_template_path_by_name("packages.rst.j2") + overview_path = relative_to_doc_source("packages.rst") def get_module_description(python_file): """Get module description. @@ -298,7 +298,7 @@ def copy_tutorials(): def main(): """Create all the rst files.""" - create_intro() + create_introduction() copy_tutorials() create_development() - create_overview() + create_packages() diff --git a/doc/source/_ext/templates/1-monte_carlo_uq.rst.j2 b/doc/source/_ext/templates/1-monte_carlo_uq.rst.j2 deleted file mode 100644 index 2271d762c..000000000 --- a/doc/source/_ext/templates/1-monte_carlo_uq.rst.j2 +++ /dev/null @@ -1,15 +0,0 @@ -1. Monte Carlo UQ -================= - -A simple example from our `README.md `_: - -.. code-block:: python - -{{ example_text }} - -Resulting in the histogram: - -.. image:: images/monte_carlo_uq.png - :width: 500 - :align: center - :alt: Monte Carlo Histogram diff --git a/doc/source/_ext/templates/intro.md.j2 b/doc/source/_ext/templates/introduction.md.j2 similarity index 100% rename from doc/source/_ext/templates/intro.md.j2 rename to doc/source/_ext/templates/introduction.md.j2 diff --git a/doc/source/_ext/templates/overview.rst.j2 b/doc/source/_ext/templates/packages.rst.j2 similarity index 100% rename from doc/source/_ext/templates/overview.rst.j2 rename to doc/source/_ext/templates/packages.rst.j2 diff --git a/doc/source/conf.py b/doc/source/conf.py index 80e71e45d..dc4c00c5b 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -52,6 +52,17 @@ "myst_parser", ] +# Control how typehints are displayed in the documentation. +# 'description' shows typehints as content of the function (next to the parameters), not in the +# function signature. +# This mirrors the behavior of types provided in the docstrings. +autodoc_typehints = "description" + +# Control how class signatures will be displayed. +# 'separated' will display the signature as a method, meaning the __init__ method is explicitly +# shown in the documentation. +autodoc_class_signature = "separated" + # Custom command to make the returns in the docstring behave like the parameter/argument section. # This will basically make the documentation a bit prettier as different returns are directly # rendered as bullet points. diff --git a/doc/source/index.rst b/doc/source/index.rst index a307b7e24..997cd4ffd 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -15,11 +15,10 @@ Contents .. toctree:: :maxdepth: 1 - intro - overview + introduction + packages tutorials development - queens faq diff --git a/doc/source/tutorials/0-onboarding.rst b/doc/source/tutorials/0-onboarding.rst index cd5bedc30..bdf2bc4f4 100644 --- a/doc/source/tutorials/0-onboarding.rst +++ b/doc/source/tutorials/0-onboarding.rst @@ -12,7 +12,7 @@ Steps to get started with QUEENS .. code-block:: bash - git clone git@github.com:queens-py/queens.git + git clone git@github.com:queens-py/queens.git #. Read through our `introduction `_ and follow the instructions in the "Installation" section to set up your local environment. @@ -34,11 +34,10 @@ Steps to get started with QUEENS .. code-block:: json - "python.pythonPath": "/bin/python", - "python.defaultInterpreterPath": "/bin/python", - "editor.formatOnSave": True, - "python.formatting.provider": "black", - "python.formatting.blackPath": "/bin/black", + "python.defaultInterpreterPath": "/bin/python", + "editor.formatOnSave": true, + "editor.defaultFormatter": "ms-python.black-formatter", + "black-formatter.args": ["--line-length", "100"], #. Browse through our `documentation `_ and our other tutorials to get a first impression. diff --git a/src/queens/data_processors/csv_file.py b/src/queens/data_processors/csv_file.py index 454a81652..1dba3ca4b 100644 --- a/src/queens/data_processors/csv_file.py +++ b/src/queens/data_processors/csv_file.py @@ -71,33 +71,44 @@ def __init__( """Instantiate data processor class for csv data. Args: - file_name_identifier (str): Identifier of file name - The file prefix can contain regex expression - and subdirectories. + file_name_identifier (str): Identifier of file name. The file prefix can contain regex + expression and subdirectories. file_options_dict (dict): Dictionary with read-in options for the file: - - header_row (int): Integer that determines which csv-row contains labels/headers of - the columns. Default is 'None', meaning no header used. - - use_cols_lst (lst): (optional) list with column numbers that should be read-in. - - skip_rows (int): Number of rows that should be skipped to be read-in in csv file. - - index_column (int, str): Column to use as the row labels of the DataFrame, either - given as string name or column index. - Note: index_column=False can be used to force pandas to - not use the first column as the index. Index_column is - used for filtering the remaining columns. - - returned_filter_format (str): Returned data format after filtering - - filter (dict): Dictionary with filter options: - -- type (str): Filter type to use - -- rows (lst): In case this options is used, the list contains the indices of - rows in the csv file that should be used as data - -- range (lst): After data is selected by `use_cols_lst` and a filter column - is specified by `index_column`, this option selects which data - range shall be filtered by providing a minimum and maximum - value pair in list format - -- target_values (list): target values to filter - -- tolerance (float): Tolerance for the filter range + + - header_row (int): + Integer that determines which csv-row contains labels/headers of + the columns. Default is 'None', meaning no header used. + - use_cols_lst (lst): + (optional) list with column numbers that should be read-in. + - skip_rows (int): + Number of rows that should be skipped to be read-in in csv file. + - index_column (int, str): + Column to use as the row labels of the DataFrame, either + given as string name or column index. + Note: ``index_column=False`` can be used to force pandas to not use the first + column as the index. ``index_column`` is used for filtering the remaining + columns. + - returned_filter_format (str): + Returned data format after filtering + - filter (dict): + Dictionary with filter options: + + -- type (str): + Filter type to use + -- rows (lst): + In case this options is used, the list contains the indices of + rows in the csv file that should be used as data + -- range (lst): + After data is selected by `use_cols_lst` and a filter column is specified + by `index_column`, this option selects which data range shall be filtered + by providing a minimum and maximum value pair in list format + -- target_values (list): + target values to filter + -- tolerance (float): + Tolerance for the filter range files_to_be_deleted_regex_lst (lst): List with paths to files that should be deleted. - The paths can contain regex expressions. + The paths can contain regex expressions. Returns: Instance of CsvFile class diff --git a/src/queens/global_settings.py b/src/queens/global_settings.py index 0e62884a8..413051ffc 100644 --- a/src/queens/global_settings.py +++ b/src/queens/global_settings.py @@ -21,7 +21,7 @@ import logging from pathlib import Path -from queens.schedulers._dask import SHUTDOWN_CLIENTS +from queens.schedulers._scheduler import CLEANUP_SCHEDULERS from queens.utils.ascii_art import print_banner_and_description from queens.utils.logger_settings import reset_logging, setup_basic_logging from queens.utils.path import PATH_TO_ROOT, create_folder_if_not_existent @@ -168,8 +168,8 @@ def __exit__(self, exception_type, exception_value, traceback): exception_value: indicates exception instance traceback: traceback object """ - for shutdown_client in SHUTDOWN_CLIENTS.copy(): - SHUTDOWN_CLIENTS.remove(shutdown_client) - shutdown_client() + for cleanup_scheduler in CLEANUP_SCHEDULERS.copy(): + CLEANUP_SCHEDULERS.remove(cleanup_scheduler) + cleanup_scheduler() reset_logging() diff --git a/src/queens/iterators/elementary_effects.py b/src/queens/iterators/elementary_effects.py index 1416e5cba..33cde8912 100644 --- a/src/queens/iterators/elementary_effects.py +++ b/src/queens/iterators/elementary_effects.py @@ -174,13 +174,15 @@ def print_results(self, results): Args: results (dict): Dictionary with the results of the sensitivity analysis, including: - - 'parameter_names': List of parameter names. - - 'sensitivity_indices': Contains indices like: - - 'names': Parameter names. - - 'mu_star': Mean absolute effect. - - 'mu': Mean effect. - - 'mu_star_conf': Confidence interval for 'mu_star'. - - 'sigma': Standard deviation of the effect. + + - `parameter_names`: List of parameter names. + - `sensitivity_indices`: Contains indices like: + + - `names`: Parameter names. + - `mu_star`: Mean absolute effect. + - `mu`: Mean effect. + - `mu_star_conf`: Confidence interval for `mu_star`. + - `sigma`: Standard deviation of the effect. """ _logger.info( "%-20s %10s %10s %15s %10s", "Parameter", "Mu_Star", "Mu", "Mu_Star_Conf", "Sigma" diff --git a/src/queens/iterators/hamiltonian_monte_carlo.py b/src/queens/iterators/hamiltonian_monte_carlo.py index cb8334060..93caac55d 100644 --- a/src/queens/iterators/hamiltonian_monte_carlo.py +++ b/src/queens/iterators/hamiltonian_monte_carlo.py @@ -14,7 +14,7 @@ # """HMC algorithm. -"The Hamiltonian Monte Carlo sampler is a gradient based MCMC algortihm. +The Hamiltonian Monte Carlo sampler is a gradient based MCMC algorithm. It is used to sample from arbitrary probability distributions. """ @@ -35,16 +35,13 @@ class HamiltonianMonteCarlo(PyMC): Attributes: max_steps (int): Maximum of leapfrog steps to take in one iteration - target_accept (float): Target accpetance rate which should be conistent after burn-in + target_accept (float): Target acceptance rate which should be consistent after burn-in path_length (float): Maximum length of particle trajectory - step_size (float): Step size, scaled by 1/(parameter dimension **0.25) + step_size (float): Step size, scaled by 1/(parameter dimension^0.25) scaling (np.array): The inverse mass, or precision matrix is_cov (boolean): Setting if the scaling is a mass or covariance matrix init_strategy (str): Strategy to tune mass damping matrix advi_iterations (int): Number of iteration steps of ADVI based init strategies - - Returns: - hmc_iterator (obj): Instance of HMC Iterator """ @log_init_args @@ -94,10 +91,10 @@ def __init__( functions progressbar (boolean, opt): Setting for printing progress bar while sampling max_steps (int, opt): Maximum of leapfrog steps to take in one iteration - target_accept (float, opt): Target accpetance rate which should be conistent after + target_accept (float, opt): Target acceptance rate which should be consistent after burn-in path_length (float, opt): Maximum length of particle trajectory - step_size (float, opt): Step size, scaled by 1/(parameter dimension **0.25) + step_size (float, opt): Step size, scaled by 1/(parameter dimension^0.25) scaling (np.array, opt): The inverse mass, or precision matrix is_cov (boolean, opt): Setting if the scaling is a mass or covariance matrix init_strategy (str, opt): Strategy to tune mass damping matrix diff --git a/src/queens/iterators/least_squares.py b/src/queens/iterators/least_squares.py index 984637132..0e4fcfe63 100644 --- a/src/queens/iterators/least_squares.py +++ b/src/queens/iterators/least_squares.py @@ -36,15 +36,16 @@ class LeastSquares(Optimization): Attributes: algorithm (str): Algorithm to perform minimization: - - trf : Trust Region Reflective algorithm, particularly suitable for large - sparse problems with bounds. Generally robust method. - - - dogbox : dogleg algorithm with rectangular trust regions, typical use - case is small problems with bounds. Not recommended for problems - with rank-deficient Jacobian. - - lm : Levenberg-Marquardt algorithm as implemented in MINPACK. Doesn’t - handle bounds and sparse Jacobians. Usually the most efficient - method for small unconstrained problems. + - trf: + Trust Region Reflective algorithm, particularly suitable for large sparse problems + with bounds. Generally robust method. + - dogbox: + dogleg algorithm with rectangular trust regions, typical use case is small problems + with bounds. Not recommended for problems with rank-deficient Jacobian. + - lm: + Levenberg-Marquardt algorithm as implemented in MINPACK. Doesn’t handle bounds and + sparse Jacobians. Usually the most efficient method for small unconstrained + problems. """ @log_init_args @@ -75,31 +76,31 @@ def __init__( verbose_output (int): Integer encoding which kind of verbose information should be printed by the optimizers. bounds (sequence, Bounds): Bounds on variables for Nelder-Mead, L-BFGS-B, TNC, SLSQP, - Powell, and trust-constr methods. - There are two ways to specify the bounds: - - 1. Instance of `Bounds` class. - 2. A sequence with 2 elements. The first element corresponds - to a sequence of lower bounds and the second element to - sequence of upper bounds. The length of each of the two - subsequences must be equal to the number of variables. + Powell, and trust-constr methods. There are two ways to specify the bounds: + + 1. Instance of `Bounds` class. + 2. A sequence with 2 elements. The first element corresponds to a sequence of lower + bounds and the second element to a sequence of upper bounds. The length of each + of the two subsequences must be equal to the number of variables. + max_feval (int): Maximum number of function evaluations. algorithm (str): Algorithm to perform minimization: - - trf : Trust Region Reflective algorithm, particularly suitable for - large sparse problems with bounds. Generally robust method. + - 'trf': Trust Region Reflective algorithm, particularly suitable for large sparse + problems with bounds. Generally robust method. + - 'dogbox': dogleg algorithm with rectangular trust regions, typical use case is + small problems with bounds. Not recommended for problems with rank-deficient + Jacobian. + - 'lm': Levenberg-Marquardt algorithm as implemented in MINPACK. Doesn’t handle + bounds and sparse Jacobians. Usually the most efficient method for small + unconstrained problems. - - dogbox : dogleg algorithm with rectangular trust regions, typical use - case is small problems with bounds. Not recommended for - problems with rank-deficient Jacobian. - - lm : Levenberg-Marquardt algorithm as implemented in MINPACK. - Doesn’t handle bounds and sparse Jacobians. Usually the most - efficient method for small unconstrained problems. jac_method (str): Method to calculate a finite difference based approximation of the Jacobian matrix: - '2-point': a one-sided scheme by definition - '3-point': more exact but needs twice as many function evaluations + jac_rel_step (array_like): Relative step size to use for finite difference approximation of Jacobian matrix. If None (default) then it is selected automatically. (see SciPy documentation for details) diff --git a/src/queens/iterators/metropolis_hastings.py b/src/queens/iterators/metropolis_hastings.py index dd0006b96..7e1b87149 100644 --- a/src/queens/iterators/metropolis_hastings.py +++ b/src/queens/iterators/metropolis_hastings.py @@ -14,7 +14,7 @@ # """Metropolis-Hastings algorithm. -"The Metropolis-Hastings algorithm is a Markov Chain Monte Carlo (MCMC) +The Metropolis-Hastings algorithm is a Markov Chain Monte Carlo (MCMC) method for obtaining a sequence of random samples from a probability distribution from which direct sampling is difficult." [1] diff --git a/src/queens/iterators/metropolis_hastings_pymc.py b/src/queens/iterators/metropolis_hastings_pymc.py index 526a595ac..4fc6fd6a4 100644 --- a/src/queens/iterators/metropolis_hastings_pymc.py +++ b/src/queens/iterators/metropolis_hastings_pymc.py @@ -14,8 +14,8 @@ # """Metropolis Hastings algorithm. -"The Metropolis Hastings algorithm is a not-gradient based MCMC -algortihm. It implements a random walk. +The Metropolis Hastings algorithm is a not-gradient based MCMC +algorithm. It implements a random walk. """ import logging diff --git a/src/queens/iterators/nuts.py b/src/queens/iterators/nuts.py index 90fed27fe..3e45ec0da 100644 --- a/src/queens/iterators/nuts.py +++ b/src/queens/iterators/nuts.py @@ -14,7 +14,7 @@ # """No-U-Turn algorithm. -"The No-U-Turn sampler is a gradient based MCMC algortihm. It builds on +The No-U-Turn sampler is a gradient based MCMC algorithm. It builds on the Hamiltonian Monte Carlo sampler to sample from (high dimensional) arbitrary probability distributions. """ @@ -44,8 +44,8 @@ class NUTS(PyMC): Attributes: max_treedepth (int): Maximum depth for the tree-search early_max_treedepth (int): Max tree depth of first 200 tuning samples - step_size (float): Step size, scaled by 1/(parameter dimension **0.25) - target_accept (float): Target accpetance rate which should be conistent after burn-in + step_size (float): Step size, scaled by 1/(parameter dimension ^ 0.25) + target_accept (float): Target acceptance rate which should be consistent after burn-in scaling (np.array): The inverse mass, or precision matrix is_cov (boolean): Setting if the scaling is a mass or covariance matrix init_strategy (str): Strategy to tune mass damping matrix @@ -103,8 +103,8 @@ def __init__( progressbar (boolean, opt): Setting for printing progress bar while sampling max_treedepth (int): Maximum depth for the tree-search early_max_treedepth (int): Max tree depth of first 200 tuning samples - step_size (float): Step size, scaled by 1/(parameter dimension **0.25) - target_accept (float): Target accpetance rate which should be conistent after burn-in + step_size (float): Step size, scaled by 1/(parameter dimension ^ 0.25) + target_accept (float): Target acceptance rate which should be consistent after burn-in scaling (np.array): The inverse mass, or precision matrix is_cov (boolean): Setting if the scaling is a mass or covariance matrix init_strategy (str): Strategy to tune mass damping matrix diff --git a/src/queens/iterators/optimization.py b/src/queens/iterators/optimization.py index 5d2d65ece..564a0a46c 100644 --- a/src/queens/iterators/optimization.py +++ b/src/queens/iterators/optimization.py @@ -40,44 +40,47 @@ class Optimization(Iterator): Attributes: algorithm (str): String that defines the optimization algorithm to be used: - - CG: Conjugate gradient optimization (unconstrained), using Jacobian - - BFGS: Broyden–Fletcher–Goldfarb–Shanno algorithm (quasi-Newton) for - optimization (iterative method for unconstrained - nonlinear optimization), using Jacobian - - L-BFGS-B: Limited memory Broyden–Fletcher–Goldfarb–Shanno algorithm - with box constraints (for large number of variables) - - TNC: Truncated Newton method (Hessian free) for nonlinear - optimization with bounds involving a large number of variables. - Jacobian necessary - - SLSQP: Sequential Least Squares Programming minimization with bounds - and constraints using Jacobian - - COBYLA: Constrained Optimization BY Linear Approximation - (no Jacobian) - - NELDER-MEAD: Downhill-simplex search method - (unconstrained, unbounded) - without the need for a Jacobian - - POWELL: Powell's conjugate direction method (unconstrained) without - the need for a Jacobian. Minimizes the function by a - bidirectional search along each search vector + - CG: + Conjugate gradient optimization (unconstrained), using Jacobian + - BFGS: + Broyden–Fletcher–Goldfarb–Shanno algorithm (quasi-Newton) for optimization + (iterative method for unconstrained nonlinear optimization), using Jacobian + - L-BFGS-B: + Limited memory Broyden–Fletcher–Goldfarb–Shanno algorithm with box constraints (for + large number of variables) + - TNC: + Truncated Newton method (Hessian free) for nonlinear optimization with bounds + involving a large number of variables. Jacobian necessary + - SLSQP: + Sequential Least Squares Programming minimization with bounds and constraints using + Jacobian + - COBYLA: + Constrained Optimization BY Linear Approximation (no Jacobian) + - NELDER-MEAD: + Downhill-simplex search method (unconstrained, unbounded) without the need for a + Jacobian + - POWELL: + Powell's conjugate direction method (unconstrained) without the need for a + Jacobian. Minimizes the function by a bidirectional search along each search vector + bounds (sequence, Bounds): Bounds on variables for Nelder-Mead, L-BFGS-B, TNC, SLSQP, - Powell, and trust-constr methods. - There are two ways to specify the bounds: - - 1. Instance of `Bounds` class. - 2. A sequence with 2 elements. The first element corresponds - to a sequence of lower bounds and the second element to - sequence of upper bounds. The length of each of the two - subsequences must be equal to the number of variables. + Powell, and trust-constr methods. There are two ways to specify the bounds: + + 1. Instance of `Bounds` class. + 2. A sequence with 2 elements. The first element corresponds to a sequence of lower + bounds and the second element to a sequence of upper bounds. The length of each of + the two subsequences must be equal to the number of variables. + cons (np.array): Nonlinear constraints for the optimization. Only for COBYLA, SLSQP and trust-constr (see SciPy documentation for details) - initial_guess (np.array): Initial guess, i.e. start point of - optimization. + initial_guess (np.array): Initial guess, i.e. start point of optimization. jac_method (str): Method to calculate a finite difference based approximation of the Jacobian matrix: - '2-point': a one-sided scheme by definition - '3-point': more exact but needs twice as many function evaluations + jac_rel_step (array_like): Relative step size to use for finite difference approximation of Jacobian matrix. If None (default) then it is selected automatically. (see SciPy documentation for details) @@ -124,45 +127,42 @@ def __init__( verbose_output (int): Integer encoding which kind of verbose information should be printed by the optimizers. bounds (sequence, Bounds): Bounds on variables for Nelder-Mead, L-BFGS-B, TNC, SLSQP, - Powell, and trust-constr methods. - There are two ways to specify the bounds: - - 1. Instance of `Bounds` class. - 2. A sequence with 2 elements. The first element corresponds - to a sequence of lower bounds and the second element to - sequence of upper bounds. The length of each of the two - subsequences must be equal to the number of variables. + Powell, and trust-constr methods. There are two ways to specify the bounds: + + 1. Instance of `Bounds` class. + 2. A sequence with 2 elements. The first element corresponds to a sequence of lower + bounds and the second element to a sequence of upper bounds. The length of each + of the two subsequences must be equal to the number of variables. + constraints (np.array): Nonlinear constraints for the optimization. Only for COBYLA, SLSQP and trust-constr (see SciPy documentation for details) max_feval (int): Maximum number of function evaluations. algorithm (str): String that defines the optimization algorithm to be used: - - CG: Conjugate gradient optimization (unconstrained), using Jacobian - - BFGS: Broyden–Fletcher–Goldfarb–Shanno algorithm (quasi-Newton) for - optimization (iterative method for unconstrained - nonlinear optimization), using Jacobian - - L-BFGS-B: Limited memory Broyden–Fletcher–Goldfarb–Shanno algorithm - with box constraints (for large number of variables) - - TNC: Truncated Newton method (Hessian free) for nonlinear - optimization with bounds involving a large number of variables. - Jacobian necessary - - SLSQP: Sequential Least Squares Programming minimization with bounds - and constraints using Jacobian - - LSQ: Nonlinear least squares with bounds using Jacobian - - COBYLA: Constrained Optimization BY Linear Approximation - (no Jacobian) - - NELDER-MEAD: Downhill-simplex search method - (unconstrained, unbounded) - without the need for a Jacobian - - POWELL: Powell's conjugate direction method (unconstrained) without - the need for a Jacobian. Minimizes the function by a - bidirectional search along each search vector + - 'CG': Conjugate gradient optimization (unconstrained), using Jacobian + - 'BFGS': Broyden–Fletcher–Goldfarb–Shanno algorithm (quasi-Newton) for optimization + (iterative method for unconstrained nonlinear optimization), using Jacobian + - 'L-BFGS-B': Limited memory Broyden–Fletcher–Goldfarb–Shanno algorithm with box + constraints (for large number of variables) + - 'TNC': Truncated Newton method (Hessian free) for nonlinear optimization with + bounds + involving a large number of variables. Jacobian necessary + - 'SLSQP': Sequential Least Squares Programming minimization with bounds and + constraints using Jacobian + - 'COBYLA': Constrained Optimization BY Linear Approximation (no Jacobian) + - 'NELDER-MEAD': Downhill-simplex search method (unconstrained, unbounded) without + the need for a Jacobian + - 'POWELL': Powell's conjugate direction method (unconstrained) without the need + for a Jacobian. Minimizes the function by a bidirectional search along each + search vector + jac_method (str): Method to calculate a finite difference based approximation of the Jacobian matrix: - '2-point': a one-sided scheme by definition - '3-point': more exact but needs twice as many function evaluations + jac_rel_step (array_like): Relative step size to use for finite difference approximation of Jacobian matrix. If None (default) then it is selected automatically. (see SciPy documentation for details) diff --git a/src/queens/iterators/reinforcement_learning.py b/src/queens/iterators/reinforcement_learning.py index 9f341db2c..ff3eb3e8a 100644 --- a/src/queens/iterators/reinforcement_learning.py +++ b/src/queens/iterators/reinforcement_learning.py @@ -95,9 +95,11 @@ def __init__( Args: model (RLModel): Model to be evaluated by the iterator. parameters (Parameters): Parameters object. + .. note:: - This parameter is required by the base class, but is - currently not used in the ``ReinforcementLearning`` iterator. + This parameter is required by the base class, but is + currently not used in the ``ReinforcementLearning`` iterator. + global_settings (GlobalSettings): Settings of the QUEENS experiment including its name and the output directory. result_description (dict): Description of desired results. diff --git a/src/queens/models/finite_difference.py b/src/queens/models/finite_difference.py index 97365de40..cf2e4adcc 100644 --- a/src/queens/models/finite_difference.py +++ b/src/queens/models/finite_difference.py @@ -32,18 +32,17 @@ class FiniteDifference(Simulation): """Finite difference model. Attributes: - finite_difference_method (str): Method to calculate a finite difference - based approximation of the Jacobian matrix: - - '2-point': a one-sided scheme by definition - - '3-point': more exact but needs twice as many function - evaluations - step_size (float): Step size for the finite difference - approximation + finite_difference_method (str): Method to calculate a finite-difference-based approximation + of the Jacobian matrix: + + - '2-point': a one-sided scheme by definition + - '3-point': more exact but needs twice as many function evaluations + + step_size (float): Step size for the finite difference approximation bounds (np.array): Lower and upper bounds on independent variables. - Defaults to no bounds meaning: [-inf, inf] - Each bound must match the size of *x0* or be a scalar, in the latter case - the bound will be the same for all variables. Use it to limit the range - of function evaluation. + Defaults to no bounds meaning: [-inf, inf]. Each bound must match the size of *x0* or + be a scalar, in the latter case the bound will be the same for all variables. Use it to + limit the range of function evaluation. """ @log_init_args @@ -53,18 +52,17 @@ def __init__(self, scheduler, driver, finite_difference_method, step_size=1e-5, Args: scheduler (Scheduler): Scheduler for the simulations driver (Driver): Driver for the simulations - finite_difference_method (str): Method to calculate a finite difference - based approximation of the Jacobian matrix: - - '2-point': a one-sided scheme by definition - - '3-point': more exact but needs twice as many - function evaluations + finite_difference_method (str): Method to calculate a finite-difference-based + approximation of the Jacobian matrix: + + - '2-point': a one-sided scheme by definition + - '3-point': more exact but needs twice as many function evaluations + step_size (float, opt): Step size for the finite difference approximation bounds (tuple of array_like, opt): Lower and upper bounds on independent variables. - Defaults to no bounds meaning: [-inf, inf] - Each bound must match the size of *x0* or be a - scalar, in the latter case the bound will be the - same for all variables. Use it to limit the - range of function evaluation. + Defaults to no bounds meaning: [-inf, inf]. Each bound must match the size of *x0* + or be a scalar, in the latter case the bound will be the same for all variables. + Use it to limit the range of function evaluation. """ super().__init__(scheduler=scheduler, driver=driver) diff --git a/src/queens/models/likelihoods/bmf_gaussian.py b/src/queens/models/likelihoods/bmf_gaussian.py index cd038a138..a070c5a95 100644 --- a/src/queens/models/likelihoods/bmf_gaussian.py +++ b/src/queens/models/likelihoods/bmf_gaussian.py @@ -675,42 +675,36 @@ def evaluate_and_gradient_per_coordinate( Args: z_lf (np.array): Low-fidelity feature vector that contains the corresponding Monte-Carlo - points on which the probabilistic mapping should be evaluated. - Dimensions: Rows: different multi-fidelity vector/points - (each row is one multi-fidelity point). - Columns: different model outputs/informative features. - Note: z_lf might be a 3d tensor here - Dims z_lf: gamma_dim x num_samples x coord_dim - Dims z_lf.T: coord_dim x num_samples x gamma_dim --> iterate over - coord_dim + points on which the probabilistic mapping should be evaluated. + Dimensions: Rows: different multi-fidelity vector/points + (each row is one multi-fidelity point). + Columns: different model outputs/informative features. + Note: z_lf might be a 3d tensor here: + Dims z_lf: gamma_dim x num_samples x coord_dim or + Dims z_lf.T: coord_dim x num_samples x gamma_dim --> iterate over coord_dim. support (str): Support/variable for which we predict the mean and (co)variance. For - `support=f` the Gaussian process predicts w.r.t. the latent function - `f`. For the choice of `support=y` we predict w.r.t. to the - simulation/experimental output `y`, + `support=f`, the Gaussian process predicts w.r.t. the latent function + `f`. For the choice of `support=y` we predict w.r.t. to the + simulation/experimental output `y`. probabilistic_mapping_obj_lst (list): List of probabilistic mapping objects _time_vec (np.array): Vector of time points _coords_mat (np.array): Matrix of spatial coordinates Returns: mean (np.array): Vector of mean predictions - :math:`\mathbb{E}_{f^*}[p(y_{HF}^*|f^*,z_{LF}^*, \mathcal{D}_{f})]` - for the HF model given the low-fidelity feature input. Different HF - predictions per row. Each row corresponds to one multi-fidelity - input vector in :math:`\Omega_{y_{lf}\times\gamma_i}`. - + :math:`\mathbb{E}_{f^*}[p(y_{HF}^*|f^*,z_{LF}^*, \mathcal{D}_{f})]` + for the HF model given the low-fidelity feature input. Different HF + predictions per row. Each row corresponds to one multi-fidelity + input vector in :math:`\Omega_{y_{lf}\times\gamma_i}`. variance (np.array): Vector of variance predictions - :math:`\mathbb{V}_{f^*}[p(y_{HF}^*|f^*,z_{LF}^*,\mathcal{D}_{f})]` - for the HF model given the low-fidelity feature input. - Different HF predictions per row. Each row corresponds to one - multi-fidelity input vector in - :math:`\Omega_{y_{lf}\times\gamma_i}`. - - grad_mean (np.array): Gradient matrix for the mean prediction. - Different HF predictions per row, and gradient - vector entries per column - grad_variance (np.array): Gradient matrix for the mean prediction. - Different HF predictions per row, and gradient - vector entries per column + :math:`\mathbb{V}_{f^*}[p(y_{HF}^*|f^*,z_{LF}^*,\mathcal{D}_{f})]` + for the HF model given the low-fidelity feature input. + Different HF predictions per row. Each row corresponds to one + multi-fidelity input vector in :math:`\Omega_{y_{lf}\times\gamma_i}`. + grad_mean (np.array): Gradient matrix for the mean prediction. + Different HF predictions per row, and gradient vector entries per column. + grad_variance (np.array): Gradient matrix for the variance prediction. + Different HF predictions per row, and gradient vector entries per column. """ mean_Y_HF_given_Z_LF = [] var_Y_HF_given_Z_LF = [] @@ -781,24 +775,20 @@ def evaluate_and_gradient_per_time_step( Returns: mean (np.array): Vector of mean predictions - :math:`\mathbb{E}_{f^*}[p(y_{HF}^*|f^*,z_{LF}^*, \mathcal{D}_{f})]` - for the HF model given the low-fidelity feature input. Different HF - predictions per row. Each row corresponds to one multi-fidelity - input vector in :math:`\Omega_{y_{lf}\times\gamma_i}`. - + :math:`\mathbb{E}_{f^*}[p(y_{HF}^*|f^*,z_{LF}^*, \mathcal{D}_{f})]` + for the HF model given the low-fidelity feature input. Different HF + predictions per row. Each row corresponds to one multi-fidelity + input vector in :math:`\Omega_{y_{lf}\times\gamma_i}`. variance (np.array): Vector of variance predictions - :math:`\mathbb{V}_{f^*}[p(y_{HF}^*|f^*,z_{LF}^*,\mathcal{D}_{f})]` - for the HF model given the low-fidelity feature input. - Different HF predictions per row. Each row corresponds to one - multi-fidelity input vector in - :math:`\Omega_{y_{lf}\times\gamma_i}`. - - grad_mean (np.array): Gradient matrix for the mean prediction. - Different HF predictions per row, and gradient - vector entries per column - grad_variance (np.array): Gradient matrix for the mean prediction. - Different HF predictions per row, and gradient - vector entries per column + :math:`\mathbb{V}_{f^*}[p(y_{HF}^*|f^*,z_{LF}^*,\mathcal{D}_{f})]` + for the HF model given the low-fidelity feature input. + Different HF predictions per row. Each row corresponds to one + multi-fidelity input vector in + :math:`\Omega_{y_{lf}\times\gamma_i}`. + grad_mean (np.array): Gradient matrix for the mean prediction. + Different HF predictions per row, and gradient vector entries per column. + grad_variance (np.array): Gradient matrix for the variance prediction. + Different HF predictions per row, and gradient vector entries per column. """ # determine the number of time steps and check coordinate compliance num_coords, t_size = BmfiaInterface.check_coordinates_return_dimensions( diff --git a/src/queens/models/surrogates/jitted_gaussian_process.py b/src/queens/models/surrogates/jitted_gaussian_process.py index 84b5deb78..1cafff53d 100644 --- a/src/queens/models/surrogates/jitted_gaussian_process.py +++ b/src/queens/models/surrogates/jitted_gaussian_process.py @@ -336,11 +336,15 @@ def predict(self, x_test, support="f", gradient_bool=False): Args: x_test (np.array): Testing matrix for GP with row-wise (vector-valued) testing points support (str): Type of support for which the GP posterior is computed; If: - - 'f': Posterior w.r.t. the latent function f - - 'y': Latent function is marginalized such that posterior is defined - w.r.t. the output y (introduces extra variance) + + - 'f': + Posterior w.r.t. the latent function f + - 'y': + Latent function is marginalized such that posterior is defined w.r.t. the + output y (introduces extra variance) + gradient_bool (bool, optional): Boolean to configure whether gradients should be - returned as well + returned as well Returns: output (dict): Output dictionary containing the posterior of the GP diff --git a/src/queens/schedulers/_dask.py b/src/queens/schedulers/_dask.py index ab6ee5968..d993f15b3 100644 --- a/src/queens/schedulers/_dask.py +++ b/src/queens/schedulers/_dask.py @@ -28,8 +28,6 @@ _logger = logging.getLogger(__name__) -SHUTDOWN_CLIENTS = [] - class Dask(Scheduler): """Abstract base class for schedulers in QUEENS. @@ -83,21 +81,7 @@ def _start_cluster_and_connect_client(self): def start_cluster_and_connect_client(self): """Start a Dask cluster and a client that connects to it.""" if self.client is None or self.client.status == "closed": - client = self._start_cluster_and_connect_client() - self.register_shutdown(client) - self.client = client - - def register_shutdown(self, client): - """Register shutdown callback. - - The Dask client and cluster will be shut down when leaving the GlobalSettings context. - - Args: - client (Client): Dask client that is connected to and submits computations to a Dask - cluster. - """ - global SHUTDOWN_CLIENTS # pylint: disable=global-variable-not-assigned - SHUTDOWN_CLIENTS.append(client.shutdown) + self.client = self._start_cluster_and_connect_client() def evaluate( self, samples: Iterable, function: SchedulerCallableSignature, job_ids: Iterable = None @@ -171,6 +155,15 @@ def run_function(*args, **kwargs): def restart_worker(self, worker): """Restart a worker.""" - async def shutdown_client(self): + def shutdown_client(self): """Shutdown the DASK client.""" - await self.client.shutdown() + if self.client is not None: + try: + self.client.shutdown() + except AttributeError as e: + _logger.warning("AttributeError while shutting down Dask client: %s", e) + + def cleanup(self): + """Cleanup after QUEENS run.""" + self.shutdown_client() + self.delete_experiment_dir_if_empty(self.experiment_dir) diff --git a/src/queens/schedulers/_scheduler.py b/src/queens/schedulers/_scheduler.py index 54599005b..ffad6663f 100644 --- a/src/queens/schedulers/_scheduler.py +++ b/src/queens/schedulers/_scheduler.py @@ -29,6 +29,8 @@ _logger = logging.getLogger(__name__) +CLEANUP_SCHEDULERS = [] + class SchedulerCallableSignature(Protocol): """Signature for callables which can be used with QUEENS schedulers.""" @@ -81,6 +83,8 @@ def __init__(self, experiment_name, experiment_dir, num_jobs, verbose=True): self.next_job_id = 0 self.verbose = verbose + CLEANUP_SCHEDULERS.append(self.cleanup) + @abc.abstractmethod def evaluate( self, samples: Iterable, function: SchedulerCallableSignature, job_ids: Iterable = None @@ -179,3 +183,19 @@ def get_job_ids(self, num_samples): job_ids = self.next_job_id + np.arange(num_samples) self.next_job_id += num_samples return job_ids + + def cleanup(self): + """Cleanup after QUEENS run.""" + self.delete_experiment_dir_if_empty(self.experiment_dir) + + @staticmethod + def delete_experiment_dir_if_empty(experiment_dir): + """Delete the experiment directory if it is empty. + + Args: + experiment_dir (Path): Path to the experiment directory. + """ + if experiment_dir.exists() and experiment_dir.is_dir(): + if not any(experiment_dir.iterdir()): + experiment_dir.rmdir() + _logger.debug("Deleted empty experiment directory '%s'.", experiment_dir) diff --git a/src/queens/schedulers/cluster.py b/src/queens/schedulers/cluster.py index aa6607373..b00cadb6a 100644 --- a/src/queens/schedulers/cluster.py +++ b/src/queens/schedulers/cluster.py @@ -17,6 +17,8 @@ import logging import time from datetime import timedelta +from pathlib import Path +from typing import Sequence from dask.distributed import Client from dask_jobqueue import PBSCluster, SLURMCluster @@ -87,6 +89,7 @@ def __init__( verbose=True, experiment_base_dir=None, overwrite_existing_experiment=False, + job_script_prologue=None, ): """Init method for the cluster scheduler. @@ -110,6 +113,8 @@ def __init__( experiment_base_dir (str, Path): Base directory for the simulation outputs overwrite_existing_experiment (bool): If True, overwrite experiment directory if it exists already. If False, prompt user for confirmation before overwriting. + job_script_prologue (list, opt): List of commands to be executed before starting a + worker. """ self.remote_connection = remote_connection self.remote_connection.open() @@ -124,6 +129,7 @@ def __init__( self.queue = queue self.cluster_internal_address = cluster_internal_address self.allowed_failures = allowed_failures + self.job_script_prologue = job_script_prologue # get the path of the experiment directory on remote host experiment_dir = self.remote_experiment_dir( @@ -169,24 +175,6 @@ def remote_experiment_dir( return experiment_dir - def local_experiment_dir( - self, experiment_name, experiment_base_dir, overwrite_existing_experiment - ): - """Get the local experiment directory. - - Args: - experiment_name (str): name of the current experiment - experiment_base_dir (str, Path): Base directory for the simulation outputs - overwrite_existing_experiment (bool): If true, continue and overwrite experiment - directory. If false, prompt user for confirmation before continuing and overwriting. - - Returns: - experiment_dir (Path): Path to local experiment directory. - """ - raise NotImplementedError( - "The Cluster scheduler should not use the local but the remote experiment directory." - ) - def _start_cluster_and_connect_client(self): """Start a Dask cluster and a client that connects to it. @@ -232,6 +220,7 @@ def _start_cluster_and_connect_client(self): "job_directives_skip": job_directives_skip, "job_extra_directives": [job_extra_directives], "worker_extra_args": ["--lifetime", worker_lifetime, "--lifetime-stagger", "2m"], + "job_script_prologue": self.job_script_prologue, # keep this hardcoded to 1, the number of threads for the mpi run is handled by # job_extra_directives. Note that the number of workers is not the number of # parallel simulations! @@ -294,5 +283,40 @@ def copy_files_to_experiment_dir(self, paths): paths (Path, list): Paths to files or directories that should be copied to experiment directory """ - destination = f"{self.experiment_dir}/" - self.remote_connection.copy_to_remote(paths, destination) + self.remote_connection.copy_to_remote(paths, self.experiment_dir) + + def copy_files_from_experiment_dir( + self, + destination: Path | None = None, + verbose: bool = True, + exclude: str | Sequence | None = None, + filters: str | None = None, + ): + """Copy files from remote experiment directory to the local machine. + + Args: + destination (Path): Path to the local directory where the files from the remote + experiment directory should be copied to. If None, the default base directory + `~/queens-experiments/` is used. + verbose: True for verbose + exclude: Options to exclude + filters: Filters for rsync + """ + if destination is None: + # We use None as experiment_base_dir to get the default base directory, since we do not + # save it explicitly in the constructor but only use it to construct the location of + # the remote experiment_dir. That said, we cannot easily retrieve the used + # experiment_base_dir from self.experiment_dir here because the remote os might be + # different from the local os, resulting in a different path structure (i.e., different + # home directory names in case of experiment_base_dir=None). If the user wants to + # specify a custom destination, they can do so via the destination argument. + destination = self.local_experiment_dir( + self.experiment_name, None, overwrite_existing_experiment=True + ).parent + self.remote_connection.copy_from_remote( + self.experiment_dir, destination, verbose, exclude, filters + ) + + @staticmethod + def delete_experiment_dir_if_empty(_): + """The remote experiment directory will never be empty, so pass.""" diff --git a/src/queens/utils/jax_minimize_wrapper.py b/src/queens/utils/jax_minimize_wrapper.py index a0c75c4c9..e19f7622a 100644 --- a/src/queens/utils/jax_minimize_wrapper.py +++ b/src/queens/utils/jax_minimize_wrapper.py @@ -58,39 +58,43 @@ def minimize( x0: Initial guess represented as a JAX PyTree. args: Extra arguments passed to the objective function and its derivative. Must consist of valid JAX types; e.g. the leaves of the PyTree must be floats. - _The remainder of the keyword arguments are inherited from + The remainder of the keyword arguments are inherited from `scipy.optimize.minimize`, and their descriptions are copied here for - convenience._ - method: Type of solver. Should be one of - - 'Nelder-Mead' :ref:`(see here) ` - - 'Powell' :ref:`(see here) ` - - 'CG' :ref:`(see here) ` - - 'BFGS' :ref:`(see here) ` - - 'Newton-CG' :ref:`(see here) ` - - 'L-BFGS-B' :ref:`(see here) ` - - 'TNC' :ref:`(see here) ` - - 'COBYLA' :ref:`(see here) ` - - 'SLSQP' :ref:`(see here) ` - - 'trust-constr':ref:`(see here) ` - - 'dogleg' :ref:`(see here) ` - - 'trust-ncg' :ref:`(see here) ` - - 'trust-exact' :ref:`(see here) ` - - 'trust-krylov' :ref:`(see here) ` + convenience. + method: Type of solver. Should be one of + + - 'Nelder-Mead' + - 'Powell' + - 'CG' + - 'BFGS' + - 'Newton-CG' + - 'L-BFGS-B' + - 'TNC' + - 'COBYLA' + - 'SLSQP' + - 'trust-constr' + - 'dogleg' + - 'trust-ncg' + - 'trust-exact' + - 'trust-krylov' - custom - a callable object (added in version 0.14.0), see below for description. - If not given, chosen to be one of ``BFGS``, ``L-BFGS-B``, ``SLSQP``, - depending on if the problem has constraints or bounds. + + If not given, chosen to be one of ``BFGS``, ``L-BFGS-B``, ``SLSQP``, depending on if + the problem has constraints or bounds. bounds: Bounds on variables for L-BFGS-B, TNC, SLSQP, Powell, and trust-constr methods. There are two ways to specify the bounds: - 1. Instance of `Bounds` class. - 2. Sequence of ``(min, max)`` pairs for each element in `x`. + + 1. Instance of `Bounds` class. + 2. Sequence of ``(min, max)`` pairs for each element in `x`. + None is used to specify no bounds. Note that in order to use `bounds` you will need to manually flatten them in the same order as your inputs `x0`. constraints: Constraints definition (only for COBYLA, SLSQP and trust-constr). Constraints for 'trust-constr' are defined as a single object or a list of objects - specifying constraints to the optimization problem. - Constraints for COBYLA, SLSQP are defined as a list of dictionaries. - Each dictionary with fields: + specifying constraints to the optimization problem. Constraints for COBYLA, SLSQP are + defined as a list of dictionaries. Each dictionary with fields: + type : str Constraint type: 'eq' for equality, 'ineq' for inequality. fun : callable @@ -99,6 +103,7 @@ def minimize( The Jacobian of `fun` (only for SLSQP). args : sequence, optional Extra arguments to be passed to the function and Jacobian. + Equality constraint means that the constraint function result is to be zero whereas inequality means that it is to be non-negative. Note that COBYLA only supports inequality constraints. @@ -106,27 +111,31 @@ def minimize( same order as your inputs `x0`. tol: Tolerance for termination. For detailed control, use solver-specific options. options: A dictionary of solver options. All methods accept the following generic options: + maxiter : int Maximum number of iterations to perform. Depending on the method each iteration may use several function evaluations. disp : bool Set to True to print convergence messages. + For method-specific options, see :func:`show_options()`. callback: Called after each iteration. For 'trust-constr' it is a callable with the signature: ``callback(xk, OptimizeResult state) -> bool`` where ``xk`` is the current parameter vector represented as a PyTree, and ``state`` is an `OptimizeResult` object, with the same fields as the ones from the return. If callback returns True the algorithm execution is terminated. - For all the other methods, the signature is: ```callback(xk)``` where `xk` is the + For all the other methods, the signature is: ``callback(xk)`` where ``xk`` is the current parameter vector, represented as a PyTree. Returns: - The optimization result represented as a ``OptimizeResult`` object. + The optimization result represented as an OptimizeResult object. Important attributes are: - ``x``: the solution array, represented as a JAX PyTree - ``success``: a Boolean flag indicating if the optimizer exited successfully - ``message``: describes the cause of the termination. - See `scipy.optimize.OptimizeResult` for a description of other attributes. + + - ``x``: the solution array, represented as a JAX PyTree + - ``success``: a Boolean flag indicating if the optimizer exited successfully + - ``message``: describes the cause of the termination. + + See ``scipy.optimize.OptimizeResult`` for a description of other attributes. """ # Use tree flatten and unflatten to convert params x0 from PyTrees to flat arrays x0_flat, unravel = ravel_pytree(x0) diff --git a/src/queens/utils/remote_operations.py b/src/queens/utils/remote_operations.py index 8a0865b9e..5ccf3f8cf 100644 --- a/src/queens/utils/remote_operations.py +++ b/src/queens/utils/remote_operations.py @@ -248,6 +248,32 @@ def sync_remote_repository(self) -> None: _logger.info("Sync of remote repository was successful.") _logger.info("It took: %s s.\n", time.time() - start_time) + def _copy(self, rsync_args_dict: dict) -> None: + """Copy files or folders from a source to a destination. + + Args: + rsync_args_dict: Dictionary with the arguments for the rsync + command that takes care of the copy + """ + # retrieve the source and destination from the rsync_args_dict for + # convenience + source = rsync_args_dict["source"] + destination = rsync_args_dict["destination"] + if not is_empty(source): + _logger.debug("Copying from %s to %s", source, destination) + remote_shell_command = None + if self.gateway is not None: + remote_shell_command = f"ssh {self.gateway.user}@{self.gateway.host} ssh" + _logger.debug("Using remote shell command %s", remote_shell_command) + rsync_args_dict["rsh"] = remote_shell_command + rsync_cmd = assemble_rsync_command(**rsync_args_dict) + # Run rsync command + result = self.local(rsync_cmd, in_stream=False) + _logger.debug(result.stdout) + _logger.debug("Copying complete.") + else: + _logger.debug("List of source files was empty. Did not copy anything.") + def copy_to_remote( self, source: str | Path | Sequence, @@ -265,30 +291,46 @@ def copy_to_remote( exclude: Options to exclude filters: Filters for rsync """ - if not is_empty(source): - host = f"{self.user}@{self.host}" - _logger.debug("Copying from %s to %s", source, destination) - remote_shell_command = None - if self.gateway is not None: - remote_shell_command = f"ssh {self.gateway.user}@{self.gateway.host} ssh" - _logger.debug("Using remote shell command %s", remote_shell_command) - rsync_cmd = assemble_rsync_command( - source, - destination, - verbose=verbose, - archive=True, - exclude=exclude, - filters=filters, - rsh=remote_shell_command, - host=host, - rsync_options=["--out-format='%n'", "--checksum"], - ) - # Run rsync command - result = self.local(rsync_cmd, in_stream=False) - _logger.debug(result.stdout) - _logger.debug("Copying complete.") - else: - _logger.debug("List of source files was empty. Did not copy anything.") + rsync_args_dict = { + "source": source, + "destination": destination, + "verbose": verbose, + "archive": True, + "exclude": exclude, + "filters": filters, + "destination_host": f"{self.user}@{self.host}", + "rsync_options": ["--out-format='%n'", "--checksum"], + } + self._copy(rsync_args_dict) + + def copy_from_remote( + self, + source: str | Path | Sequence, + destination: Path | str, + verbose: bool = True, + exclude: str | Sequence | None = None, + filters: str | None = None, + ) -> None: + """Copy files or folders from remote to local machine. + + Args: + source: Paths to copy from remote machine + destination: Destination on local machine + verbose: True for verbose + exclude: Options to exclude + filters: Filters for rsync + """ + rsync_args_dict = { + "source": source, # remote side + "destination": destination, # local side + "verbose": verbose, + "archive": True, + "exclude": exclude, + "filters": filters, + "source_host": f"{self.user}@{self.host}", + "rsync_options": ["--out-format='%n'", "--checksum"], + } + self._copy(rsync_args_dict) def build_remote_environment( self, diff --git a/src/queens/utils/rsync.py b/src/queens/utils/rsync.py index 44ebd18b5..8c7525584 100644 --- a/src/queens/utils/rsync.py +++ b/src/queens/utils/rsync.py @@ -32,7 +32,8 @@ def assemble_rsync_command( filters: str | None = None, verbose: bool = True, rsh: str | None = None, - host: str | None = None, + source_host: str | None = None, + destination_host: str | None = None, rsync_options: Sequence | None = None, ) -> str: """Assemble rsync command. @@ -45,7 +46,8 @@ def assemble_rsync_command( filters: Filters for rsync verbose: True for verbose rsh: Remote ssh command - host: Host to which to copy the files + source_host: Host from which to copy the files + destination_host: Host to which to copy the files rsync_options: Additional rsync options Returns: @@ -71,8 +73,10 @@ def listify(obj: Any) -> Any: options.extend(listify(rsync_options)) if rsh: options.append(f"--rsh='{rsh}'") - if host: - destination = f"{host}:{destination}" + if source_host: + source = [f"{source_host}:{file}" for file in listify(source)] + if destination_host: + destination = f"{destination_host}:{destination}" options_string = " ".join([str(option) for option in options]) source_string = " ".join([str(file) for file in listify(source)]) @@ -88,7 +92,8 @@ def rsync( filters: str | None = None, verbose: bool = True, rsh: str | None = None, - host: str | None = None, + source_host: str | None = None, + destination_host: str | None = None, rsync_options: Sequence | None = None, ) -> None: """Run rsync command. @@ -101,7 +106,8 @@ def rsync( filters: Filters for rsync verbose: True for verbose rsh: Remote ssh command - host: Host where to copy the files to + source_host: Host from which to copy the files + destination_host: Host to which to copy the files rsync_options: Additional rsync options """ if not is_empty(source): @@ -113,7 +119,8 @@ def rsync( filters=filters, verbose=verbose, rsh=rsh, - host=host, + source_host=source_host, + destination_host=destination_host, rsync_options=rsync_options, ) diff --git a/templates/jobscripts/fourc_charon.sh b/templates/jobscripts/fourc_charon.sh index 532023838..10ad09c0d 100644 --- a/templates/jobscripts/fourc_charon.sh +++ b/templates/jobscripts/fourc_charon.sh @@ -3,8 +3,6 @@ echo $HOME cd $HOME -source /home/cluster_tools/user/load_four_c_environment.sh - module list ########################################## # # diff --git a/tests/integration_tests/cluster/test_dask_cluster.py b/tests/integration_tests/cluster/test_dask_cluster.py index 87b20917a..1c1aaa7df 100644 --- a/tests/integration_tests/cluster/test_dask_cluster.py +++ b/tests/integration_tests/cluster/test_dask_cluster.py @@ -20,6 +20,7 @@ import os from dataclasses import asdict, dataclass from pathlib import Path +from typing import List import numpy as np import pytest @@ -116,11 +117,9 @@ def patch_experiments_directory(experiment_name, experiment_base_directory=None) return patch_experiments_directory @pytest.fixture(name="experiment_dir") - def fixture_experiment_dir(self, global_settings, remote_connection, mock_experiment_dir): + def fixture_experiment_dir(self, test_name, remote_connection, mock_experiment_dir): """Fixture providing the remote experiment directory.""" - experiment_dir, _ = remote_connection.run_function( - mock_experiment_dir, global_settings.experiment_name, None - ) + experiment_dir, _ = remote_connection.run_function(mock_experiment_dir, test_name, None) return experiment_dir @pytest.fixture(name="_create_experiment_dir") @@ -148,6 +147,7 @@ def fixture_cluster_kwargs(self, cluster_settings, remote_connection, test_name) "cluster_internal_address": cluster_settings["cluster_internal_address"], "experiment_name": test_name, "queue": cluster_settings.get("queue"), + "job_script_prologue": cluster_settings.get("job_script_prologue"), } def test_new_experiment_dir(self, cluster_kwargs, remote_connection, experiment_dir): @@ -207,6 +207,44 @@ def test_y_prompt_input_for_existing_experiment_dir( mocker.patch("sys.stdin.readline", return_value=user_input) Cluster(**cluster_kwargs, overwrite_existing_experiment=False) + def test_deletion_of_experiment_dir_with_files( + self, global_settings, cluster_kwargs, remote_connection, experiment_dir + ): + """Test the deletion of an experiment directory containing files. + + The experiment directory should NOT be deleted when exiting the + global settings context. + """ + + def experiment_dir_exists_and_contents(experiment_dir): + """Assert that experiment directory and test file exist.""" + experiment_dir_exists = experiment_dir.exists() + if not experiment_dir_exists: + return experiment_dir_exists, [] + + experiment_dir_contents = list(experiment_dir.iterdir()) + return experiment_dir_exists, experiment_dir_contents + + with global_settings: + Cluster(**cluster_kwargs) + + # Check that remote experiment directory is not empty + experiment_dir_exists, experiment_dir_contents_before = remote_connection.run_function( + experiment_dir_exists_and_contents, experiment_dir + ) + assert experiment_dir_exists + assert any(experiment_dir_contents_before) + + # Check that remote experiment directory has not been changed + experiment_dir_exists, experiment_dir_contents_after = remote_connection.run_function( + experiment_dir_exists_and_contents, experiment_dir + ) + assert experiment_dir_exists + for file_before, file_after in zip( + experiment_dir_contents_before, experiment_dir_contents_after, strict=True + ): + assert file_before == file_after + def test_fourc_mc_cluster( self, third_party_inputs, @@ -216,6 +254,7 @@ def test_fourc_mc_cluster( fourc_cluster_path, fourc_example_expected_output, global_settings, + tmp_path, ): """Test remote 4C simulations with DASK jobqueue and MC iterator. @@ -234,6 +273,7 @@ def test_fourc_mc_cluster( fourc_cluster_path (Path): paths to 4C executable on the cluster fourc_example_expected_output (np.ndarray): Expected output for the MC samples global_settings (GlobalSettings): object containing experiment name and tmp_path + tmp_path (Path): Temporary path for storing remote data locally """ fourc_input_file_template = third_party_inputs / "fourc" / "solid_runtime_hex8.4C.yaml" @@ -274,7 +314,12 @@ def test_fourc_mc_cluster( # Load results results = load_result(global_settings.result_file(".pickle")) - # The data has to be deleted before the assertion + # Copy the data from the remote location to a temporary local directory + # before it is deleted on the remote cluster + local_data_path = Path(tmp_path) / "remote_computation_data" + scheduler.copy_files_from_experiment_dir(local_data_path) + + # The remote data has to be deleted before the assertion self.delete_simulation_data(remote_connection) # assert statements @@ -282,6 +327,21 @@ def test_fourc_mc_cluster( results["raw_output_data"]["result"], fourc_example_expected_output, decimal=6 ) + # Now we test whether copying of the remote data worked correctly + + local_experiment_path = local_data_path / global_settings.experiment_name + local_data = np.zeros_like(fourc_example_expected_output) + for i in range(2): + # 1) we make sure that the result files are contained in the local copy + output_path = local_experiment_path / str(i) / "output" + assert (output_path / "output-structure.pvd").exists() + + # 2) and use a data processor to extract the data from the local copy of the remote data + local_data[i] = data_processor(output_path) + + # The extracted local data should match the expected output + np.testing.assert_array_almost_equal(local_data, fourc_example_expected_output, decimal=6) + def delete_simulation_data(self, remote_connection): """Delete simulation data on the cluster. @@ -323,6 +383,7 @@ class ClusterConfig: default_python_path: str cluster_script_path: Path queue: str | None = None + job_script_prologue: List[str] | None = None dict = asdict @@ -356,6 +417,7 @@ class ClusterConfig: cluster_internal_address="192.168.2.253", default_python_path="$HOME/miniconda3/envs/queens/bin/python", cluster_script_path=Path(), + job_script_prologue=["source /home/cluster_tools/user/load_four_c_environment.sh"], ) CLUSTER_CONFIGS = { diff --git a/tests/unit_tests/schedulers/test_experiment_dir.py b/tests/unit_tests/schedulers/test_experiment_dir.py index 9076e8c72..554ad0b23 100644 --- a/tests/unit_tests/schedulers/test_experiment_dir.py +++ b/tests/unit_tests/schedulers/test_experiment_dir.py @@ -115,3 +115,45 @@ def fixture_create_experiment_dir(experiment_dir): """Create the experiment directory.""" os.mkdir(experiment_dir) assert experiment_dir.exists() + + +@pytest.mark.parametrize("scheduler_class", [Local, Pool]) +def test_empty_experiment_dir_is_deleted( + global_settings, tmp_path, test_name, experiment_dir, scheduler_class +): + """Test that an empty experiment directory is deleted. + + This should happen when exiting the global settings context. + """ + with global_settings: + scheduler_class( + experiment_name=test_name, + experiment_base_dir=tmp_path, + ) + assert experiment_dir.exists() + assert not any(experiment_dir.iterdir()) + + assert not experiment_dir.exists() + + +@pytest.mark.parametrize("scheduler_class", [Local, Pool]) +def test_experiment_dir_with_files_is_not_deleted( + global_settings, tmp_path, test_name, experiment_dir, scheduler_class +): + """Test that an experiment directory containing files is not deleted. + + Such an experiment directory should NOT be deleted when exiting the + global settings context. + """ + with global_settings: + scheduler_class( + experiment_name=test_name, + experiment_base_dir=tmp_path, + ) + assert experiment_dir.exists() + test_file = experiment_dir / "test_file.txt" + test_file.write_text("test content") + assert test_file.exists() + + assert experiment_dir.exists() + assert test_file.exists()