Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PwBaseWorkChain: improve restart from parent_folder #722

Merged
merged 12 commits into from
Sep 21, 2021
34 changes: 34 additions & 0 deletions aiida_quantumespresso/calculations/pw.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
"""`CalcJob` implementation for the pw.x code of Quantum ESPRESSO."""
import os
import warnings

from aiida import orm
from aiida.common.lang import classproperty
Expand Down Expand Up @@ -69,6 +70,8 @@ def define(cls, spec):
help='kpoint mesh or kpoint path')
spec.input('hubbard_file', valid_type=orm.SinglefileData, required=False,
help='SinglefileData node containing the output Hubbard parameters from a HpCalculation')
spec.inputs.validator = cls.validate_inputs

spec.output('output_parameters', valid_type=orm.Dict,
help='The `output_parameters` output node of the successful calculation.')
spec.output('output_structure', valid_type=orm.StructureData, required=False,
Expand Down Expand Up @@ -152,6 +155,37 @@ def define(cls, spec):
'is `False` and/or `electron_maxstep` is 0.')
# yapf: enable

@staticmethod
def validate_inputs(value, _):
"""Validate the top level namespace.

1. Check that the restart input parameters are set correctly. In case of 'nscf' and 'bands' calculations, this
means that ``parent_folder`` is provided, ``startingpot`` is set to 'file' and ``restart_mode`` is
'from_scratch'. For other calculations, if the ``parent_folder`` is provided, the restart settings must be set
to use some of the outputs.
"""
parameters = value['parameters'].get_dict()
calculation_type = parameters.get('CONTROL', {}).get('calculation', 'scf')

# Check that the restart input parameters are set correctly
if calculation_type in ('nscf', 'bands'):
if 'parent_folder' not in value:
return f'`parent_folder` not provided for `{calculation_type}` calculation.'
if parameters.get('ELECTRONS', {}).get('startingpot', 'file') != 'file':
return f'`startingpot` should be set to `file` for a `{calculation_type}` calculation.'
if parameters.get('CONTROL', {}).get('restart_mode', 'from_scratch') != 'from_scratch':
warnings.warn(f'`restart_mode` should be set to `from_scratch` for a `{calculation_type}` calculation.')
elif 'parent_folder' in value:
if not any([
parameters.get('CONTROL', {}).get('restart_mode', None) == 'restart',
parameters.get('ELECTRONS', {}).get('startingpot', None) == 'file',
parameters.get('ELECTRONS', {}).get('startingwfc', None) == 'file'
]):
warnings.warn(
'`parent_folder` input was provided for the `PwCalculation`, but no '
'input parameters are set to restart from these files.'
)

@classproperty
def filename_input_hubbard_parameters(cls):
"""Return the relative file name of the file containing the Hubbard parameters.
Expand Down
9 changes: 9 additions & 0 deletions aiida_quantumespresso/common/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,12 @@ class SpinType(enum.Enum):
COLLINEAR = 'collinear'
NON_COLLINEAR = 'non_collinear'
SPIN_ORBIT = 'spin_orbit'


class RestartType(enum.Enum):
"""Enumeration of ways to restart a calculation in Quantum ESPRESSO."""

FULL = 'full'
FROM_SCRATCH = 'from_scratch'
FROM_CHARGE_DENSITY = 'from_charge_density'
FROM_WAVE_FUNCTIONS = 'from_wave_functions'
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi guys, I wanted just to point out that it could be useful to have also FROM_FILES (or something like that), so that one can restart from the density and wave functions. I know that 'full' is meant to do so, but it will also read the atomic positions.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My first inclination would be to say that restarting from the wave functions, it should be pretty fast for QE to recalculate the charge density. But if I remember correctly, QE doesn't actually do this, instead just plugging in the wave functions but for the potential calculated from the atomic charge density, correct?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, should be like this. Two case scenario would be:

  • electric enthalpy routine: it uses the wfcs to build the polarization operator (essentially you start with a slightly different hamiltonian)
  • maybe in some restart when vc-relaxing?

But I do agree at the end it is not such a difference. I was just wondering, since the new implementation is so cool (great job!!! :D ), it would be quite easy to just add that one more, just in case one needs it.
Or do you think that the "experienced" user can still always tweak the inputs if that is the case?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the clarification, @bastonero! Note that the restart types are only used for setting the restarts after an error has been handled using the set_restart_type method. So if a user wants to restart from a previous calculation, the correct inputs have to be provided, not the restart type.

Happy to implement another restart type (not sure about the name, FROM_FILES seems a bit too general, maybe FROM_CHARGE_AND_WFC?). Do you think there is already a current error handler where this restart type would be used though?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah ok, I see, thanks! For the moment, actually, I still don't have an example of handler where it would be useful. FROM_CHARGE_AND_WFC sounds good anyway.

119 changes: 73 additions & 46 deletions aiida_quantumespresso/workflows/pw/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from aiida.plugins import CalculationFactory, GroupFactory

from aiida_quantumespresso.calculations.functions.create_kpoints_from_distance import create_kpoints_from_distance
from aiida_quantumespresso.common.types import ElectronicType, SpinType
from aiida_quantumespresso.common.types import ElectronicType, SpinType, RestartType
from aiida_quantumespresso.utils.defaults.calculation import pw as qe_defaults
from aiida_quantumespresso.utils.mapping import update_mapping, prepare_process_inputs
from aiida_quantumespresso.utils.pseudopotential import validate_and_prepare_pseudos_inputs
Expand Down Expand Up @@ -73,7 +73,6 @@ def define(cls, spec):

spec.outline(
cls.setup,
cls.validate_parameters,
cls.validate_kpoints,
cls.validate_pseudos,
if_(cls.should_run_init)(
Expand Down Expand Up @@ -230,30 +229,25 @@ def get_builder_from_protocol(
return builder

def setup(self):
"""Call the `setup` of the `BaseRestartWorkChain` and then create the inputs dictionary in `self.ctx.inputs`.
"""Call the ``setup`` of the ``BaseRestartWorkChain`` and create the inputs dictionary in ``self.ctx.inputs``.

This `self.ctx.inputs` dictionary will be used by the `BaseRestartWorkChain` to submit the calculations in the
internal loop.
This ``self.ctx.inputs`` dictionary will be used by the ``BaseRestartWorkChain`` to submit the calculations
in the internal loop.

The ``parameters`` and ``settings`` input ``Dict`` nodes are converted into a regular dictionary and the
default namelists for the ``parameters`` are set to empty dictionaries if not specified.
"""
super().setup()
self.ctx.restart_calc = None
mbercx marked this conversation as resolved.
Show resolved Hide resolved
self.ctx.inputs = AttributeDict(self.exposed_inputs(PwCalculation, 'pw'))

def validate_parameters(self):
"""Validate inputs that might depend on each other and cannot be validated by the spec.

Also define dictionary `inputs` in the context, that will contain the inputs for the calculation that will be
launched in the `run_calculation` step.
"""
self.ctx.inputs.parameters = self.ctx.inputs.parameters.get_dict()
self.ctx.inputs.settings = self.ctx.inputs.settings.get_dict() if 'settings' in self.ctx.inputs else {}

if 'parent_folder' in self.ctx.inputs:
self.ctx.restart_calc = self.ctx.inputs.parent_folder.creator

mbercx marked this conversation as resolved.
Show resolved Hide resolved
self.ctx.inputs.parameters.setdefault('CONTROL', {})
self.ctx.inputs.parameters.setdefault('ELECTRONS', {})
self.ctx.inputs.parameters.setdefault('SYSTEM', {})
self.ctx.inputs.parameters['CONTROL'].setdefault('calculation', 'scf')

self.ctx.inputs.settings = self.ctx.inputs.settings.get_dict() if 'settings' in self.ctx.inputs else {}

def validate_kpoints(self):
"""Validate the inputs related to k-points.

Expand Down Expand Up @@ -305,6 +299,36 @@ def set_max_seconds(self, max_wallclock_seconds):
max_seconds = max_wallclock_seconds * max_seconds_factor
self.ctx.inputs.parameters['CONTROL']['max_seconds'] = max_seconds

def set_restart_type(self, restart_type, parent_folder=None):
"""Set the restart type for the next iteration."""

if parent_folder is None and restart_type != RestartType.FROM_SCRATCH:
raise ValueError('When not restarting from scratch, a `parent_folder` must be provided.')
mbercx marked this conversation as resolved.
Show resolved Hide resolved

if restart_type == RestartType.FROM_SCRATCH:
self.ctx.inputs.parameters['CONTROL']['restart_mode'] = 'from_scratch'
self.ctx.inputs.parameters['ELECTRONS'].pop('startingpot', None)
mbercx marked this conversation as resolved.
Show resolved Hide resolved
self.ctx.inputs.parameters['ELECTRONS'].pop('startingwfc', None)
self.ctx.inputs.pop('parent_folder', None)

elif restart_type == RestartType.FULL:
self.ctx.inputs.parameters['CONTROL']['restart_mode'] = 'restart'
self.ctx.inputs.parameters['ELECTRONS'].pop('startingpot', None)
self.ctx.inputs.parameters['ELECTRONS'].pop('startingwfc', None)
self.ctx.inputs.parent_folder = parent_folder

elif restart_type == RestartType.FROM_CHARGE_DENSITY:
self.ctx.inputs.parameters['CONTROL']['restart_mode'] = 'from_scratch'
self.ctx.inputs.parameters['ELECTRONS']['startingpot'] = 'file'
self.ctx.inputs.parameters['ELECTRONS'].pop('startingwfc', None)
self.ctx.inputs.parent_folder = parent_folder

elif restart_type == RestartType.FROM_WAVE_FUNCTIONS:
self.ctx.inputs.parameters['CONTROL']['restart_mode'] = 'from_scratch'
self.ctx.inputs.parameters['ELECTRONS'].pop('startingpot', None)
self.ctx.inputs.parameters['ELECTRONS']['startingwfc'] = 'file'
self.ctx.inputs.parent_folder = parent_folder

def should_run_init(self):
"""Return whether an initialization calculation should be run.

Expand Down Expand Up @@ -407,24 +431,12 @@ def inspect_init(self):
return

def prepare_process(self):
"""Prepare the inputs for the next calculation.

If a `restart_calc` has been set in the context, its `remote_folder` will be used as the `parent_folder` input
for the next calculation and the `restart_mode` is set to `restart`. Otherwise, no `parent_folder` is used and
`restart_mode` is set to `from_scratch`.
"""
"""Prepare the inputs for the next calculation."""
max_wallclock_seconds = self.ctx.inputs.metadata.options.get('max_wallclock_seconds', None)

if max_wallclock_seconds is not None and 'max_seconds' not in self.ctx.inputs.parameters['CONTROL']:
self.set_max_seconds(max_wallclock_seconds)

if self.ctx.restart_calc:
self.ctx.inputs.parameters['CONTROL']['restart_mode'] = 'restart'
self.ctx.inputs.parent_folder = self.ctx.restart_calc.outputs.remote_folder
else:
self.ctx.inputs.parameters['CONTROL']['restart_mode'] = 'from_scratch'
self.ctx.inputs.pop('parent_folder', None)
mbercx marked this conversation as resolved.
Show resolved Hide resolved

def report_error_handled(self, calculation, action):
"""Report an action taken for a calculation that has failed.

Expand All @@ -443,7 +455,8 @@ def sanity_check_insufficient_bands(self, calculation):

Verify that the occupation of the last band is below a certain threshold, unless `occupations` was explicitly
set to `fixed` in the input parameters. If this is violated, the calculation used too few bands and cannot be
trusted. The number of bands is increased and the calculation is restarted, starting from the last.
trusted. The number of bands is increased and the calculation is restarted, using the charge density from the
previous calculation.
"""
from aiida_quantumespresso.utils.bands import get_highest_occupied_band

Expand Down Expand Up @@ -476,10 +489,14 @@ def sanity_check_insufficient_bands(self, calculation):

nbnd_cur = calculation.outputs.output_parameters.get_dict()['number_of_bands']
nbnd_new = nbnd_cur + max(int(nbnd_cur * self.defaults.delta_factor_nbnd), self.defaults.delta_minimum_nbnd)
self.ctx.inputs.parameters['SYSTEM']['nbnd'] = nbnd_new

self.ctx.inputs.parameters.setdefault('SYSTEM', {})['nbnd'] = nbnd_new
self.set_restart_type(RestartType.FROM_CHARGE_DENSITY, calculation.outputs.remote_folder)
self.report(
f'Action taken: increased number of bands to {nbnd_new} and restarting from the previous charge '
'density.'
)

self.report(f'Action taken: increased number of bands to {nbnd_new} and restarting from scratch')
return ProcessHandlerReport(True)

@process_handler(priority=600)
Expand All @@ -504,14 +521,20 @@ def handle_known_unrecoverable_failure(self, calculation):
PwCalculation.exit_codes.ERROR_OUT_OF_WALLTIME,
])
def handle_out_of_walltime(self, calculation):
"""Handle `ERROR_OUT_OF_WALLTIME` exit code: calculation shut down neatly and we can simply restart."""
"""Handle `ERROR_OUT_OF_WALLTIME` exit code.

In this case the calculation shut down neatly and we can simply restart. We consider two cases:

1. If the structure is unchanged, we do a full restart.
2. If the structure has changed during the calculation, we restart from scratch.
mbercx marked this conversation as resolved.
Show resolved Hide resolved
"""
try:
self.ctx.inputs.structure = calculation.outputs.output_structure
except exceptions.NotExistent:
self.ctx.restart_calc = calculation
self.set_restart_type(RestartType.FULL, calculation.outputs.remote_folder)
self.report_error_handled(calculation, 'simply restart from the last calculation')
else:
self.ctx.restart_calc = None
self.set_restart_type(RestartType.FROM_SCRATCH)
self.report_error_handled(calculation, 'out of walltime: structure changed so restarting from scratch')

return ProcessHandlerReport(True)
Expand All @@ -527,7 +550,6 @@ def handle_vcrelax_converged_except_final_scf(self, calculation):
Convergence reached in `vc-relax` except thresholds exceeded in final scf: consider as converged.
"""
self.ctx.is_finished = True
self.ctx.restart_calc = calculation
action = 'ionic convergence thresholds met except in final scf: consider structure relaxed.'
self.report_error_handled(calculation, action)
self.results() # Call the results method to attach the output nodes
Expand All @@ -548,9 +570,10 @@ def handle_relax_recoverable_ionic_convergence_error(self, calculation):
These exit codes signify that the ionic convergence thresholds were not met, but the output structure is usable,
so the solution is to simply restart from scratch but from the output structure.
"""
self.ctx.restart_calc = None
self.ctx.inputs.structure = calculation.outputs.output_structure
action = 'no ionic convergence but clean shutdown: restarting from scratch but using output structure.'

self.set_restart_type(RestartType.FROM_SCRATCH)
self.report_error_handled(calculation, action)
return ProcessHandlerReport(True)

Expand All @@ -565,33 +588,38 @@ def handle_relax_recoverable_electronic_convergence_error(self, calculation):
"""Handle various exit codes for recoverable `relax` calculations with failed electronic convergence.

These exit codes signify that the electronic convergence thresholds were not met, but the output structure is
usable, so the solution is to simply restart from scratch but from the output structure.
usable, so the solution is to simply restart from scratch but from the output structure and with a reduced
``mixing_beta``.
"""
factor = self.defaults.delta_factor_mixing_beta
mixing_beta = self.ctx.inputs.parameters.get('ELECTRONS', {}).get('mixing_beta', self.defaults.qe.mixing_beta)
mixing_beta_new = mixing_beta * factor

self.ctx.restart_calc = None
self.ctx.inputs.parameters.setdefault('ELECTRONS', {})['mixing_beta'] = mixing_beta_new
self.ctx.inputs.parameters['ELECTRONS']['mixing_beta'] = mixing_beta_new
self.ctx.inputs.structure = calculation.outputs.output_structure
action = 'no electronic convergence but clean shutdown: reduced beta mixing from {} to {} restarting from ' \
'scratch but using output structure.'.format(mixing_beta, mixing_beta_new)

self.set_restart_type(RestartType.FROM_SCRATCH)
self.report_error_handled(calculation, action)
return ProcessHandlerReport(True)

@process_handler(priority=410, exit_codes=[
PwCalculation.exit_codes.ERROR_ELECTRONIC_CONVERGENCE_NOT_REACHED,
])
def handle_electronic_convergence_not_achieved(self, calculation):
"""Handle `ERROR_ELECTRONIC_CONVERGENCE_NOT_REACHED`: decrease the mixing beta and restart from scratch."""
"""Handle `ERROR_ELECTRONIC_CONVERGENCE_NOT_REACHED` error.

Decrease the mixing beta and fully restart from the previous calculation.
"""
factor = self.defaults.delta_factor_mixing_beta
mixing_beta = self.ctx.inputs.parameters.get('ELECTRONS', {}).get('mixing_beta', self.defaults.qe.mixing_beta)
mixing_beta_new = mixing_beta * factor

self.ctx.restart_calc = None
self.ctx.inputs.parameters.setdefault('ELECTRONS', {})['mixing_beta'] = mixing_beta_new

self.ctx.inputs.parameters['ELECTRONS']['mixing_beta'] = mixing_beta_new
action = f'reduced beta mixing from {mixing_beta} to {mixing_beta_new} and restarting from the last calculation'

self.set_restart_type(RestartType.FULL, calculation.outputs.remote_folder)
self.report_error_handled(calculation, action)
return ProcessHandlerReport(True)

Expand All @@ -601,7 +629,6 @@ def handle_electronic_convergence_not_achieved(self, calculation):
def handle_electronic_convergence_warning(self, calculation):
"""Handle `WARNING_ELECTRONIC_CONVERGENCE_NOT_REACHED': consider finished."""
self.ctx.is_finished = True
self.ctx.restart_calc = calculation
action = 'electronic convergence not reached but inputs say this is ok: consider finished.'
self.report_error_handled(calculation, action)
self.results() # Call the results method to attach the output nodes
Expand Down
Loading