Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Core] restart save process #2283

Merged
merged 8 commits into from
Jun 8, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 43 additions & 32 deletions kratos/python_scripts/restart_utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@ def __init__(self, model_part, settings):
raise Exception(err_msg)
self.serializer_flag = __serializer_flags[serializer_trace]

self.next_output = 0.0

self.echo_level = settings["echo_level"].GetInt()

# load settings
Expand All @@ -66,11 +64,14 @@ def __init__(self, model_part, settings):
self.restart_control_type_is_time = True
elif restart_control_type == "step":
self.restart_control_type_is_time = False
self.restart_save_frequency = int(self.restart_save_frequency) # STEP is an integer
else:
err_msg = 'The requested restart_control_type "' + restart_control_type + '" is not available!\n'
err_msg += 'Available options are: "time", "step"'
raise Exception(err_msg)

self.next_output = self.restart_save_frequency # Schedule the first output to avoid printing in first step

self.save_restart_files_in_folder = settings["save_restart_files_in_folder"].GetBool()

#### Public functions ####
Expand Down Expand Up @@ -107,31 +108,40 @@ def LoadRestart(self, restart_file_name=""):
def SaveRestart(self):
"""
This function saves the restart file. It should be called at the end of a time-step.
Whether a restart file is being written or not is decided internally
Use "IsRestartOutputStep" to check if a restart file should be written in this time-step
"""
if self.__IsRestartOutputStep():
if self.save_restart_files_in_folder:
folder_path = self.__GetFolderPathSave()
if not os.path.isdir(folder_path):
os.makedirs(folder_path)

if self.restart_control_type_is_time:
control_label = self.model_part.ProcessInfo[KratosMultiphysics.TIME]
else:
control_label = self.model_part.ProcessInfo[KratosMultiphysics.STEP]

file_name = self.__GetFileNameSave(control_label)

# Save the ModelPart
serializer = KratosMultiphysics.Serializer(file_name, self.serializer_flag)
serializer.Save(self.model_part.Name, self.model_part)
if self.echo_level > 0:
self._PrintOnRankZero("::[Restart Utility]::", "Saved restart file", file_name + ".rest")

# Schedule next output
if self.restart_save_frequency > 0.0: # Note: if == 0, we'll just always print
while self.next_output <= control_label:
self.next_output += self.restart_save_frequency
if self.save_restart_files_in_folder:
folder_path = self.__GetFolderPathSave()
if not os.path.isdir(folder_path):
os.makedirs(folder_path)

if self.restart_control_type_is_time:
time = self.model_part.ProcessInfo[KratosMultiphysics.TIME]
control_label = self.__GetPrettyTime(time)
else:
control_label = self.model_part.ProcessInfo[KratosMultiphysics.STEP]

file_name = self.__GetFileNameSave(control_label)

# Save the ModelPart
serializer = KratosMultiphysics.Serializer(file_name, self.serializer_flag)
serializer.Save(self.model_part.Name, self.model_part)
if self.echo_level > 0:
self._PrintOnRankZero("::[Restart Utility]::", "Saved restart file", file_name + ".rest")

# Schedule next output
if self.restart_save_frequency > 0.0: # Note: if == 0, we'll just always print
while self.next_output <= control_label:
self.next_output += self.restart_save_frequency

def IsRestartOutputStep(self):
"""
This function checks and returns whether a restart file should be written in this time-step
"""
if self.restart_control_type_is_time:
return (self.model_part.ProcessInfo[KratosMultiphysics.TIME] > self.next_output)
else:
return (self.model_part.ProcessInfo[KratosMultiphysics.STEP] >= self.next_output)

#### Protected functions ####

Expand All @@ -151,12 +161,6 @@ def _PrintOnRankZero(self, *args):

#### Private functions ####

def __IsRestartOutputStep(self):
if self.restart_control_type_is_time:
return (self.model_part.ProcessInfo[KratosMultiphysics.TIME] > self.next_output)
else:
return (self.model_part.ProcessInfo[KratosMultiphysics.STEP] >= self.next_output)

def __GetFolderPathLoad(self):
if self.load_restart_files_from_folder:
return os.path.join(self.raw_path, self.folder_name)
Expand All @@ -178,3 +182,10 @@ def __GetFileNameSave(self, file_label):
restart_file_name = self.raw_file_name + '_' + self._GetFileLabelSave(file_label)

return os.path.join(self.__GetFolderPathSave(), restart_file_name)

def __GetPrettyTime(self, time):
"""This functions reduces the digits of a number to a relevant precision
"""
pretty_time = "{0:.12g}".format(time)
pretty_time = float(pretty_time)
return pretty_time
75 changes: 75 additions & 0 deletions kratos/python_scripts/save_restart_process.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from __future__ import print_function, absolute_import, division #makes KratosMultiphysics backward compatible with python 2.6 and 2.7
# Importing the Kratos Library
import KratosMultiphysics


def Factory(settings, Model):
if(type(settings) != KratosMultiphysics.Parameters):
raise Exception("Expected input shall be a Parameters object, encapsulating a json string")
return SaveRestartProcess(Model, settings["Parameters"])


class SaveRestartProcess(KratosMultiphysics.Process):

def __init__(self, model, params):
"""This process compares saves restart files
It works both in OpenMP and MPI
see the "default_settings" for available options
"""
## Settings string in json format
default_settings = KratosMultiphysics.Parameters("""{
"model_part_name" : "",
"echo_level" : 0,
"serializer_trace" : "no_trace",
"restart_save_frequency" : 0.0,
"restart_control_type" : "time",
"save_restart_files_in_folder" : true
}""")

## Overwrite the default settings with user-provided parameters
params.ValidateAndAssignDefaults(default_settings)
self.params = params
self.model = model

if self.params["model_part_name"].GetString() == "":
raise Exception('No "model_part_name" was specified!')

def ExecuteInitialize(self):
model_part = self.model[self.params["model_part_name"].GetString()]

is_mpi_execution = (model_part.GetCommunicator().TotalProcesses() > 1)

if is_mpi_execution:
import KratosMultiphysics.TrilinosApplication
from trilinos_restart_utility import TrilinosRestartUtility as Restart
else:
from restart_utility import RestartUtility as Restart

self.params.AddValue("input_filename", self.params["model_part_name"])
self.params.RemoveValue("model_part_name")

self.restart_utility = Restart(model_part, self.params)

def ExecuteBeforeSolutionLoop(self):
pass

def ExecuteInitializeSolutionStep(self):
pass

def ExecuteFinalizeSolutionStep(self):
pass

def ExecuteBeforeOutputStep(self):
pass

def IsOutputStep(self):
return self.restart_utility.IsRestartOutputStep()

def PrintOutput(self):
self.restart_utility.SaveRestart()

def ExecuteAfterOutputStep(self):
pass

def ExecuteFinalize(self):
pass
Copy link
Member

@jcotela jcotela Jun 7, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor suggestion (maybe for a future PR): When I have had to use restart, it was often because I was running on a cluster with some limit on run-time for the cases. In such situations, I would just estimate how many time steps I could run in a single batch, then launch a job for that and ensure that the model part was serialized at the end of the job. I think it would make sense that the process allows this, which could be achieved by having an extra save on ExecuteFinalize (maybe only if explicily requested via json?).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds fine to me, we can speak tmr abt this

82 changes: 72 additions & 10 deletions kratos/tests/test_restart.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import KratosMultiphysics.kratos_utilities as kratos_utils
import restart_utility
import save_restart_process as save_rest_proc

import os
import sys
Expand All @@ -21,6 +22,9 @@ def ReadModelPart(file_path):
model_part_io.ReadModelPart(model_part)
return model_part

def IsRestartFile(file_name):
return os.path.isfile(file_name) and file_name.endswith('.rest')

class TestRestart(KratosUnittest.TestCase):

def setUp(self):
Expand All @@ -29,7 +33,8 @@ def setUp(self):

def tearDown(self):
kratos_utils.DeleteFileIfExisting("test_restart_file.rest")
kratos_utils.DeleteFileIfExisting("test_restart_file_5.3.rest")
kratos_utils.DeleteFileIfExisting("test_restart_file_15.0.rest")
kratos_utils.DeleteDirectoryIfExisting("MainRestart__restart_files")

def _check_modelpart(self, model_part):
self.assertEqual(model_part.NumberOfSubModelParts(), 2)
Expand Down Expand Up @@ -161,28 +166,35 @@ def __execute_restart_test(self, serializer_flag):
def __execute_restart_utility_save(self, model_part_name, restart_time):
model_part = ReadModelPart(GetFilePath("test_model_part_io_read"))

model_part.ProcessInfo[KratosMultiphysics.TIME] = restart_time # saving is only done if time > 0.0
model_part.ProcessInfo[KratosMultiphysics.TIME] = 0.0 # saving is only done if time > 0.0

restart_parameters = KratosMultiphysics.Parameters("""
{
"input_filename" : "test_restart_file",
"restart_save_frequency" : 0.0,
"save_restart_files_in_folder" : false
"input_filename" : "test_restart_file",
"restart_save_frequency" : 10.0,
"save_restart_files_in_folder" : false
}
""")

rest_utility = restart_utility.RestartUtility(model_part, restart_parameters)

rest_utility.SaveRestart()
self.assertFalse(rest_utility.IsRestartOutputStep())

model_part.ProcessInfo[KratosMultiphysics.TIME] = restart_time

self.assertTrue(rest_utility.IsRestartOutputStep())

if rest_utility.IsRestartOutputStep():
rest_utility.SaveRestart()

def __execute_restart_utility_load(self, model_part_name, restart_time):
loaded_model_part = KratosMultiphysics.ModelPart(model_part_name)

restart_parameters = KratosMultiphysics.Parameters("""
{
"input_filename" : "test_restart_file",
"restart_load_file_label" : "",
"load_restart_files_from_folder" : false
"input_filename" : "test_restart_file",
"restart_load_file_label" : "",
"load_restart_files_from_folder" : false
}
""")

Expand All @@ -195,6 +207,7 @@ def __execute_restart_utility_load(self, model_part_name, restart_time):
return loaded_model_part



def test_restart_NOTRACE(self):
self.__execute_restart_test(KratosMultiphysics.SerializerTraceType.SERIALIZER_NO_TRACE)

Expand All @@ -207,12 +220,61 @@ def test_restart_TRACE_ALL(self):
def test_restart_utility(self):
# Here we only test SERIALIZER_NO_TRACE since the others are tested in the simple tests
model_part_name = "MainRestart"
restart_time = 5.3
restart_time = 15.0
self.__execute_restart_utility_save(model_part_name, restart_time)
loaded_model_part = self.__execute_restart_utility_load(model_part_name, restart_time)

self._check_modelpart(loaded_model_part)

def test_save_restart_process(self):
model_part = ReadModelPart(GetFilePath("test_model_part_io_read"))
model = KratosMultiphysics.Model()
model.AddModelPart(model_part)

# Here "step" is used as control type, since "time" (=> default) is covered in the tests above
save_restart_process_params = KratosMultiphysics.Parameters("""{
"Parameters" : {
"model_part_name" : "MainRestart",
"restart_save_frequency" : 2,
"restart_control_type" : "step"
}
}""")

model_part.ProcessInfo[KratosMultiphysics.TIME] = 0.0
model_part.ProcessInfo[KratosMultiphysics.STEP] = 0

delta_time = 0.35
end_time = 17.1

save_restart_process = save_rest_proc.Factory(save_restart_process_params, model)
save_restart_process.ExecuteInitialize()
save_restart_process.ExecuteBeforeSolutionLoop()
while model_part.ProcessInfo[KratosMultiphysics.TIME] < end_time:
model_part.ProcessInfo[KratosMultiphysics.TIME] += delta_time
model_part.ProcessInfo[KratosMultiphysics.STEP] += 1
save_restart_process.ExecuteInitializeSolutionStep()
if save_restart_process.IsOutputStep():
save_restart_process.PrintOutput()
save_restart_process.ExecuteFinalizeSolutionStep()
save_restart_process.ExecuteFinalize()

# Checking if the files exist
base_path = "MainRestart__restart_files"
base_file_name = os.path.join(base_path, "MainRestart_")
for i in range(2,50,2):
self.assertTrue(os.path.isfile(base_file_name + str(i) + ".rest"))

# Check number of restart-files
expected_num_files = 24
num_files = len([name for name in os.listdir(base_path) if IsRestartFile(os.path.join(base_path, name))])
self.assertEqual(expected_num_files, num_files)

# Loading one of the files and checking if the loaded model_part is ok
file_name = base_file_name + "16"
loaded_model_part = self.__execute_restart_load(file_name, KratosMultiphysics.SerializerTraceType.SERIALIZER_NO_TRACE)

self._check_modelpart(loaded_model_part)


if __name__ == '__main__':
KratosUnittest.main()