From 847c54befea7bc1e2c72773721d392d0f2fab0ba Mon Sep 17 00:00:00 2001 From: Benedikt Volkel Date: Fri, 1 Mar 2024 08:55:13 +0100 Subject: [PATCH] [Anchor] Make tests return with error code if QC fails --- MC/run/ANCHOR/anchorMC.sh | 16 ++++++++++------ .../ANCHOR/tests/test_anchor_2023_apass2_PbPb.sh | 3 +++ .../ANCHOR/tests/test_anchor_2023_apass2_pp.sh | 2 ++ 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/MC/run/ANCHOR/anchorMC.sh b/MC/run/ANCHOR/anchorMC.sh index 13512fea7..6d944b2cd 100755 --- a/MC/run/ANCHOR/anchorMC.sh +++ b/MC/run/ANCHOR/anchorMC.sh @@ -46,8 +46,9 @@ print_help() echo "ALIEN_JDL_SIMENGINE or SIMENGINE, choose the transport engine, default: TGeant4," echo "ALIEN_JDL_WORKFLOWDETECTORS, set detectors to be taken into account, default: ITS,TPC,TOF,FV0,FT0,FDD,MID,MFT,MCH,TRD,EMC,PHS,CPV,HMP,CTP," echo "ALIEN_JDL_ANCHOR_SIM_OPTIONS, additional options that are passed to the workflow creation, default: -gen pythia8," - echo "ALIEN_JDL_ADDTIMESERIESINMC, run TPC time series. Default: 1, switch off by setting to 0," - echo "DISABLE_QC, set this to disable QC, e.g. to 1" + echo "ALIEN_JDL_ADDTIMESERIESINMC, run TPC time series. Switch off by setting to 0, default: 1," + echo "ALIEN_JDL_ANCHOR_SIM_DISABLE_QC|ANCHOR_SIM_DISABLE_QC, set this to disable QC, e.g. to 1, default: 0," + echo "ALIEN_JDL_ANCHOR_SIM_STOP_ON_FAILURE_QC|ANCHOR_SIM_STOP_ON_FAILURE_QC, set this different from 0 to abort as soon as an error occurs in QC, default: 0." } # Prevent the script from being soured to omit unexpected surprises when exit is used @@ -85,6 +86,8 @@ export ALIEN_JDL_SIMENGINE=${ALIEN_JDL_SIMENGINE:-${SIMENGINE:-TGeant4}} export ALIEN_JDL_WORKFLOWDETECTORS=${ALIEN_JDL_WORKFLOWDETECTORS:-ITS,TPC,TOF,FV0,FT0,FDD,MID,MFT,MCH,TRD,EMC,PHS,CPV,HMP,CTP} # can be passed to contain additional options that will be passed to o2dpg_sim_workflow_anchored.py and eventually to o2dpg_sim_workflow.py export ALIEN_JDL_ANCHOR_SIM_OPTIONS=${ALIEN_JDL_ANCHOR_SIM_OPTIONS:--gen pythia8} +export ALIEN_JDL_ANCHOR_SIM_DISABLE_QC=${ALIEN_JDL_ANCHOR_SIM_DISABLE_QC:-${ANCHOR_SIM_DISABLE_QC:-0}} +export ALIEN_JDL_ANCHOR_SIM_STOP_ON_FAILURE_QC=${ALIEN_JDL_ANCHOR_SIM_STOP_ON_FAILURE_QC:-${ANCHOR_SIM_STOP_ON_FAILURE_QC:-0}} # all others MUST be set by the user/on the outside export ALIEN_JDL_LPMANCHORPASSNAME=${ALIEN_JDL_LPMANCHORPASSNAME:-${ANCHORPASSNAME}} export ALIEN_JDL_MCANCHOR=${ALIEN_JDL_MCANCHOR:-${MCANCHOR}} @@ -237,7 +240,7 @@ export FAIRMQ_IPC_PREFIX=./ echo "Ready to start main workflow" -${O2DPG_ROOT}/MC/bin/o2_dpg_workflow_runner.py -f workflow.json -tt ${ALIEN_JDL_O2DPGWORKFLOWTARGET:-aod} --cpu-limit ${ALIEN_JDL_CPULIMIT:-8} +${O2DPG_ROOT}/MC/bin/o2_dpg_workflow_runner.py -f workflow.json -tt ${ALIEN_JDL_O2DPGWORKFLOWTARGET:-aod} --cpu-limit ${ALIEN_JDL_CPULIMIT} MCRC=$? # <--- we'll report back this code if [[ "${ALIEN_JDL_ADDTIMESERIESINMC}" != "0" ]]; then @@ -246,12 +249,13 @@ if [[ "${ALIEN_JDL_ADDTIMESERIESINMC}" != "0" ]]; then ${O2DPG_ROOT}/MC/bin/o2_dpg_workflow_runner.py -f workflow.json -tt tpctimes fi -[[ ! -z "${DISABLE_QC}" ]] && echo "INFO: QC is disabled, skip it." +[[ "${ALIEN_JDL_ANCHOR_SIM_DISABLE_QC}" != "0" ]] && echo "INFO: QC is disabled, skip it." -if [[ -z "${DISABLE_QC}" && "${MCRC}" = "0" && "${remainingargs}" == *"--include-local-qc"* ]] ; then +if [[ "${ALIEN_JDL_ANCHOR_SIM_DISABLE_QC}" == "0" && "${MCRC}" = "0" && "${remainingargs}" == *"--include-local-qc"* ]] ; then # do QC tasks echo "Doing QC" - ${O2DPG_ROOT}/MC/bin/o2_dpg_workflow_runner.py -f workflow.json --target-labels QC --cpu-limit ${ALIEN_JDL_CPULIMIT:-8} -k + [[ "${ALIEN_JDL_ANCHOR_SIM_STOP_ON_FAILURE_QC}" != "0" ]] && keep_going="" || keep_going="-k" + ${O2DPG_ROOT}/MC/bin/o2_dpg_workflow_runner.py -f workflow.json --target-labels QC --cpu-limit ${ALIEN_JDL_CPULIMIT} ${keep_going} # NOTE that with the -k|--keep-going option, the runner will try to keep on executing even if some tasks fail. # That means, even if there is a failing QC task, the return code will be 0 MCRC=$? diff --git a/MC/run/ANCHOR/tests/test_anchor_2023_apass2_PbPb.sh b/MC/run/ANCHOR/tests/test_anchor_2023_apass2_PbPb.sh index 095908e4b..68738bc96 100755 --- a/MC/run/ANCHOR/tests/test_anchor_2023_apass2_PbPb.sh +++ b/MC/run/ANCHOR/tests/test_anchor_2023_apass2_PbPb.sh @@ -28,6 +28,9 @@ export CYCLE=0 # on the GRID, this is set, for our use case, we can mimic any job ID export ALIEN_PROC_ID=2963436952 +# this test should return with != 0 if QC shows issues +export ANCHOR_SIM_STOP_ON_FAILURE_QC=1 + # run the central anchor steering script; this includes # * derive timestamp # * derive interaction rate diff --git a/MC/run/ANCHOR/tests/test_anchor_2023_apass2_pp.sh b/MC/run/ANCHOR/tests/test_anchor_2023_apass2_pp.sh index e528b8a0a..2e34d3192 100755 --- a/MC/run/ANCHOR/tests/test_anchor_2023_apass2_pp.sh +++ b/MC/run/ANCHOR/tests/test_anchor_2023_apass2_pp.sh @@ -30,6 +30,8 @@ export ALIEN_PROC_ID=2963436952 # for pp and 50 events per TF, we launch only 4 workers. export NWORKERS=4 +# this test should return with != 0 if QC shows issues +export ANCHOR_SIM_STOP_ON_FAILURE_QC=1 # run the central anchor steering script; this includes # * derive timestamp