From 1d0ef2dafe2446c0a504014a87f70e83ad06460b Mon Sep 17 00:00:00 2001 From: Valentin Kuznetsov Date: Tue, 15 Feb 2022 10:31:39 -0500 Subject: [PATCH 01/16] Remove PFNs from a final document we send to WMArchive --- .../WMCore/Services/WMArchive/DataMap.py | 35 +++++++++++++------ 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/src/python/WMCore/Services/WMArchive/DataMap.py b/src/python/WMCore/Services/WMArchive/DataMap.py index 06ddfbfa16b..1523a4cb890 100644 --- a/src/python/WMCore/Services/WMArchive/DataMap.py +++ b/src/python/WMCore/Services/WMArchive/DataMap.py @@ -1,6 +1,5 @@ from __future__ import (division, print_function) from builtins import str -from past.builtins import basestring from future.utils import viewitems, listvalues, listitems import copy @@ -13,18 +12,17 @@ # convert data format under stpes["cmsRun1"/"logArch1"/"stageOut1"]["output"] WMARCHIVE_REMOVE_OUTSIDE_LAYER = ["checksum", "dataset"] # convert to list from str -WMARCHIVE_CONVERT_TO_LIST = ["OutputPFN", "lfn"] +WMARCHIVE_CONVERT_TO_LIST = ["lfn"] -WMARCHIVE_DATA_MAP = {"OutputPFN": "outputPFNs", "inputPath": "inputDataset", - "lfn": "outputLFNs", "input": "inputLFNs", "inputpfns": "inputPFNs"} +WMARCHIVE_DATA_MAP = {"inputPath": "inputDataset", + "lfn": "outputLFNs", "input": "inputLFNs"} -WMARCHIVE_REMOVE_FIELD = ["InputPFN", "pfn", "user_dn", "user_vogroup", "user_vorole"] +WMARCHIVE_REMOVE_FIELD = ["user_dn", "user_vogroup", "user_vorole"] WMARCHIVE_COMBINE_FIELD = {"outputDataset": ["primaryDataset", "processedDataset", "dataTier"]} WMARCHIVE_LFN_REF_KEY = ["lfn", "files"] -WMARCHIVE_PFN_REF_KEY = ["pfn"] -WMARCHIVE_FILE_REF_KEY = {"LFN": WMARCHIVE_LFN_REF_KEY, - "PFN": WMARCHIVE_PFN_REF_KEY} +WMARCHIVE_PFN_REF_KEY = [] +WMARCHIVE_FILE_REF_KEY = {"LFN": WMARCHIVE_LFN_REF_KEY} ERROR_TYPE = {'exitCode': int} @@ -94,7 +92,6 @@ # 'guid': '', # 'inputDataset': '', 'inputLFNs': [], - 'inputPFNs': [], # TODO change to empty string from None # 'location': '', # 'merged': False, @@ -102,7 +99,6 @@ # 'output_module_class': '', # 'outputDataset': '', 'outputLFNs': [], - 'outputPFNs': [], # 'prep_id': '', # 'processingStr': '', # 'processingVer': -1, @@ -125,6 +121,23 @@ } +def cleanStep(idict): + """ + perform clean-up of PFNs attributes in place for given dictionary + + :param idict: a FWJR report dictionary + :return: a dictionary without PFNs + """ + for step in ['input', 'output']: + data = idict.get(step, {}) + for key, values in data.items(): + for elem in values: + for skip in ['pfn', 'InputPFN', 'OutputPFN', 'inputpfns']: + if skip in elem: + del elem[skip] + data[key] = values + return idict + def combineDataset(dataset): dataset["outputDataset"] = "/%s/%s/%s" % (dataset.pop("primaryDataset"), dataset.pop("processedDataset"), @@ -300,12 +313,12 @@ def convertStepValue(stepValue): return stepValue - def convertSteps(steps): stepList = [] for key, value in viewitems(steps): stepItem = {} stepItem['name'] = key + value = cleanStep(value) stepItem.update(convertStepValue(value)) _validateTypeAndSetDefault(stepItem, TOP_LEVEL_STEP_DEFAULT) stepList.append(stepItem) From 7dab9d803c7b0599ff4f82a70ee5671345b6dee6 Mon Sep 17 00:00:00 2001 From: germanfgv Date: Thu, 21 Apr 2022 13:55:29 +0200 Subject: [PATCH 02/16] Check CMSSW version before getting sim-datatier map --- src/python/WMCore/WMSpec/StdSpecs/StdBase.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/python/WMCore/WMSpec/StdSpecs/StdBase.py b/src/python/WMCore/WMSpec/StdSpecs/StdBase.py index e128375a696..32c53d814be 100644 --- a/src/python/WMCore/WMSpec/StdSpecs/StdBase.py +++ b/src/python/WMCore/WMSpec/StdSpecs/StdBase.py @@ -14,6 +14,7 @@ from Utils.PythonVersion import PY3 from Utils.Utilities import decodeBytesToUnicodeConditional from Utils.Utilities import makeList, makeNonEmptyList, strToBool, safeStr +from WMCore.WMRuntime.Tools.Scram import isCMSSWSupported from WMCore.Cache.WMConfigCache import ConfigCache, ConfigCacheException from WMCore.Lexicon import (couchurl, procstring, activity, procversion, primdataset, gpuParameters, lfnBase, identifier, acqname, cmsname, @@ -160,7 +161,7 @@ def skimToDataTier(cmsswVersion, skim): command += "cd %s\n" % scramBaseDirs[0] command += "eval `scramv1 runtime -sh`\n" - if PY3: + if isCMSSWSupported(cmsswVersion, "CMSSW_10_3_0"): command += """python3 -c 'from Configuration.StandardSequences.Skims_cff import getSkimDataTier\n""" else: command += """python -c 'from Configuration.StandardSequences.Skims_cff import getSkimDataTier\n""" From 7d91ca7425e1322d231a0fb5148f9a31da715329 Mon Sep 17 00:00:00 2001 From: Alan Malta Rodrigues Date: Mon, 25 Apr 2022 15:11:57 +0200 Subject: [PATCH 03/16] 2.0.3.pre2 Signed-off-by: Alan Malta Rodrigues --- CHANGES | 11 +++++++++++ src/python/WMCore/__init__.py | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 2c7acd2f64c..d98ef5c381e 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,14 @@ +2.0.3.pre1 to 2.0.3.pre2: + - move ckey/cert functions to Utils.CertTools (Valentin Kuznetsov) #11101 + - Add bin and dependencies to wmcore PyPI package (Erik Gough) #11103 + - Remove cPickle support (Erik Gough) #11096 + - Update wmagent deploy script to add opportunistic resources one by one (Alan Malta Rodrigues) #11097 + - Support adding diskless resources into the database (Alan Malta Rodrigues) #11097 + - Remove no longer needed code in the StatusChangeTasks ReqMgr2 thread (Alan Malta Rodrigues) #11095 + - Bump WMAgent deployment example to the latest stable version (Alan Malta Rodrigues) #11091 + - Remove PFNs from a final document we send to WMArchive (Valentin Kuznetsov) #10998 + + 2.0.2 to 2.0.3.pre1: - Enhance logic to map ScramArch to OS (Alan Malta Rodrigues) #11088 - When ScramArch is empty str/list/None, return any as required_os (Alan Malta Rodrigues) #11083 diff --git a/src/python/WMCore/__init__.py b/src/python/WMCore/__init__.py index ae1a90bd0a7..f549f1625f8 100644 --- a/src/python/WMCore/__init__.py +++ b/src/python/WMCore/__init__.py @@ -6,5 +6,5 @@ """ -__version__ = '2.0.3.pre1' +__version__ = '2.0.3.pre2' __all__ = [] From 0cf08c3266dad1bb2cd5070f8f2a736fd2f57bd6 Mon Sep 17 00:00:00 2001 From: Alan Malta Rodrigues Date: Tue, 12 Oct 2021 14:53:51 +0200 Subject: [PATCH 04/16] Minor test json template updates EL8 template; replacement for SC 6 steps tweak EL8 workflows new templates final tweaks to Memory and Campaign --- .../ReqMgr/requests/DMWM/SC_6Steps_PU.json | 187 +++++++++++++++++ .../requests/DMWM/SC_6Steps_Scratch.json | 134 ------------ test/data/ReqMgr/requests/DMWM/SC_EL8.json | 112 ++++++++++ .../ReqMgr/requests/DMWM/TC_6Tasks_PU.json | 198 ++++++++++++++++++ test/data/ReqMgr/requests/DMWM/TC_EL8.json | 117 +++++++++++ .../Integration/SC_LumiMask_Rules.json | 182 +++++++--------- .../requests/Integration/SC_PY3_PURecyc.json | 5 +- .../Integration/TaskChain_PUMCRecyc.json | 5 +- 8 files changed, 698 insertions(+), 242 deletions(-) create mode 100644 test/data/ReqMgr/requests/DMWM/SC_6Steps_PU.json delete mode 100644 test/data/ReqMgr/requests/DMWM/SC_6Steps_Scratch.json create mode 100644 test/data/ReqMgr/requests/DMWM/SC_EL8.json create mode 100644 test/data/ReqMgr/requests/DMWM/TC_6Tasks_PU.json create mode 100644 test/data/ReqMgr/requests/DMWM/TC_EL8.json diff --git a/test/data/ReqMgr/requests/DMWM/SC_6Steps_PU.json b/test/data/ReqMgr/requests/DMWM/SC_6Steps_PU.json new file mode 100644 index 00000000000..6949660df50 --- /dev/null +++ b/test/data/ReqMgr/requests/DMWM/SC_6Steps_PU.json @@ -0,0 +1,187 @@ +{ + "assignRequest": { + "AcquisitionEra": { + "HIG_RunIISummer20UL16DIGIPremixAPV_02791_0": "AcquisitionEra-OVERRIDE-ME", + "HIG_RunIISummer20UL16HLTAPV_02812_0": "AcquisitionEra-OVERRIDE-ME", + "HIG_RunIISummer20UL16MiniAODAPVv2_02457_0": "AcquisitionEra-OVERRIDE-ME", + "HIG_RunIISummer20UL16RECOAPV_02812_0": "AcquisitionEra-OVERRIDE-ME", + "HIG_RunIISummer20UL16SIMAPV_02812_0": "AcquisitionEra-OVERRIDE-ME", + "HIG_RunIISummer20UL16wmLHEGENAPV_03178_0": "AcquisitionEra-OVERRIDE-ME" + }, + "Dashboard": "Dashboard-OVERRIDE-ME", + "GracePeriod": 300, + "MergedLFNBase": "/store/backfill/1", + "Override": { + "eos-lfn-prefix": "root://eoscms.cern.ch//eos/cms/store/logs/prod/recent/TESTBED" + }, + "ProcessingString": { + "HIG_RunIISummer20UL16DIGIPremixAPV_02791_0": "ProcessingString-OVERRIDE-ME", + "HIG_RunIISummer20UL16HLTAPV_02812_0": "ProcessingString-OVERRIDE-ME", + "HIG_RunIISummer20UL16MiniAODAPVv2_02457_0": "ProcessingString-OVERRIDE-ME", + "HIG_RunIISummer20UL16RECOAPV_02812_0": "ProcessingString-OVERRIDE-ME", + "HIG_RunIISummer20UL16SIMAPV_02812_0": "ProcessingString-OVERRIDE-ME", + "HIG_RunIISummer20UL16wmLHEGENAPV_03178_0": "ProcessingString-OVERRIDE-ME" + }, + "ProcessingVersion": 19, + "SiteBlacklist": [], + "SiteWhitelist": [ + "SiteWhitelist-OVERRIDE-ME" + ], + "SoftTimeout": 129600, + "Team": "Team-OVERRIDE-ME", + "UnmergedLFNBase": "/store/unmerged" + }, + "createRequest": { + "AcquisitionEra": "DEFAULT_AcqEra", + "CMSSWVersion": "CMSSW_10_6_25", + "Campaign": "Campaign-OVERRIDE-ME", + "Comments": { + "CheckList": ["StepChain request from scratch; StepChain with duplicate output module; StepChain request with KeepOutput false", + "StepChain multicore/eventStreams; StepChain with different CMSSW/ScramArch and PrepID"], + "WorkFlowDesc": ["StepChain from scratch with PU at Step3; KeepOutput of Step5/6 only - AOD and MINI; SC with dup outputModule", + "Diff CMSSW/ScramArch and PrepID; 100 jobs 2LpJ; Diff Multicore and EventStreams, 4GB RAM"] + }, + "ConfigCacheUrl": "https://cmsweb.cern.ch/couchdb", + "CouchDBName": "reqmgr_config_cache", + "DQMUploadUrl": "https://cmsweb.cern.ch/dqm/dev", + "DbsUrl": "https://cmsweb-testbed.cern.ch/dbs/int/global/DBSReader/", + "EventStreams": 0, + "GlobalTag": "106X_mcRun2_asymptotic_preVFP_v11", + "IncludeParents": false, + "Memory": 4000, + "Multicore": 4, + "PrepID": "HIG-TopLevel", + "ProcessingString": "DEFAULT_ProcStr", + "ProcessingVersion": 11, + "RequestPriority": 400000, + "RequestString": "RequestString-OVERRIDE-ME", + "RequestType": "StepChain", + "ScramArch": [ + "slc7_amd64_gcc700" + ], + "SizePerEvent": 3437.5822380952377, + "Step1": { + "AcquisitionEra": "RunIISummer20UL16wmLHEGENAPV", + "CMSSWVersion": "CMSSW_10_6_28_patch1", + "Campaign": "RunIISummer20UL16wmLHEGENAPV", + "ConfigCacheID": "abd4cc458ae724060c82a8c7e8535570", + "EventsPerJob": 1000, + "EventsPerLumi": 500, + "FilterEfficiency": 1, + "GlobalTag": "106X_mcRun2_asymptotic_preVFP_v8", + "KeepOutput": false, + "LheInputFiles": false, + "Multicore": 2, + "EventStreams": 2, + "PrepID": "HIG-Step1", + "PrimaryDataset": "VBF_HHTo2G2Tau_CV_1_C2V_1_C3_0_TuneCP5_13TeV-powheg-pythia8", + "ProcessingString": "Step1_WMCore_TEST", + "RequestNumEvents": 100000, + "ScramArch": [ + "slc7_amd64_gcc700" + ], + "Seeding": "AutomaticSeeding", + "SplittingAlgo": "EventBased", + "StepName": "HIG_RunIISummer20UL16wmLHEGENAPV_03178_0" + }, + "Step2": { + "AcquisitionEra": "RunIISummer20UL16SIMAPV", + "CMSSWVersion": "CMSSW_10_6_17_patch1", + "Campaign": "RunIISummer20UL16SIMAPV", + "ConfigCacheID": "08a4940e14b0b7a2f0f4720a0f22bacb", + "FilterEfficiency": 1, + "GlobalTag": "106X_mcRun2_asymptotic_preVFP_v8", + "InputFromOutputModule": "RAWSIMoutput", + "InputStep": "HIG_RunIISummer20UL16wmLHEGENAPV_03178_0", + "KeepOutput": false, + "PrepID": "HIG-Step2", + "PrimaryDataset": "VBF_HHTo2G2Tau_CV_1_C2V_1_C3_0_TuneCP5_13TeV-powheg-pythia8", + "ProcessingString": "Step2_WMCore_TEST", + "ScramArch": [ + "slc7_amd64_gcc700" + ], + "SplittingAlgo": "EventAwareLumiBased", + "StepName": "HIG_RunIISummer20UL16SIMAPV_02812_0" + }, + "Step3": { + "AcquisitionEra": "RunIISummer20UL16DIGIPremixAPV", + "CMSSWVersion": "CMSSW_10_6_17_patch1", + "Campaign": "RunIISummer20UL16DIGIPremixAPV", + "ConfigCacheID": "08a4940e14b0b7a2f0f4720a0f23bdce", + "FilterEfficiency": 1, + "GlobalTag": "106X_mcRun2_asymptotic_preVFP_v8", + "InputFromOutputModule": "RAWSIMoutput", + "InputStep": "HIG_RunIISummer20UL16SIMAPV_02812_0", + "KeepOutput": false, + "MCPileup": "/Neutrino_E-10_gun/RunIISummer20ULPrePremix-UL16_106X_mcRun2_asymptotic_v13-v1/PREMIX", + "PrepID": "HIG-Step3", + "PrimaryDataset": "VBF_HHTo2G2Tau_CV_1_C2V_1_C3_0_TuneCP5_13TeV-powheg-pythia8", + "ProcessingString": "Step3_WMCore_TEST", + "ScramArch": [ + "slc7_amd64_gcc700" + ], + "SplittingAlgo": "EventAwareLumiBased", + "StepName": "HIG_RunIISummer20UL16DIGIPremixAPV_02791_0" + }, + "Step4": { + "AcquisitionEra": "RunIISummer20UL16HLTAPV", + "CMSSWVersion": "CMSSW_8_0_36_UL_patch1", + "Campaign": "RunIISummer20UL16HLTAPV", + "ConfigCacheID": "1b96d80d6061d572208f6a0ef554734c", + "FilterEfficiency": 1, + "GlobalTag": "80X_mcRun2_asymptotic_2016_TrancheIV_v6", + "InputFromOutputModule": "PREMIXRAWoutput", + "InputStep": "HIG_RunIISummer20UL16DIGIPremixAPV_02791_0", + "KeepOutput": false, + "Multicore": 2, + "EventStreams": 2, + "PrepID": "HIG-Step4", + "PrimaryDataset": "VBF_HHTo2G2Tau_CV_1_C2V_1_C3_0_TuneCP5_13TeV-powheg-pythia8", + "ProcessingString": "Step4_WMCore_TEST", + "ScramArch": [ + "slc7_amd64_gcc530" + ], + "SplittingAlgo": "EventAwareLumiBased", + "StepName": "HIG_RunIISummer20UL16HLTAPV_02812_0" + }, + "Step5": { + "AcquisitionEra": "RunIISummer20UL16RECOAPV", + "CMSSWVersion": "CMSSW_10_6_17_patch1", + "Campaign": "RunIISummer20UL16RECOAPV", + "ConfigCacheID": "08a4940e14b0b7a2f0f4720a0f2436d0", + "FilterEfficiency": 1, + "GlobalTag": "106X_mcRun2_asymptotic_preVFP_v8", + "InputFromOutputModule": "RAWSIMoutput", + "InputStep": "HIG_RunIISummer20UL16HLTAPV_02812_0", + "KeepOutput": true, + "PrepID": "HIG-Step5", + "PrimaryDataset": "VBF_HHTo2G2Tau_CV_1_C2V_1_C3_0_TuneCP5_13TeV-powheg-pythia8", + "ProcessingString": "Step5_WMCore_TEST", + "ScramArch": ["slc7_amd64_gcc700"], + "SplittingAlgo": "EventAwareLumiBased", + "StepName": "HIG_RunIISummer20UL16RECOAPV_02812_0" + }, + "Step6": { + "AcquisitionEra": "RunIISummer20UL16MiniAODAPVv2", + "CMSSWVersion": "CMSSW_10_6_25", + "Campaign": "RunIISummer20UL16MiniAODAPVv2", + "ConfigCacheID": "c121f9d5ea13ca2b315c408778e5693a", + "FilterEfficiency": 1, + "GlobalTag": "106X_mcRun2_asymptotic_preVFP_v11", + "InputFromOutputModule": "AODSIMoutput", + "InputStep": "HIG_RunIISummer20UL16RECOAPV_02812_0", + "KeepOutput": true, + "Multicore": 1, + "EventStreams": 2, + "PrepID": "HIG-Step6", + "PrimaryDataset": "VBF_HHTo2G2Tau_CV_1_C2V_1_C3_0_TuneCP5_13TeV-powheg-pythia8", + "ProcessingString": "Step6_WMCore_TEST", + "ScramArch": ["slc7_amd64_gcc820"], + "SplittingAlgo": "EventAwareLumiBased", + "StepName": "HIG_RunIISummer20UL16MiniAODAPVv2_02457_0" + }, + "StepChain": 6, + "SubRequestType": "ReDigi", + "TimePerEvent": 15.951107623690724 + } +} diff --git a/test/data/ReqMgr/requests/DMWM/SC_6Steps_Scratch.json b/test/data/ReqMgr/requests/DMWM/SC_6Steps_Scratch.json deleted file mode 100644 index 097a5df2a05..00000000000 --- a/test/data/ReqMgr/requests/DMWM/SC_6Steps_Scratch.json +++ /dev/null @@ -1,134 +0,0 @@ -{ - "assignRequest": { - "AcquisitionEra": "AcquisitionEra-OVERRIDE-ME", - "Dashboard": "Dashboard-OVERRIDE-ME", - "GracePeriod": 300, - "MergedLFNBase": "/store/backfill/1", - "Override": { - "eos-lfn-prefix": "root://eoscms.cern.ch//eos/cms/store/logs/prod/recent/TESTBED" - }, - "ProcessingString": "ProcessingString-OVERRIDE-ME", - "ProcessingVersion": 19, - "SiteWhitelist": [], - "SiteBlacklist": [], - "SoftTimeout": 129600, - "Team": "Team-OVERRIDE-ME", - "UnmergedLFNBase": "/store/unmerged" - }, - "createRequest": { - "AcquisitionEra": "DEFAULT_AcqEra", - "CMSSWVersion": "CMSSW_10_6_1_patch1", - "Campaign": "Campaign-OVERRIDE-ME", - "Comments": { - "WorkFlowDesc": ["SC pileup from scratch, fork-setup, with 6 steps; Keep output of tasks 1-3-6, drop GEN-SIM-DIGI-RAW", - "Automatic job spltting ~50EpJ, 1LpJ; Step6 single core, others 8; 16GB of RSS memory", - "Diff PrepID; Steps1-2-3 with CMSSW_10.6.1, others with 10.6.1.patch1", - "Step2-4 with same output module and datatier, dropped;"], - "CheckList": "SC from scratch with PU; SC KeepOutput=False; SC with dup outputModule; SC multicore; SC with different CMSSW/ScramArch and PrepID" - }, - "ConfigCacheUrl": "https://cmsweb.cern.ch/couchdb", - "CouchDBName": "reqmgr_config_cache", - "DbsUrl": "https://cmsweb-testbed.cern.ch/dbs/int/global/DBSReader", - "GlobalTag": "106X_mcRun3_2021_realistic_v1", - "Memory": 16000, - "Multicore": 1, - "PrepID": "TEST-TopStep-CMSSW_10_6_1", - "ProcessingString": "DEFAULT_ProcStr", - "ProcessingVersion": 1, - "RequestPriority": 600000, - "RequestString": "RequestString-OVERRIDE-ME", - "RequestType": "StepChain", - "ScramArch": [ - "slc7_amd64_gcc700" - ], - "SizePerEvent": 1234, - "SubRequestType": "RelVal", - "Step1": { - "AcquisitionEra": "CMSSW_10_6_1", - "Campaign": "RelVal_Generic_Campaign", - "CMSSWVersion": "CMSSW_10_6_1", - "ConfigCacheID": "a2f53143f6931808046972d233af51b6", - "GlobalTag": "106X_mcRun3_2021_realistic_v1", - "KeepOutput": true, - "Multicore": 8, - "PrepID": "TEST-Step1-CMSSW_10_6_1", - "PrimaryDataset": "RelValH125GGgluonfusion_14", - "ProcessingString": "Step1_WMCore_TEST", - "RequestNumEvents": 9000, - "Seeding": "AutomaticSeeding", - "SplittingAlgo": "EventBased", - "StepName": "H125GGgluonfusion_GenSim" - }, - "Step2": { - "AcquisitionEra": "CMSSW_10_6_1", - "Campaign": "RelVal_Generic_Campaign", - "CMSSWVersion": "CMSSW_10_6_1", - "ConfigCacheID": "a2f53143f6931808046972d233a37590", - "GlobalTag": "106X_mcRun3_2021_realistic_v1", - "InputFromOutputModule": "FEVTDEBUGoutput", - "InputStep": "H125GGgluonfusion_GenSim", - "KeepOutput": false, - "Multicore": 8, - "ProcessingString": "Step2_WMCore_TEST", - "StepName": "DigiFull_Step2" - }, - "Step3": { - "AcquisitionEra": "CMSSW_10_6_1", - "Campaign": "RelVal_Generic_Campaign", - "CMSSWVersion": "CMSSW_10_6_1", - "ConfigCacheID": "a2f53143f6931808046972d23391f53b", - "GlobalTag": "106X_mcRun3_2021_realistic_v1", - "InputFromOutputModule": "FEVTDEBUGHLToutput", - "InputStep": "DigiFull_Step2", - "KeepOutput": true, - "Multicore": 8, - "PrepID": "TEST-Step3-CMSSW_10_6_1", - "ProcessingString": "Step3_WMCore_TEST", - "StepName": "RecoFull_Step3" - }, - "Step4": { - "AcquisitionEra": "CMSSW_10_6_1_patch1", - "Campaign": "RelVal_Generic_Campaign", - "CMSSWVersion": "CMSSW_10_6_1_patch1", - "ConfigCacheID": "da2ff8e07187c0fd7b9de5c47b4cabd2", - "GlobalTag": "106X_mcRun3_2021_realistic_v3", - "InputFromOutputModule": "FEVTDEBUGoutput", - "InputStep": "H125GGgluonfusion_GenSim", - "KeepOutput": false, - "MCPileup": "/RelValMinBias_14TeV/CMSSW_10_6_1-106X_mcRun3_2021_realistic_v1_rsb-v1/GEN-SIM", - "Multicore": 8, - "ProcessingString": "Step4_WMCore_TEST", - "StepName": "DigiFullPU_Step4" - }, - "Step5": { - "AcquisitionEra": "CMSSW_10_6_1_patch1", - "Campaign": "RelVal_Generic_Campaign", - "CMSSWVersion": "CMSSW_10_6_1_patch1", - "ConfigCacheID": "da2ff8e07187c0fd7b9de5c47b8daa78", - "GlobalTag": "106X_mcRun3_2021_realistic_v3", - "InputFromOutputModule": "FEVTDEBUGHLToutput", - "InputStep": "DigiFullPU_Step4", - "KeepOutput": false, - "MCPileup": "/RelValMinBias_14TeV/CMSSW_10_6_1-106X_mcRun3_2021_realistic_v1_rsb-v1/GEN-SIM", - "Multicore": 8, - "ProcessingString": "Step5_WMCore_TEST", - "StepName": "RecoFullPU_Step5" - }, - "Step6": { - "AcquisitionEra": "CMSSW_10_6_1_patch1", - "Campaign": "RelVal_Generic_Campaign", - "CMSSWVersion": "CMSSW_10_6_1_patch1", - "ConfigCacheID": "da2ff8e07187c0fd7b9de5c47b373d9a", - "GlobalTag": "106X_mcRun3_2021_realistic_v3", - "InputFromOutputModule": "MINIAODSIMoutput", - "InputStep": "RecoFullPU_Step5", - "KeepOutput": true, - "Multicore": 1, - "PrepID": "TEST-Step6-CMSSW_10_6_1", - "ProcessingString": "Step6_WMCore_TEST", - "StepName": "NanoFull_Step6" - }, - "StepChain": 6, - "TimePerEvent": 576.0 - } -} diff --git a/test/data/ReqMgr/requests/DMWM/SC_EL8.json b/test/data/ReqMgr/requests/DMWM/SC_EL8.json new file mode 100644 index 00000000000..bc591b12599 --- /dev/null +++ b/test/data/ReqMgr/requests/DMWM/SC_EL8.json @@ -0,0 +1,112 @@ +{ + "assignRequest": { + "AcquisitionEra": { + "GenSimFull": "AcquisitionEra-OVERRIDE-ME", + "Digi_2021": "AcquisitionEra-OVERRIDE-ME", + "RecoNano_2021": "AcquisitionEra-OVERRIDE-ME" + }, + "Dashboard": "Dashboard-OVERRIDE-ME", + "GracePeriod": 300, + "MergedLFNBase": "/store/backfill/1", + "Override": {"eos-lfn-prefix": "root://eoscms.cern.ch//eos/cms/store/logs/prod/recent/TESTBED"}, + "ProcessingString": { + "GenSimFull": "ProcessingString-OVERRIDE-ME", + "Digi_2021": "ProcessingString-OVERRIDE-ME", + "RecoNano_2021": "ProcessingString-OVERRIDE-ME" + }, + "ProcessingVersion": 19, + "SiteBlacklist": [], + "SiteWhitelist": ["SiteWhitelist-OVERRIDE-ME"], + "SoftTimeout": 129600, + "Team": "Team-OVERRIDE-ME", + "UnmergedLFNBase": "/store/unmerged" + }, + "createRequest": { + "AcquisitionEra": "DEFAULT_AcqEra", + "CMSSWVersion": "CMSSW_12_4_0_pre2", + "Campaign": "Campaign-OVERRIDE-ME", + "Comments": { + "CheckList": "StepChain: Py3 MC from scratch; StepChain EL8 workflows", + "WorkFlowDesc": ["SC from scratch, no PU; Running on EL8 OS, with alma8 for Step1/3 and cs8 for Step2;", + "Diff PrepID/Multicore/Mem; Harvesting enabled"] + }, + "ConfigCacheUrl": "https://cmsweb.cern.ch/couchdb", + "CouchDBName": "reqmgr_config_cache", + "DQMConfigCacheID": "9d1d1906571cda6b360f8486bd8f422e", + "DQMHarvestUnit": "byRun", + "DQMUploadUrl": "https://cmsweb.cern.ch/dqm/relval", + "DbsUrl": "https://cmsweb-testbed.cern.ch/dbs/int/global/DBSReader/", + "EnableHarvesting": true, + "EventStreams": 0, + "GPUParams": "null", + "GlobalTag": "123X_mcRun3_2021_realistic_v12", + "IncludeParents": false, + "Memory": 6000, + "Multicore": 1, + "PrepID": "TEST-CMSSW_12_4_0_pre2_TopLevel", + "ProcessingString": "DEFAULT_ProcStr", + "ProcessingVersion": 2, + "RequestPriority": 600000, + "RequestString": "RequestString-OVERRIDE-ME", + "RequestType": "StepChain", + "ScramArch": ["alma8_amd64_gcc10"], + "SizePerEvent": 1, + "SubRequestType": "RelVal", + "Step1": { + "AcquisitionEra": "CMSSW_12_4_0_pre2", + "CMSSWVersion": "CMSSW_12_4_0_pre2", + "Campaign": "CMSSW_12_4_0_pre2__fullsim_noPU_2021_14TeV-1649399635", + "ConfigCacheID": "9d1d1906571cda6b360f8486bd8ee581", + "EventStreams": 2, + "EventsPerJob": 200, + "EventsPerLumi": 100, + "GlobalTag": "123X_mcRun3_2021_realistic_v12", + "KeepOutput": true, + "Multicore": 4, + "PrepID": "TEST-CMSSW_12_4_0_pre2_Step1", + "PrimaryDataset": "RelValWToLNu_14TeV", + "ProcessingString": "Step1_WMCore_TEST", + "RequestNumEvents": 10000, + "ScramArch": ["alma8_amd64_gcc10"], + "Seeding": "AutomaticSeeding", + "SplittingAlgo": "EventBased", + "StepName": "GenSimFull" + }, + "Step2": { + "AcquisitionEra": "CMSSW_12_4_0_pre2", + "CMSSWVersion": "CMSSW_12_4_0_pre2", + "Campaign": "CMSSW_12_4_0_pre2__fullsim_noPU_2021_14TeV-1649399635", + "ConfigCacheID": "9d1d1906571cda6b360f8486bd8ef9b3", + "EventStreams": 2, + "GlobalTag": "123X_mcRun3_2021_realistic_v12", + "InputFromOutputModule": "FEVTDEBUGoutput", + "InputStep": "GenSimFull", + "KeepOutput": true, + "Multicore": 4, + "PrepID": "TEST-CMSSW_12_4_0_pre2_Step2", + "ProcessingString": "Step2_WMCore_TEST", + "ScramArch": ["cs8_amd64_gcc10"], + "SplittingAlgo": "LumiBased", + "StepName": "Digi_2021" + }, + "Step3": { + "AcquisitionEra": "CMSSW_12_4_0_pre2", + "CMSSWVersion": "CMSSW_12_4_0_pre2", + "Campaign": "CMSSW_12_4_0_pre2__fullsim_noPU_2021_14TeV-1649399635", + "ConfigCacheID": "9d1d1906571cda6b360f8486bd8f33ba", + "EventStreams": 2, + "GlobalTag": "123X_mcRun3_2021_realistic_v12", + "InputFromOutputModule": "FEVTDEBUGHLToutput", + "InputStep": "Digi_2021", + "KeepOutput": true, + "Multicore": 8, + "PrepID": "TEST-CMSSW_12_4_0_pre2_Step3", + "ProcessingString": "Step3_WMCore_TEST", + "ScramArch": ["alma8_amd64_gcc10"], + "SplittingAlgo": "LumiBased", + "StepName": "RecoNano_2021" + }, + "StepChain": 3, + "TimePerEvent": 1 + } +} diff --git a/test/data/ReqMgr/requests/DMWM/TC_6Tasks_PU.json b/test/data/ReqMgr/requests/DMWM/TC_6Tasks_PU.json new file mode 100644 index 00000000000..d3dad0e4719 --- /dev/null +++ b/test/data/ReqMgr/requests/DMWM/TC_6Tasks_PU.json @@ -0,0 +1,198 @@ +{ + "assignRequest": { + "AcquisitionEra": { + "HIG_RunIISummer20UL16DIGIPremixAPV_02791_0": "AcquisitionEra-OVERRIDE-ME", + "HIG_RunIISummer20UL16HLTAPV_02812_0": "AcquisitionEra-OVERRIDE-ME", + "HIG_RunIISummer20UL16MiniAODAPVv2_02457_0": "AcquisitionEra-OVERRIDE-ME", + "HIG_RunIISummer20UL16RECOAPV_02812_0": "AcquisitionEra-OVERRIDE-ME", + "HIG_RunIISummer20UL16SIMAPV_02812_0": "AcquisitionEra-OVERRIDE-ME", + "HIG_RunIISummer20UL16wmLHEGENAPV_03178_0": "AcquisitionEra-OVERRIDE-ME" + }, + "Dashboard": "Dashboard-OVERRIDE-ME", + "GracePeriod": 300, + "MergedLFNBase": "/store/backfill/1", + "Override": { + "eos-lfn-prefix": "root://eoscms.cern.ch//eos/cms/store/logs/prod/recent/TESTBED" + }, + "ProcessingString": { + "HIG_RunIISummer20UL16DIGIPremixAPV_02791_0": "ProcessingString-OVERRIDE-ME", + "HIG_RunIISummer20UL16HLTAPV_02812_0": "ProcessingString-OVERRIDE-ME", + "HIG_RunIISummer20UL16MiniAODAPVv2_02457_0": "ProcessingString-OVERRIDE-ME", + "HIG_RunIISummer20UL16RECOAPV_02812_0": "ProcessingString-OVERRIDE-ME", + "HIG_RunIISummer20UL16SIMAPV_02812_0": "ProcessingString-OVERRIDE-ME", + "HIG_RunIISummer20UL16wmLHEGENAPV_03178_0": "ProcessingString-OVERRIDE-ME" + }, + "ProcessingVersion": 19, + "SiteBlacklist": [], + "SiteWhitelist": [ + "SiteWhitelist-OVERRIDE-ME" + ], + "SoftTimeout": 129600, + "Team": "Team-OVERRIDE-ME", + "UnmergedLFNBase": "/store/unmerged" + }, + "createRequest": { + "AcquisitionEra": "DEFAULT_AcqEra", + "CMSSWVersion": "CMSSW_10_6_25", + "Campaign": "Campaign-OVERRIDE-ME", + "Comments": { + "CheckList": ["TaskChain request from scratch; TaskChain with duplicate output module; TaskChain request with KeepOutput false", + "TaskChain multicore/eventStreams; TaskChain with different CMSSW/ScramArch and PrepID"], + "WorkFlowDesc": ["TC from scratch with PU at Task3; KeepOutput of Task5/6 only - AOD and MINI;", + "Diff CMSSW/ScramArch and PrepID; 2000EpJ and 4LpJ for Task1; Diff Multicore and EventStreams, 4GB RAM"] + }, + "ConfigCacheUrl": "https://cmsweb.cern.ch/couchdb", + "CouchDBName": "reqmgr_config_cache", + "DQMUploadUrl": "https://cmsweb.cern.ch/dqm/dev", + "DbsUrl": "https://cmsweb-testbed.cern.ch/dbs/int/global/DBSReader/", + "EventStreams": 0, + "GlobalTag": "106X_mcRun2_asymptotic_preVFP_v11", + "IncludeParents": false, + "Memory": 4000, + "Multicore": 4, + "PrepID": "HIG-TopLevel", + "ProcessingString": "DEFAULT_ProcStr", + "ProcessingVersion": 11, + "RequestPriority": 400000, + "RequestString": "RequestString-OVERRIDE-ME", + "RequestType": "TaskChain", + "ScramArch": [ + "slc7_amd64_gcc700" + ], + "SizePerEvent": 3437.5822380952377, + "Task1": { + "AcquisitionEra": "RunIISummer20UL16wmLHEGENAPV", + "CMSSWVersion": "CMSSW_10_6_28_patch1", + "Campaign": "RunIISummer20UL16wmLHEGENAPV", + "ConfigCacheID": "abd4cc458ae724060c82a8c7e8535570", + "EventsPerLumi": 500, + "FilterEfficiency": 1, + "GlobalTag": "106X_mcRun2_asymptotic_preVFP_v8", + "KeepOutput": false, + "LheInputFiles": false, + "Memory": 1000, + "Multicore": 2, + "EventStreams": 2, + "PrepID": "HIG-Task1", + "PrimaryDataset": "VBF_HHTo2G2Tau_CV_1_C2V_1_C3_0_TuneCP5_13TeV-powheg-pythia8", + "ProcessingString": "Task1_WMCore_TEST", + "RequestNumEvents": 100000, + "ScramArch": [ + "slc7_amd64_gcc700" + ], + "Seeding": "AutomaticSeeding", + "SplittingAlgo": "EventBased", + "TaskName": "HIG_RunIISummer20UL16wmLHEGENAPV_03178_0", + "TimePerEvent": 14.4 + }, + "Task2": { + "AcquisitionEra": "RunIISummer20UL16SIMAPV", + "CMSSWVersion": "CMSSW_10_6_17_patch1", + "Campaign": "RunIISummer20UL16SIMAPV", + "ConfigCacheID": "08a4940e14b0b7a2f0f4720a0f22bacb", + "FilterEfficiency": 1, + "GlobalTag": "106X_mcRun2_asymptotic_preVFP_v8", + "InputFromOutputModule": "RAWSIMoutput", + "InputTask": "HIG_RunIISummer20UL16wmLHEGENAPV_03178_0", + "KeepOutput": false, + "Memory": 2000, + "PrepID": "HIG-Task2", + "PrimaryDataset": "VBF_HHTo2G2Tau_CV_1_C2V_1_C3_0_TuneCP5_13TeV-powheg-pythia8", + "ProcessingString": "Task2_WMCore_TEST", + "ScramArch": [ + "slc7_amd64_gcc700" + ], + "SplittingAlgo": "EventAwareLumiBased", + "TaskName": "HIG_RunIISummer20UL16SIMAPV_02812_0", + "TimePerEvent": 15 + }, + "Task3": { + "AcquisitionEra": "RunIISummer20UL16DIGIPremixAPV", + "CMSSWVersion": "CMSSW_10_6_17_patch1", + "Campaign": "RunIISummer20UL16DIGIPremixAPV", + "ConfigCacheID": "08a4940e14b0b7a2f0f4720a0f23bdce", + "FilterEfficiency": 1, + "GlobalTag": "106X_mcRun2_asymptotic_preVFP_v8", + "InputFromOutputModule": "RAWSIMoutput", + "InputTask": "HIG_RunIISummer20UL16SIMAPV_02812_0", + "KeepOutput": false, + "MCPileup": "/Neutrino_E-10_gun/RunIISummer20ULPrePremix-UL16_106X_mcRun2_asymptotic_v13-v1/PREMIX", + "Memory": 3000, + "PrepID": "HIG-Task3", + "PrimaryDataset": "VBF_HHTo2G2Tau_CV_1_C2V_1_C3_0_TuneCP5_13TeV-powheg-pythia8", + "ProcessingString": "Task3_WMCore_TEST", + "ScramArch": [ + "slc7_amd64_gcc700" + ], + "SplittingAlgo": "EventAwareLumiBased", + "TaskName": "HIG_RunIISummer20UL16DIGIPremixAPV_02791_0", + "TimePerEvent": 15 + }, + "Task4": { + "AcquisitionEra": "RunIISummer20UL16HLTAPV", + "CMSSWVersion": "CMSSW_8_0_36_UL_patch1", + "Campaign": "RunIISummer20UL16HLTAPV", + "ConfigCacheID": "1b96d80d6061d572208f6a0ef554734c", + "FilterEfficiency": 1, + "GlobalTag": "80X_mcRun2_asymptotic_2016_TrancheIV_v6", + "InputFromOutputModule": "PREMIXRAWoutput", + "InputTask": "HIG_RunIISummer20UL16DIGIPremixAPV_02791_0", + "KeepOutput": false, + "Memory": 3000, + "Multicore": 2, + "EventStreams": 2, + "PrepID": "HIG-Task4", + "PrimaryDataset": "VBF_HHTo2G2Tau_CV_1_C2V_1_C3_0_TuneCP5_13TeV-powheg-pythia8", + "ProcessingString": "Task4_WMCore_TEST", + "ScramArch": [ + "slc7_amd64_gcc530" + ], + "SplittingAlgo": "EventAwareLumiBased", + "TaskName": "HIG_RunIISummer20UL16HLTAPV_02812_0", + "TimePerEvent": 15 + }, + "Task5": { + "AcquisitionEra": "RunIISummer20UL16RECOAPV", + "CMSSWVersion": "CMSSW_10_6_17_patch1", + "Campaign": "RunIISummer20UL16RECOAPV", + "ConfigCacheID": "08a4940e14b0b7a2f0f4720a0f2436d0", + "FilterEfficiency": 1, + "GlobalTag": "106X_mcRun2_asymptotic_preVFP_v8", + "InputFromOutputModule": "RAWSIMoutput", + "InputTask": "HIG_RunIISummer20UL16HLTAPV_02812_0", + "KeepOutput": true, + "Memory": 3000, + "PrepID": "HIG-Task5", + "PrimaryDataset": "VBF_HHTo2G2Tau_CV_1_C2V_1_C3_0_TuneCP5_13TeV-powheg-pythia8", + "ProcessingString": "Task5_WMCore_TEST", + "ScramArch": ["slc7_amd64_gcc700"], + "SplittingAlgo": "EventAwareLumiBased", + "TaskName": "HIG_RunIISummer20UL16RECOAPV_02812_0", + "TimePerEvent": 15 + }, + "Task6": { + "AcquisitionEra": "RunIISummer20UL16MiniAODAPVv2", + "CMSSWVersion": "CMSSW_10_6_25", + "Campaign": "RunIISummer20UL16MiniAODAPVv2", + "ConfigCacheID": "c121f9d5ea13ca2b315c408778e5693a", + "FilterEfficiency": 1, + "GlobalTag": "106X_mcRun2_asymptotic_preVFP_v11", + "InputFromOutputModule": "AODSIMoutput", + "InputTask": "HIG_RunIISummer20UL16RECOAPV_02812_0", + "KeepOutput": true, + "Memory": 3000, + "Multicore": 1, + "EventStreams": 2, + "PrepID": "HIG-Task6", + "PrimaryDataset": "VBF_HHTo2G2Tau_CV_1_C2V_1_C3_0_TuneCP5_13TeV-powheg-pythia8", + "ProcessingString": "Task6_WMCore_TEST", + "ScramArch": ["slc7_amd64_gcc820"], + "SplittingAlgo": "EventAwareLumiBased", + "TaskName": "HIG_RunIISummer20UL16MiniAODAPVv2_02457_0", + "TimePerEvent": 15 + }, + "TaskChain": 6, + "SubRequestType": "ReDigi", + "TimePerEvent": 15.951107623690724 + } +} diff --git a/test/data/ReqMgr/requests/DMWM/TC_EL8.json b/test/data/ReqMgr/requests/DMWM/TC_EL8.json new file mode 100644 index 00000000000..bde845c1bd3 --- /dev/null +++ b/test/data/ReqMgr/requests/DMWM/TC_EL8.json @@ -0,0 +1,117 @@ +{ + "assignRequest": { + "AcquisitionEra": { + "GenSimFull": "AcquisitionEra-OVERRIDE-ME", + "Digi_2021": "AcquisitionEra-OVERRIDE-ME", + "RecoNano_2021": "AcquisitionEra-OVERRIDE-ME" + }, + "Dashboard": "Dashboard-OVERRIDE-ME", + "GracePeriod": 300, + "MergedLFNBase": "/store/backfill/1", + "Override": {"eos-lfn-prefix": "root://eoscms.cern.ch//eos/cms/store/logs/prod/recent/TESTBED"}, + "ProcessingString": { + "GenSimFull": "ProcessingString-OVERRIDE-ME", + "Digi_2021": "ProcessingString-OVERRIDE-ME", + "RecoNano_2021": "ProcessingString-OVERRIDE-ME" + }, + "ProcessingVersion": 19, + "SiteBlacklist": [], + "SiteWhitelist": ["SiteWhitelist-OVERRIDE-ME"], + "SoftTimeout": 129600, + "Team": "Team-OVERRIDE-ME", + "UnmergedLFNBase": "/store/unmerged" + }, + "createRequest": { + "AcquisitionEra": "DEFAULT_AcqEra", + "CMSSWVersion": "CMSSW_12_4_0_pre2", + "Campaign": "Campaign-OVERRIDE-ME", + "Comments": { + "CheckList": "TaskChain: Py3 MC from scratch; TaskChain EL8 workflows", + "WorkFlowDesc": ["TC from scratch, no PU; Running on EL8 OS, with alma8 for Task1/3 and cs8 for Task2;", + "Diff PrepID/Multicore/Mem; Harvesting enabled"] + }, + "ConfigCacheUrl": "https://cmsweb.cern.ch/couchdb", + "CouchDBName": "reqmgr_config_cache", + "DQMConfigCacheID": "9d1d1906571cda6b360f8486bd8f422e", + "DQMHarvestUnit": "byRun", + "DQMUploadUrl": "https://cmsweb.cern.ch/dqm/relval", + "DbsUrl": "https://cmsweb-testbed.cern.ch/dbs/int/global/DBSReader/", + "EnableHarvesting": true, + "EventStreams": 0, + "GPUParams": "null", + "GlobalTag": "123X_mcRun3_2021_realistic_v12", + "IncludeParents": false, + "Memory": 3000, + "Multicore": 1, + "PrepID": "TEST-CMSSW_12_4_0_pre2_TopLevel", + "ProcessingString": "DEFAULT_ProcStr", + "ProcessingVersion": 2, + "RequestPriority": 600000, + "RequestString": "RequestString-OVERRIDE-ME", + "RequestType": "TaskChain", + "ScramArch": ["alma8_amd64_gcc10"], + "SizePerEvent": 1, + "SubRequestType": "RelVal", + "Task1": { + "AcquisitionEra": "CMSSW_12_4_0_pre2", + "CMSSWVersion": "CMSSW_12_4_0_pre2", + "Campaign": "CMSSW_12_4_0_pre2__fullsim_noPU_2021_14TeV-1649399635", + "ConfigCacheID": "9d1d1906571cda6b360f8486bd8ee581", + "EventStreams": 2, + "EventsPerJob": 200, + "EventsPerLumi": 100, + "GlobalTag": "123X_mcRun3_2021_realistic_v12", + "KeepOutput": true, + "Memory": 1500, + "Multicore": 4, + "PrepID": "TEST-CMSSW_12_4_0_pre2_Task1", + "PrimaryDataset": "RelValWToLNu_14TeV", + "ProcessingString": "Task1_WMCore_TEST", + "RequestNumEvents": 10000, + "ScramArch": ["alma8_amd64_gcc10"], + "Seeding": "AutomaticSeeding", + "SplittingAlgo": "EventBased", + "TaskName": "GenSimFull" + }, + "Task2": { + "AcquisitionEra": "CMSSW_12_4_0_pre2", + "CMSSWVersion": "CMSSW_12_4_0_pre2", + "Campaign": "CMSSW_12_4_0_pre2__fullsim_noPU_2021_14TeV-1649399635", + "ConfigCacheID": "9d1d1906571cda6b360f8486bd8ef9b3", + "EventStreams": 2, + "GlobalTag": "123X_mcRun3_2021_realistic_v12", + "InputFromOutputModule": "FEVTDEBUGoutput", + "InputTask": "GenSimFull", + "KeepOutput": true, + "LumisPerJob": 5, + "Memory": 4000, + "Multicore": 4, + "PrepID": "TEST-CMSSW_12_4_0_pre2_Task2", + "ProcessingString": "Task2_WMCore_TEST", + "ScramArch": ["cs8_amd64_gcc10"], + "SplittingAlgo": "LumiBased", + "TaskName": "Digi_2021" + }, + "Task3": { + "AcquisitionEra": "CMSSW_12_4_0_pre2", + "CMSSWVersion": "CMSSW_12_4_0_pre2", + "Campaign": "CMSSW_12_4_0_pre2__fullsim_noPU_2021_14TeV-1649399635", + "ConfigCacheID": "9d1d1906571cda6b360f8486bd8f33ba", + "EventStreams": 2, + "GlobalTag": "123X_mcRun3_2021_realistic_v12", + "InputFromOutputModule": "FEVTDEBUGHLToutput", + "InputTask": "Digi_2021", + "KeepOutput": true, + "LumisPerJob": 5, + "Memory": 6000, + "Multicore": 8, + "PrepID": "TEST-CMSSW_12_4_0_pre2_Task3", + "ProcessingString": "Task3_WMCore_TEST", + "ScramArch": ["alma8_amd64_gcc10"], + "SplittingAlgo": "LumiBased", + "TaskName": "RecoNano_2021" + }, + "TaskChain": 3, + "TimePerEvent": 1 + } +} diff --git a/test/data/ReqMgr/requests/Integration/SC_LumiMask_Rules.json b/test/data/ReqMgr/requests/Integration/SC_LumiMask_Rules.json index 2e45e10542f..5da7355e783 100644 --- a/test/data/ReqMgr/requests/Integration/SC_LumiMask_Rules.json +++ b/test/data/ReqMgr/requests/Integration/SC_LumiMask_Rules.json @@ -1,106 +1,82 @@ { - "assignRequest": { - "SiteWhitelist": [], - "Team": "Team-OVERRIDE-ME", - "AcquisitionEra": "AcquisitionEra-OVERRIDE-ME", - "ProcessingString": "ProcessingString-OVERRIDE-ME", - "Dashboard": "Dashboard-OVERRIDE-ME", - "SiteBlacklist": [], - "MergedLFNBase": "/store/backfill/1", - "UnmergedLFNBase": "/store/unmerged", - "MinMergeSize": 2147483648, - "MaxMergeSize": 4294967296, - "MaxMergeEvents": 50000, - "Override": { - "eos-lfn-prefix": "root://eoscms.cern.ch//eos/cms/store/logs/prod/recent/TESTBED" + "assignRequest": { + "AcquisitionEra": { + "HLTDR2_2018": "AcquisitionEra-OVERRIDE-ME" + }, + "Dashboard": "Dashboard-OVERRIDE-ME", + "GracePeriod": 300, + "MergedLFNBase": "/store/backfill/1", + "Override": { + "eos-lfn-prefix": "root://eoscms.cern.ch//eos/cms/store/logs/prod/recent/TESTBED" + }, + "ProcessingString": { + "HLTDR2_2018": "ProcessingString-OVERRIDE-ME" + }, + "ProcessingVersion": 19, + "SiteBlacklist": [], + "SiteWhitelist": [ + "SiteWhitelist-OVERRIDE-ME" + ], + "SoftTimeout": 129600, + "Team": "Team-OVERRIDE-ME", + "UnmergedLFNBase": "/store/unmerged", + "CustodialSites": ["T1_US_FNAL_MSS"], + "NonCustodialSites": ["T1_US_FNAL_Disk"], + "AutoApproveSubscriptionSites": ["T1_US_FNAL_Disk"], + "SubscriptionPriority": "Normal", + "CustodialSubType": "Replica", + "NonCustodialSubType": "Move", + "CustodialGroup": "FacOps", + "NonCustodialGroup": "AnalysisOps" }, - "ProcessingVersion": 1, - "SoftTimeout": 129600, - "GracePeriod": 1000, - "CustodialSites": ["T1_US_FNAL_MSS"], - "NonCustodialSites": ["T1_US_FNAL_Disk"], - "AutoApproveSubscriptionSites": ["T1_US_FNAL_Disk"], - "SubscriptionPriority": "Normal", - "CustodialSubType": "Replica", - "NonCustodialSubType": "Move", - "CustodialGroup": "FacOps", - "NonCustodialGroup": "AnalysisOps", - "BlockCloseMaxWaitTime": 72000, - "BlockCloseMaxFiles": 500, - "BlockCloseMaxEvents": 200000000, - "BlockCloseMaxSize": 5000000000000 - }, - "createRequest": { - "AcquisitionEra": "DMWM_Test", - "CMSSWVersion": "CMSSW_10_6_1_patch1", - "Campaign": "Agent124_Val", - "ConfigCacheUrl": "https://cmsweb.cern.ch/couchdb", - "Comments": {"WorkFlowDesc": ["SLC7 SC with 3 steps and LumiList with 20 lumis; PU step1/step2; 16GB/4CPU; save only NANOAODSIM", - "FacOps Custodial Replica to MSS; AnalysisOps NonCustodial Move to Disk auto-approved;"], - "CheckList": ["StepChain request with input + PU", "StepChain request with lumiMask", "StepChain with KeepOutput False", - "Container automatic rule to Disk", "Bypass container automatic rule for Tape"]}, - "DbsUrl": "https://cmsweb-prod.cern.ch/dbs/prod/global/DBSReader", - "GlobalTag": "106X_mcRun3_2021_realistic_v3", - "Memory": 16000, - "Multicore": 1, - "PrepID": "CMSSW_10_6_1_patch1__fullsim_PU_2021_14TeV-1562592614-H125GGgluonfusion_14", - "ProcessingString": "DEFAULT_ProcStr", - "ProcessingVersion": 1, - "RequestPriority": 600000, - "RequestString": "RequestString-OVERRIDE-ME", - "RequestType": "StepChain", - "ScramArch": [ - "slc7_amd64_gcc700" - ], - "SizePerEvent": 1234, - "Step1": { - "AcquisitionEra": "CMSSW_10_6_1_patch1", - "Campaign": "CMSSW_10_6_1_patch1_Step1", - "ConfigCacheID": "da2ff8e07187c0fd7b9de5c47b4cabd2", - "GlobalTag": "106X_mcRun3_2021_realistic_v3", - "InputDataset": "/RelValH125GGgluonfusion_14/CMSSW_10_6_1-106X_mcRun3_2021_realistic_v1_rsb-v1/GEN-SIM", - "KeepOutput": false, - "LumiList": {"1": [[26, 30], [36, 40], [171, 180]]}, - "LumisPerJob": 2, - "MCPileup": "/RelValMinBias_14TeV/CMSSW_10_6_1-106X_mcRun3_2021_realistic_v1_rsb-v1/GEN-SIM", - "Memory": 16000, - "Multicore": 4, - "ProcessingString": "Step1_WMCore_TEST", - "SplittingAlgo": "LumiBased", - "StepName": "DigiFullPU_2021PU" - }, - "Step2": { - "AcquisitionEra": "CMSSW_10_6_1_patch1", - "Campaign": "CMSSW_10_6_1_patch1_Step2", - "ConfigCacheID": "da2ff8e07187c0fd7b9de5c47b8daa78", - "GlobalTag": "106X_mcRun3_2021_realistic_v3", - "InputFromOutputModule": "FEVTDEBUGHLToutput", - "InputStep": "DigiFullPU_2021PU", - "KeepOutput": false, - "LumisPerJob": 2, - "MCPileup": "/RelValMinBias_14TeV/CMSSW_10_6_1-106X_mcRun3_2021_realistic_v1_rsb-v1/GEN-SIM", - "Memory": 16000, - "Multicore": 4, - "ProcessingString": "Step2_WMCore_TEST", - "SplittingAlgo": "LumiBased", - "StepName": "RecoFullPU_2021PU" - }, - "Step3": { - "AcquisitionEra": "CMSSW_10_6_1_patch1", - "Campaign": "CMSSW_10_6_1_patch1_Step3", - "ConfigCacheID": "da2ff8e07187c0fd7b9de5c47b373d9a", - "GlobalTag": "106X_mcRun3_2021_realistic_v3", - "InputFromOutputModule": "MINIAODSIMoutput", - "InputStep": "RecoFullPU_2021PU", - "KeepOutput": true, - "LumisPerJob": 2, - "Memory": 16000, - "Multicore": 4, - "ProcessingString": "Step3_WMCore_TEST", - "SplittingAlgo": "LumiBased", - "StepName": "NanoFull_2021PU" - }, - "StepChain": 3, - "TimePerEvent": 10 - } + "createRequest": { + "AcquisitionEra": "DEFAULT_AcqEra", + "CMSSWVersion": "CMSSW_12_3_0", + "Campaign": "Campaign-OVERRIDE-ME", + "Comments": { + "CheckList": "StepChain with lumiMask; StepChain with PrimaryDataset overriden; Rucio automatic data placement", + "WorkFlowDesc": "SC with input and 100 lumis masked; SC with PrimaryDataset overriden; Create container rules at FNAL_Disk" + }, + "ConfigCacheUrl": "https://cmsweb.cern.ch/couchdb", + "CouchDBName": "reqmgr_config_cache", + "DQMUploadUrl": "https://cmsweb.cern.ch/dqm/relval", + "DbsUrl": "https://cmsweb-testbed.cern.ch/dbs/int/global/DBSReader/", + "DeterministicPileup": false, + "EventStreams": 0, + "GlobalTag": "123X_dataRun2_HLT_relval_v2", + "Memory": 13500, + "Multicore": 1, + "PrepID": "TEST-CMSSW_12_3_0__PPC-RunDoubleMuon2018C-00001", + "PrimaryDataset": "UPDATEME", + "ProcessingString": "DEFAULT_ProcStr", + "ProcessingVersion": 1, + "RequestPriority": 700000, + "RequestString": "RequestString-OVERRIDE-ME", + "RequestType": "StepChain", + "ScramArch": [ + "slc7_amd64_gcc11" + ], + "SizePerEvent": 1, + "Step1": { + "AcquisitionEra": "CMSSW_12_3_0", + "CMSSWVersion": "CMSSW_12_3_0", + "Campaign": "DMWM_Test", + "ConfigCacheID": "2f912ab5586fb42d5603f65ade81775c", + "GlobalTag": "123X_dataRun2_HLT_relval_v2", + "InputDataset": "/DoubleMuon/Run2018C-v1/RAW", + "KeepOutput": true, + "LumiList": {"319450": [[1, 100]]}, + "LumisPerJob": 1, + "Multicore": 8, + "PrimaryDataset": "DoubleMuon_New", + "ProcessingString": "Step1_WMCore_TEST", + "ScramArch": [ + "slc7_amd64_gcc11" + ], + "SplittingAlgo": "LumiBased", + "StepName": "HLTDR2_2018" + }, + "StepChain": 1, + "TimePerEvent": 1 + } } diff --git a/test/data/ReqMgr/requests/Integration/SC_PY3_PURecyc.json b/test/data/ReqMgr/requests/Integration/SC_PY3_PURecyc.json index 205e461fcbf..d6bcf887a9e 100644 --- a/test/data/ReqMgr/requests/Integration/SC_PY3_PURecyc.json +++ b/test/data/ReqMgr/requests/Integration/SC_PY3_PURecyc.json @@ -46,8 +46,7 @@ "Memory": 10000, "Multicore": 1, "PrepID": "TEST-CMSSW_12_0_0__fullsim_PU_2021_14TeV-ZMM_14-00004", - "PrimaryDataset": "UPDATEME", - "ProcessingString": "DEFAULT_ProcStr", + "ProcessingString": "DEFAULT_ProcStr", "ProcessingVersion": 1, "RequestPriority": 600000, "RequestString": "RequestString-OVERRIDE-ME", @@ -67,7 +66,7 @@ "KeepOutput": true, "MCPileup": "/RelValMinBias_14TeV/CMSSW_12_0_0_pre6-120X_mcRun3_2021_realistic_v4-v1/GEN-SIM", "Multicore": 8, - "ProcessingString": "Step1_WMCore_TEST", + "ProcessingString": "Step1_WMCore_TEST", "ScramArch": [ "slc7_amd64_gcc900" ], diff --git a/test/data/ReqMgr/requests/Integration/TaskChain_PUMCRecyc.json b/test/data/ReqMgr/requests/Integration/TaskChain_PUMCRecyc.json index b05411c9711..facc51a81c8 100644 --- a/test/data/ReqMgr/requests/Integration/TaskChain_PUMCRecyc.json +++ b/test/data/ReqMgr/requests/Integration/TaskChain_PUMCRecyc.json @@ -42,7 +42,7 @@ "GlobalTag": "112X_mcRun3_2024_realistic_v10", "Memory": 3000, "Multicore": 1, - "PrepID": "TEST-CMSSW_11_2_0_pre8__PDMVRELVALS-100-QCD_Pt_600_800_14-00002", + "PrepID": "TEST-PrepID-TopLevel", "ProcessingString": "DEFAULT_ProcStr", "ProcessingVersion": 1, "RequestPriority": 600000, @@ -87,7 +87,8 @@ "MCPileup": "/RelValMinBias_14TeV/CMSSW_11_2_0_pre8-112X_mcRun3_2024_realistic_v10_forTrk-v1/GEN-SIM", "Memory": 12000, "Multicore": 4, - "ProcessingString": "Task2_WMCore_TEST", + "PrepID": "TEST-PrepID-Task2", + "ProcessingString": "Task2_WMCore_TEST", "ScramArch": [ "slc7_amd64_gcc820" ], From 448e4fa11403dde190e9adac0b6de97510aeed57 Mon Sep 17 00:00:00 2001 From: khurtado Date: Mon, 25 Apr 2022 23:00:06 +0200 Subject: [PATCH 05/16] Remove support for Unpacking user tarballs. No longer used by CRAB. --- .../WMRuntime/Scripts/UnpackUserTarball.py | 136 ------------- .../WMCore/WMSpec/Steps/Executors/CMSSW.py | 28 +-- .../Scripts_t/UnpackUserTarball_t.py | 179 ------------------ 3 files changed, 2 insertions(+), 341 deletions(-) delete mode 100755 src/python/WMCore/WMRuntime/Scripts/UnpackUserTarball.py delete mode 100644 test/python/WMCore_t/WMRuntime_t/Scripts_t/UnpackUserTarball_t.py diff --git a/src/python/WMCore/WMRuntime/Scripts/UnpackUserTarball.py b/src/python/WMCore/WMRuntime/Scripts/UnpackUserTarball.py deleted file mode 100755 index 9765be877b7..00000000000 --- a/src/python/WMCore/WMRuntime/Scripts/UnpackUserTarball.py +++ /dev/null @@ -1,136 +0,0 @@ -#!/usr/bin/env python -""" -_UnpackUserTarball_ - -Unpack the user tarball and put it's contents in the right place -""" -from __future__ import print_function -from future import standard_library - -from Utils.Utilities import encodeUnicodeToBytes - -standard_library.install_aliases() - -import logging -import os -import hashlib -import re -import shutil -import subprocess -import sys -import tempfile - -import urllib.request -from urllib.parse import urlsplit -from subprocess import getstatusoutput - - -def setHttpProxy(url): - """ - Use frontier to figure out the http_proxies. - Pick one deterministically based on the url and loadbalance settings - """ - if 'http_proxy' in os.environ: - return os.environ['http_proxy'] - - status, output = getstatusoutput('cmsGetFnConnect frontier://smallfiles') - if status: - return None - - proxyList = re.findall('\(proxyurl=([\w\d\.\-\:\/]+)\)', output) - if 'loadbalance=proxies' in output: - urlHash = int(hashlib.sha1(encodeUnicodeToBytes(url)).hexdigest()[:15], 16) - proxy = proxyList[urlHash % len(proxyList)] - else: - proxy = proxyList[0] - os.environ['http_proxy'] = proxy - return proxy - - -def getRetriever(scheme): - """ - Get the right retriever function depending on the scheme. - If scheme is 'http' return urllib.urlretrieve, else if the scheme is https create a URLOpener - with certificates taken from the X509_USER_PROXY variable. If certificates are not available return - urllib.urlretrieve as for the http case. - """ - if 'X509_USER_PROXY' in os.environ and os.path.isfile(os.environ['X509_USER_PROXY']): - certfile = os.environ['X509_USER_PROXY'] - else: - if scheme == 'https': - logging.info("User proxy not found. Trying to retrieve the file without using certificates") - certfile = None - - if scheme == 'http' or not certfile: - retriever = urllib.request.urlretrieve - else: - logging.info("Using %s as X509 certificate", certfile) - op = urllib.request.URLopener(None, key_file=certfile, cert_file=certfile) - op.addheader('Accept', 'application/octet-stream') - retriever = op.retrieve - - return retriever - - -def UnpackUserTarball(): - tarballs = [] - userFiles = [] - if len(sys.argv) > 1: - tarballs = sys.argv[1].split(',') - if len(sys.argv) > 2: - userFiles = sys.argv[2].split(',') - - jobDir = os.environ['WMAGENTJOBDIR'] - - for tarball in tarballs: - splitResult = urlsplit(tarball) - tarFile = os.path.join(jobDir, os.path.basename(tarball)) - - # Is it a URL or a file that exists in the jobDir? - if splitResult[0] in ['xrootd', 'root']: - logging.info("Fetching tarball %s through xrootd", tarball) - try: - subprocess.check_call(['xrdcp', '-d', '1', '-f', tarball, 'TEMP_TARBALL.tgz']) - subprocess.check_call(['tar', 'xf', 'TEMP_TARBALL.tgz']) - except subprocess.CalledProcessError: - logging.error("Couldn't retrieve/extract file from xrootd") - raise - finally: - if os.path.exists('TEMP_TARBALL.tgz'): - os.unlink('TEMP_TARBALL.tgz') - - elif splitResult[0] in ['http', 'https'] and splitResult[1]: - retriever = getRetriever(splitResult[0]) - with tempfile.NamedTemporaryFile() as tempFile: - if setHttpProxy(tarball): - try: - logging.info('Fetching URL tarball %s through proxy server', tarball) - fileName, headers = retriever(tarball, tempFile.name) - except (RuntimeError, IOError): - del os.environ['http_proxy'] - logging.warning('Fetching URL tarball %s after proxy server failure', tarball) - fileName, headers = retriever(tarball, tempFile.name) - else: - logging.info('Fetching URL tarball %s without proxy server', tarball) - fileName, headers = retriever(tarball, tempFile.name) - - try: - subprocess.check_call(['tar', 'xf', fileName]) - except subprocess.CalledProcessError: - raise RuntimeError('Error extracting %s' % tarball) - elif os.path.isfile(tarFile): - logging.info("Untarring %s", tarFile) - subprocess.check_call(['tar', 'xf', tarFile]) - else: - raise IOError('%s does not exist' % tarFile) - - for userFile in userFiles: - if userFile: - logging.info("Moving '%s' to execution directory.", userFile) - shutil.move(userFile, '..') - - return 0 - - -if __name__ == '__main__': - sys.exit(UnpackUserTarball()) diff --git a/src/python/WMCore/WMSpec/Steps/Executors/CMSSW.py b/src/python/WMCore/WMSpec/Steps/Executors/CMSSW.py index 84795988180..2e4d045fcc7 100644 --- a/src/python/WMCore/WMSpec/Steps/Executors/CMSSW.py +++ b/src/python/WMCore/WMSpec/Steps/Executors/CMSSW.py @@ -126,10 +126,6 @@ def execute(self, emulator=None): cmsswCommand = self.step.application.command.executable cmsswConfig = self.step.application.command.configuration cmsswArguments = self.step.application.command.arguments - userTarball = ','.join(self.step.user.inputSandboxes) - userFiles = ','.join(self.step.user.userFiles) - logging.info('User files are %s', userFiles) - logging.info('User sandboxes are %s', userTarball) scramArch = getSingleScramArch(scramArch) @@ -256,8 +252,6 @@ def execute(self, emulator=None): jobReportXML, cmsswCommand, cmsswConfig, - userTarball, - userFiles, cmsswArguments] logging.info("Executing CMSSW. args: %s", args) @@ -394,7 +388,7 @@ def post(self, emulator=None): if [ $# -lt $REQUIRED_ARGUMENT_COUNT ] then echo "Usage: `basename $0` \ - [Arguments for cmsRun]" + [Arguments for cmsRun]" exit 70 fi @@ -408,12 +402,8 @@ def post(self, emulator=None): JOB_REPORT=$6 EXECUTABLE=$7 CONFIGURATION=$8 -USER_TARBALL=$9 shift;shift;shift;shift;shift; -shift;shift;shift;shift; -# Can only do nine parameters at a time -USER_FILES=$1 -shift; +shift;shift;shift; echo "Setting up Frontier log level" export FRONTIER_LOG_LEVEL=warning @@ -438,20 +428,6 @@ def post(self, emulator=None): eval `$SCRAM_COMMAND runtime -sh` EXIT_STATUS=$? if [ $EXIT_STATUS -ne 0 ]; then echo "***\nCouldn't get scram runtime: $EXIT_STATUS\n*"; exit 73; fi -if [ -n "$USER_TARBALL" ] ; then - python2 -m WMCore.WMRuntime.Scripts.UnpackUserTarball $USER_TARBALL $USER_FILES - EXIT_STATUS=$? - if [ $EXIT_STATUS -ne 0 ]; then - echo "***\nCouldn't untar sandbox with python2: $EXIT_STATUS\n"; - echo "***\nWill try with python2.6 as it might be an old CMSSW release!" - python2.6 -m WMCore.WMRuntime.Scripts.UnpackUserTarball $USER_TARBALL $USER_FILES - EXIT_STATUS=$? - if [ $EXIT_STATUS -ne 0 ]; then - echo "***\nCouldn't untar sandbox with python2.6: $EXIT_STATUS\n"; - exit 74; - fi - fi -fi echo "Completed SCRAM project" cd .. echo "Executing CMSSW" diff --git a/test/python/WMCore_t/WMRuntime_t/Scripts_t/UnpackUserTarball_t.py b/test/python/WMCore_t/WMRuntime_t/Scripts_t/UnpackUserTarball_t.py deleted file mode 100644 index c1525653dcb..00000000000 --- a/test/python/WMCore_t/WMRuntime_t/Scripts_t/UnpackUserTarball_t.py +++ /dev/null @@ -1,179 +0,0 @@ -#!/usr/bin/env python -""" -_UnpackUserTarball_t.py - -Tests for the user tarball unpacker and additional file mover - -""" - -from future import standard_library -standard_library.install_aliases() - -import urllib - -import logging -import os -import subprocess -import sys -import tempfile -import unittest - -from WMCore.WMRuntime.Scripts.UnpackUserTarball import UnpackUserTarball - -class UnpackUserTarballTest(unittest.TestCase): - """ - unittest for UnpackUserTarball script - - """ - - - # Set up a dummy logger - logger = logging.getLogger('UNITTEST') - logger.setLevel(logging.DEBUG) - ch = logging.StreamHandler() - ch.setLevel(logging.ERROR) - logger.addHandler(ch) - - - def setUp(self): - """ - Set up for unit tests - """ - - os.environ['WMAGENTJOBDIR'] = '/tmp/' - self.arch = 'slc5_ia32_gcc434' - self.version = 'CMSSW_3_8_7' - self.base = '/tmp/%s/' % self.version - self.localFile = '/%s/unittestTarball.tgz' % os.environ['WMAGENTJOBDIR'] - self.tempDir = tempfile.mkdtemp() - self.logger.debug("Using temp directory %s" % self.tempDir) - - self.origDir = os.getcwd() - - # Make a dummy CMSSW environment - - commands = [ - 'rm -rf %s' % self.base, - 'mkdir -p %s/lib/%s/' % (self.base, self.arch), - 'touch %s/lib/%s/libSomething.so' % (self.base, self.arch), - 'mkdir -p %s/src/Module/Submodule/data/' % (self.base), - 'touch %s/src/Module/Submodule/data/datafile.txt' % (self.base), - 'touch %s/extra_file.txt' % (self.base), - 'touch %s/extra_file2.txt' % (self.base), - 'touch %s/additional_file.txt' % (self.base), - 'tar -C %s -czf %s .' % (self.base, self.localFile), - ] - - for command in commands: - self.logger.debug("Executing command %s" % command) - subprocess.check_call(command.split(' ')) - os.mkdir(os.path.join(self.tempDir, self.version)) - os.chdir(os.path.join(self.tempDir, self.version)) - return - - - def tearDown(self): - """ - Clean up the files we've spewed all over - """ - os.chdir(self.origDir) - - subprocess.check_call(['rm', '-rf', self.tempDir]) - subprocess.check_call(['rm', '-rf', self.base]) - subprocess.check_call(['rm', '-f', self.localFile]) - - return - - - def testFileSandbox(self): - """ - _testFileSandbox_ - - Test a single sandbox that is a file - - """ - sys.argv = ['scriptName','unittestTarball.tgz',''] - UnpackUserTarball() - self.assertTrue(os.path.isfile('lib/slc5_ia32_gcc434/libSomething.so')) - - - def testBadFile(self): - """ - _testBadFile_ - - Test we get an exception from a non-existent file - - """ - sys.argv = ['scriptName','doesNotExist.tgz',''] - self.assertRaises(IOError, UnpackUserTarball) - - - def testUrlSandbox(self): - """ - _testUrlSandbox_ - - Test a single sandbox that is a URL - - """ - - sys.argv = ['scriptName','http://home.fnal.gov/~ewv/unittestTarball.tgz',''] - UnpackUserTarball() - self.assertTrue(os.path.isfile('lib/slc5_ia32_gcc434/libSomething.so')) - - - def testUrlNotTar(self): - """ - _testUrlSandbox_ - - Test a single sandbox that is a URL - - """ - - sys.argv = ['scriptName','http://home.fnal.gov/~ewv/index.html',''] - self.assertRaises(RuntimeError, UnpackUserTarball) - - - def testBadUrl(self): - """ - _testBadUrl_ - - Test a single sandbox that is a URL - - """ - - sys.argv = ['scriptName','http://home.fnal.gov/~ewv/not-there.txt',''] - self.assertRaises(urllib.error.HTTPError, UnpackUserTarball) - - - def testFileAndURLSandbox(self): - """ - _testFileAndURLSandbox_ - - Test two sandboxes. One a file, one a URL - - """ - - sys.argv = ['scriptName','unittestTarball.tgz,http://home.fnal.gov/~ewv/unittestTarball.tgz',''] - UnpackUserTarball() - # First is only in web tarball, second only in local - self.assertTrue(os.path.isfile('lib/slc5_ia32_gcc434/libSomething.so')) - self.assertTrue(os.path.isfile('lib/slc5_ia32_gcc434/libSomefile.so')) - - - def testFileRelocation(self): - """ - _testFileRelocation_ - - Test our ability to relocate files within the sandbox - - """ - - sys.argv = ['scriptName','unittestTarball.tgz','extra_file.txt,additional_file.txt'] - UnpackUserTarball() - self.assertTrue(os.path.isfile('../extra_file.txt')) - self.assertTrue(os.path.isfile('../additional_file.txt')) - - - -if __name__ == "__main__": - unittest.main() From 766d9f5a67e7d4776d10b4aa82ea182b6dfc110f Mon Sep 17 00:00:00 2001 From: Alan Malta Rodrigues Date: Mon, 25 Apr 2022 14:28:03 -0400 Subject: [PATCH 06/16] Create script to cancel GQEs --- bin/kill-workflow-in-global | 44 +++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100755 bin/kill-workflow-in-global diff --git a/bin/kill-workflow-in-global b/bin/kill-workflow-in-global new file mode 100755 index 00000000000..d83c3a8b336 --- /dev/null +++ b/bin/kill-workflow-in-global @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 +""" +When to use this script: when a workflow is in status "aborted" but +it still has active GQEs. + +Use this script to mimic exactly the same action as the one taken +by ReqMgr2 when aborting a workflow (without a state transition). + +This script will mark the global workqueue elements - for a given +workflow - as CancelRequested, such that the agents can proceed +and acknowledge it, moving elements to status Canceled. +""" +from __future__ import print_function + +import os +import sys + +from WMCore.Configuration import loadConfigurationFile +from WMCore.Services.WorkQueue.WorkQueue import WorkQueue + + +def main(): + args = sys.argv[1:] + if not len(args) == 1: + print("usage: kill-workflow-in-global workflowname") + sys.exit(0) + wflowName = args[0] + + # get configuration file path + if "WMAGENT_CONFIG" not in os.environ: + os.environ['WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py' + + # load config + wmConfig = loadConfigurationFile(os.environ['WMAGENT_CONFIG']) + + gqService = WorkQueue(wmConfig.WorkloadSummary.couchurl, + wmConfig.WorkQueueManager.dbname) + + gqService.cancelWorkflow(wflowName) + print("Cancel requested for workflow: {}".format(wflowName)) + + +if __name__ == "__main__": + main() From 6ef58dc6737d348456ed75f641d9efffa8668232 Mon Sep 17 00:00:00 2001 From: Alan Malta Rodrigues Date: Mon, 25 Apr 2022 16:01:37 -0400 Subject: [PATCH 07/16] Update ReqMgr2 CP thread to properly deal with aborted/force-complete workflows CancelRequested plus final state should be considered as aborted renamed constant for work cancelation rename workqueue aggregate status from aborted to canceled --- .../CherryPyThreads/StatusChangeTasks.py | 70 ++++++++++--------- .../ReqMgr/DataStructs/RequestStatus.py | 15 ++-- .../WMCore/Services/WorkQueue/WorkQueue.py | 20 ++++-- 3 files changed, 56 insertions(+), 49 deletions(-) diff --git a/src/python/WMCore/ReqMgr/CherryPyThreads/StatusChangeTasks.py b/src/python/WMCore/ReqMgr/CherryPyThreads/StatusChangeTasks.py index f20d4b180bf..03c355b60f0 100644 --- a/src/python/WMCore/ReqMgr/CherryPyThreads/StatusChangeTasks.py +++ b/src/python/WMCore/ReqMgr/CherryPyThreads/StatusChangeTasks.py @@ -1,24 +1,21 @@ """ Created on May 19, 2015 """ -from __future__ import (division, print_function) -from builtins import range -from future.utils import viewitems from WMCore.REST.CherryPyPeriodicTask import CherryPyPeriodicTask -from WMCore.ReqMgr.DataStructs.RequestStatus import AUTO_TRANSITION +from WMCore.ReqMgr.DataStructs.RequestStatus import AUTO_TRANSITION, CANCEL_AUTO_TRANSITION from WMCore.Services.WorkQueue.WorkQueue import WorkQueue from WMCore.Services.ReqMgr.ReqMgr import ReqMgr def moveForwardStatus(reqmgrSvc, wfStatusDict, logger): - for status, nextStatus in viewitems(AUTO_TRANSITION): + for status, nextStatus in AUTO_TRANSITION.items(): count = 0 requests = reqmgrSvc.getRequestByStatus([status], detail=False) for wf in requests: stateFromGQ = wfStatusDict.get(wf, None) - if stateFromGQ is None: + if stateFromGQ in [None, "canceled"]: continue elif stateFromGQ == status: continue @@ -38,40 +35,45 @@ def moveForwardStatus(reqmgrSvc, wfStatusDict, logger): except ValueError: # No state change needed continue - # special case for aborted workflow - aborted-completed instead of completed - if status == "aborted" and i == 0: + for j in range(i + 1): count += 1 - reqmgrSvc.updateRequestStatus(wf, "aborted-completed") - logger.info("%s in %s moved to %s", wf, status, "aborted-completed") - else: - for j in range(i + 1): - count += 1 - reqmgrSvc.updateRequestStatus(wf, nextStatus[j]) - logger.info("%s in %s moved to %s", wf, status, nextStatus[j]) + reqmgrSvc.updateRequestStatus(wf, nextStatus[j]) + logger.info("%s in %s moved to %s", wf, status, nextStatus[j]) logger.info("%s requests moved to new state from %s", count, status) return -def moveToCompletedForNoWQJobs(reqmgrSvc, wfStatusDict, logger): +def moveToCompletedForNoWQJobs(reqmgrSvc, globalQSvc, wfStatusDict, logger): """ - Handle the case when request is aborted/rejected before elements are created in GQ + Handle workflows that have been either aborted or force-completed. + This will ensure that no global workqueue elements will be left behind. + + :param reqmgrSvc: object instance of the ReqMgr class + :param globalQSvc: object instance of the WorkQueue class + :param wfStatusDict: workflow status according to the workqueue elements + :param logger: a logger object instance + :return: None object """ - - statusTransition = {"aborted": ["aborted-completed"]} - - for status, nextStatusList in viewitems(statusTransition): + for status, nextStatus in CANCEL_AUTO_TRANSITION.items(): requests = reqmgrSvc.getRequestByStatus([status], detail=False) count = 0 - for wf in requests: - # check whether wq elements exists for given request - # if not, it means - if wf not in wfStatusDict: - for nextStatus in nextStatusList: - reqmgrSvc.updateRequestStatus(wf, nextStatus) - count += 1 - logger.info("Total aborted-completed: %d", count) - - return + for wflowName in requests: + stateFromGQ = wfStatusDict.get(wflowName, None) + if stateFromGQ == "canceled": + # elements still in CancelRequested, wait for the agent to do his job + continue + elif stateFromGQ in ["acquired", "running-open", "running-closed"]: + # then something went wrong with the workflow abortion/force-completion + # trigger another cancel request + logger.info("%s in %s but WQEs in %s, cancelling it again!", + wflowName, status, stateFromGQ) + globalQSvc.cancelWorkflow(wflowName) + elif stateFromGQ in ["completed", None]: + # all elements are already in a final state or no longer exist, advance status + count += 1 + reqmgrSvc.updateRequestStatus(wflowName, nextStatus) + logger.info("%s in %s moved to %s", wflowName, status, nextStatus) + logger.info("Total %s: %d", nextStatus, count) class StatusChangeTasks(CherryPyPeriodicTask): @@ -89,14 +91,14 @@ def advanceStatus(self, config): Advance the request status based on the global workqueue elements status """ reqmgrSvc = ReqMgr(config.reqmgr2_url, logger=self.logger) - gqService = WorkQueue(config.workqueue_url) + globalQSvc = WorkQueue(config.workqueue_url) self.logger.info("Getting GQ data for status check") - wfStatusDict = gqService.getWorkflowStatusFromWQE() + wfStatusDict = globalQSvc.getWorkflowStatusFromWQE() self.logger.info("Advancing statuses") moveForwardStatus(reqmgrSvc, wfStatusDict, self.logger) - moveToCompletedForNoWQJobs(reqmgrSvc, wfStatusDict, self.logger) + moveToCompletedForNoWQJobs(reqmgrSvc, globalQSvc, wfStatusDict, self.logger) self.logger.info("Done advancing status") diff --git a/src/python/WMCore/ReqMgr/DataStructs/RequestStatus.py b/src/python/WMCore/ReqMgr/DataStructs/RequestStatus.py index 138c31160cf..281dee4e7bd 100644 --- a/src/python/WMCore/ReqMgr/DataStructs/RequestStatus.py +++ b/src/python/WMCore/ReqMgr/DataStructs/RequestStatus.py @@ -146,18 +146,17 @@ "rejected-archived": [], } -# transition automatically controlled by ReqMgr2 -# aborted to completed instead of aborted-completed -# since workqueue mapping doesn't have aborted-completed status. -# but it need to be converted to aborted-completed whenever update db -### NOTE: the order of the list matters and it's used for status transition +# Workflow state transition automatically controlled by ReqMgr2 +### NOTE: order of this list matters and it's used for status transition AUTO_TRANSITION = {"staged": ["acquired", "running-open", "running-closed", "completed"], "acquired": ["running-open", "running-closed", "completed"], "running-open": ["running-closed", "completed"], - "aborted": ["completed"], - "running-closed": ["completed"], - "force-complete": ["completed"]} + "running-closed": ["completed"]} +# Workflow state transition automatically controlled by ReqMgr2 +# Specific to workflows either aborted or force-completed +CANCEL_AUTO_TRANSITION = {"aborted": "aborted-completed", + "force-complete": "completed"} # list of destination states which doesn't allow any additional argument update STATES_ALLOW_ONLY_STATE_TRANSITION = [key for key, val in viewitems(ALLOWED_ACTIONS_FOR_STATUS) if len(val) == 0] diff --git a/src/python/WMCore/Services/WorkQueue/WorkQueue.py b/src/python/WMCore/Services/WorkQueue/WorkQueue.py index e95505b3b72..f3e75f43969 100644 --- a/src/python/WMCore/Services/WorkQueue/WorkQueue.py +++ b/src/python/WMCore/Services/WorkQueue/WorkQueue.py @@ -14,7 +14,7 @@ def convertWQElementsStatusToWFStatus(elementsStatusSet): :param: elementsStatusSet - dictionary of {request_name: set of all WQE status of this request, ...} :returns: request status - Here is the mapping between request status and it GQE status + Here is the mapping between request status and the GQE status 1. acquired: all the GQEs are either Available or Negotiating. Work is still in GQ, but not LQ. 2. running-open: at least one of the GQEs are in Acquired status. @@ -24,9 +24,9 @@ def convertWQElementsStatusToWFStatus(elementsStatusSet): All work is finished in WMBS (excluding cleanup and logcollect) 5. failed: all the GQEs are in Failed status. If the workflow has multiple GQEs and only a few are in Failed status, then just follow the usual request status. - - NOTE: CancelRequested status is a transient status and it should not trigger - any request status transition (thus, None gets returned). + 6. canceled: used to distinguish requests that have been correctly canceled, + coming from workflows either aborted or force-complete. This state does not + trigger a workflow status transition. """ if not elementsStatusSet: return None @@ -37,11 +37,15 @@ def convertWQElementsStatusToWFStatus(elementsStatusSet): running = set(["Running"]) runningOpen = set(["Available", "Negotiating", "Acquired"]) runningClosed = set(["Running", "Done", "Canceled"]) + canceled = set(["CancelRequested", "Done", "Canceled", "Failed"]) completed = set(["Done", "Canceled", "Failed"]) failed = set(["Failed"]) - if forceCompleted <= elementsStatusSet: # at least 1 WQE in CancelRequested - return None + # Just a reminder: + # <= every element in the left set is also in the right set + # & return elements common between the left and right set + if elementsStatusSet == forceCompleted: # all WQEs in CancelRequested + return "canceled" elif elementsStatusSet == acquired: # all WQEs in Acquired return "running-open" elif elementsStatusSet == running: # all WQEs in Running @@ -52,6 +56,8 @@ def convertWQElementsStatusToWFStatus(elementsStatusSet): return "acquired" elif elementsStatusSet <= completed: # all WQEs in a final state return "completed" + elif elementsStatusSet <= canceled: # some WQEs still waiting to be cancelled + return "canceled" elif elementsStatusSet & runningOpen: # at least 1 WQE still in GQ return "running-open" elif elementsStatusSet & runningClosed: # all WQEs already in LQ and WMBS @@ -213,7 +219,7 @@ def getAvailableWorkflows(self): def cancelWorkflow(self, wf): """Cancel a workflow""" - nonCancelableElements = ['Done', 'Canceled', 'Failed'] + nonCancelableElements = ['Done', 'CancelRequested', 'Canceled', 'Failed'] data = self.db.loadView('WorkQueue', 'elementsDetailByWorkflowAndStatus', {'startkey': [wf], 'endkey': [wf, {}], 'reduce': False}) From ebaf3eb3fe0b7e1bb8ce2ebf63d33b1516a38c28 Mon Sep 17 00:00:00 2001 From: Alan Malta Rodrigues Date: Mon, 25 Apr 2022 16:53:26 -0400 Subject: [PATCH 08/16] fix unit tests update unit tests --- .../Services_t/WorkQueue_t/WorkQueue_t.py | 41 +++++++++++-------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/test/python/WMCore_t/Services_t/WorkQueue_t/WorkQueue_t.py b/test/python/WMCore_t/Services_t/WorkQueue_t/WorkQueue_t.py index 3434e47a54d..2f09b701de5 100644 --- a/test/python/WMCore_t/Services_t/WorkQueue_t/WorkQueue_t.py +++ b/test/python/WMCore_t/Services_t/WorkQueue_t/WorkQueue_t.py @@ -214,17 +214,11 @@ def testConvertWQElementsStatusToWFStatus(self): self.assertEqual(convertWQElementsStatusToWFStatus(set(["Available", "Negotiating", "Acquired"])), "running-open") self.assertEqual(convertWQElementsStatusToWFStatus(set(["Available", "Negotiating", "Acquired", "Running"])), "running-open") self.assertEqual(convertWQElementsStatusToWFStatus(set(["Available", "Negotiating", "Acquired", "Running", "Done"])), "running-open") - self.assertIsNone(convertWQElementsStatusToWFStatus(set(["Available", "Negotiating", "Acquired", "Running", "Done", "CancelRequested"]))) - self.assertIsNone(convertWQElementsStatusToWFStatus(set(["Available", "Negotiating", "Acquired", "Running", "Done", "CancelRequested", "Canceled"]))) self.assertEqual(convertWQElementsStatusToWFStatus(set(["Negotiating", "Acquired"])), "running-open") self.assertEqual(convertWQElementsStatusToWFStatus(set(["Negotiating", "Acquired", "Running"])), "running-open") self.assertEqual(convertWQElementsStatusToWFStatus(set(["Negotiating", "Acquired", "Running", "Done"])), "running-open") - self.assertIsNone(convertWQElementsStatusToWFStatus(set(["Negotiating", "Acquired", "Running", "Done", "CancelRequested"]))) - self.assertIsNone(convertWQElementsStatusToWFStatus(set(["Negotiating", "Acquired", "Running", "Done", "CancelRequested", "Canceled"]))) self.assertEqual(convertWQElementsStatusToWFStatus(set(["Acquired", "Running"])), "running-open") self.assertEqual(convertWQElementsStatusToWFStatus(set(["Acquired", "Running", "Done"])), "running-open") - self.assertIsNone(convertWQElementsStatusToWFStatus(set(["Acquired", "Running", "Done", "CancelRequested"]))) - self.assertIsNone(convertWQElementsStatusToWFStatus(set(["Acquired", "Running", "Done", "CancelRequested", "Canceled"]))) self.assertEqual(convertWQElementsStatusToWFStatus(set(["Available", "Done"])), "running-open") self.assertEqual(convertWQElementsStatusToWFStatus(set(["Available", "Running", "Done", "Canceled"])), "running-open") self.assertEqual(convertWQElementsStatusToWFStatus(set(["Acquired", "Done"])), "running-open") @@ -234,12 +228,10 @@ def testConvertWQElementsStatusToWFStatus(self): # workflows completely acquired by the agents self.assertEqual(convertWQElementsStatusToWFStatus(set(["Running"])), "running-closed") self.assertEqual(convertWQElementsStatusToWFStatus(set(["Running", "Done"])), "running-closed") - self.assertIsNone(convertWQElementsStatusToWFStatus(set(["Running", "Done", "CancelRequested"]))) - self.assertIsNone(convertWQElementsStatusToWFStatus(set(["Running", "Done", "CancelRequested", "Canceled"]))) self.assertEqual(convertWQElementsStatusToWFStatus(set(["Running", "Done", "Canceled"])), "running-closed") # workflows completed/aborted/force-completed, thus existent elements - # but no more active workqueue elements in the system + # but no more active workqueue elements in the system self.assertEqual(convertWQElementsStatusToWFStatus(set(["Done"])), "completed") self.assertEqual(convertWQElementsStatusToWFStatus(set(["Canceled"])), "completed") self.assertEqual(convertWQElementsStatusToWFStatus(set(["Done", "Canceled"])), "completed") @@ -256,9 +248,26 @@ def testConvertWQElementsStatusToWFStatus(self): self.assertEqual(convertWQElementsStatusToWFStatus(set(["Done", "Failed"])), "completed") self.assertEqual(convertWQElementsStatusToWFStatus(set(["Canceled", "Failed"])), "completed") - # workflows in a temporary state, nothing to do with them yet - self.assertIsNone(convertWQElementsStatusToWFStatus(set(["Done", "CancelRequested"]))) - self.assertIsNone(convertWQElementsStatusToWFStatus(set(["CancelRequested"]))) + # workflows that have been aborted but still have workqueue elements around + self.assertEqual("running-open", convertWQElementsStatusToWFStatus( + set(["Available", "Negotiating", "Acquired", "Running", "Done", "CancelRequested"]))) + self.assertEqual("running-open", convertWQElementsStatusToWFStatus( + set(["Available", "Negotiating", "Acquired", "Running", "Done", "CancelRequested", "Canceled"]))) + self.assertEqual("running-open", convertWQElementsStatusToWFStatus( + set(["Negotiating", "Acquired", "Running", "Done", "CancelRequested"]))) + self.assertEqual("running-open", convertWQElementsStatusToWFStatus( + set(["Negotiating", "Acquired", "Running", "Done", "CancelRequested", "Canceled"]))) + self.assertEqual("running-open", convertWQElementsStatusToWFStatus( + set(["Acquired", "Running", "Done", "CancelRequested"]))) + self.assertEqual("running-open", convertWQElementsStatusToWFStatus( + set(["Acquired", "Running", "Done", "CancelRequested", "Canceled"]))) + self.assertEqual("running-closed", convertWQElementsStatusToWFStatus( + set(["Running", "Done", "CancelRequested"]))) + self.assertEqual("running-closed", convertWQElementsStatusToWFStatus( + set(["Running", "Done", "CancelRequested", "Canceled"]))) + self.assertEqual("canceled", convertWQElementsStatusToWFStatus( + set(["Done", "CancelRequested"]))) + self.assertEqual("canceled", convertWQElementsStatusToWFStatus(set(["CancelRequested"]))) def test2ConvertWQElementsStatusToWFStatus(self): """ @@ -271,7 +280,7 @@ def test2ConvertWQElementsStatusToWFStatus(self): self.assertEqual(convertWQElementsStatusToWFStatus(set(["Acquired"])), "running-open") self.assertEqual(convertWQElementsStatusToWFStatus(set(["Running"])), "running-closed") self.assertEqual(convertWQElementsStatusToWFStatus(set(["Done"])), "completed") - self.assertIsNone(convertWQElementsStatusToWFStatus(set(["CancelRequested"]))) + self.assertEqual(convertWQElementsStatusToWFStatus(set(["CancelRequested"])), "canceled") self.assertEqual(convertWQElementsStatusToWFStatus(set(["Canceled"])), "completed") self.assertEqual(convertWQElementsStatusToWFStatus(set(["Failed"])), "failed") @@ -291,9 +300,9 @@ def test2ConvertWQElementsStatusToWFStatus(self): self.assertEqual(convertWQElementsStatusToWFStatus(set(["Running", "Done"])), 'running-closed') self.assertEqual(convertWQElementsStatusToWFStatus(set(["Running", "Failed"])), "running-closed") self.assertEqual(convertWQElementsStatusToWFStatus(set(["Done", "Failed"])), "completed") - self.assertIsNone(convertWQElementsStatusToWFStatus(set(["Done", "CancelRequested"]))) + self.assertEqual(convertWQElementsStatusToWFStatus(set(["Done", "CancelRequested"])), "canceled") self.assertEqual(convertWQElementsStatusToWFStatus(set(["Done", "Canceled"])), "completed") - self.assertIsNone(convertWQElementsStatusToWFStatus(set(["CancelRequested", "Canceled"]))) + self.assertEqual(convertWQElementsStatusToWFStatus(set(["CancelRequested", "Canceled"])), "canceled") # triple WQE with standard state transition self.assertEqual(convertWQElementsStatusToWFStatus(set(["Available", "Negotiating", "Acquired"])), "running-open") @@ -316,7 +325,7 @@ def test2ConvertWQElementsStatusToWFStatus(self): self.assertEqual(convertWQElementsStatusToWFStatus(set(["Acquired", "Running", "Failed"])), "running-open") self.assertEqual(convertWQElementsStatusToWFStatus(set(["Acquired", "Done", "Failed"])), "running-open") self.assertEqual(convertWQElementsStatusToWFStatus(set(["Running", "Done", "Failed"])), "running-closed") - self.assertIsNone(convertWQElementsStatusToWFStatus(set(["CancelRequested", "Done", "Failed"]))) + self.assertEqual(convertWQElementsStatusToWFStatus(set(["CancelRequested", "Done", "Failed"])), "canceled") self.assertEqual(convertWQElementsStatusToWFStatus(set(["Canceled", "Done", "Failed"])), "completed") From 73eba96a55476bcbffb785af4151ce82890e3cd6 Mon Sep 17 00:00:00 2001 From: Dirk Hufnagel Date: Wed, 21 Jul 2021 23:40:47 +0200 Subject: [PATCH 09/16] change fallback for missing system xrootd-client Update fallback COMP xrootd library to OSG-based repository remove xrdcp-old; fix check for xrdcp/xrdfs utils --- .../WMCore/Storage/Backends/XRDCPImpl.py | 60 ++++++++----------- 1 file changed, 26 insertions(+), 34 deletions(-) diff --git a/src/python/WMCore/Storage/Backends/XRDCPImpl.py b/src/python/WMCore/Storage/Backends/XRDCPImpl.py index 8fdc9692810..bbd8ec7c3f9 100644 --- a/src/python/WMCore/Storage/Backends/XRDCPImpl.py +++ b/src/python/WMCore/Storage/Backends/XRDCPImpl.py @@ -10,6 +10,7 @@ from __future__ import print_function import argparse +import logging import os from WMCore.Storage.Registry import registerStageOutImpl @@ -43,6 +44,21 @@ def createOutputDirectory(self, targetPFN): """ return + def _checkXRDUtilsExist(self): + """ + Verifies whether xrdcp and xrdfs utils exist in the job path. + + :return: True if both exist, otherwise False + """ + foundXrdcp, foundXrdfs = False, False + for path in os.environ["PATH"].split(os.pathsep): + if os.access(os.path.join(path, "xrdcp"), os.X_OK): + foundXrdcp = True + if os.access(os.path.join(path, "xrdfs"), os.X_OK): + foundXrdfs = True + + return foundXrdcp & foundXrdfs + def createStageOutCommand(self, sourcePFN, targetPFN, options=None, checksums=None): """ _createStageOutCommand_ @@ -60,7 +76,6 @@ def createStageOutCommand(self, sourcePFN, targetPFN, options=None, checksums=No parser = argparse.ArgumentParser() parser.add_argument('--wma-cerncastor', action='store_true') - parser.add_argument('--wma-old', action='store_true') parser.add_argument('--wma-disablewriterecovery', action='store_true') parser.add_argument('--wma-preload') args, unknown = parser.parse_known_args(options.split()) @@ -82,46 +97,23 @@ def createStageOutCommand(self, sourcePFN, targetPFN, options=None, checksums=No useChecksum = (checksums is not None and 'adler32' in checksums and not self.stageIn) - xrdcpExec = "xrdcp" - if args.wma_old: - xrdcpExec = "xrdcp-old" - - # check if xrdcp(-old) and xrdfs are in path - # fallback to xrootd 4.0.4 from COMP externals if not - xrootdInPath = False - if any(os.access(os.path.join(path, xrdcpExec), os.X_OK) for path in os.environ["PATH"].split(os.pathsep)): - if any(os.access(os.path.join(path, "xrdfs"), os.X_OK) for path in os.environ["PATH"].split(os.pathsep)): - xrootdInPath = True - - if not xrootdInPath: - # COMP software can be in many place, check all of them - cmsSoftDir = os.environ.get("VO_CMS_SW_DIR", None) - if not cmsSoftDir: - cmsSoftDir = os.environ.get("OSG_APP", None) - if cmsSoftDir: - cmsSoftDir = os.path.join(cmsSoftDir, "cmssoft/cms") - else: - cmsSoftDir = os.environ.get("CVMFS", None) - - if cmsSoftDir: - - initFiles = [] - initFiles.append(os.path.join(cmsSoftDir, "COMP/slc6_amd64_gcc493/external/xrootd/4.0.4-comp/etc/profile.d/init.sh")) - initFiles.append(os.path.join(cmsSoftDir, "COMP/slc6_amd64_gcc493/external/libevent/2.0.22/etc/profile.d/init.sh")) - initFiles.append(os.path.join(cmsSoftDir, "COMP/slc6_amd64_gcc493/external/gcc/4.9.3/etc/profile.d/init.sh")) - - if all(os.path.isfile(initFile) for initFile in initFiles): - for initFile in initFiles: - copyCommand += "source %s\n" % initFile + # If not, check whether OSG cvmfs repo is available. + # This likely only works on RHEL7 x86-64 (and compatible) OS, but this + # represents most of our resources and it's a fallback mechanism anyways + if not self._checkXRDUtilsExist(): + logging.warning("Failed to find XRootD in the path. Trying fallback to OSG CVMFS...") + initFile = "/cvmfs/oasis.opensciencegrid.org/osg-software/osg-wn-client/current/el7-x86_64/setup.sh" + if os.path.isfile(initFile): + copyCommand += "source %s\n" % initFile if args.wma_disablewriterecovery: copyCommand += "env XRD_WRITERECOVERY=0 " if args.wma_preload: - xrdcpCmd = "%s %s" % (args.wma_preload, xrdcpExec) + xrdcpCmd = "%s xrdcp" % args.wma_preload self.xrdfsCmd = "%s xrdfs" % args.wma_preload else: - xrdcpCmd = xrdcpExec + xrdcpCmd = "xrdcp" self.xrdfsCmd = "xrdfs" copyCommand += "%s --force --nopbar " % xrdcpCmd From 8a784292b3297a594ee69831a7f7b350d6913fac Mon Sep 17 00:00:00 2001 From: Alan Malta Rodrigues Date: Tue, 26 Apr 2022 23:47:37 -0400 Subject: [PATCH 10/16] fix unit tests --- test/python/WMCore_t/Storage_t/Backends_t/XRDCPImpl_t.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/python/WMCore_t/Storage_t/Backends_t/XRDCPImpl_t.py b/test/python/WMCore_t/Storage_t/Backends_t/XRDCPImpl_t.py index 77d659db188..f2deeeec169 100644 --- a/test/python/WMCore_t/Storage_t/Backends_t/XRDCPImpl_t.py +++ b/test/python/WMCore_t/Storage_t/Backends_t/XRDCPImpl_t.py @@ -1,9 +1,9 @@ from __future__ import (print_function, division) import unittest +import os from mock import mock - from WMCore.Storage.Backends.XRDCPImpl import XRDCPImpl @@ -87,6 +87,9 @@ def createStageOutCommandResults(self, stageIn, sourcePFN, targetPFN, localPFN, copyCommand += "echo \"Local File Size is: $LOCAL_SIZE\"\n" if copyCommandOptions: targetPFN += "?svcClass=t0cms" + initFile = "/cvmfs/oasis.opensciencegrid.org/osg-software/osg-wn-client/current/el7-x86_64/setup.sh" + if not self.XRDCPImpl._checkXRDUtilsExist() and os.path.isfile(initFile): + copyCommand += "source {}\n".format(initFile) copyCommand += "xrdcp --force --nopbar " if unknow: copyCommand += "%s " % unknow From c878a6f7077e431ee6d6729602efe520000058e7 Mon Sep 17 00:00:00 2001 From: Alan Malta Rodrigues Date: Thu, 28 Apr 2022 11:29:27 +0200 Subject: [PATCH 11/16] 2.0.3.pre3 Signed-off-by: Alan Malta Rodrigues --- CHANGES | 9 +++++++++ src/python/WMCore/__init__.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index d98ef5c381e..fafe596abb9 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,12 @@ +2.0.3.pre2 to 2.0.3.pre3: + - change fallback for missing system xrootd-client (Dirk Hufnagel) #11117 + - Update ReqMgr2 CP thread to properly deal with aborted/force-complete workflows (Alan Malta Rodrigues) #11113 + - Create script to cancel GQEs (Alan Malta Rodrigues) #11113 + - Remove support for Unpacking user tarballs. No longer used by CRAB. (khurtado) #11114 + - Minor test json template updates (Alan Malta Rodrigues) #10864 + - Check CMSSW version before getting sim-datatier map (germanfgv) #11110 + + 2.0.3.pre1 to 2.0.3.pre2: - move ckey/cert functions to Utils.CertTools (Valentin Kuznetsov) #11101 - Add bin and dependencies to wmcore PyPI package (Erik Gough) #11103 diff --git a/src/python/WMCore/__init__.py b/src/python/WMCore/__init__.py index f549f1625f8..cd9c66a6a7d 100644 --- a/src/python/WMCore/__init__.py +++ b/src/python/WMCore/__init__.py @@ -6,5 +6,5 @@ """ -__version__ = '2.0.3.pre2' +__version__ = '2.0.3.pre3' __all__ = [] From 2d721f59365adb03609e75593491d88caa21814f Mon Sep 17 00:00:00 2001 From: Alan Malta Rodrigues Date: Thu, 28 Apr 2022 15:25:17 -0400 Subject: [PATCH 12/16] Remove block open logic from DBS3Reader and WorkQueue more OpenForWriting removals fix DataBlockGenerator removed DBS3Reader listOpenFileBlocks method new line at the end of the module --- src/python/WMCore/Services/DBS/DBS3Reader.py | 61 ++------------- .../WMCore/Services/DBS/DBSWriterObjects.py | 74 +------------------ .../WMCore/WorkQueue/Policy/Start/Block.py | 11 ++- src/python/WMCore/WorkQueue/WMBSHelper.py | 5 +- src/python/WMCore/WorkQueue/WorkQueue.py | 33 +-------- .../DataBlockGenerator/DataBlockGenerator.py | 22 ++---- .../Emulators/DataBlockGenerator/Globals.py | 8 -- 7 files changed, 24 insertions(+), 190 deletions(-) diff --git a/src/python/WMCore/Services/DBS/DBS3Reader.py b/src/python/WMCore/Services/DBS/DBS3Reader.py index 5865b6ed3cf..488c3117f46 100644 --- a/src/python/WMCore/Services/DBS/DBS3Reader.py +++ b/src/python/WMCore/Services/DBS/DBS3Reader.py @@ -37,8 +37,7 @@ def remapDBS3Keys(data, stringify=False, **others): 'num_block': 'NumberOfBlocks', 'num_lumi': 'NumberOfLumis', 'event_count': 'NumberOfEvents', 'run_num': 'RunNumber', 'file_size': 'FileSize', 'block_size': 'BlockSize', - 'file_count': 'NumberOfFiles', 'open_for_writing': 'OpenForWriting', - 'logical_file_name': 'LogicalFileName', + 'file_count': 'NumberOfFiles', 'logical_file_name': 'LogicalFileName', 'adler32': 'Adler32', 'check_sum': 'Checksum', 'md5': 'Md5', 'block_name': 'BlockName', 'lumi_section_num': 'LumiSectionNumber'} @@ -395,7 +394,7 @@ def getDBSSummaryInfo(self, dataset=None, block=None): result['block'] = block if block else '' return result - def listFileBlocks(self, dataset, onlyClosedBlocks=False, blockName=None): + def listFileBlocks(self, dataset, blockName=None): """ _listFileBlocks_ @@ -406,7 +405,6 @@ def listFileBlocks(self, dataset, onlyClosedBlocks=False, blockName=None): args = {'dataset': dataset, 'detail': False} if blockName: args['block_name'] = blockName - if onlyClosedBlocks: args['detail'] = True try: blocks = self.dbs.listBlocks(**args) @@ -415,30 +413,7 @@ def listFileBlocks(self, dataset, onlyClosedBlocks=False, blockName=None): msg += "%s\n" % formatEx3(ex) raise DBSReaderError(msg) - if onlyClosedBlocks: - result = [x['block_name'] for x in blocks if str(x['open_for_writing']) != "1"] - - else: - result = [x['block_name'] for x in blocks] - - return result - - def listOpenFileBlocks(self, dataset): - """ - _listOpenFileBlocks_ - - Retrieve a list of open fileblock names for a dataset - - """ - self.checkDatasetPath(dataset) - try: - blocks = self.dbs.listBlocks(dataset=dataset, detail=True) - except dbsClientException as ex: - msg = "Error in DBSReader.listFileBlocks(%s)\n" % dataset - msg += "%s\n" % formatEx3(ex) - raise DBSReaderError(msg) - - result = [x['block_name'] for x in blocks if str(x['open_for_writing']) == "1"] + result = [x['block_name'] for x in blocks] return result @@ -634,12 +609,10 @@ def getFileBlock(self, fileBlockName): :return: a dictionary in the format of: {"PhEDExNodeNames" : [], - "Files" : { LFN : Events }, - "IsOpen" : True|False} + "Files" : { LFN : Events }} """ result = {"PhEDExNodeNames": [], # FIXME: we better get rid of this line! - "Files": self.listFilesInBlock(fileBlockName), - "IsOpen": self.blockIsOpen(fileBlockName)} + "Files": self.listFilesInBlock(fileBlockName)} return result def getFileBlockWithParents(self, fileBlockName): @@ -650,8 +623,7 @@ def getFileBlockWithParents(self, fileBlockName): :return: a dictionary in the format of: {"PhEDExNodeNames" : [], - "Files" : { LFN : Events }, - "IsOpen" : True|False} + "Files" : { LFN : Events }} """ fileBlockName = decodeBytesToUnicode(fileBlockName) @@ -660,8 +632,7 @@ def getFileBlockWithParents(self, fileBlockName): raise DBSReaderError(msg % fileBlockName) result = {"PhEDExNodeNames": [], # FIXME: we better get rid of this line! - "Files": self.listFilesInBlockWithParents(fileBlockName), - "IsOpen": self.blockIsOpen(fileBlockName)} + "Files": self.listFilesInBlockWithParents(fileBlockName)} return result def listBlockParents(self, blockName): @@ -675,24 +646,6 @@ def listBlockParents(self, blockName): result = [block['parent_block_name'] for block in blocks] return result - def blockIsOpen(self, blockName): - """ - _blockIsOpen_ - - Return True if named block is open, false if not, or if block - doenst exist - - """ - self.checkBlockName(blockName) - blockInstance = self.dbs.listBlocks(block_name=blockName, detail=True) - if len(blockInstance) == 0: - return False - blockInstance = blockInstance[0] - isOpen = blockInstance.get('open_for_writing', 1) - if isOpen == 0: - return False - return True - def blockToDatasetPath(self, blockName): """ _blockToDatasetPath_ diff --git a/src/python/WMCore/Services/DBS/DBSWriterObjects.py b/src/python/WMCore/Services/DBS/DBSWriterObjects.py index ff4cffcbead..2bcdb757170 100644 --- a/src/python/WMCore/Services/DBS/DBSWriterObjects.py +++ b/src/python/WMCore/Services/DBS/DBSWriterObjects.py @@ -378,76 +378,4 @@ def createDBSFileBlock(blockName): NOTE: This method DOES NOT create a new block in DBS """ - return DbsFileBlock( Name = blockName) - -def getDBSFileBlock(dbsApiRef, procDataset, pnn): - """ - _getDBSFileBlock_ - - Given the procDataset and pnn provided, get the currently open - file block for that dataset/se pair. - If an open block does not exist, then create a new block and - return that - - """ - logging.warning("getDBSFileBlocks(): dset, pnn: %s, %s", procDataset, pnn) - - allBlocks = dbsApiRef.listBlocks(procDataset, block_name = "*", - phedex_node_name = "*") - - logging.warning("getDBSFileBlock(): all blocks %s", allBlocks) - - openBlocks = [b for b in allBlocks if str(b['OpenForWriting']) == "1"] - - logging.warning("getDBSFileBlocks(): open blocks %s", openBlocks) - - blockRef = None - if len(openBlocks) > 1: - msg = "Too many open blocks for dataset:\n" - msg += "PNN: %s\n" % pnn - msg += "Dataset: %s\n" %procDataset - msg += "Using last open block\n" - logging.warning(msg) - blockRef = openBlocks[-1] - elif len(openBlocks) == 1: - blockRef = openBlocks[0] - - if blockRef == None: - # // - # // Need to create new block - #// - - logging.warning("getDBSFileBlock(): Creating a new block...") - - newBlockName = dbsApiRef.insertBlock (procDataset, None , - phedex_node_list = [pnn]) - - # get from DBS listBlocks API the DbsFileBlock newly inserted - blocks = dbsApiRef.listBlocks(procDataset, block_name = newBlockName ) - if len(blocks) > 1: - msg = "Too many blocks with the same name: %s:\n" % newBlockName - msg += "Using last block\n" - logging.warning(msg) - blockRef = blocks[-1] - elif len(blocks) == 1: - blockRef = blocks[0] - else: - msg = "No FileBlock found to add files to" - logging.error(msg) - # FIXME: throw an error ? - -## StorageElementList below is wrong: it should be a list of dictionary [ { 'Name': pnn } ] -## In order to define the DbsFileBlock it should be enough to specify its blockname and -## it shouldn't be needed to specify the SE and Dataset again, -## however since this is not the case, it's safer to get the DbsFileBlock from listBlocks DBS API -## rather then defining a DbsFileBlock. -# blockRef = DbsFileBlock( -# Name = newBlockName, -# Dataset = procDataset, -# PhEDExNodeList = [ pnn ] -# ) - - - - logging.warning("Open FileBlock located at PNN: %s to use is FileBlock: %s ", pnn, blockRef['Name']) - return blockRef + return DbsFileBlock(Name=blockName) diff --git a/src/python/WMCore/WorkQueue/Policy/Start/Block.py b/src/python/WMCore/WorkQueue/Policy/Start/Block.py index 505bcf91e1a..2be25a70c46 100644 --- a/src/python/WMCore/WorkQueue/Policy/Start/Block.py +++ b/src/python/WMCore/WorkQueue/Policy/Start/Block.py @@ -59,7 +59,7 @@ def split(self): NumberOfFiles=int(block['NumberOfFiles']), NumberOfEvents=int(block['NumberOfEvents']), Jobs=estimateJobs, - OpenForNewData=True if str(block.get('OpenForWriting')) == '1' else False, + OpenForNewData=False, NoInputUpdate=self.initialTask.getTrustSitelists().get('trustlists'), NoPileupUpdate=self.initialTask.getTrustSitelists().get('trustPUlists') ) @@ -102,7 +102,7 @@ def validBlocks(self, task, dbs): blocks.append(str(data)) else: Lexicon.dataset(data) # check dataset name - for block in dbs.listFileBlocks(data, onlyClosedBlocks=True): + for block in dbs.listFileBlocks(data): blocks.append(str(block)) for blockName in blocks: @@ -248,10 +248,9 @@ def newDataAvailable(self, task, inbound): """ self.initialTask = task dbs = self.dbs() - openBlocks = dbs.listOpenFileBlocks(task.getInputDatasetPath()) - if openBlocks: - return True - return False + allBlocks = dbs.listFileBlocks(task.getInputDatasetPath()) + newBlocks = set(allBlocks) - set(self.rejectedWork) - set(self.badWork) + return bool(newBlocks) @staticmethod def supportsWorkAddition(): diff --git a/src/python/WMCore/WorkQueue/WMBSHelper.py b/src/python/WMCore/WorkQueue/WMBSHelper.py index 4a2aa664d7f..daadaf815d6 100644 --- a/src/python/WMCore/WorkQueue/WMBSHelper.py +++ b/src/python/WMCore/WorkQueue/WMBSHelper.py @@ -445,8 +445,6 @@ def addFiles(self, block): create wmbs files from given dbs block. as well as run lumi update """ - blockOpen = block.get('IsOpen', False) - if self.topLevelTask.getInputACDC(): self.isDBS = False logging.info('Adding ACDC files into WMBS for %s', self.wmSpec.name()) @@ -470,7 +468,8 @@ def addFiles(self, block): self.wmSpec.name()) self._createFilesInDBSBuffer() - self.topLevelFileset.markOpen(blockOpen) + # DBS blocks are always closed, so mark fileset as closed as well + self.topLevelFileset.markOpen(False) return totalFiles def getMergeOutputMapping(self): diff --git a/src/python/WMCore/WorkQueue/WorkQueue.py b/src/python/WMCore/WorkQueue/WorkQueue.py index 3d319dc08da..f214b3e817b 100644 --- a/src/python/WMCore/WorkQueue/WorkQueue.py +++ b/src/python/WMCore/WorkQueue/WorkQueue.py @@ -387,14 +387,14 @@ def _getDBSDataset(self, match): dbs = self._getDbs(match['Dbs']) datasetName = list(match['Inputs'])[0] - blocks = dbs.listFileBlocks(datasetName, onlyClosedBlocks=True) + blocks = dbs.listFileBlocks(datasetName) for blockName in blocks: blockSummary = dbs.getFileBlock(blockName) blockSummary['PhEDExNodeNames'] = self.rucio.getDataLockedAndAvailable(name=blockName, account=self.params['rucioAccount']) tmpDsetDict[blockName] = blockSummary - dbsDatasetDict = {'Files': [], 'IsOpen': False, 'PhEDExNodeNames': []} + dbsDatasetDict = {'Files': [], 'PhEDExNodeNames': []} dbsDatasetDict['Files'] = [f for block in listvalues(tmpDsetDict) for f in block['Files']] dbsDatasetDict['PhEDExNodeNames'].extend( [f for block in listvalues(tmpDsetDict) for f in block['PhEDExNodeNames']]) @@ -464,33 +464,6 @@ def _wmbsPreparation(self, match, wmspec, blockName, dbsBlock): return sub - def addNewFilesToOpenSubscriptions(self, *elements): - """Inject new files to wmbs for running elements that have new files. - Assumes elements are from the same workflow""" - if not self.params['LocalQueueFlag']: - return - - from WMCore.WorkQueue.WMBSHelper import WMBSHelper - wmspec = None - for ele in elements: - if not ele.isRunning() or not ele['SubscriptionId'] or not ele: - continue - if not ele['Inputs'] or not ele['OpenForNewData'] or ele['StartPolicy'] == 'Dataset': - continue - if not wmspec: - wmspec = self.backend.getWMSpec(ele['RequestName']) - blockName, dbsBlock = self._getDBSBlock(ele, wmspec) - if ele['NumOfFilesAdded'] != len(dbsBlock['Files']): - self.logger.info("Adding new files to open block %s (%s)", blockName, ele.id) - wmbsHelper = WMBSHelper(wmspec, ele['TaskName'], blockName, ele['Mask'], self.params['CacheDir']) - ele['NumOfFilesAdded'] += wmbsHelper.createSubscriptionAndAddFiles(block=dbsBlock)[1] - self.backend.updateElements(ele.id, NumOfFilesAdded=ele['NumOfFilesAdded']) - if dbsBlock['IsOpen'] != ele['OpenForNewData']: - self.logger.info("Closing open block %s (%s)", blockName, ele.id) - self.backend.updateInboxElements(ele['ParentQueueId'], OpenForNewData=dbsBlock['IsOpen']) - self.backend.updateElements(ele.id, OpenForNewData=dbsBlock['IsOpen']) - ele['OpenForNewData'] = dbsBlock['IsOpen'] - def _assignToChildQueue(self, queue, *elements): """Assign work from parent to queue""" workByRequest = {} @@ -1018,8 +991,6 @@ def performSyncAndCancelAction(self, skipWMBS): parentQueueDeleted = False continue - self.addNewFilesToOpenSubscriptions(*elements) - updated_elements = [x for x in result['Elements'] if x.modified] for x in updated_elements: self.logger.debug("Updating progress %s (%s): %s", x['RequestName'], x.id, x.statusMetrics()) diff --git a/src/python/WMQuality/Emulators/DataBlockGenerator/DataBlockGenerator.py b/src/python/WMQuality/Emulators/DataBlockGenerator/DataBlockGenerator.py index 300145ff634..698e451a26e 100644 --- a/src/python/WMQuality/Emulators/DataBlockGenerator/DataBlockGenerator.py +++ b/src/python/WMQuality/Emulators/DataBlockGenerator/DataBlockGenerator.py @@ -22,23 +22,15 @@ def _blockGenerator(self, dataset): blockName = "%s#%s" % (dataset, i+1) size = GlobalParams.numOfFilesPerBlock() * GlobalParams.sizeOfFile() - blocks.append( - {'Name' : blockName, - 'NumberOfEvents' : numOfEvents, - 'NumberOfFiles' : GlobalParams.numOfFilesPerBlock(), - 'NumberOfLumis' : GlobalParams.numOfLumisPerBlock(), - 'Size' : size, - 'Parents' : (), - 'OpenForWriting' : '1' if self._openForWriting() else '0'} - ) + blocks.append({'Name' : blockName, + 'NumberOfEvents' : numOfEvents, + 'NumberOfFiles' : GlobalParams.numOfFilesPerBlock(), + 'NumberOfLumis' : GlobalParams.numOfLumisPerBlock(), + 'Size' : size, + 'Parents' : ()} + ) return blocks - def _openForWriting(self): - """Is block open or closed? - Should do this on a block by block basis but so far not needed, - just make a global state""" - return GlobalParams.blocksOpenForWriting() - def getParentBlock(self, block, numberOfParents = 1): blocks = [] numOfEvents = GlobalParams.numOfFilesPerBlock() * GlobalParams.numOfEventsPerFile() diff --git a/src/python/WMQuality/Emulators/DataBlockGenerator/Globals.py b/src/python/WMQuality/Emulators/DataBlockGenerator/Globals.py index 96ad5fe37ae..caba6458792 100644 --- a/src/python/WMQuality/Emulators/DataBlockGenerator/Globals.py +++ b/src/python/WMQuality/Emulators/DataBlockGenerator/Globals.py @@ -103,14 +103,6 @@ def getRunNumberForBlock(blockName): #assumes blockName is contains number after '#' return int(blockName.split('#')[-1]) - @staticmethod - def setBlocksOpenForWriting(blocksOpenForWriting): - GlobalParams._blocks_open_for_writing = blocksOpenForWriting - - @staticmethod - def blocksOpenForWriting(): - return GlobalParams._blocks_open_for_writing - @staticmethod def resetParams(): """ From edd72ba9ed9add7499f1637a3e9f47c90c626e5a Mon Sep 17 00:00:00 2001 From: Alan Malta Rodrigues Date: Thu, 28 Apr 2022 15:33:50 -0400 Subject: [PATCH 13/16] remove open blocks logic from unit tests update unit tests --- .../WMCore_t/Services_t/DBS_t/DBSReader_t.py | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/test/python/WMCore_t/Services_t/DBS_t/DBSReader_t.py b/test/python/WMCore_t/Services_t/DBS_t/DBSReader_t.py index e421a1f16ee..fd4dc53e17c 100644 --- a/test/python/WMCore_t/Services_t/DBS_t/DBSReader_t.py +++ b/test/python/WMCore_t/Services_t/DBS_t/DBSReader_t.py @@ -208,16 +208,10 @@ def testListFileBlocks(self): blocks = self.dbs.listFileBlocks(DATASET) self.assertTrue(BLOCK in blocks) # block is closed - block = self.dbs.listFileBlocks(DATASET, blockName=BLOCK, onlyClosedBlocks=True)[0] + block = self.dbs.listFileBlocks(DATASET, blockName=BLOCK)[0] self.assertEqual(block, BLOCK) self.assertTrue(BLOCK in block) - def testListOpenFileBlocks(self): - """listOpenFileBlocks finds open blocks""" - # hard to find a dataset with open blocks, so don't bother - self.dbs = DBSReader(self.endpoint) - self.assertFalse(self.dbs.listOpenFileBlocks(DATASET)) - def testBlockExists(self): """blockExists returns existence of blocks""" self.dbs = DBSReader(self.endpoint) @@ -283,7 +277,7 @@ def testGetFileBlock(self): """getFileBlock returns block""" self.dbs = DBSReader(self.endpoint) block = self.dbs.getFileBlock(BLOCK) - self.assertEqual(len(block), 3) + self.assertEqual(len(block), 2) self.assertEqual(2, len(block['Files'])) self.assertRaises(DBSReaderError, self.dbs.getFileBlock, BLOCK + 'asas') @@ -292,7 +286,7 @@ def testGetFileBlockWithParents(self): """getFileBlockWithParents returns block and parents""" self.dbs = DBSReader(self.endpoint) block = self.dbs.getFileBlockWithParents(BLOCK_WITH_PARENTS) - self.assertEqual(len(block), 3) + self.assertEqual(len(block), 2) self.assertEqual(PARENT_FILE, block['Files'][0]['ParentList'][0]['LogicalFileName']) self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents, BLOCK + 'asas') @@ -305,11 +299,6 @@ def testListBlockParents(self): self.assertFalse(self.dbs.listBlockParents(PARENT_BLOCK)) - def testBlockIsOpen(self): - """blockIsOpen checks if a block is open""" - self.dbs = DBSReader(self.endpoint) - self.assertFalse(self.dbs.blockIsOpen(BLOCK)) - def testBlockToDatasetPath(self): """blockToDatasetPath extracts path from block name""" self.dbs = DBSReader(self.endpoint) From 356a00c168274346b9b21209162b1beaf2c2605e Mon Sep 17 00:00:00 2001 From: Alan Malta Rodrigues Date: Mon, 25 Apr 2022 17:51:21 -0400 Subject: [PATCH 14/16] Enforce dataset_lifetime column to be integer and not null ensure move and custodial are not None; fix order of db constraints address some of Valentins concerns add docstring and extra comment for the param/validation --- .../WMComponent/DBS3Buffer/MySQL/Create.py | 2 +- .../DBS3Buffer/MySQL/NewSubscription.py | 32 +++++++++++++------ .../WMComponent/DBS3Buffer/Oracle/Create.py | 2 +- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/src/python/WMComponent/DBS3Buffer/MySQL/Create.py b/src/python/WMComponent/DBS3Buffer/MySQL/Create.py index 5b16a354062..bd3afaeabb0 100644 --- a/src/python/WMComponent/DBS3Buffer/MySQL/Create.py +++ b/src/python/WMComponent/DBS3Buffer/MySQL/Create.py @@ -49,7 +49,7 @@ def __init__(self, logger=None, dbi=None, params=None): subscribed INTEGER DEFAULT 0, phedex_group VARCHAR(100), delete_blocks INTEGER, - dataset_lifetime INTEGER DEFAULT 0, + dataset_lifetime INTEGER DEFAULT 0 NOT NULL, PRIMARY KEY (id), CONSTRAINT uq_dbs_dat_sub UNIQUE (dataset_id, site, custodial, auto_approve, move, priority))""" diff --git a/src/python/WMComponent/DBS3Buffer/MySQL/NewSubscription.py b/src/python/WMComponent/DBS3Buffer/MySQL/NewSubscription.py index 6fffbffc541..1529e1aa278 100644 --- a/src/python/WMComponent/DBS3Buffer/MySQL/NewSubscription.py +++ b/src/python/WMComponent/DBS3Buffer/MySQL/NewSubscription.py @@ -24,6 +24,18 @@ class NewSubscription(DBFormatter): """ def _createPhEDExSubBinds(self, datasetID, subscriptionInfo, custodialFlag): + """ + Creates the database binds for both custodial and non custodial data + placements. + + :param datasetID: integer with the dataset id + :param subscriptionInfo: dictionary object from the request spec + :param custodialFlag: boolean flag defining whether it's custodial or not + :return: a list of dictionary binds + """ + # NOTE: the subscription information is already validated upstream, at + # the request factory. Thus, there is no need to cast/validate anything + # at this level. # DeleteFromSource is not supported for move subscriptions delete_blocks = None @@ -40,15 +52,17 @@ def _createPhEDExSubBinds(self, datasetID, subscriptionInfo, custodialFlag): binds = [] for site in sites: - binds.append({'id': datasetID, - 'site': site, - 'custodial': custodialFlag, - 'auto_approve': 1 if site in subscriptionInfo['AutoApproveSites'] else 0, - 'move': isMove, - 'priority': subscriptionInfo['Priority'], - 'phedex_group': phedex_group, - 'delete_blocks': delete_blocks, - 'dataset_lifetime': subscriptionInfo['DatasetLifetime']}) + bind = {'id': datasetID, + 'site': site, + 'custodial': 1 if custodialFlag else 0, + 'auto_approve': 1 if site in subscriptionInfo['AutoApproveSites'] else 0, + 'move': isMove, + 'priority': subscriptionInfo['Priority'], + 'phedex_group': phedex_group, + 'delete_blocks': delete_blocks} + if subscriptionInfo['DatasetLifetime'] is not None: + bind.update(dict(dataset_lifetime=subscriptionInfo['DatasetLifetime'])) + binds.append(bind) return binds def execute(self, datasetID, subscriptionInfo, conn=None, transaction=False): diff --git a/src/python/WMComponent/DBS3Buffer/Oracle/Create.py b/src/python/WMComponent/DBS3Buffer/Oracle/Create.py index 3b38036bbe6..f516e20fe47 100644 --- a/src/python/WMComponent/DBS3Buffer/Oracle/Create.py +++ b/src/python/WMComponent/DBS3Buffer/Oracle/Create.py @@ -49,7 +49,7 @@ def __init__(self, logger = None, dbi = None, params = None): subscribed INTEGER DEFAULT 0, phedex_group VARCHAR(100), delete_blocks INTEGER, - dataset_lifetime INTEGER DEFAULT 0, + dataset_lifetime INTEGER DEFAULT 0 NOT NULL, PRIMARY KEY (id), CONSTRAINT uq_dbs_dat_sub UNIQUE (dataset_id, site, custodial, auto_approve, move, priority) )""" From 5abb5af8c87f4103734a38f09680e1808779b390 Mon Sep 17 00:00:00 2001 From: Alan Malta Rodrigues Date: Tue, 3 May 2022 02:27:53 +0200 Subject: [PATCH 15/16] 2.0.3.pre4 Signed-off-by: Alan Malta Rodrigues --- CHANGES | 5 +++++ src/python/WMCore/__init__.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index fafe596abb9..4749103e172 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,8 @@ +2.0.3.pre3 to 2.0.3.pre4: + - Enforce dataset_lifetime column to be integer and not null (Alan Malta Rodrigues) #11115 + - Remove block open logic from DBS3Reader and WorkQueue (Alan Malta Rodrigues) #11123 + + 2.0.3.pre2 to 2.0.3.pre3: - change fallback for missing system xrootd-client (Dirk Hufnagel) #11117 - Update ReqMgr2 CP thread to properly deal with aborted/force-complete workflows (Alan Malta Rodrigues) #11113 diff --git a/src/python/WMCore/__init__.py b/src/python/WMCore/__init__.py index cd9c66a6a7d..6291eda9e0c 100644 --- a/src/python/WMCore/__init__.py +++ b/src/python/WMCore/__init__.py @@ -6,5 +6,5 @@ """ -__version__ = '2.0.3.pre3' +__version__ = '2.0.3.pre4' __all__ = [] From dd24a370b29c2835cbbbda3981ca5c85fad9202b Mon Sep 17 00:00:00 2001 From: Todor Ivanov Date: Tue, 3 May 2022 19:00:59 +0300 Subject: [PATCH 16/16] Add T3_US_Lancium. Add T3_US_Lancium to deploy-wmagent.sh as well. --- deploy/addUSOpportunistic.sh | 2 +- deploy/deploy-wmagent.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/deploy/addUSOpportunistic.sh b/deploy/addUSOpportunistic.sh index 55d7374cfd2..4f10158f1c2 100755 --- a/deploy/addUSOpportunistic.sh +++ b/deploy/addUSOpportunistic.sh @@ -3,7 +3,7 @@ PEND_JOBS=3000 RUNN_JOBS=3000 # default manage location manage=/data/srv/wmagent/current/config/wmagent/manage -for site in {T3_US_NERSC,T3_US_OSG,T3_US_PSC,T3_US_SDSC,T3_US_TACC,T3_US_Anvil}; +for site in {T3_US_NERSC,T3_US_OSG,T3_US_PSC,T3_US_SDSC,T3_US_TACC,T3_US_Anvil,T3_US_Lancium}; do echo "Adding site: $site into the resource-control with $PEND_JOBS pending and $RUNN_JOBS running slots" $manage execute-agent wmagent-resource-control --site-name=$site --cms-name=$site --ce-name=$site --pnn=$site --plugin=SimpleCondorPlugin --pending-slots=1000 --running-slots=1000; diff --git a/deploy/deploy-wmagent.sh b/deploy/deploy-wmagent.sh index 933c7ba9d8d..7a52b83e788 100644 --- a/deploy/deploy-wmagent.sh +++ b/deploy/deploy-wmagent.sh @@ -374,7 +374,7 @@ echo "Done!" && echo echo "*** Setting up US opportunistic resources ***" if [[ "$HOSTNAME" == *fnal.gov ]]; then sed -i "s+forceSiteDown = \[\]+forceSiteDown = \[$FORCEDOWN\]+" $MANAGE_DIR/config.py - for resourceName in {T3_US_NERSC,T3_US_OSG,T3_US_PSC,T3_US_SDSC,T3_US_TACC,T3_US_Anvil,T3_ES_PIC_BSC}; + for resourceName in {T3_US_NERSC,T3_US_OSG,T3_US_PSC,T3_US_SDSC,T3_US_TACC,T3_US_Anvil,T3_US_Lancium,T3_ES_PIC_BSC}; do ./manage execute-agent wmagent-resource-control --plugin=SimpleCondorPlugin --opportunistic \ --pending-slots=$HPC_PEND_JOBS --running-slots=$HPC_RUNN_JOBS --add-one-site $resourceName