Skip to content

Commit

Permalink
Add change dbs dataset file status fix #5204 (#5241)
Browse files Browse the repository at this point in the history
* add setdataset.py for #5204

* refactor and add setfiles

* add Content-type arg to HTTPRequests

* setdataset to use contentType

* rename commands to setdatasetstatus setfilestatus

* add autocomplete

* list of LFNs not supported yet

* some pylint and pep8

* add logging for setfilestatus

* more HTTPRequest,CRABRest,getDBSRest to new RestInterfaces.py

* do not pass version to REST clients, it is set in HTTPRequests

* fix use of version and UserAgent

* simply make userAgent=CRABClient/__version__ the default

* cleanup use of Content-type

* warn users that setdataset status does not change file status
  • Loading branch information
belforte authored Oct 27, 2023
1 parent 0bc1ad0 commit 33afb42
Show file tree
Hide file tree
Showing 9 changed files with 371 additions and 34 deletions.
26 changes: 23 additions & 3 deletions etc/crab-bash-completion.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ _UseCrab ()
"")
case "$cur" in
"")
COMPREPLY=( $(compgen -W '--version --help -h --quiet --debug status tasks proceed checkwrite getlog checkusername checkdataset submit getoutput resubmit kill uploadlog remake report preparelocal createmyproxy' -- $cur) )
COMPREPLY=( $(compgen -W '--version --help -h --quiet --debug status tasks proceed checkwrite getlog checkusername checkdataset submit getoutput resubmit kill uploadlog remake report preparelocal createmyproxy setdatasetstatus setfilestatus' -- $cur) )
;;
-*)
COMPREPLY=( $(compgen -W '--version --help -h --quiet --debug' -- $cur) )
;;
*)
COMPREPLY=( $(compgen -W 'status tasks proceed checkwrite getlog checkusername checkdataset submit getoutput resubmit kill uploadlog remake report preparelocal createmyproxy' -- $cur) )
COMPREPLY=( $(compgen -W 'status tasks proceed checkwrite getlog checkusername checkdataset submit getoutput resubmit kill uploadlog remake report preparelocal createmyproxy setdatasetstatus setfilestatus' -- $cur) )
;;
esac
;;
Expand Down Expand Up @@ -284,9 +284,29 @@ _UseCrab ()
esac
;;

"setdatasetstatus")
case "$cur" in
-*)
COMPREPLY=( $(compgen -W '--help -h --status --dataset' -- $cur) )
;;
*)
COMPREPLY=( $(compgen -f $cur) )
esac
;;

"setfilestatus")
case "$cur" in
-*)
COMPREPLY=( $(compgen -W '--help -h --status --dataset --files' -- $cur) )
;;
*)
COMPREPLY=( $(compgen -f $cur) )
esac
;;


*)
COMPREPLY=( $(compgen -W 'status tasks proceed checkwrite getlog checkusername submit getoutput resubmit kill uploadlog remake report preparelocal' -- $cur) )
COMPREPLY=( $(compgen -W 'status tasks proceed checkwrite getlog checkusername submit getoutput resubmit kill uploadlog remake report preparelocal setdatasetstatus setfilestatus' -- $cur) )
;;
esac

Expand Down
1 change: 0 additions & 1 deletion src/python/CRABClient/CRABOptParser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from optparse import OptionParser

from CRABClient import __version__ as client_version
from CRABClient.ClientUtilities import getAvailCommands
from ServerUtilities import SERVICE_INSTANCES


Expand Down
2 changes: 2 additions & 0 deletions src/python/CRABClient/ClientMapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@
'checkusername' : {'acceptsArguments': False, 'requiresREST': False, 'requiresRucio': False, 'requiresDirOption': False, 'useCache': False, 'requiresProxyVOOptions': False, 'requiresLocalCache': False},
'checkwrite' : {'acceptsArguments': False, 'requiresREST': False, 'requiresRucio': True, 'requiresDirOption': False, 'useCache': False, 'requiresProxyVOOptions': True, 'requiresLocalCache': False},
'checkdataset' : {'acceptsArguments': False, 'requiresREST': False, 'requiresRucio': True, 'requiresDirOption': False, 'useCache': False, 'requiresProxyVOOptions': False, 'requiresLocalCache': False},
'setdatasetstatus' : {'acceptsArguments': False, 'requiresREST': False, 'requiresRucio': False, 'requiresDirOption': False, 'useCache': False, 'requiresProxyVOOptions': False, 'requiresLocalCache': False},
'setfilestatus' : {'acceptsArguments': False, 'requiresREST': False, 'requiresRucio': False, 'requiresDirOption': False, 'useCache': False, 'requiresProxyVOOptions': False, 'requiresLocalCache': False},
'getlog' : {'acceptsArguments': False, 'requiresREST': True, 'requiresRucio': False, 'requiresDirOption': True, 'useCache': True, 'requiresProxyVOOptions': True, 'requiresLocalCache': True },
'getoutput' : {'acceptsArguments': False, 'requiresREST': True, 'requiresRucio': True, 'requiresDirOption': True, 'useCache': True, 'requiresProxyVOOptions': True, 'requiresLocalCache': True },
'kill' : {'acceptsArguments': False, 'requiresREST': True, 'requiresRucio': False, 'requiresDirOption': True, 'useCache': False, 'requiresProxyVOOptions': False, 'requiresLocalCache': True },
Expand Down
4 changes: 1 addition & 3 deletions src/python/CRABClient/ClientUtilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,8 +230,7 @@ def uploadlogfile(logger, proxyfilename, taskname=None, logfilename=None, logpat
# so it needs its own REST server instantiation
restClass = CRABClient.Emulator.getEmulator('rest')
crabserver = restClass(hostname=serverurl, localcert=proxyfilename, localkey=proxyfilename,
retry=2, logger=logger, verbose=False, version=__version__,
userAgent='CRABClient')
retry=2, logger=logger, verbose=False)
crabserver.setDbInstance(instance)
cacheurl = server_info(crabserver=crabserver, subresource='backendurls')['cacheSSL']

Expand Down Expand Up @@ -646,7 +645,6 @@ def validateSubmitOptions(options, args):
#Since server_info class needs SubCommand, and SubCommand needs server_info for
#delegating the proxy then we are screwed
#If anyone has a better solution please go on, otherwise live with that one :) :)
from CRABClient import __version__

def server_info(crabserver=None, subresource=None):
"""
Expand Down
8 changes: 3 additions & 5 deletions src/python/CRABClient/Commands/SubCommand.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,18 +362,16 @@ def __init__(self, logger, cmdargs=None, disable_interspersed_args=False):
# this is usually the first time that a call to the server is made, so where Emulator('rest') is initialized
# arguments to Emulator('rest') call must match those for HTTPRequest.__init__ in RESTInteractions.py
#server = CRABClient.Emulator.getEmulator('rest')(url=serverurl, localcert=proxyfilename, localkey=proxyfilename,
# version=__version__, retry=2, logger=logger)
# retry=2, logger=logger)
if self.cmdconf['requiresREST']:
crabRest = CRABClient.Emulator.getEmulator('rest')
self.crabserver = crabRest(hostname=self.serverurl, localcert=self.proxyfilename, localkey=self.proxyfilename,
retry=2, logger=self.logger, verbose=False, version=__version__,
userAgent='CRABClient')
retry=2, logger=self.logger, verbose=False)
self.crabserver.setDbInstance(self.instance)
# prepare also a test crabserver instance which will send tarballs to S3
self.s3tester = crabRest(hostname='cmsweb-testbed.cern.ch',
localcert=self.proxyfilename, localkey=self.proxyfilename,
retry=0, logger=self.logger, verbose=False, version=__version__,
userAgent='CRABClient')
retry=0, logger=self.logger, verbose=False)
self.s3tester.setDbInstance('preprod')
self.handleMyProxy()

Expand Down
120 changes: 120 additions & 0 deletions src/python/CRABClient/Commands/setdatasetstatus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# pylint: disable=consider-using-f-string, unspecified-encoding
"""
allow users to (in)validate their own DBS USER datasets
"""

import sys
import json

from CRABClient.Commands.SubCommand import SubCommand
from CRABClient.ClientExceptions import MissingOptionException, ConfigurationException, CommandFailedException
from CRABClient.ClientUtilities import colors
from CRABClient.RestInterfaces import getDbsREST

if sys.version_info >= (3, 0):
from urllib.parse import urlencode # pylint: disable=E0611
if sys.version_info < (3, 0):
from urllib import urlencode


class setdatasetstatus(SubCommand):
"""
Set status of a USER dataset in phys03,
optionally invalidates/revalidates all files in it
meant to replace https://github.com/dmwm/DBS/blob/master/Client/utils/DataOpsScripts/DBS3SetDatasetStatus.py
and to work whenever CRAB is supported, i.e. with both python2 and python3
"""

name = 'setdatasetstatus'

def __init__(self, logger, cmdargs=None):
SubCommand.__init__(self, logger, cmdargs)

def __call__(self):
result = 'FAILED' # will change to 'SUCCESS' when all is OK

instance = self.options.instance
dataset = self.options.dataset
status = self.options.status
recursive = self.options.recursive
self.logger.debug('instance = %s' % instance)
self.logger.debug('dataset = %s' % dataset)
self.logger.debug('status = %s' % status)
self.logger.debug('recursive = %s' % recursive)

if recursive:
self.logger.warning("ATTENTION: recursive option is not implemented yet. Ignoring it")

# from DBS instance, to DBS REST services
dbsReader, dbsWriter = getDbsREST(instance=instance, logger=self.logger,
cert=self.proxyfilename, key=self.proxyfilename)

self.logger.info("looking up Dataset %s in DBS %s" % (dataset, instance))
datasetStatusQuery = {'dataset': dataset, 'dataset_access_type': '*', 'detail': True}
ds, rc, msg = dbsReader.get(uri="datasets", data=urlencode(datasetStatusQuery))
self.logger.debug('exitcode= %s', rc)
if not ds:
self.logger.error("ERROR: dataset %s not found in DBS" % dataset)
raise ConfigurationException
self.logger.info("Dataset status in DBS is %s" % ds[0]['dataset_access_type'])
self.logger.info("Will set it to %s" % status)
data = {'dataset': dataset, 'dataset_access_type': status}
jdata = json.dumps(data)
out, rc, msg = dbsWriter.put(uri='datasets', data=jdata)
if rc == 200 and msg == 'OK':
self.logger.info("Dataset status changed successfully")
result = 'SUCCESS'
else:
msg = "Dataset status change failed: %s" % out
raise CommandFailedException(msg)

ds, rc, msg = dbsReader.get(uri="datasets", data=urlencode(datasetStatusQuery))
self.logger.debug('exitcode= %s', rc)
self.logger.info("Dataset status in DBS now is %s" % ds[0]['dataset_access_type'])

self.logger.info("NOTE: status of files inside the dataset has NOT been changed")

return {'commandStatus': result}

def setOptions(self):
"""
__setOptions__
This allows to set specific command options
"""
self.parser.add_option('--instance', dest='instance', default='prod/phys03',
help="DBS instance. e.g. prod/phys03 (default) or int/phys03. Use at your own risk." + \
"Unless you really know what you are doing, stay with the default"
)
self.parser.add_option('--dataset', dest='dataset', default=None,
help='dataset name')
self.parser.add_option('--status', dest='status', default=None,
help="New status of the dataset: VALID/INVALID/DELETED/DEPRECATED",
choices=['VALID', 'INVALID', 'DELETED', 'DEPRECATED']
)
self.parser.add_option('--recursive', dest='recursive', default=False, action="store_true",
help="Apply status to children datasets and sets all files status in those" + \
"to VALID if status=VALID, INVALID otherwise"
)

def validateOptions(self):
SubCommand.validateOptions(self)

if self.options.dataset is None:
msg = "%sError%s: Please specify the dataset to check." % (colors.RED, colors.NORMAL)
msg += " Use the --dataset option."
ex = MissingOptionException(msg)
ex.missingOption = "dataset"
raise ex
if self.options.status is None:
msg = "%sError%s: Please specify the new dataset status." % (colors.RED, colors.NORMAL)
msg += " Use the --status option."
ex = MissingOptionException(msg)
ex.missingOption = "status"
raise ex
# minimal sanity check
instance = self.options.instance
if not '/' in instance or len(instance.split('/'))>2 and not instance.startswith('https://'):
msg = "Bad instance value %s. " % instance
msg += "Use either server/db format or full URL"
raise ConfigurationException(msg)
150 changes: 150 additions & 0 deletions src/python/CRABClient/Commands/setfilestatus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
# pylint: disable=consider-using-f-string, unspecified-encoding
"""
allow users to (in)validate some files in their USER datasets in phys03
"""

import json

from CRABClient.Commands.SubCommand import SubCommand
from CRABClient.ClientExceptions import MissingOptionException, ConfigurationException, CommandFailedException
from CRABClient.ClientUtilities import colors
from CRABClient.RestInterfaces import getDbsREST


class setfilestatus(SubCommand):
"""
Set status of a USER dataset in phys03,
optionally invalidates/revalidates all files in it
meant to replace https://github.com/dmwm/DBS/blob/master/Client/utils/DataOpsScripts/DBS3SetDatasetStatus.py
and to work whenever CRAB is supported, i.e. with both python2 and python3
"""

name = 'setfilestatus'

def __init__(self, logger, cmdargs=None):
SubCommand.__init__(self, logger, cmdargs)

def __call__(self):

result = 'FAILED' # will change to 'SUCCESS' when all is OK

# intitalize, and validate args
instance = self.options.instance
dataset = self.options.dataset
files = self.options.files
status = self.options.status
self.logger.debug('instance = %s' % instance)
self.logger.debug('dataset = %s' % dataset)
self.logger.debug('files = %s' % files)
self.logger.debug('status = %s' % status)

statusToSet = 1 if status == 'VALID' else 0

filesToChange = None
if files:
# did the user specify the name of a file containing a list of LFN's ?
try:
with open(files, 'r') as f:
flist = [lfn.strip() for lfn in f]
filesToChange = ','.join(flist)
except IOError:
# no. Assume we have a comma separated list of LFN's (a single LFN is also OK)
filesToChange = files.strip(",").strip()
finally:
# files and dataset options are mutually exclusive
dataset = None
if ',' in filesToChange:
raise NotImplementedError('list of LFNs is not supported yet')

# from DBS instance, to DBS REST services
dbsReader, dbsWriter = getDbsREST(instance=instance, logger=self.logger,
cert=self.proxyfilename, key=self.proxyfilename)
# we will need the dataset name
if dataset:
datasetName = dataset
else:
# get it from DBS
lfn = filesToChange.split(',')[0]
query = {'logical_file_name': lfn}
out, rc, msg = dbsReader.get(uri='datasets', data=query)
if not out:
self.logger.error("ERROR: file %s not found in DBS" % lfn)
raise ConfigurationException
datasetName = out[0]['dataset']
self.logger.info('LFN to be changed belongs to dataset %s' % datasetName)

# when acting on a list of LFN's, can't print status of all files before/after
# best we can do is to print the number of valid/invalid file in the dataset
# before/after.

self.logFilesTally(dataset=datasetName, dbs=dbsReader)

if filesToChange:
data = {'logical_file_name': filesToChange, 'is_file_valid': statusToSet}
if dataset:
data = {'dataset': dataset, 'is_file_valid': statusToSet}
jdata = json.dumps(data) # PUT requires data in JSON format
out, rc, msg = dbsWriter.put(uri='files', data=jdata)
if rc == 200 and msg == 'OK':
self.logger.info("File(s) status changed successfully")
result = 'SUCCESS'
else:
msg = "File(s) status change failed: %s" % out
raise CommandFailedException(msg)

self.logFilesTally(dataset=datasetName, dbs=dbsReader)

return {'commandStatus': result}

def logFilesTally(self, dataset=None, dbs=None):
""" prints total/valid/invalid files in dataset """
query = {'dataset': dataset, 'validFileOnly': 1}
out, _, _ = dbs.get(uri='files', data=query)
valid = len(out)
query = {'dataset': dataset, 'validFileOnly': 0}
out, _, _ = dbs.get(uri='files', data=query)
total = len(out)
invalid = total - valid
self.logger.info("Dataset file count total/valid/invalid = %d/%d/%d" % (total, valid, invalid))

def setOptions(self):
"""
__setOptions__
This allows to set specific command options
"""
self.parser.add_option('-i', '--instance', dest='instance', default='prod/phys03',
help='DBS instance. e.g. prod/phys03 (default) or int/phys03'
)
self.parser.add_option('-d', '--dataset', dest='dataset', default=None,
help='Will apply status to all files in this dataset.' + \
' Use either --files or--dataset',
metavar='<dataset_name>')
self.parser.add_option('-s', '--status', dest='status', default=None,
help='New status of the file(s): VALID/INVALID',
choices=['VALID', 'INVALID']
)
self.parser.add_option('-f', '--files', dest='files', default=None,
help='List of files to be validated/invalidated.' + \
' Can be either a simple LFN or a file containg LFNs or' + \
' a comma separated list of LFNs. Use either --files or --dataset',
metavar="<lfn1[,..,lfnx] or filename>")

def validateOptions(self):
SubCommand.validateOptions(self)

if not self.options.files and not self.options.dataset:
msg = "%sError%s: Please specify the files to change." % (colors.RED, colors.NORMAL)
msg += " Use either the --files or the --dataset option."
ex = MissingOptionException(msg)
ex.missingOption = "files"
raise ex
if self.options.files and self.options.dataset:
msg = "%sError%s: You can not use both --files and --dataset at same time" % (colors.RED, colors.NORMAL)
raise ConfigurationException(msg)
if self.options.status is None:
msg = "%sError%s: Please specify the new file(s) status." % (colors.RED, colors.NORMAL)
msg += " Use the --status option."
ex = MissingOptionException(msg)
ex.missingOption = "status"
raise ex
4 changes: 2 additions & 2 deletions src/python/CRABClient/Emulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@ def setEmulator(name, value):
overrideDict[name] = value

def getDefaults():
import CRABClient.CrabRestInterface
return {'rest' : CRABClient.CrabRestInterface.CRABRest,
import CRABClient.RestInterfaces
return {'rest' : CRABClient.RestInterfaces.CRABRest,
'ufc' : 'dummy_ufc'}
Loading

0 comments on commit 33afb42

Please sign in to comment.