From 137b13df2972a9d1fd1f8ef4769f3b05a164d5f4 Mon Sep 17 00:00:00 2001 From: Boris Sadkhin Date: Mon, 23 Mar 2020 16:21:22 -0500 Subject: [PATCH 1/5] Added concierge --- deployment/conf/deployment.cfg | 3 ++- prune_acls.py | 0 staging_service/app.py | 13 +++++++++++++ staging_service/utils.py | 11 +++++++++-- 4 files changed, 24 insertions(+), 3 deletions(-) create mode 100644 prune_acls.py diff --git a/deployment/conf/deployment.cfg b/deployment/conf/deployment.cfg index 0c7da04..5884ce9 100644 --- a/deployment/conf/deployment.cfg +++ b/deployment/conf/deployment.cfg @@ -1,4 +1,5 @@ [staging_service] META_DIR = /kb/deployment/lib/src/data/metadata/ DATA_DIR = /kb/deployment/lib/src/data/bulk/ -AUTH_URL = https://ci.kbase.us/services/auth/api/V2/token \ No newline at end of file +AUTH_URL = https://ci.kbase.us/services/auth/api/V2/token +CONCIERGE_DIR = /kb/deployment/lib/src/data/bulk/kbaseconcierge \ No newline at end of file diff --git a/prune_acls.py b/prune_acls.py new file mode 100644 index 0000000..e69de29 diff --git a/staging_service/app.py b/staging_service/app.py index 264acae..0e6f893 100644 --- a/staging_service/app.py +++ b/staging_service/app.py @@ -12,6 +12,15 @@ VERSION = '1.1.1' +@routes.get('/add-acl-concierge') +async def add_acl_concierge(request: web.Request): + username = await authorize_request(request) + concierge_path = Path.validate_path(username, concierge=True).full_path + result = AclManager().add_acl(concierge_path+ "/" + username) + result['msg'] = 'Please transfer your data to the KBase Globus Endpoint in the following directory: kbaseconcierge/%s'.format(username) + return web.json_response(result) + + @routes.get('/add-acl') async def add_acl(request: web.Request): username = await authorize_request(request) @@ -388,12 +397,16 @@ def app_factory(config): # potentially some type of code restructure would allow this without a bunch of globals DATA_DIR = config['staging_service']['DATA_DIR'] META_DIR = config['staging_service']['META_DIR'] + CONCIERGE_DIR = config['staging_service']['CONCIERGE_DIR'] if DATA_DIR.startswith('.'): DATA_DIR = os.path.normpath(os.path.join(os.getcwd(), DATA_DIR)) if META_DIR.startswith('.'): META_DIR = os.path.normpath(os.path.join(os.getcwd(), META_DIR)) + if CONCIERGE_DIR.startswith('.'): + CONCIERGE_DIR = os.path.normpath(os.path.join(os.getcwd(), CONCIERGE_DIR)) Path._DATA_DIR = DATA_DIR Path._META_DIR = META_DIR + Path._CONCIERGE_DIR = CONCIERGE_DIR global auth_client auth_client = KBaseAuth2(config['staging_service']['AUTH_URL']) return app diff --git a/staging_service/utils.py b/staging_service/utils.py index 9dca095..ad2f5e6 100644 --- a/staging_service/utils.py +++ b/staging_service/utils.py @@ -40,6 +40,7 @@ async def run_command(*args): class Path(object): _META_DIR = None # expects to be set by config _DATA_DIR = None # expects to be set by config + _CONCIERGE_DIR_ = None # expects to be set by config __slots__ = ['full_path', 'metadata_path', 'user_path', 'name', 'jgi_metadata'] def __init__(self, full_path, metadata_path, user_path, name, jgi_metadata): @@ -49,8 +50,9 @@ def __init__(self, full_path, metadata_path, user_path, name, jgi_metadata): self.name = name self.jgi_metadata = jgi_metadata + @staticmethod - def validate_path(username: str, path: str = ''): + def validate_path(username: str, path: str = '', concierge=False): """ @returns a path object based on path that must start with username throws an exeception for an invalid path or username @@ -64,7 +66,12 @@ def validate_path(username: str, path: str = ''): while path.startswith('/'): path = path[1:] user_path = os.path.join(username, path) - full_path = os.path.join(Path._DATA_DIR, user_path) + + if concierge: + full_path = os.path.join(Path._CONCIERGE_DIR_, user_path) + else: + full_path = os.path.join(Path._DATA_DIR, user_path) + metadata_path = os.path.join(Path._META_DIR, user_path) name = os.path.basename(path) jgi_metadata = os.path.join(os.path.dirname(full_path), '.' + name + '.jgi') From fa11014d49350a26d7792983984bf5c2e0227a8c Mon Sep 17 00:00:00 2001 From: Boris Sadkhin Date: Mon, 23 Mar 2020 16:39:20 -0500 Subject: [PATCH 2/5] Added concierge --- deployment/conf/testing.cfg | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/deployment/conf/testing.cfg b/deployment/conf/testing.cfg index 45149bc..13ce98c 100644 --- a/deployment/conf/testing.cfg +++ b/deployment/conf/testing.cfg @@ -1,4 +1,5 @@ [staging_service] META_DIR = ./data/metadata/ DATA_DIR = ./data/bulk/ -AUTH_URL = https://ci.kbase.us/services/auth/api/V2/token \ No newline at end of file +AUTH_URL = https://ci.kbase.us/services/auth/api/V2/token +CONCIERGE_DIR = /kb/deployment/lib/src/data/bulk/kbaseconcierge \ No newline at end of file From 8d448f3d871906405b0f14313fb7e9381af407ab Mon Sep 17 00:00:00 2001 From: Boris Sadkhin Date: Mon, 23 Mar 2020 18:04:06 -0500 Subject: [PATCH 3/5] Dir --- Dockerfile | 3 +- RELEASE_NOTES.md | 3 + deployment/conf/deployment.cfg | 2 +- deployment/conf/local.cfg | 3 +- deployment/conf/testing.cfg | 2 +- docker-compose.yml | 4 +- prune_acls.py | 129 +++++++++++++++++++++++++++++++++ staging_service/app.py | 60 ++++++++++----- staging_service/utils.py | 23 ++++-- 9 files changed, 197 insertions(+), 32 deletions(-) diff --git a/Dockerfile b/Dockerfile index 5482c5b..7301991 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,10 +14,11 @@ RUN pip install -r /requirements.txt COPY ./ /kb/module COPY ./globus.cfg /etc/globus.cfg - +RUN touch /var/log/globus.log && chmod 777 /var/log/globus.log RUN cp -r /kb/module/staging_service /kb/deployment/lib RUN cp -r /kb/module/deployment /kb + EXPOSE 3000 WORKDIR /kb/deployment/lib diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 276ad33..10dc336 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,2 +1,5 @@ +### Version 1.1.1 +- Add a add-acl-concierge endpoint + ### Version 1.1.0 - Added a `download` endpoint for files \ No newline at end of file diff --git a/deployment/conf/deployment.cfg b/deployment/conf/deployment.cfg index 5884ce9..7442433 100644 --- a/deployment/conf/deployment.cfg +++ b/deployment/conf/deployment.cfg @@ -2,4 +2,4 @@ META_DIR = /kb/deployment/lib/src/data/metadata/ DATA_DIR = /kb/deployment/lib/src/data/bulk/ AUTH_URL = https://ci.kbase.us/services/auth/api/V2/token -CONCIERGE_DIR = /kb/deployment/lib/src/data/bulk/kbaseconcierge \ No newline at end of file +CONCIERGE_PATH = /kbaseconcierge \ No newline at end of file diff --git a/deployment/conf/local.cfg b/deployment/conf/local.cfg index 45149bc..b0033d2 100644 --- a/deployment/conf/local.cfg +++ b/deployment/conf/local.cfg @@ -1,4 +1,5 @@ [staging_service] META_DIR = ./data/metadata/ DATA_DIR = ./data/bulk/ -AUTH_URL = https://ci.kbase.us/services/auth/api/V2/token \ No newline at end of file +AUTH_URL = https://ci.kbase.us/services/auth/api/V2/token +CONCIERGE_PATH = /kbaseconcierge \ No newline at end of file diff --git a/deployment/conf/testing.cfg b/deployment/conf/testing.cfg index 13ce98c..b0033d2 100644 --- a/deployment/conf/testing.cfg +++ b/deployment/conf/testing.cfg @@ -2,4 +2,4 @@ META_DIR = ./data/metadata/ DATA_DIR = ./data/bulk/ AUTH_URL = https://ci.kbase.us/services/auth/api/V2/token -CONCIERGE_DIR = /kb/deployment/lib/src/data/bulk/kbaseconcierge \ No newline at end of file +CONCIERGE_PATH = /kbaseconcierge \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 87758e8..f14b742 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -9,6 +9,8 @@ services: # it further assumes that there is a pre-existing /data/metadata directory volumes: - "./data:/kb/deployment/lib/src/data" + - "./:/staging_service" + environment: - KB_DEPLOYMENT_CONFIG=/kb/deployment/conf/deployment.cfg - - FILE_LIFETIME="90" \ No newline at end of file + - FILE_LIFETIME="90" diff --git a/prune_acls.py b/prune_acls.py index e69de29..1ddca4c 100644 --- a/prune_acls.py +++ b/prune_acls.py @@ -0,0 +1,129 @@ +#!/root/bulk/acl_manager/py3globus/bin/python + +""" +Deletes ACLS from globus, and then clears out directories older than THRESHOLD (60) days +""" +from __future__ import print_function # for python 2 + +import logging +import time +import shutil +from collections import namedtuple + +from os.path import getmtime + +import globus_sdk +from globus_sdk import TransferAPIError +import configparser + +""" +Setup clients and read token +""" +current_time = time.time() +THRESHOLD_DAYS = 60 + +admin_acls = ['9cb619d0-4417-11e8-8e06-0a6d4e044368', '580118b2-dc53-11e6-9d02-22000a1e3b52'] +admin_names = ['dolsonadmin', 'dolson'] + +config = configparser.ConfigParser() +config.read("globus.cfg") +cf = config['globus'] +endpoint_id = cf['endpoint_id'] + +client = globus_sdk.NativeAppAuthClient(cf['client_id']) +try: + transfer_authorizer = globus_sdk.RefreshTokenAuthorizer(cf['transfer_token'], client) + globus_transfer_client = globus_sdk.TransferClient(authorizer=transfer_authorizer) + auth_authorizer = globus_sdk.RefreshTokenAuthorizer(cf['auth_token'], client) + globus_auth_client = globus_sdk.AuthClient(authorizer=auth_authorizer) +except globus_sdk.GlobusAPIError as error: + logging.error(str(error.code) + error.raw_text) + raise Exception(str("Invalid Token Specified in globus.cfg file")) + + +def remove_directory(directory): + """ + :param directory: Directory to DELETE + :return: Log success or failure of deleting this directory to the log + """ + try: + logging.info("About to delete {}".format(directory)) + #shutil.rmtree(directory) + except OSError as error: + logging.error("Couldn't delete {} {} {}".format(directory, error.message, error.filename)) + + +def remove_acl(acl): + """ + :param acl: ACL To Delete + :return: Logs success or failure of deleting this ACL to the log + """ + logging.info( + "{}:About to remove ACL {} for {} (> {} days)".format(current_time, acl['id'], acl['path'], + THRESHOLD_DAYS)) + # try: + # resp = globus_transfer_client.delete_endpoint_acl_rule(endpoint_id, acl['id']) + # except TransferAPIError as error: + # logging.error(error.raw_text) + + +def main(): + logging.basicConfig(filename='prune_acl.log', level=logging.INFO) + logging.info("{}:BEGIN RUN".format(current_time)) + + old_acls = get_old_acls() + + logging.info("{}:ATTEMPTING TO DELETE {} OLD ACLS".format(current_time, len(old_acls))) + for acl in old_acls: + remove_acl(acl.acl) + remove_directory(acl.dir) + logging.info("{}:END RUN".format(current_time)) + + +def get_endpoint_acls(): + """ + :return: Return a dictionary of endpoint ACLS using the Globus API + """ + try: + return globus_transfer_client.endpoint_acl_list(endpoint_id)['DATA'] + except TransferAPIError as error: + print(error) + + +def directory_is_old(directory): + """ + :param directory: + :return: True or False depending on whether the directory has not been modified in more than THRESHOLD days + """ + try: + age = current_time - getmtime(directory) + except OSError: + return False + + days = age / 60 / 60 / 24 + if days > THRESHOLD_DAYS: + return True + return False + + +def get_old_acls(): + """ + Get the size and modified date of the directories for each ACL + If the directory > threshold days, add it to the list of old_acls to be removed + :return: A list of ACLs to be removed + """ + acls = get_endpoint_acls() + logging.info("{}:FOUND {} acls".format(current_time, len(acls))) + old_acls = [] + old_acl_and_dir = namedtuple("old_acl_and_dir", "acl dir") + for acl in acls: + directory = "/dtn/disk0/bulk" + acl['path'] + if directory_is_old(directory) and acl['id'] not in admin_acls: + oad = old_acl_and_dir(acl, directory) + old_acls.append(oad) + + return old_acls + + +if __name__ == '__main__': + main() diff --git a/staging_service/app.py b/staging_service/app.py index 0e6f893..b8bd73d 100644 --- a/staging_service/app.py +++ b/staging_service/app.py @@ -1,12 +1,14 @@ -from aiohttp import web -import aiohttp_cors import os -from .metadata import stat_data, some_metadata, dir_info, add_upa, similar import shutil -from .utils import Path, run_command, AclManager + +import aiohttp_cors +from aiohttp import web + +from .JGIMetadata import read_metadata_for, translate_for_importer from .auth2Client import KBaseAuth2 from .globus import assert_globusid_exists, is_globusid -from .JGIMetadata import read_metadata_for, translate_for_importer +from .metadata import some_metadata, dir_info, add_upa, similar +from .utils import Path, run_command, AclManager routes = web.RouteTableDef() VERSION = '1.1.1' @@ -15,9 +17,11 @@ @routes.get('/add-acl-concierge') async def add_acl_concierge(request: web.Request): username = await authorize_request(request) - concierge_path = Path.validate_path(username, concierge=True).full_path - result = AclManager().add_acl(concierge_path+ "/" + username) - result['msg'] = 'Please transfer your data to the KBase Globus Endpoint in the following directory: kbaseconcierge/%s'.format(username) + user_dir = Path.validate_path(username).full_path + concierge_path = f"{Path._CONCIERGE_PATH}/{username}/" + result = AclManager().add_acl_concierge(shared_directory=user_dir, + concierge_path=concierge_path) + result['msg'] = f'Requesting Globus Perms for the following globus dir: {concierge_path}' return web.json_response(result) @@ -28,6 +32,7 @@ async def add_acl(request: web.Request): result = AclManager().add_acl(user_dir) return web.json_response(result) + @routes.get('/remove-acl') async def remove_acl(request: web.Request): username = await authorize_request(request) @@ -102,6 +107,7 @@ async def list_files(request: web.Request): data = await dir_info(path, show_hidden, recurse=True) return web.json_response(data) + @routes.get('/download/{path:.*}') async def download_files(request: web.Request): """ @@ -246,7 +252,7 @@ async def upload_files_chunked(request: web.Request): if not os.path.exists(path.full_path): error_msg = 'We are sorry but upload was interrupted. Please try again.'.format( - path=path.full_path) + path=path.full_path) raise web.HTTPNotFound(text=error_msg) response = await some_metadata( @@ -277,7 +283,7 @@ async def define_UPA(request: web.Request): await add_upa(path, UPA) return web.Response( text='succesfully updated UPA {UPA} for file {path}'.format(UPA=UPA, path=path.user_path) - ) + ) @routes.delete('/delete/{path:.+}') @@ -356,7 +362,7 @@ async def decompress(request: web.Request): elif file_extension == '.zip' or file_extension == '.ZIP': await run_command('unzip', path.full_path, '-d', destination) elif file_extension == '.tar': - await run_command('tar', 'xf', path.full_path, '-C', destination) + await run_command('tar', 'xf', path.full_path, '-C', destination) elif file_extension == '.gz': await run_command('gzip', '-d', path.full_path) elif file_extension == '.bz2' or file_extension == 'bzip2': @@ -385,10 +391,10 @@ def app_factory(config): app.router.add_routes(routes) cors = aiohttp_cors.setup(app, defaults={ "*": aiohttp_cors.ResourceOptions( - allow_credentials=True, - expose_headers="*", - allow_headers="*", - ) + allow_credentials=True, + expose_headers="*", + allow_headers="*", + ) }) # Configure CORS on all routes. for route in list(app.router.routes()): @@ -397,16 +403,32 @@ def app_factory(config): # potentially some type of code restructure would allow this without a bunch of globals DATA_DIR = config['staging_service']['DATA_DIR'] META_DIR = config['staging_service']['META_DIR'] - CONCIERGE_DIR = config['staging_service']['CONCIERGE_DIR'] + CONCIERGE_PATH = config['staging_service']['CONCIERGE_PATH'] if DATA_DIR.startswith('.'): DATA_DIR = os.path.normpath(os.path.join(os.getcwd(), DATA_DIR)) if META_DIR.startswith('.'): META_DIR = os.path.normpath(os.path.join(os.getcwd(), META_DIR)) - if CONCIERGE_DIR.startswith('.'): - CONCIERGE_DIR = os.path.normpath(os.path.join(os.getcwd(), CONCIERGE_DIR)) + if CONCIERGE_PATH.startswith('.'): + CONCIERGE_PATH = os.path.normpath(os.path.join(os.getcwd(), CONCIERGE_PATH)) Path._DATA_DIR = DATA_DIR Path._META_DIR = META_DIR - Path._CONCIERGE_DIR = CONCIERGE_DIR + Path._CONCIERGE_PATH = CONCIERGE_PATH + + if Path._DATA_DIR is None: + raise Exception("Please provide DATA_DIR in the config file ") + else: + print("Setting DATA_DIR to", DATA_DIR) + + if Path._META_DIR is None: + raise Exception("Please provide META_DIR in the config file ") + else: + print("Setting META_DIR dir to", META_DIR) + + if Path._CONCIERGE_PATH is None: + raise Exception("Please provide CONCIERGE_PATH in the config file ") + else: + print("Setting CONCIERGE_PATH dir to", CONCIERGE_PATH) + global auth_client auth_client = KBaseAuth2(config['staging_service']['AUTH_URL']) return app diff --git a/staging_service/utils.py b/staging_service/utils.py index ad2f5e6..fe5ea77 100644 --- a/staging_service/utils.py +++ b/staging_service/utils.py @@ -1,11 +1,11 @@ import asyncio import configparser +import json import logging import os import globus_sdk from aiohttp.web import HTTPInternalServerError, HTTPOk -import json async def run_command(*args): @@ -40,7 +40,7 @@ async def run_command(*args): class Path(object): _META_DIR = None # expects to be set by config _DATA_DIR = None # expects to be set by config - _CONCIERGE_DIR_ = None # expects to be set by config + _CONCIERGE_PATH = None # expects to be set by config __slots__ = ['full_path', 'metadata_path', 'user_path', 'name', 'jgi_metadata'] def __init__(self, full_path, metadata_path, user_path, name, jgi_metadata): @@ -52,7 +52,7 @@ def __init__(self, full_path, metadata_path, user_path, name, jgi_metadata): @staticmethod - def validate_path(username: str, path: str = '', concierge=False): + def validate_path(username: str, path: str = ''): """ @returns a path object based on path that must start with username throws an exeception for an invalid path or username @@ -66,11 +66,7 @@ def validate_path(username: str, path: str = '', concierge=False): while path.startswith('/'): path = path[1:] user_path = os.path.join(username, path) - - if concierge: - full_path = os.path.join(Path._CONCIERGE_DIR_, user_path) - else: - full_path = os.path.join(Path._DATA_DIR, user_path) + full_path = os.path.join(Path._DATA_DIR, user_path) metadata_path = os.path.join(Path._META_DIR, user_path) name = os.path.basename(path) @@ -196,6 +192,17 @@ def _remove_acl(self, user_identity_id: str): 'user_identity_id': user_identity_id} raise HTTPInternalServerError(text=json.dumps(response), content_type='application/json') + def add_acl_concierge(self, shared_directory: str, concierge_path: str): + """ + Add ACL to the concierge globus share via the globus API + :param shared_directory: Dir to get globus ID from and to generate id to create ACL for share + :param shared_concierge_directory: KBase Concierge Dir to add acl for + :return: Result of attempt to add acl + """ + user_identity_id = self._get_globus_identity(shared_directory) + return self._add_acl(user_identity_id, concierge_path) + + def add_acl(self, shared_directory: str): """ Add ACL to the globus share via the globus API From c64b61e61fb4262ed88db56db1caa80cfb2570d9 Mon Sep 17 00:00:00 2001 From: Boris Sadkhin Date: Mon, 23 Mar 2020 18:43:20 -0500 Subject: [PATCH 4/5] Create dir --- staging_service/utils.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/staging_service/utils.py b/staging_service/utils.py index fe5ea77..55da82d 100644 --- a/staging_service/utils.py +++ b/staging_service/utils.py @@ -200,6 +200,13 @@ def add_acl_concierge(self, shared_directory: str, concierge_path: str): :return: Result of attempt to add acl """ user_identity_id = self._get_globus_identity(shared_directory) + cp_full = f"{Path._DATA_DIR}/{concierge_path}" + try: + os.mkdir(cp_full) + print(f"Attempting to create concierge dir {cp_full}") + except FileExistsError as e: + print(e) + return self._add_acl(user_identity_id, concierge_path) From ce870312dd72bfe079459b6bbce865cc772c5c8d Mon Sep 17 00:00:00 2001 From: Boris Sadkhin Date: Mon, 23 Mar 2020 18:57:22 -0500 Subject: [PATCH 5/5] Added link: --- staging_service/app.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/staging_service/app.py b/staging_service/app.py index b8bd73d..e63955b 100644 --- a/staging_service/app.py +++ b/staging_service/app.py @@ -19,9 +19,12 @@ async def add_acl_concierge(request: web.Request): username = await authorize_request(request) user_dir = Path.validate_path(username).full_path concierge_path = f"{Path._CONCIERGE_PATH}/{username}/" - result = AclManager().add_acl_concierge(shared_directory=user_dir, - concierge_path=concierge_path) + aclm = AclManager() + result = aclm.add_acl_concierge(shared_directory=user_dir, + concierge_path=concierge_path) result['msg'] = f'Requesting Globus Perms for the following globus dir: {concierge_path}' + result[ + 'link'] = f"https://app.globus.org/file-manager?destination_id={aclm.endpoint_id}&destination_path={concierge_path}" return web.json_response(result)