diff --git a/api/auth/containerauth.py b/api/auth/containerauth.py index b6265bc2b641c8c1a8599299518fa3e0faa78274..50ed8b7140f6b8219e0bb0c385b22f48b220fbdb 100644 --- a/api/auth/containerauth.py +++ b/api/auth/containerauth.py @@ -26,15 +26,9 @@ def default_container(handler, container=None, target_parent_container=None): required_perm = 'admin' has_access = _get_access(handler.uid, target_parent_container) >= INTEGER_PERMISSIONS[required_perm] elif method == 'DELETE': - required_perm = 'rw' - if container.get('has_children'): - # If the container has children or files, admin is required to delete - required_perm = 'admin' - additional_error_msg = 'Container is not empty.' - if target_parent_container: - has_access = _get_access(handler.uid, target_parent_container) >= INTEGER_PERMISSIONS[required_perm] - else: - has_access = _get_access(handler.uid, container) >= INTEGER_PERMISSIONS[required_perm] + # Container deletion always requires admin + required_perm = 'admin' + has_access = _get_access(handler.uid, target_parent_container) >= INTEGER_PERMISSIONS[required_perm] elif method == 'PUT' and target_parent_container is not None: has_access = ( _get_access(handler.uid, container) >= INTEGER_PERMISSIONS['admin'] and diff --git a/api/dao/basecontainerstorage.py b/api/dao/basecontainerstorage.py index febe11f2d7e08b132850a593e6b28f769d87b1ac..9484833f4f393b749400bd95e018637045310326 100644 --- a/api/dao/basecontainerstorage.py +++ b/api/dao/basecontainerstorage.py @@ -47,9 +47,10 @@ class ContainerStorage(object): Examples: projects, sessions, acquisitions and collections """ - def __init__(self, cont_name, use_object_id=False): + def __init__(self, cont_name, use_object_id=False, use_delete_tag=False): self.cont_name = cont_name self.use_object_id = use_object_id + self.use_delete_tag = use_delete_tag self.dbc = config.db[cont_name] @classmethod @@ -195,6 +196,8 @@ class ContainerStorage(object): _id = bson.ObjectId(_id) except bson.errors.InvalidId as e: raise APIStorageException(e.message) + if self.use_delete_tag: + return self.dbc.update_one({'_id': _id}, {'$set': {'deleted': datetime.datetime.utcnow()}}) return self.dbc.delete_one({'_id':_id}) def get_el(self, _id, projection=None, fill_defaults=False): @@ -203,18 +206,23 @@ class ContainerStorage(object): _id = bson.ObjectId(_id) except bson.errors.InvalidId as e: raise APIStorageException(e.message) - cont = self.dbc.find_one(_id, projection) + cont = self.dbc.find_one({'_id': _id, 'deleted': {'$exists': False}}, projection) self._from_mongo(cont) if fill_defaults: self._fill_default_values(cont) + if cont is not None and cont.get('files', []): + cont['files'] = [f for f in cont['files'] if 'deleted' not in f] return cont def get_all_el(self, query, user, projection, fill_defaults=False): + if query is None: + query = {} if user: if query.get('permissions'): query['$and'] = [{'permissions': {'$elemMatch': user}}, {'permissions': query.pop('permissions')}] else: query['permissions'] = {'$elemMatch': user} + query['deleted'] = {'$exists': False} # if projection includes files.info, add new key `info_exists` and allow reserved info keys through if projection and ('info' in projection or 'files.info' in projection or 'subject.info' in projection): @@ -232,6 +240,8 @@ class ContainerStorage(object): results = list(self.dbc.find(query, projection)) for cont in results: + if cont.get('files', []): + cont['files'] = [f for f in cont['files'] if 'deleted' not in f] self._from_mongo(cont) if fill_defaults: self._fill_default_values(cont) diff --git a/api/dao/containerstorage.py b/api/dao/containerstorage.py index 3c4961b8d19bf172930e84c88464d8b008069c8c..de279038563f3223c6cacd1ec24814e6001562d1 100644 --- a/api/dao/containerstorage.py +++ b/api/dao/containerstorage.py @@ -42,7 +42,7 @@ class GroupStorage(ContainerStorage): class ProjectStorage(ContainerStorage): def __init__(self): - super(ProjectStorage,self).__init__('projects', use_object_id=True) + super(ProjectStorage,self).__init__('projects', use_object_id=True, use_delete_tag=True) def create_el(self, payload): result = super(ProjectStorage, self).create_el(payload) @@ -100,7 +100,7 @@ class ProjectStorage(ContainerStorage): class SessionStorage(ContainerStorage): def __init__(self): - super(SessionStorage,self).__init__('sessions', use_object_id=True) + super(SessionStorage,self).__init__('sessions', use_object_id=True, use_delete_tag=True) def _fill_default_values(self, cont): cont = super(SessionStorage,self)._fill_default_values(cont) @@ -230,7 +230,7 @@ class SessionStorage(ContainerStorage): class AcquisitionStorage(ContainerStorage): def __init__(self): - super(AcquisitionStorage,self).__init__('acquisitions', use_object_id=True) + super(AcquisitionStorage,self).__init__('acquisitions', use_object_id=True, use_delete_tag=True) def create_el(self, payload): result = super(AcquisitionStorage, self).create_el(payload) @@ -301,13 +301,13 @@ class AcquisitionStorage(ContainerStorage): class CollectionStorage(ContainerStorage): def __init__(self): - super(CollectionStorage, self).__init__('collections', use_object_id=True) + super(CollectionStorage, self).__init__('collections', use_object_id=True, use_delete_tag=True) class AnalysisStorage(ContainerStorage): def __init__(self): - super(AnalysisStorage, self).__init__('analyses', use_object_id=True) + super(AnalysisStorage, self).__init__('analyses', use_object_id=True, use_delete_tag=True) def get_parent(self, parent_type, parent_id): diff --git a/api/dao/containerutil.py b/api/dao/containerutil.py index 7de564aabe5690c3a6e46efc2876c1063a55b413..bc748a0c5f737e5ab93714cbaeec9b14689cff20 100644 --- a/api/dao/containerutil.py +++ b/api/dao/containerutil.py @@ -20,47 +20,34 @@ SINGULAR_TO_PLURAL = { } PLURAL_TO_SINGULAR = {p: s for s, p in SINGULAR_TO_PLURAL.iteritems()} -def propagate_changes(cont_name, _id, query, update): - """ - Propagates changes down the heirarchy tree. - cont_name and _id refer to top level container (which will not be modified here) +def propagate_changes(cont_name, cont_ids, query, update, include_refs=False): """ + Propagates changes down the heirarchy tree recursively. + cont_name and cont_ids refer to top level containers (which will not be modified here) + """ - if cont_name == 'groups': - project_ids = [p['_id'] for p in config.db.projects.find({'group': _id}, [])] - session_ids = [s['_id'] for s in config.db.sessions.find({'project': {'$in': project_ids}}, [])] - - project_q = copy.deepcopy(query) - project_q['_id'] = {'$in': project_ids} - session_q = copy.deepcopy(query) - session_q['_id'] = {'$in': session_ids} - acquisition_q = copy.deepcopy(query) - acquisition_q['session'] = {'$in': session_ids} - - config.db.projects.update_many(project_q, update) - config.db.sessions.update_many(session_q, update) - config.db.acquisitions.update_many(acquisition_q, update) - + containers = ['groups', 'projects', 'sessions', 'acquisitions'] + if not isinstance(cont_ids, list): + cont_ids = [cont_ids] + if query is None: + query = {} - # Apply change to projects - elif cont_name == 'projects': - session_ids = [s['_id'] for s in config.db.sessions.find({'project': _id}, [])] + if include_refs: + analysis_query = copy.deepcopy(query) + analysis_query.update({'parent.type': singularize(cont_name), 'parent.id': {'$in': cont_ids}}) + config.db.analyses.update_many(analysis_query, update) - session_q = copy.deepcopy(query) - session_q['project'] = _id - acquisition_q = copy.deepcopy(query) - acquisition_q['session'] = {'$in': session_ids} + if cont_name in ('groups', 'projects', 'sessions'): + child_cont = containers[containers.index(cont_name) + 1] + child_ids = [c['_id'] for c in config.db[child_cont].find({singularize(cont_name): {'$in': cont_ids}}, [])] + child_query = copy.deepcopy(query) + child_query['_id'] = {'$in': child_ids} + config.db[child_cont].update_many(child_query, update) - config.db.sessions.update_many(session_q, update) - config.db.acquisitions.update_many(acquisition_q, update) - - elif cont_name == 'sessions': - query['session'] = _id - config.db.acquisitions.update_many(query, update) - else: - raise ValueError('changes can only be propagated from group, project or session level') + # Recurse to the next hierarchy level + propagate_changes(child_cont, child_ids, query, update, include_refs=include_refs) def add_id_to_subject(subject, pid): @@ -105,9 +92,9 @@ def get_stats(cont, cont_type): # Get session and non-compliant session count match_q = {} if cont_type == 'projects': - match_q = {'project': cont['_id'], 'archived': {'$in': [None, False]}} + match_q = {'project': cont['_id'], 'archived': {'$in': [None, False]}, 'deleted': {'$exists': False}} elif cont_type == 'collections': - result = config.db.acquisitions.find({'collections': cont['_id'], 'archived': {'$in': [None, False]}}, {'session': 1}) + result = config.db.acquisitions.find({'collections': cont['_id'], 'archived': {'$in': [None, False]}, 'deleted': {'$exists': False}}, {'session': 1}) session_ids = list(set([s['session'] for s in result])) match_q = {'_id': {'$in': session_ids}} @@ -158,6 +145,24 @@ def sanitize_info(info): return sanitized_info +def get_referring_analyses(cont_name, cont_id, filename=None): + """ + Get all (non-deleted) analyses that reference any file from the container as their input. + If filename is given, only return analyses that have that specific file as their input. + """ + query = { + 'destination.type': 'analysis', + 'inputs.type': singularize(cont_name), + 'inputs.id': str(cont_id), + } + if filename: + query['inputs.name'] = filename + jobs = config.db.jobs.find(query, {'destination.id': True}) + analysis_ids = [bson.ObjectId(job['destination']['id']) for job in jobs] + analyses = config.db.analyses.find({'_id': {'$in': analysis_ids}, 'deleted': {'$exists': False}}) + return list(analyses) + + class ContainerReference(object): # pylint: disable=redefined-builtin # TODO: refactor to resolve pylint warning @@ -198,7 +203,7 @@ class ContainerReference(object): def get(self): collection = pluralize(self.type) - result = config.db[collection].find_one({'_id': bson.ObjectId(self.id)}) + result = config.db[collection].find_one({'_id': bson.ObjectId(self.id), 'deleted': {'$exists': False}}) if result is None: raise APINotFoundException('No such {} {} in database'.format(self.type, self.id)) if 'parent' in result: diff --git a/api/dao/hierarchy.py b/api/dao/hierarchy.py index dbc26dd03e5b263d04860389703a22182427be03..46837d336ef1170c22f06f4fcce6ea295a18d978 100644 --- a/api/dao/hierarchy.py +++ b/api/dao/hierarchy.py @@ -134,48 +134,6 @@ def get_parent_tree(cont_name, _id): return tree -def propagate_changes(cont_name, _id, query, update): - """ - Propagates changes down the heirarchy tree. - - cont_name and _id refer to top level container (which will not be modified here) - """ - - - if cont_name == 'groups': - project_ids = [p['_id'] for p in config.db.projects.find({'group': _id}, [])] - session_ids = [s['_id'] for s in config.db.sessions.find({'project': {'$in': project_ids}}, [])] - - project_q = copy.deepcopy(query) - project_q['_id'] = {'$in': project_ids} - session_q = copy.deepcopy(query) - session_q['_id'] = {'$in': session_ids} - acquisition_q = copy.deepcopy(query) - acquisition_q['session'] = {'$in': session_ids} - - config.db.projects.update_many(project_q, update) - config.db.sessions.update_many(session_q, update) - config.db.acquisitions.update_many(acquisition_q, update) - - - # Apply change to projects - elif cont_name == 'projects': - session_ids = [s['_id'] for s in config.db.sessions.find({'project': _id}, [])] - - session_q = copy.deepcopy(query) - session_q['project'] = _id - acquisition_q = copy.deepcopy(query) - acquisition_q['session'] = {'$in': session_ids} - - config.db.sessions.update_many(session_q, update) - config.db.acquisitions.update_many(acquisition_q, update) - - elif cont_name == 'sessions': - query['session'] = _id - config.db.acquisitions.update_many(query, update) - else: - raise ValueError('changes can only be propagated from group, project or session level') - def is_session_compliant(session, template): """ Given a project-level session template and a session, @@ -221,7 +179,7 @@ def is_session_compliant(session, template): min_count = fr_temp.pop('minimum') count = 0 for f in cont.get('files', []): - if not check_cont(f, fr_temp): + if 'deleted' in f or not check_cont(f, fr_temp): # Didn't find a match, on to the next one continue else: @@ -248,7 +206,7 @@ def is_session_compliant(session, template): if not session.get('_id'): # New session, won't have any acquisitions. Compliance check fails return False - acquisitions = list(config.db.acquisitions.find({'session': session['_id'], 'archived':{'$ne':True}})) + acquisitions = list(config.db.acquisitions.find({'session': session['_id'], 'archived': {'$ne': True}, 'deleted': {'$exists': False}})) for req in a_requirements: req_temp = copy.deepcopy(req) min_count = req_temp.pop('minimum') @@ -276,11 +234,19 @@ def upsert_fileinfo(cont_name, _id, fileinfo): for f in container_before.get('files',[]): # Fine file in result and set to file_after if f['name'] == fileinfo['name']: - file_before = f + if 'deleted' in f: + # Ugly hack: remove already existing file that has the 'deleted' tag + # This creates a gap in the delete functionality, ie. this file cannot be restored from this point on. + # Note that the previous file in storage will be unreferenced from the DB (unless CAS edge case...) + config.db[cont_name].find_one_and_update( + {'_id': _id, 'files.name': fileinfo['name']}, + {'$pull': {'files': {'name': fileinfo['name']}}} + ) + else: + file_before = f break if file_before is None: - fileinfo['created'] = fileinfo['modified'] container_after = add_fileinfo(cont_name, _id, fileinfo) else: @@ -338,7 +304,7 @@ def _find_or_create_destination_project(group_id, project_label, timestamp, user project_label = 'Unknown' project_regex = '^'+re.escape(project_label)+'$' - project = config.db.projects.find_one({'group': group['_id'], 'label': {'$regex': project_regex, '$options': 'i'}}) + project = config.db.projects.find_one({'group': group['_id'], 'label': {'$regex': project_regex, '$options': 'i'}, 'deleted': {'$exists': False}}) if project: # If the project already exists, check the user's access @@ -459,20 +425,20 @@ def find_existing_hierarchy(metadata, type_='uid', user=None): raise APIStorageException(str(e)) # Confirm session and acquisition exist - session_obj = config.db.sessions.find_one({'uid': session_uid}, ['project', 'permissions']) + session_obj = config.db.sessions.find_one({'uid': session_uid, 'deleted': {'$exists': False}}, ['project', 'permissions']) if session_obj is None: raise APINotFoundException('Session with uid {} does not exist'.format(session_uid)) if user and not has_access(user, session_obj, 'rw'): raise APIPermissionException('User {} does not have read-write access to session {}'.format(user, session_uid)) - a = config.db.acquisitions.find_one({'uid': acquisition_uid}, ['_id']) + a = config.db.acquisitions.find_one({'uid': acquisition_uid, 'deleted': {'$exists': False}}, ['_id']) if a is None: raise APINotFoundException('Acquisition with uid {} does not exist'.format(acquisition_uid)) now = datetime.datetime.utcnow() project_files = dict_fileinfos(project.pop('files', [])) - project_obj = config.db.projects.find_one({'_id': session_obj['project']}, projection=PROJECTION_FIELDS + ['name']) + project_obj = config.db.projects.find_one({'_id': session_obj['project'], 'deleted': {'$exists': False}}, projection=PROJECTION_FIELDS + ['name']) target_containers = _get_targets(project_obj, session, acquisition, type_, now) target_containers.append( (TargetContainer(project_obj, 'project'), project_files) @@ -496,7 +462,7 @@ def upsert_bottom_up_hierarchy(metadata, type_='uid', user=None): log.error(metadata) raise APIStorageException(str(e)) - session_obj = config.db.sessions.find_one({'uid': session_uid}) + session_obj = config.db.sessions.find_one({'uid': session_uid, 'deleted': {'$exists': False}}) if session_obj: # skip project creation, if session exists if user and not has_access(user, session_obj, 'rw'): @@ -504,7 +470,7 @@ def upsert_bottom_up_hierarchy(metadata, type_='uid', user=None): now = datetime.datetime.utcnow() project_files = dict_fileinfos(project.pop('files', [])) - project_obj = config.db.projects.find_one({'_id': session_obj['project']}, projection=PROJECTION_FIELDS + ['name']) + project_obj = config.db.projects.find_one({'_id': session_obj['project'], 'deleted': {'$exists': False}}, projection=PROJECTION_FIELDS + ['name']) target_containers = _get_targets(project_obj, session, acquisition, type_, now) target_containers.append( (TargetContainer(project_obj, 'project'), project_files) diff --git a/api/dao/liststorage.py b/api/dao/liststorage.py index 50df34cc33ef31c8093e0441905399ca719552fc..12d38ca6101c864db49f88c74835617432376011 100644 --- a/api/dao/liststorage.py +++ b/api/dao/liststorage.py @@ -20,10 +20,11 @@ class ListStorage(object): Examples: permissions in projects, permissions in groups, notes in projects, sessions, acquisitions, etc """ - def __init__(self, cont_name, list_name, use_object_id = False): + def __init__(self, cont_name, list_name, use_object_id=False, use_delete_tag=False): self.cont_name = cont_name self.list_name = list_name self.use_object_id = use_object_id + self.use_delete_tag = use_delete_tag self.dbc = config.db[cont_name] def get_container(self, _id, query_params=None): @@ -103,16 +104,9 @@ class ListStorage(object): query = {'_id': _id} update = { '$pull': {self.list_name: query_params}, - '$set': { 'modified': datetime.datetime.utcnow()} - } - result = self.dbc.update_one(query, update) - if self.list_name is 'files' and self.cont_name in ['sessions', 'acquisitions']: - if self.cont_name == 'sessions': - session_id = _id - else: - session_id = AcquisitionStorage().get_container(_id).get('session') - SessionStorage().recalc_session_compliance(session_id) - return result + '$set': {'modified': datetime.datetime.utcnow()} + } + return self.dbc.update_one(query, update) def _get_el(self, _id, query_params): query = {'_id': _id, self.list_name: {'$elemMatch': query_params}} @@ -162,6 +156,28 @@ class FileStorage(ListStorage): 'jobs_triggered': len(jobs_spawned) } + def _delete_el(self, _id, query_params): + files = self.get_container(_id).get('files', []) + for f in files: + if f['name'] == query_params['name']: + f['deleted'] = datetime.datetime.utcnow() + result = self.dbc.update_one({'_id': _id}, {'$set': {'files': files, 'modified': datetime.datetime.utcnow()}}) + if self.cont_name in ['sessions', 'acquisitions']: + if self.cont_name == 'sessions': + session_id = _id + else: + session_id = AcquisitionStorage().get_container(_id).get('session') + SessionStorage().recalc_session_compliance(session_id) + return result + + def _get_el(self, _id, query_params): + query_params_nondeleted = query_params.copy() + query_params_nondeleted['deleted'] = {'$exists': False} + query = {'_id': _id, 'files': {'$elemMatch': query_params_nondeleted}} + projection = {'files.$': 1} + result = self.dbc.find_one(query, projection) + if result and result.get(self.list_name): + return result.get(self.list_name)[0] def modify_info(self, _id, query_params, payload): update = {} diff --git a/api/download.py b/api/download.py index 31914625f5a2e6ec01c448e83730de3cc2ef9966..fa9ee58910ff896db7cc10aff327af6c1418ab0a 100644 --- a/api/download.py +++ b/api/download.py @@ -119,7 +119,7 @@ class Download(base.RequestHandler): filename = None ids_of_paths = {} - base_query = {} + base_query = {'deleted': {'$exists': False}} if not self.superuser_request: base_query['permissions._id'] = self.uid @@ -138,9 +138,9 @@ class Download(base.RequestHandler): prefix = '/'.join([arc_prefix, project['group'], project['label']]) total_size, file_cnt = self._append_targets(targets, 'projects', project, prefix, total_size, file_cnt, data_path, req_spec.get('filters')) - sessions = config.db.sessions.find({'project': item_id}, ['label', 'files', 'uid', 'timestamp', 'timezone', 'subject']) + sessions = config.db.sessions.find({'project': item_id, 'deleted': {'$exists': False}}, ['label', 'files', 'uid', 'timestamp', 'timezone', 'subject']) session_dict = {session['_id']: session for session in sessions} - acquisitions = config.db.acquisitions.find({'session': {'$in': session_dict.keys()}}, ['label', 'files', 'session', 'uid', 'timestamp', 'timezone']) + acquisitions = config.db.acquisitions.find({'session': {'$in': session_dict.keys()}, 'deleted': {'$exists': False}}, ['label', 'files', 'session', 'uid', 'timestamp', 'timezone']) session_prefixes = {} subject_dict = {} @@ -187,7 +187,7 @@ class Download(base.RequestHandler): total_size, file_cnt = self._append_targets(targets, 'sessions', session, prefix, total_size, file_cnt, data_path, req_spec.get('filters')) # If the param `collection` holding a collection id is not None, filter out acquisitions that are not in the collection - a_query = {'session': item_id} + a_query = {'session': item_id, 'deleted': {'$exists': False}} if collection: a_query['collections'] = bson.ObjectId(collection) acquisitions = config.db.acquisitions.find(a_query, ['label', 'files', 'uid', 'timestamp', 'timezone']) @@ -285,7 +285,7 @@ class Download(base.RequestHandler): yield chunk if len(chunk) % BLOCKSIZE != 0: yield (BLOCKSIZE - (len(chunk) % BLOCKSIZE)) * b'\0' - self.log_user_access(AccessType.download_file, cont_name=cont_name, cont_id=cont_id, multifile=True, origin_override=ticket['origin']) # log download + self.log_user_access(AccessType.download_file, cont_name=cont_name, cont_id=cont_id, filename=os.path.basename(arcpath), multifile=True, origin_override=ticket['origin']) # log download yield stream.getvalue() # get tar stream trailer stream.close() @@ -295,7 +295,7 @@ class Download(base.RequestHandler): t.type = tarfile.SYMTYPE t.linkname = os.path.relpath(filepath, data_path) yield t.tobuf() - self.log_user_access(AccessType.download_file, cont_name=cont_name, cont_id=cont_id, multifile=True, origin_override=ticket['origin']) # log download + self.log_user_access(AccessType.download_file, cont_name=cont_name, cont_id=cont_id, filename=os.path.basename(arcpath), multifile=True, origin_override=ticket['origin']) # log download stream = cStringIO.StringIO() with tarfile.open(mode='w|', fileobj=stream) as _: pass @@ -347,9 +347,9 @@ class Download(base.RequestHandler): if level == 'project': # Grab sessions and their ids - sessions = config.db.sessions.find({'project': node['_id']}, {'_id': 1}) + sessions = config.db.sessions.find({'project': node['_id'], 'deleted': {'$exists': False}}, {'_id': 1}) session_ids = [s['_id'] for s in sessions] - acquisitions = config.db.acquisitions.find({'session': {'$in': session_ids}}, {'_id': 1}) + acquisitions = config.db.acquisitions.find({'session': {'$in': session_ids}, 'deleted': {'$exists': False}}, {'_id': 1}) acquisition_ids = [a['_id'] for a in acquisitions] containers['projects']=1 @@ -362,7 +362,7 @@ class Download(base.RequestHandler): cont_query['acquisitions']['_id']['$in'] = cont_query['acquisitions']['_id']['$in'] + acquisition_ids elif level == 'session': - acquisitions = config.db.acquisitions.find({'session': node['_id']}, {'_id': 1}) + acquisitions = config.db.acquisitions.find({'session': node['_id'], 'deleted': {'$exists': False}}, {'_id': 1}) acquisition_ids = [a['_id'] for a in acquisitions] @@ -415,4 +415,3 @@ class Download(base.RequestHandler): else: res[type_] = doc return res - diff --git a/api/handlers/collectionshandler.py b/api/handlers/collectionshandler.py index 39b07a08f9ed691c1fef70c2ab830edc5741fff6..5a8dd473d07a753a43a6a1c43c3e3ad849fe2ea6 100644 --- a/api/handlers/collectionshandler.py +++ b/api/handlers/collectionshandler.py @@ -19,7 +19,7 @@ class CollectionsHandler(ContainerHandler): container_handler_configurations['collections'] = { 'permchecker': containerauth.collection_permissions, - 'storage': containerstorage.ContainerStorage('collections', use_object_id=True), + 'storage': containerstorage.ContainerStorage('collections', use_object_id=True, use_delete_tag=True), 'storage_schema_file': 'collection.json', 'payload_schema_file': 'collection.json', 'list_projection': {'info': 0} @@ -84,10 +84,10 @@ class CollectionsHandler(ContainerHandler): self.abort(400, 'not a valid object id') item_id = bson.ObjectId(item['_id']) if item['level'] == 'project': - sess_ids = [s['_id'] for s in config.db.sessions.find({'project': item_id}, [])] - acq_ids += [a['_id'] for a in config.db.acquisitions.find({'session': {'$in': sess_ids}}, [])] + sess_ids = [s['_id'] for s in config.db.sessions.find({'project': item_id, 'deleted': {'$exists': False}}, [])] + acq_ids += [a['_id'] for a in config.db.acquisitions.find({'session': {'$in': sess_ids}, 'deleted': {'$exists': False}}, [])] elif item['level'] == 'session': - acq_ids += [a['_id'] for a in config.db.acquisitions.find({'session': item_id}, [])] + acq_ids += [a['_id'] for a in config.db.acquisitions.find({'session': item_id, 'deleted': {'$exists': False}}, [])] elif item['level'] == 'acquisition': acq_ids += [item_id] operator = '$addToSet' if contents['operation'] == 'add' else '$pull' @@ -95,10 +95,25 @@ class CollectionsHandler(ContainerHandler): self.abort(400, 'not a valid object id') config.db.acquisitions.update_many({'_id': {'$in': acq_ids}}, {operator: {'collections': bson.ObjectId(_id)}}) + def delete(self, **kwargs): - _id = kwargs.get('cid') - super(CollectionsHandler, self).delete('collections', **kwargs) - config.db.acquisitions.update_many({'collections': bson.ObjectId(_id)}, {'$pull': {'collections': bson.ObjectId(_id)}}) + _id = bson.ObjectId(kwargs.pop('cid')) + self.config = self.container_handler_configurations['collections'] + self.storage = self.config['storage'] + container = self._get_container(_id) + container['has_children'] = container.get('files') or container.get('analyses') + permchecker = self._get_permchecker(container, None) + try: + # This line exec the actual delete checking permissions using the decorator permchecker + result = permchecker(self.storage.exec_op)('DELETE', _id) + config.db.acquisitions.update_many({'collections': _id}, {'$pull': {'collections': _id}}) + except APIStorageException as e: + self.abort(400, e.message) + + if result.modified_count == 1: + return {'deleted': 1} + else: + self.abort(404, 'Element not removed from container {} {}'.format(self.storage.cont_name, _id)) def get_all(self): projection = self.container_handler_configurations['collections']['list_projection'] diff --git a/api/handlers/containerhandler.py b/api/handlers/containerhandler.py index e45f157be12a062fa319875879f75d3b3f68243f..7d420b2aa151600c719e5e28210ca769faf98b4f 100644 --- a/api/handlers/containerhandler.py +++ b/api/handlers/containerhandler.py @@ -529,6 +529,7 @@ class ContainerHandler(base.RequestHandler): return + @log_access(AccessType.delete_container) def delete(self, cont_name, **kwargs): _id = kwargs.pop('cid') self.config = self.container_handler_configurations[cont_name] @@ -540,6 +541,13 @@ class ContainerHandler(base.RequestHandler): container['has_children'] = False if container.get('files') or container.get('analyses'): container['has_children'] = True + + if cont_name == 'acquisitions': + analyses = containerutil.get_referring_analyses(cont_name, _id) + if analyses: + analysis_ids = [str(a['_id']) for a in analyses] + self.abort(400, 'Cannot delete acquisition {} referenced by analyses {}'.format(_id, analysis_ids)) + target_parent_container, _ = self._get_parent_container(container) permchecker = self._get_permchecker(container, target_parent_container) try: @@ -547,9 +555,13 @@ class ContainerHandler(base.RequestHandler): result = permchecker(self.storage.exec_op)('DELETE', _id) except APIStorageException as e: self.abort(400, e.message) - - if result.deleted_count == 1: - return {'deleted': result.deleted_count} + if result.modified_count == 1: + deleted_at = config.db[cont_name].find_one({'_id': bson.ObjectId(_id)})['deleted'] + # Don't overwrite deleted timestamp for already deleted children + query = {'deleted': {'$exists': False}} + update = {'$set': {'deleted': deleted_at}} + containerutil.propagate_changes(cont_name, bson.ObjectId(_id), query, update, include_refs=True) + return {'deleted': 1} else: self.abort(404, 'Element not removed from container {} {}'.format(self.storage.cont_name, _id)) @@ -638,6 +650,9 @@ class ContainerHandler(base.RequestHandler): except APIStorageException as e: self.abort(400, e.message) if container is not None: + files = container.get('files', []) + if files: + container['files'] = [f for f in files if 'deleted' not in f] return container else: self.abort(404, 'Element {} not found in container {}'.format(_id, self.storage.cont_name)) diff --git a/api/handlers/listhandler.py b/api/handlers/listhandler.py index c6cea1c40ea0783bfef7558813990fc1b8f48eaf..fd1ab0405b0bf809c1810adcb5b1f945cb91713d 100644 --- a/api/handlers/listhandler.py +++ b/api/handlers/listhandler.py @@ -17,7 +17,7 @@ from ..dao import noop from ..dao import liststorage from ..dao import containerutil from ..web.errors import APIStorageException -from ..web.request import log_access, AccessType +from ..web.request import AccessType def initialize_list_configurations(): @@ -248,8 +248,6 @@ class PermissionsListHandler(ListHandler): """ method to propagate permissions from a container/group to its sessions and acquisitions """ - if query is None: - query = {} if cont_name == 'groups': try: containerutil.propagate_changes(cont_name, _id, query, update) @@ -259,7 +257,7 @@ class PermissionsListHandler(ListHandler): try: oid = bson.ObjectId(_id) update = {'$set': { - 'permissions': config.db[cont_name].find_one({'_id': oid},{'permissions': 1})['permissions'] + 'permissions': config.db[cont_name].find_one({'_id': oid}, {'permissions': 1})['permissions'] }} containerutil.propagate_changes(cont_name, oid, {}, update) except APIStorageException: @@ -475,10 +473,10 @@ class FileListHandler(ListHandler): # log download if we haven't already for this ticket if ticket: if not ticket.get('logged', False): - self.log_user_access(AccessType.download_file, cont_name=cont_name, cont_id=_id) + self.log_user_access(AccessType.download_file, cont_name=cont_name, cont_id=_id, filename=fileinfo['name']) config.db.downloads.update_one({'_id': ticket_id}, {'$set': {'logged': True}}) else: - self.log_user_access(AccessType.download_file, cont_name=cont_name, cont_id=_id) + self.log_user_access(AccessType.download_file, cont_name=cont_name, cont_id=_id, filename=fileinfo['name']) # Authenticated or ticketed download request else: @@ -495,14 +493,16 @@ class FileListHandler(ListHandler): # recheck ticket for logged flag ticket = config.db.downloads.find_one({'_id': ticket_id}) if not ticket.get('logged', False): - self.log_user_access(AccessType.download_file, cont_name=cont_name, cont_id=_id) + self.log_user_access(AccessType.download_file, cont_name=cont_name, cont_id=_id, filename=fileinfo['name']) config.db.downloads.update_one({'_id': ticket_id}, {'$set': {'logged': True}}) else: - self.log_user_access(AccessType.download_file, cont_name=cont_name, cont_id=_id) + self.log_user_access(AccessType.download_file, cont_name=cont_name, cont_id=_id, filename=fileinfo['name']) - @log_access(AccessType.view_file) def get_info(self, cont_name, list_name, **kwargs): + _id = kwargs['cid'] + filename = kwargs['name'] + self.log_user_access(AccessType.view_file, cont_name=cont_name, cont_id=_id, filename=filename) return super(FileListHandler,self).get(cont_name, list_name, **kwargs) def modify_info(self, cont_name, list_name, **kwargs): @@ -526,19 +526,11 @@ class FileListHandler(ListHandler): def post(self, cont_name, list_name, **kwargs): _id = kwargs.pop('cid') - - # Ugly hack: ensure cont_name is singular. Pass singular or plural to code that expects it. - if cont_name.endswith('s'): - cont_name_plural = cont_name - cont_name = cont_name[:-1] - else: - cont_name_plural = cont_name + 's' - # Authorize - permchecker, _, _, _, _ = self._initialize_request(cont_name_plural, list_name, _id) + permchecker, _, _, _, _ = self._initialize_request(containerutil.pluralize(cont_name), list_name, _id) permchecker(noop)('POST', _id=_id) - return upload.process_upload(self.request, upload.Strategy.targeted, container_type=cont_name, id_=_id, origin=self.origin) + return upload.process_upload(self.request, upload.Strategy.targeted, container_type=containerutil.singularize(cont_name), id_=_id, origin=self.origin) @validators.verify_payload_exists def put(self, cont_name, list_name, **kwargs): @@ -554,10 +546,18 @@ class FileListHandler(ListHandler): def delete(self, cont_name, list_name, **kwargs): # Overriding base class delete to audit action before completion _id = kwargs.pop('cid') + filename = kwargs['name'] permchecker, storage, _, _, keycheck = self._initialize_request(cont_name, list_name, _id, query_params=kwargs) permchecker(noop)('DELETE', _id=_id, query_params=kwargs) - self.log_user_access(AccessType.delete_file, cont_name=cont_name, cont_id=_id) + + if cont_name == 'acquisitions': + analyses = containerutil.get_referring_analyses(cont_name, _id, filename=filename) + if analyses: + analysis_ids = [str(a['_id']) for a in analyses] + self.abort(400, 'Cannot delete file {} referenced by analyses {}'.format(filename, analysis_ids)) + + self.log_user_access(AccessType.delete_file, cont_name=cont_name, cont_id=_id, filename=filename) try: result = keycheck(storage.exec_op)('DELETE', _id, query_params=kwargs) except APIStorageException as e: @@ -613,7 +613,7 @@ class FileListHandler(ListHandler): raise Exception('Packfiles can only be targeted at projects') # Authorize: confirm project exists - project = config.db['projects'].find_one({ '_id': bson.ObjectId(_id)}) + project = config.db['projects'].find_one({'_id': bson.ObjectId(_id), 'deleted': {'$exists': False}}) if project is None: raise Exception('Project ' + _id + ' does not exist') diff --git a/api/handlers/refererhandler.py b/api/handlers/refererhandler.py index 628300935fb3c7ae71e58252e50fc3fdd68952c6..112b1e7836f64f147dfbc1fe9d0f1a992c8b57ae 100644 --- a/api/handlers/refererhandler.py +++ b/api/handlers/refererhandler.py @@ -186,14 +186,13 @@ class AnalysesHandler(RefererHandler): parent = self.storage.get_parent(cont_name, cid) permchecker = self.get_permchecker(parent) permchecker(noop)('DELETE') - self.log_user_access(AccessType.delete_file, cont_name=cont_name, cont_id=cid) try: result = self.storage.delete_el(_id) except APIStorageException as e: self.abort(400, e.message) - if result.deleted_count == 1: - return {'deleted': result.deleted_count} + if result.modified_count == 1: + return {'deleted': result.modified_count} else: self.abort(404, 'Analysis {} not removed from container {} {}'.format(_id, cont_name, cid)) @@ -377,10 +376,10 @@ class AnalysesHandler(RefererHandler): # log download if we haven't already for this ticket if ticket: if not ticket.get('logged', False): - self.log_user_access(AccessType.download_file, cont_name=cont_name, cont_id=cid) + self.log_user_access(AccessType.download_file, cont_name=cont_name, cont_id=cid, filename=fileinfo['name']) config.db.downloads.update_one({'_id': ticket_id}, {'$set': {'logged': True}}) else: - self.log_user_access(AccessType.download_file, cont_name=cont_name, cont_id=cid) + self.log_user_access(AccessType.download_file, cont_name=cont_name, cont_id=cid, filename=fileinfo['name']) # Request to download the file itself else: @@ -396,10 +395,10 @@ class AnalysesHandler(RefererHandler): if ticket: ticket = config.db.downloads.find_one({'_id': ticket_id}) if not ticket.get('logged', False): - self.log_user_access(AccessType.download_file, cont_name=cont_name, cont_id=cid) + self.log_user_access(AccessType.download_file, cont_name=cont_name, cont_id=cid, filename=fileinfo['name']) config.db.downloads.update_one({'_id': ticket_id}, {'$set': {'logged': True}}) else: - self.log_user_access(AccessType.download_file, cont_name=cont_name, cont_id=cid) + self.log_user_access(AccessType.download_file, cont_name=cont_name, cont_id=cid, filename=fileinfo['name']) def _check_ticket(self, ticket_id, _id, filename): diff --git a/api/handlers/reporthandler.py b/api/handlers/reporthandler.py index d15f3b0b1032bc2a4e165d35c3f21dcec885403b..b1986292f443c7b7b36464061e9183ed1ae0c4a3 100644 --- a/api/handlers/reporthandler.py +++ b/api/handlers/reporthandler.py @@ -42,6 +42,7 @@ ACCESS_LOG_FIELDS = [ "context.analysis.label", "context.collection.id", "context.collection.label", + "context.file.name", "context.ticket_id", "request_method", "request_path" @@ -178,10 +179,10 @@ class SiteReport(Report): group = {} group['label'] = g.get('label') - project_ids = [p['_id'] for p in config.db.projects.find({'group': g['_id']}, [])] + project_ids = [p['_id'] for p in config.db.projects.find({'group': g['_id'], 'deleted': {'$exists': False}}, [])] group['project_count'] = len(project_ids) - group['session_count'] = config.db.sessions.count({'project': {'$in': project_ids}}) + group['session_count'] = config.db.sessions.count({'project': {'$in': project_ids}, 'deleted': {'$exists': False}}) report['groups'].append(group) return report @@ -250,7 +251,7 @@ class ProjectReport(Report): return False def _base_query(self, pid): - base_query = {'project': pid} + base_query = {'project': pid, 'deleted': {'$exists': False}} if self.start_date is not None or self.end_date is not None: base_query['created'] = {} @@ -358,7 +359,7 @@ class ProjectReport(Report): report = {} report['projects'] = [] - projects = config.db.projects.find({'_id': {'$in': self.projects}}) + projects = config.db.projects.find({'_id': {'$in': self.projects}, 'deleted': {'$exists': False}}) for p in projects: project = self._base_project_report() project['name'] = p.get('label') @@ -751,7 +752,7 @@ class UsageReport(Report): report[key]['session_count'] = r['session_count'] - file_q = {} + file_q = {'deleted': {'$exists': False}} analysis_q = {'analyses.files.output': True} if 'created' in base_query: @@ -862,24 +863,24 @@ class UsageReport(Report): 'file_mbs': 0 } """ - projects = config.db.projects.find({}) + projects = config.db.projects.find({'deleted': {'$exists': False}}) final_report_list = [] for p in projects: report_obj = self._create_default(project=p) # Grab sessions and their ids - sessions = config.db.sessions.find({'project': p['_id']}, {'_id': 1}) + sessions = config.db.sessions.find({'project': p['_id'], 'deleted': {'$exists': False}}, {'_id': 1}) session_ids = [s['_id'] for s in sessions] # Grab acquisitions and their ids - acquisitions = config.db.acquisitions.find({'session': {'$in': session_ids}}, {'_id': 1}) + acquisitions = config.db.acquisitions.find({'session': {'$in': session_ids}, 'deleted': {'$exists': False}}, {'_id': 1}) acquisition_ids = [a['_id'] for a in acquisitions] # For the project and each session and acquisition, create a list of analysis ids parent_ids = session_ids + acquisition_ids + [p['_id']] - analysis_ids = [an['_id'] for an in config.db.analyses.find({'parent.id': {'$in': parent_ids}})] - + analysis_ids = [an['_id'] for an in config.db.analyses.find({'parent.id': {'$in': parent_ids}, 'deleted': {'$exists': False}})] + report_obj['session_count'] = len(session_ids) # for each type of container below it will have a slightly modified match query @@ -891,7 +892,7 @@ class UsageReport(Report): } # Create queries for files and analyses based on created date if a range was provided - file_q = {} + file_q = {'deleted': {'$exists': False}} analysis_q = {'analyses.files.output': True} if 'created' in base_query: diff --git a/api/resolver.py b/api/resolver.py index 36c31de9f064fcbb7f31a3541a3064c765b9513a..0dfb4c5b36816034cdfa2133bdb331f32e38bf77 100644 --- a/api/resolver.py +++ b/api/resolver.py @@ -58,6 +58,8 @@ def _get_files(table, match): return files def _get_docs(table, label, match): + match_nondeleted = match.copy() + match_nondeleted['deleted'] = {'$exists': False} results = list(config.db[table].find(match, Node.projection, sort=Node.sorting)) for y in results: y.update({'node_type': label}) diff --git a/api/web/base.py b/api/web/base.py index b2a5c0b1fc3b1e6b3b0fc19d3ab8f178ee023e37..2423142b04fdd11b43e3edf1b44ef10a98f58422 100644 --- a/api/web/base.py +++ b/api/web/base.py @@ -379,7 +379,7 @@ class RequestHandler(webapp2.RequestHandler): util.send_json_http_exception(self.response, message, code, request_id, custom=custom_errors) - def log_user_access(self, access_type, cont_name=None, cont_id=None, multifile=False, origin_override=None): + def log_user_access(self, access_type, cont_name=None, cont_id=None, filename=None, multifile=False, origin_override=None): if not config.get_item('core', 'access_log_enabled'): return @@ -412,6 +412,8 @@ class RequestHandler(webapp2.RequestHandler): context[k] = {'id': str(v['_id']), 'label': v.get('label')} if k == 'subject': context[k]['label'] = v.get('code') + if filename: + context['file'] = {'name': filename} log_map['context'] = context if access_type is AccessType.download_file and self.get_param('ticket') and not multifile: diff --git a/api/web/request.py b/api/web/request.py index 67dca3589ae94bf1d7d94237714d349a55ce9d68..68e7975265387fbd746ade6c2bbc0b85123682ce 100644 --- a/api/web/request.py +++ b/api/web/request.py @@ -8,6 +8,7 @@ from .. import util AccessType = util.Enum('AccessType', { 'accept_failed_output': 'accept_failed_output', + 'delete_container': 'delete_container', 'view_container': 'view_container', 'view_subject': 'view_subject', 'view_file': 'view_file', @@ -45,7 +46,7 @@ def get_request_logger(request_id): return logger -def log_access(access_type, cont_kwarg='cont_name', cont_id_kwarg='cid'): +def log_access(access_type, cont_kwarg='cont_name', cont_id_kwarg='cid', filename_kwarg='name'): """ A decorator to log a user or drone's access to an endpoint """ @@ -55,17 +56,19 @@ def log_access(access_type, cont_kwarg='cont_name', cont_id_kwarg='cid'): cont_name = None cont_id = None + filename = None if access_type not in [AccessType.user_login, AccessType.user_logout]: cont_name = kwargs.get(cont_kwarg) cont_id = kwargs.get(cont_id_kwarg) + filename = kwargs.get(filename_kwarg) - # Only log view_container events when the container is a session - if access_type is AccessType.view_container and cont_name not in ['sessions', 'session']: + # Only log view_container events when the container is a project/session/acquisition + if access_type is AccessType.view_container and cont_name not in ['project', 'projects', 'sessions', 'session', 'acquisition', 'acquisitions']: return result - self.log_user_access(access_type, cont_name, cont_id) + self.log_user_access(access_type, cont_name=cont_name, cont_id=cont_id, filename=filename) return result return log_user_access_from_request diff --git a/bin/undelete.py b/bin/undelete.py new file mode 100755 index 0000000000000000000000000000000000000000..120cf3d110674281ab9bdbf24ef3cd1fa6502458 --- /dev/null +++ b/bin/undelete.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python +""" +Undo a container or file deletion action previously performed via the API. + +Container undeletion is propagated to all child/referring containers that +were deleted at the same time as the target container (ie. as part of the +same API call). + +Undeleting a container which has deleted parent(s) raises an error, unless +`--include-parents` is specified, which also restores all parent containers +(without further propagation) regardless whether they were deleted at the +same time or not. + +Undeleting a container without a deleted timestamp doesn't propagate, unless +`--always-propagate` is specified. +""" +import argparse +import logging +import sys + +import bson + +from api import config +from api.dao.containerutil import pluralize, propagate_changes + + +log = logging.getLogger('scitran.undelete') +cont_names = ['projects', 'sessions', 'acquisitions', 'analyses', 'collections'] +cont_names_str = '|'.join(cont_names) + + +def main(*argv): + ap = argparse.ArgumentParser(description=sys.modules[__name__].__doc__) + ap.add_argument('cont_name', help='container name to undelete ({})'.format(cont_names_str)) + ap.add_argument('cont_id', help='container id to undelete (bson.ObjectId)') + ap.add_argument('filename', nargs='?', help='filename within container (optional)') + ap.add_argument('--include-parents', action='store_true', help='restore deleted parent containers') + ap.add_argument('--always-propagate', action='store_true', help='propagate even without deleted tag') + args = ap.parse_args(argv or sys.argv[1:] or ['--help']) + + try: + undelete(args.cont_name, args.cont_id, filename=args.filename, + include_parents=args.include_parents, + always_propagate=args.always_propagate) + log.info('Done.') + except (AssertionError, RuntimeError, ValueError) as exc: + log.error(exc.message) + sys.exit(1) + + +def undelete(cont_name, cont_id, filename=None, include_parents=False, always_propagate=False): + if cont_name not in cont_names: + raise ValueError('Invalid cont_name "{}" (must be one of {})'.format(cont_name, cont_names_str)) + if not bson.ObjectId.is_valid(cont_id): + raise ValueError('Invalid cont_id "{}" (must be parseable ObjectId)'.format(cont_id)) + + cont_id = bson.ObjectId(cont_id) + cont_str = '{}/{}'.format(cont_name, cont_id) + container = get_container(cont_name, cont_id) + if container is None: + raise RuntimeError('Cannot find {}'.format(cont_str)) + + if cont_name == 'collections': + log.warning('Undeleting collections is limited such that any acquisitions or sessions' + 'will have to be re-added to the collection again. The files and notes of' + 'the collection are fully restored.') + + unset_deleted = {'$unset': {'deleted': True}} + for parent_name, parent_id in get_parent_refs(cont_name, cont_id, filename=filename): + parent_str = '{}/{}'.format(parent_name, parent_id) + parent = get_container(parent_name, parent_id) + if 'deleted' in parent: + assert parent['deleted'] >= container['deleted'] + if not include_parents: + msg = ('Found parent {}\n' + 'which was deleted {} {}.\n' + 'Run undelete against the parent first to restore it with propagation,\n' + 'or use `--include-parents` to restore parents without propagation.') + deleted_time = 'at the same time as' if parent['deleted'] == container['deleted'] else 'after' + raise RuntimeError(msg.format(parent_str, deleted_time, cont_str)) + log.info('Removing "deleted" tag from parent %s...', parent_str) + config.db[parent_name].update_one({'_id': parent_id}, unset_deleted) + + if filename is None: + # Undeleting a container (and any children/referrers) + if 'deleted' in container: + log.info('Removing "deleted" tag from %s...', cont_str) + config.db[cont_name].update_one({'_id': cont_id}, unset_deleted) + propagate_query = {'deleted': container['deleted']} + elif always_propagate: + propagate_query = {} + else: + log.info('Skipping %s - has no "deleted" tag', cont_str) + return + log.info('Removing "deleted" tag from child/referring containers...') + propagate_changes(cont_name, cont_id, propagate_query, unset_deleted, include_refs=True) + + else: + # Undeleting a single file + file_str = '{}/{}'.format(cont_str, filename) + for f in container.get('files', []): + if f['name'] == filename: + if 'deleted' not in f: + log.info('Skipping file %s - has no "deleted" tag', file_str) + return + log.info('Removing "deleted" tag from file %s...', file_str) + del f['deleted'] + config.db[cont_name].update_one({'_id': cont_id}, {'$set': {'files': container['files']}}) + break + else: + raise RuntimeError('Cannot find file {}'.format(file_str)) + + +def get_container(cont_name, cont_id): + return config.db[cont_name].find_one({'_id': cont_id}) + + +def get_parent_refs(cont_name, cont_id, filename=None): + parent_name, parent_id = None, None + + container = get_container(cont_name, cont_id) + if filename is not None: + parent_name, parent_id = cont_name, cont_id + elif cont_name == 'analyses': + parent_name, parent_id = pluralize(container['parent']['type']), container['parent']['id'] + elif cont_name == 'acquisitions': + parent_name, parent_id = 'sessions', container['session'] + elif cont_name == 'sessions': + parent_name, parent_id = 'projects', container['project'] + + if parent_name is None: + return [] + return [(parent_name, parent_id)] + get_parent_refs(parent_name, parent_id) + + +if __name__ == '__main__': + main() diff --git a/tests/integration_tests/python/conftest.py b/tests/integration_tests/python/conftest.py index 7cd2593d53575232317e5c9e2520cf1f0d2b3e6b..1cba4892da543bb2285370c1854951e10f98c907 100644 --- a/tests/integration_tests/python/conftest.py +++ b/tests/integration_tests/python/conftest.py @@ -252,6 +252,8 @@ class DataBuilder(object): payload['_id'] = self.randstr() if resource == 'gear' and 'name' not in payload['gear']: payload['gear']['name'] = self.randstr() + if resource == 'collection' and 'label' not in payload: + payload['label'] = self.randstr() # add missing label fields using randstr # such fields are: [project.label, session.label, acquisition.label] diff --git a/tests/integration_tests/python/test_access_log.py b/tests/integration_tests/python/test_access_log.py index 083f5f791475d99e75ba560618a00e7d4a2a83a6..b690b1bae1b1ce7fa1d166ad95c95aadf3f31b87 100644 --- a/tests/integration_tests/python/test_access_log.py +++ b/tests/integration_tests/python/test_access_log.py @@ -9,6 +9,7 @@ from api.web.request import AccessType def test_access_log_succeeds(data_builder, as_admin, log_db): project = data_builder.create_project() session = data_builder.create_session() + acquisition = data_builder.create_acquisition() file_name = 'one.csv' ### @@ -50,6 +51,25 @@ def test_access_log_succeeds(data_builder, as_admin, log_db): assert most_recent_log['origin']['id'] == 'admin@user.com' + ### + # Test project access is logged + ### + + log_records_count_before = log_db.access_log.count({}) + + r = as_admin.get('/projects/' + project) + assert r.ok + + log_records_count_after = log_db.access_log.count({}) + assert log_records_count_before+1 == log_records_count_after + + most_recent_log = log_db.access_log.find({}).sort([('_id', -1)]).limit(1)[0] + + assert most_recent_log['context']['project']['id'] == project + assert most_recent_log['access_type'] == AccessType.view_container.value + assert most_recent_log['origin']['id'] == 'admin@user.com' + + ### # Test session access is logged ### @@ -64,7 +84,26 @@ def test_access_log_succeeds(data_builder, as_admin, log_db): most_recent_log = log_db.access_log.find({}).sort([('_id', -1)]).limit(1)[0] - assert most_recent_log['context']['session']['id'] == str(session) + assert most_recent_log['context']['session']['id'] == session + assert most_recent_log['access_type'] == AccessType.view_container.value + assert most_recent_log['origin']['id'] == 'admin@user.com' + + + ### + # Test acquisition access is logged + ### + + log_records_count_before = log_db.access_log.count({}) + + r = as_admin.get('/acquisitions/' + acquisition) + assert r.ok + + log_records_count_after = log_db.access_log.count({}) + assert log_records_count_before+1 == log_records_count_after + + most_recent_log = log_db.access_log.find({}).sort([('_id', -1)]).limit(1)[0] + + assert most_recent_log['context']['acquisition']['id'] == acquisition assert most_recent_log['access_type'] == AccessType.view_container.value assert most_recent_log['origin']['id'] == 'admin@user.com' @@ -125,6 +164,7 @@ def test_access_log_succeeds(data_builder, as_admin, log_db): most_recent_log = log_db.access_log.find({}).sort([('_id', -1)]).limit(1)[0] assert most_recent_log['context']['project']['id'] == project + assert most_recent_log['context']['file']['name'] == file_name assert most_recent_log['access_type'] == AccessType.download_file.value assert most_recent_log['origin']['id'] == 'admin@user.com' @@ -151,7 +191,8 @@ def test_access_log_succeeds(data_builder, as_admin, log_db): most_recent_log = log_db.access_log.find({}).sort([('_id', -1)]).limit(1)[0] - assert most_recent_log['context']['project']['id'] == str(project) + assert most_recent_log['context']['project']['id'] == project + assert most_recent_log['context']['file']['name'] == file_name assert most_recent_log['context']['ticket_id'] == ticket_id assert most_recent_log['access_type'] == AccessType.download_file.value assert most_recent_log['origin']['id'] == 'admin@user.com' @@ -181,6 +222,7 @@ def test_access_log_succeeds(data_builder, as_admin, log_db): most_recent_logs = log_db.access_log.find({}).sort([('_id', -1)]).limit(file_count) for l in most_recent_logs: + assert l['context']['file']['name'] == file_name assert l['access_type'] == AccessType.download_file.value assert l['origin']['id'] == 'admin@user.com' @@ -203,6 +245,7 @@ def test_access_log_succeeds(data_builder, as_admin, log_db): most_recent_logs = log_db.access_log.find({}).sort([('_id', -1)]).limit(file_count) for l in most_recent_logs: + assert l['context']['file']['name'] == file_name assert l['access_type'] == AccessType.download_file.value assert l['origin']['id'] == 'admin@user.com' @@ -222,7 +265,8 @@ def test_access_log_succeeds(data_builder, as_admin, log_db): most_recent_log = log_db.access_log.find({}).sort([('_id', -1)]).limit(1)[0] - assert most_recent_log['context']['project']['id'] == str(project) + assert most_recent_log['context']['project']['id'] == project + assert most_recent_log['context']['file']['name'] == file_name assert most_recent_log['access_type'] == AccessType.view_file.value assert most_recent_log['origin']['id'] == 'admin@user.com' @@ -241,11 +285,70 @@ def test_access_log_succeeds(data_builder, as_admin, log_db): most_recent_log = log_db.access_log.find({}).sort([('_id', -1)]).limit(1)[0] - assert most_recent_log['context']['project']['id'] == str(project) + assert most_recent_log['context']['project']['id'] == project + assert most_recent_log['context']['file']['name'] == file_name assert most_recent_log['access_type'] == AccessType.delete_file.value assert most_recent_log['origin']['id'] == 'admin@user.com' + ### + # Test acquisition delete is logged + ### + + log_records_count_before = log_db.access_log.count({}) + + r = as_admin.delete('/acquisitions/' + acquisition) + assert r.ok + + log_records_count_after = log_db.access_log.count({}) + assert log_records_count_before+1 == log_records_count_after + + most_recent_log = log_db.access_log.find({}).sort([('_id', -1)]).limit(1)[0] + + assert most_recent_log['context']['acquisition']['id'] == acquisition + assert most_recent_log['access_type'] == AccessType.delete_container.value + assert most_recent_log['origin']['id'] == 'admin@user.com' + + + ### + # Test session delete is logged + ### + + log_records_count_before = log_db.access_log.count({}) + + r = as_admin.delete('/sessions/' + session) + assert r.ok + + log_records_count_after = log_db.access_log.count({}) + assert log_records_count_before+1 == log_records_count_after + + most_recent_log = log_db.access_log.find({}).sort([('_id', -1)]).limit(1)[0] + + assert most_recent_log['context']['session']['id'] == session + assert most_recent_log['access_type'] == AccessType.delete_container.value + assert most_recent_log['origin']['id'] == 'admin@user.com' + + + ### + # Test project delete is logged + ### + + log_records_count_before = log_db.access_log.count({}) + + r = as_admin.delete('/projects/' + project) + assert r.ok + + log_records_count_after = log_db.access_log.count({}) + assert log_records_count_before+1 == log_records_count_after + + most_recent_log = log_db.access_log.find({}).sort([('_id', -1)]).limit(1)[0] + + assert most_recent_log['context']['project']['id'] == project + assert most_recent_log['access_type'] == AccessType.delete_container.value + assert most_recent_log['origin']['id'] == 'admin@user.com' + + + def test_access_log_fails(data_builder, as_admin, log_db): project = data_builder.create_project() file_name = 'one.csv' @@ -265,8 +368,8 @@ def test_access_log_fails(data_builder, as_admin, log_db): r = as_admin.delete('/projects/' + project + '/files/' + file_name) assert r.status_code == 500 + log_db.command('collMod', 'access_log', validator={}, validationLevel='strict') + r = as_admin.get('/projects/' + project) assert r.ok assert r.json()['files'] - - log_db.command('collMod', 'access_log', validator={}, validationLevel='strict') diff --git a/tests/integration_tests/python/test_containers.py b/tests/integration_tests/python/test_containers.py index a6b46c62bdc35e45b151f2c0a24c0da4434076df..259383ccefb2c73ecf08ddbc9f65d5cf9668d50c 100644 --- a/tests/integration_tests/python/test_containers.py +++ b/tests/integration_tests/python/test_containers.py @@ -1064,6 +1064,7 @@ def test_edit_subject_info(data_builder, as_admin, as_user): assert r.ok assert r.json()['info'] == {} + def test_fields_list_requests(data_builder, file_form, as_admin): # Ensure sensitive keys are not returned on list endpoints # Project: info and files.info @@ -1166,3 +1167,92 @@ def test_fields_list_requests(data_builder, file_form, as_admin): assert not a.get('info') assert not a.get('tags') assert not a['files'][0].get('info') + + +def test_container_delete_tag(data_builder, default_payload, as_admin, as_user, file_form, api_db): + gear_doc = default_payload['gear']['gear'] + gear_doc['inputs'] = {'csv': {'base': 'file'}} + gear = data_builder.create_gear(gear=gear_doc) + project = data_builder.create_project() + session = data_builder.create_session() + acquisition = data_builder.create_acquisition() + collection = data_builder.create_collection() + assert as_admin.post('/acquisitions/' + acquisition + '/files', files=file_form('test.csv')).ok + r = as_admin.post('/sessions/' + session + '/analyses', params={'job': 'true'}, json={ + 'analysis': {'label': 'with-job'}, + 'job': { + 'gear_id': gear, + 'inputs': {'csv': {'type': 'acquisition', 'id': acquisition, 'name': 'test.csv'}} + } + }) + assert r.ok + analysis = r.json()['_id'] + r = as_admin.put('/collections/' + collection, json={ + 'contents': {'operation': 'add', 'nodes': [{'level': 'session', '_id': session}]} + }) + assert r.ok + + # try to delete project without admin perms + r = as_user.delete('/projects/' + project) + assert r.status_code == 403 + + # try to delete acquisition referenced by analysis + r = as_admin.delete('/acquisitions/' + acquisition) + assert r.status_code == 400 + + # try to delete acquisition file referenced by analysis + r = as_admin.delete('/acquisitions/' + acquisition + '/files/test.csv') + assert r.status_code == 400 + + # verify that a non-referenced file _can_ be deleted from the same acquisition + assert as_admin.post('/acquisitions/' + acquisition + '/files', files=file_form('unrelated.csv')).ok + assert as_admin.delete('/acquisitions/' + acquisition + '/files/unrelated.csv').ok + + # delete collection + assert collection in as_admin.get('/acquisitions/' + acquisition).json()['collections'] + assert as_admin.delete('/collections/' + collection).ok + assert 'deleted' in api_db.collections.find_one({'_id': bson.ObjectId(collection)}) + assert as_admin.get('/collections/' + collection).status_code == 404 + assert collection not in as_admin.get('/acquisitions/' + acquisition).json()['collections'] + + # delete analysis + r = as_admin.delete('/sessions/' + session + '/analyses/' + analysis) + assert r.ok + assert 'deleted' in api_db.analyses.find_one({'_id': bson.ObjectId(analysis)}) + assert as_admin.get('/sessions/' + session + '/analyses/' + analysis).status_code == 404 + assert as_admin.get('/analyses/' + analysis).status_code == 404 + + # try to delete acquisition without admin perms + r = as_user.delete('/acquisitions/' + acquisition) + assert r.status_code == 403 + + # delete acquisition + assert as_admin.delete('/acquisitions/' + acquisition).ok + assert 'deleted' in api_db.acquisitions.find_one({'_id': bson.ObjectId(acquisition)}) + assert as_admin.get('/acquisitions/' + acquisition).status_code == 404 + + # delete project as admin + acquisition2 = data_builder.create_acquisition() + r = as_admin.post('/sessions/' + session + '/analyses', files=file_form( + 'analysis.csv', meta={'label': 'no-job', 'inputs': [{'name': 'analysis.csv'}]})) + analysis2 = r.json()['_id'] + + assert as_admin.delete('/projects/' + project).ok + + # test that entries get tagged recursively + assert 'deleted' in api_db.projects.find_one({'_id': bson.ObjectId(project)}) + assert 'deleted' in api_db.sessions.find_one({'_id': bson.ObjectId(session)}) + assert 'deleted' in api_db.acquisitions.find_one({'_id': bson.ObjectId(acquisition2)}) + assert 'deleted' in api_db.analyses.find_one({'_id': bson.ObjectId(analysis2)}) + + # test that tagged entries are filtered in endpoints + assert as_admin.get('/projects/' + project).status_code == 404 + assert as_admin.get('/sessions/' + session).status_code == 404 + assert as_admin.get('/acquisitions/' + acquisition2).status_code == 404 + assert as_admin.get('/sessions/' + session + '/analyses/' + analysis2).status_code == 404 + assert as_admin.get('/analyses/' + analysis2).status_code == 404 + + assert as_admin.get('/projects').json() == [] + assert as_admin.get('/sessions').json() == [] + assert as_admin.get('/acquisitions').json() == [] + assert as_admin.get('/collections').json() == [] diff --git a/tests/integration_tests/python/test_reports.py b/tests/integration_tests/python/test_reports.py index c8d70b7b22a9b2ce1e001d03b78dd68a4edc3b8f..db37102838e956db0801c98eed5af8dd3c913bdb 100644 --- a/tests/integration_tests/python/test_reports.py +++ b/tests/integration_tests/python/test_reports.py @@ -30,6 +30,23 @@ def test_site_report(data_builder, randstr, as_admin, as_user): assert group_report['project_count'] == 1 assert group_report['session_count'] == 1 + # get site report with deleted session + assert as_admin.delete('/sessions/' + session).ok + site_report = as_admin.get('/report/site').json() + group_report = next((g for g in site_report['groups'] if g['label'] == group_label), None) + assert group_report is not None + assert group_report['project_count'] == 1 + assert group_report['session_count'] == 0 + + # get site report with deleted project + session2 = data_builder.create_session() + assert as_admin.delete('/projects/' + project).ok + site_report = as_admin.get('/report/site').json() + group_report = next((g for g in site_report['groups'] if g['label'] == group_label), None) + assert group_report is not None + assert group_report['project_count'] == 0 + assert group_report['session_count'] == 0 + def test_project_report(data_builder, as_admin, as_user): project_1 = data_builder.create_project() @@ -296,6 +313,21 @@ def test_usage_report(data_builder, file_form, as_user, as_admin): r = as_admin.get('/report/usage', params={'type': 'project'}) assert r.ok + # Test that deleted files are not counted + assert as_admin.post('/projects/' + project + '/files', files=file_form('project.csv')).ok + assert as_admin.delete('/projects/' + project + '/files/' + 'project.csv').ok + + r = as_admin.get('/report/usage', params={ + 'type': 'project', 'start_date': yesterday_ts, 'end_date': tomorrow_ts + }) + assert r.ok + usage = r.json() + assert len(usage) == 2 + assert usage[1]['project']['label'] == 'project2' + assert usage[1]['session_count'] == 0 + assert usage[1]['file_mbs'] == 0 + assert usage[1]['gear_execution_count'] == 0 + # delete project r= as_admin.delete('/projects/' + project) assert r.ok diff --git a/tests/integration_tests/python/test_undelete.py b/tests/integration_tests/python/test_undelete.py new file mode 100644 index 0000000000000000000000000000000000000000..0af999ef26afa8147b603e9e334ea7e3e1a0563c --- /dev/null +++ b/tests/integration_tests/python/test_undelete.py @@ -0,0 +1,130 @@ +import os +import sys + +import attrdict +import bson +import pytest + + +@pytest.fixture(scope='function') +def undelete(mocker): + """Enable importing from `bin` and return `undelete.undelete`.""" + bin_path = os.path.join(os.getcwd(), 'bin') + mocker.patch('sys.path', [bin_path] + sys.path) + import undelete + return undelete.undelete + + +@pytest.yield_fixture(scope='function') +def containers(data_builder, as_admin, file_form, api_db): + """Populate DB with test dataset including deleted and non-deleted entries.""" + p_1 = data_builder.create_project() + s_1_1 = data_builder.create_session(project=p_1) + c_1_1_1 = data_builder.create_collection() + an_1_1_1 = as_admin.post('/sessions/' + s_1_1 + '/analyses', files=file_form( + 'analysis.csv', meta={'label': 'no-job', 'inputs': [{'name': 'analysis.csv'}]})).json()['_id'] + ac_1_1_1 = data_builder.create_acquisition(session=s_1_1) + ac_1_1_2 = data_builder.create_acquisition(session=s_1_1) + s_1_2 = data_builder.create_session(project=p_1) + ac_1_2_1 = data_builder.create_acquisition(session=s_1_2) + p_2 = data_builder.create_project() + s_2_1 = data_builder.create_session(project=p_2) + ac_2_1_1 = data_builder.create_acquisition(session=s_2_1) + assert as_admin.post('/acquisitions/' + ac_1_1_1 + '/files', files=file_form('f_1_1_1_1')).ok + assert as_admin.post('/acquisitions/' + ac_1_1_1 + '/files', files=file_form('f_1_1_1_2')).ok + assert as_admin.post('/acquisitions/' + ac_2_1_1 + '/files', files=file_form('f_2_1_1_1')).ok + assert as_admin.post('/acquisitions/' + ac_1_1_1 + '/files', files=file_form('f_1_1_1_1')).ok + + assert as_admin.delete('/sessions/' + s_1_1 + '/analyses/' + an_1_1_1).ok + assert as_admin.delete('/collections/' + c_1_1_1).ok + assert as_admin.delete('/acquisitions/' + ac_1_1_1 + '/files/f_1_1_1_1').ok + assert as_admin.delete('/acquisitions/' + ac_1_1_1).ok + assert as_admin.delete('/sessions/' + s_1_1).ok + assert as_admin.delete('/projects/' + p_1).ok + + containers = attrdict.AttrDict( + p_1=p_1, + s_1_1=s_1_1, + c_1_1_1=c_1_1_1, + an_1_1_1=an_1_1_1, + ac_1_1_1=ac_1_1_1, + ac_1_1_2=ac_1_1_2, + s_1_2=s_1_2, + ac_1_2_1=ac_1_2_1, + p_2=p_2, + s_2_1=s_2_1, + ac_2_1_1=ac_2_1_1, + ) + + def is_deleted(cont_key, filename=None): + cont_name = {'p': 'projects', + 's': 'sessions', + 'ac': 'acquisitions', + 'an': 'analyses', + 'c': 'collections', + }[cont_key.split('_')[0]] + url = '/{}/{}'.format(cont_name, containers[cont_key]) + if filename is None: + return as_admin.get(url).status_code == 404 + else: + return as_admin.get(url + '/files/' + filename).status_code == 404 + + containers['is_deleted'] = is_deleted + yield containers + api_db.analyses.delete_one({'_id': bson.ObjectId(an_1_1_1)}) + + +def test_undelete_noop(undelete, containers): + undelete('projects', containers.p_2) + undelete('sessions', containers.s_2_1) + undelete('acquisitions', containers.ac_2_1_1) + undelete('acquisitions', containers.ac_2_1_1, filename='f_2_1_1_1') + + +def test_undelete_scope(undelete, containers, as_admin, api_db): + assert containers.is_deleted('p_1') + assert containers.is_deleted('s_1_1') + assert containers.is_deleted('s_1_2') + undelete('projects', containers.p_1) + assert not containers.is_deleted('p_1') + assert containers.is_deleted('s_1_1') + assert not containers.is_deleted('s_1_2') + + assert containers.is_deleted('s_1_1') + assert containers.is_deleted('ac_1_1_1') + assert containers.is_deleted('ac_1_1_2') + undelete('sessions', containers.s_1_1) + assert not containers.is_deleted('s_1_1') + assert containers.is_deleted('ac_1_1_1') + assert not containers.is_deleted('ac_1_1_2') + + assert containers.is_deleted('ac_1_1_1') + undelete('acquisitions', containers.ac_1_1_1) + assert not containers.is_deleted('ac_1_1_1') + + assert containers.is_deleted('ac_1_1_1', filename='f_1_1_1_1') + undelete('acquisitions', containers.ac_1_1_1, filename='f_1_1_1_1') + assert not containers.is_deleted('ac_1_1_1', filename='f_1_1_1_1') + + assert containers.is_deleted('c_1_1_1') + undelete('collections', containers.c_1_1_1) + assert not containers.is_deleted('c_1_1_1') + + assert containers.is_deleted('an_1_1_1') + undelete('analyses', containers.an_1_1_1) + assert not containers.is_deleted('an_1_1_1') + + +def test_undelete_options(undelete, containers): + with pytest.raises(RuntimeError, match=r'use --include-parents'): + undelete('acquisitions', containers.ac_1_1_1, filename='f_1_1_1_1') + + undelete('acquisitions', containers.ac_1_1_1, filename='f_1_1_1_1', include_parents=True) + assert not containers.is_deleted('p_1') + assert containers.is_deleted('s_1_2') + + undelete('projects', containers.p_1) + assert containers.is_deleted('s_1_2') + + undelete('projects', containers.p_1, always_propagate=True) + assert not containers.is_deleted('s_1_2')