From 7d7a9acddad021c08fc6843dad0c68c636c48aab Mon Sep 17 00:00:00 2001 From: Renzo Frigato <rfrigato@stanford.edu> Date: Thu, 12 Nov 2015 17:16:52 -0800 Subject: [PATCH] improvements after code review --- api/api.py | 240 +++++++++-------------------- api/dao/containerstorage.py | 14 +- api/dao/liststorage.py | 16 +- api/files.py | 5 +- api/handlers/collectionshandler.py | 4 +- api/handlers/containerhandler.py | 28 ++-- api/handlers/grouphandler.py | 2 +- api/handlers/listhandler.py | 37 ++--- api/handlers/userhandler.py | 5 +- api/mongo.py | 40 +++-- api/util.py | 2 + api/validators.py | 11 ++ bin/api.wsgi | 1 + test_bootstrap.json | 28 ++-- test_integ/test_collection.py | 2 +- test_integ/test_users.py | 6 +- 16 files changed, 179 insertions(+), 262 deletions(-) diff --git a/api/api.py b/api/api.py index 45c92ba2..4e1233f6 100644 --- a/api/api.py +++ b/api/api.py @@ -17,13 +17,33 @@ from handlers import collectionshandler #regexes used in routing table: routing_regexes = { + # group id regex + # length between 2 and 32 characters + # allowed characters are [0-9a-z.@_-] (start and ends only with [0-9a-z]) 'group_id_re': '[0-9a-z][0-9a-z.@_-]{0,30}[0-9a-z]', + # container id regex + # hexadecimal string exactly of length 24 'cid_re': '[0-9a-f]{24}', + # site id regex + # length less than 24 characters + # allowed characters are [0-9a-z] 'site_re': '[0-9a-z]{0,24}', + # user id regex + # any length, allowed chars are [0-9a-z.@_-] 'user_id_re': '[0-9a-z.@_-]*', + # container name regex + # possible values are projects, sessions, acquisitions or collections 'cont_name_re': 'projects|sessions|acquisitions|collections', + # tag regex + # length between 3 and 24 characters + # any character allowed except '/'' 'tag_re': '[^/]{3,24}', + # filename regex + # length between 3 and 60 characters + # any character allowed except '/' 'filename_re': '[^/]{3,60}', + # note id regex + # hexadecimal string exactly of length 24 'note_id_re': '[0-9a-f]{24}' } @@ -31,181 +51,63 @@ def _format(route): return route.format(**routing_regexes) routes = [ - webapp2.Route( - r'/api', - core.Core - ), + webapp2.Route(r'/api', core.Core), webapp2_extras.routes.PathPrefixRoute(r'/api', [ - webapp2.Route( - r'/download', - core.Core, handler_method='download', methods=['GET', 'POST'], name='download' - ), - webapp2.Route( - r'/reaper', - core.Core, handler_method='reaper', methods=['POST'] - ), - webapp2.Route( - r'/sites', - core.Core, handler_method='sites', methods=['GET'] - ) + webapp2.Route(r'/download', core.Core, handler_method='download', methods=['GET', 'POST'], name='download'), + webapp2.Route(r'/reaper', core.Core, handler_method='reaper', methods=['POST']), + webapp2.Route(r'/sites', core.Core, handler_method='sites', methods=['GET']) ]), - webapp2.Route( - r'/api/users', - userhandler.UserHandler, handler_method='get_all', methods=['GET'] - ), - webapp2.Route( - r'/api/users', - userhandler.UserHandler, methods=['POST'] - ), + webapp2.Route(r'/api/users', userhandler.UserHandler, handler_method='get_all', methods=['GET']), + webapp2.Route(r'/api/users', userhandler.UserHandler, methods=['POST']), webapp2_extras.routes.PathPrefixRoute(r'/api/users', [ - webapp2.Route( - r'/self', - userhandler.UserHandler, handler_method='self', methods=['GET'] - ), - webapp2.Route( - r'/roles', - userhandler.UserHandler, handler_method='roles', methods=['GET'] - ), - webapp2.Route( - _format(r'/<_id:{user_id_re}>'), - userhandler.UserHandler, name='user' - ), - webapp2.Route( - _format(r'/<uid:{user_id_re}>/groups'), - grouphandler.GroupHandler, handler_method='get_all', methods=['GET'], name='groups' - ), + webapp2.Route(r'/self', userhandler.UserHandler, handler_method='self', methods=['GET']), + webapp2.Route(_format(r'/<_id:{user_id_re}>'), userhandler.UserHandler, name='user'), + webapp2.Route(_format(r'/<uid:{user_id_re}>/groups'), grouphandler.GroupHandler, handler_method='get_all', methods=['GET'], name='groups'), ]), - webapp2.Route( - r'/api/jobs', - jobs.Jobs - ), + webapp2.Route(r'/api/jobs', jobs.Jobs), webapp2_extras.routes.PathPrefixRoute(r'/api/jobs', [ - webapp2.Route( - r'/next', - jobs.Jobs, handler_method='next', methods=['GET'] - ), - webapp2.Route( - r'/count', - jobs.Jobs, handler_method='count', methods=['GET'] - ), - webapp2.Route( - r'/addTestJob', - jobs.Jobs, handler_method='addTestJob', methods=['GET'] - ), - webapp2.Route( - r'/<:[^/]+>', - jobs.Job, name='job' - ), + webapp2.Route(r'/next', jobs.Jobs, handler_method='next', methods=['GET']), + webapp2.Route(r'/count', jobs.Jobs, handler_method='count', methods=['GET']), + webapp2.Route(r'/addTestJob', jobs.Jobs, handler_method='addTestJob', methods=['GET']), + webapp2.Route(r'/<:[^/]+>', jobs.Job, name='job'), ]), - webapp2.Route( - r'/api/groups', - grouphandler.GroupHandler, handler_method='get_all', methods=['GET'] - ), - webapp2.Route( - r'/api/groups', - grouphandler.GroupHandler, methods=['POST'] - ), - webapp2.Route( - _format(r'/api/groups/<_id:{group_id_re}>'), - grouphandler.GroupHandler, name='group' - ), - webapp2.Route( - _format(r'/api/<cont_name:groups>/<cid:{group_id_re}>/<list_name:roles>'), - listhandler.ListHandler, name='group' - ), - webapp2.Route( - _format(r'/api/<cont_name:groups>/<cid:{group_id_re}>/<list_name:roles>/<site:{site_re}>/<_id:{user_id_re}>'), - listhandler.ListHandler, name='group', methods=['GET', 'PUT', 'DELETE'] - ), - webapp2.Route( - _format(r'/api/<cont_name:{cont_name_re}>/<cid:{cid_re}>/<list_name:tags>'), - listhandler.ListHandler, methods=['POST'], name='tags_post' - ), - webapp2.Route( - _format(r'/api/<cont_name:{cont_name_re}>/<cid:{cid_re}>/<list_name:tags>/<value:{tag_re}>'), - listhandler.ListHandler, name='tags' - ), - webapp2.Route( - _format(r'/api/<cont_name:{cont_name_re}>/<cid:{cid_re}>/<list_name:files>'), - listhandler.FileListHandler, name='files_post', methods=['POST'] - ), - webapp2.Route( - _format(r'/api/<cont_name:{cont_name_re}>/<cid:{cid_re}>/<list_name:files>/<filename:{filename_re}>'), - listhandler.FileListHandler, name='files' - ), - webapp2.Route( - _format(r'/api/<cont_name:collections|projects>/<cid:{cid_re}>/<list_name:permissions>'), - listhandler.PermissionsListHandler, name='perms_post', methods=['POST'] - ), - webapp2.Route( - _format( - r'/api/<cont_name:collections|projects>/<cid:{cid_re}>' + - r'/<list_name:permissions>/<site:{site_re}>/<_id:{user_id_re}>' - ), - listhandler.PermissionsListHandler, name='perms' - ), - webapp2.Route( - _format(r'/api/<cont_name:{cont_name_re}>/<cid:{cid_re}>/<list_name:notes>'), - listhandler.NotesListHandler, name='notes_post', methods=['POST'] - ), - webapp2.Route( - _format(r'/api/<cont_name:{cont_name_re}>/<cid:{cid_re}>/<list_name:notes>/<_id:{note_id_re}>'), - listhandler.NotesListHandler, name='notes' - ), - - webapp2.Route( - r'/api/collections/curators', - collectionshandler.CollectionsHandler, handler_method='curators', methods=['GET'] - ), - webapp2.Route( - r'/api/<cont_name:collections>', - collectionshandler.CollectionsHandler, name='colls', handler_method='get_all', methods=['GET'] - ), - webapp2.Route( - r'/api/<cont_name:collections>', - collectionshandler.CollectionsHandler, methods=['POST'] - ), - webapp2.Route( - _format(r'/api/<cont_name:collections>/<cid:{cid_re}>'), - collectionshandler.CollectionsHandler, name='coll_details', methods=['GET', 'PUT', 'DELETE'] - ), - webapp2.Route( - _format(r'/api/<cont_name:collections>/<cid:{cid_re}>/sessions'), - collectionshandler.CollectionsHandler, name='coll_ses', handler_method='get_sessions', methods=['GET'] - ), - webapp2.Route( - _format(r'/api/<cont_name:collections>/<cid:{cid_re}>/acquisitions'), - collectionshandler.CollectionsHandler, name='coll_acq', handler_method='get_acquisitions', methods=['GET'] - ), - - webapp2.Route( - _format(r'/api/users/<uid:{user_id_re}>/<cont_name:{cont_name_re}>'), - containerhandler.ContainerHandler, name='user_conts', handler_method='get_all_for_user', methods=['GET'] - ), - webapp2.Route( - r'/api/projects/groups', - containerhandler.ContainerHandler, handler_method='get_groups_with_project', methods=['GET'] - ), - webapp2.Route( - _format(r'/api/<par_cont_name:groups>/<par_id:{group_id_re}>/<cont_name:projects>'), - containerhandler.ContainerHandler, name='cont_sublist', handler_method='get_all', methods=['GET'] - ), - webapp2.Route( - _format(r'/api/<par_cont_name:{cont_name_re}>/<par_id:{cid_re}>/<cont_name:{cont_name_re}>'), - containerhandler.ContainerHandler, name='cont_sublist', handler_method='get_all', methods=['GET'] - ), - webapp2.Route( - _format(r'/api/<cont_name:{cont_name_re}>'), - containerhandler.ContainerHandler, name='cont_list', handler_method='get_all', methods=['GET'] - ), - webapp2.Route( - _format(r'/api/<cont_name:{cont_name_re}>'), - containerhandler.ContainerHandler, methods=['POST'] - ), - webapp2.Route( - _format(r'/api/<cont_name:{cont_name_re}>/<cid:{cid_re}>'), - containerhandler.ContainerHandler, name='cont_details', methods=['GET','PUT','DELETE'] - ), + webapp2.Route(r'/api/groups', grouphandler.GroupHandler, handler_method='get_all', methods=['GET']), + webapp2.Route(r'/api/groups', grouphandler.GroupHandler, methods=['POST']), + webapp2.Route(_format(r'/api/groups/<_id:{group_id_re}>'), grouphandler.GroupHandler, name='group'), + + webapp2.Route(_format(r'/api/<cont_name:groups>/<cid:{group_id_re}>/<list_name:roles>'), listhandler.ListHandler, name='group'), + webapp2.Route(_format(r'/api/<cont_name:groups>/<cid:{group_id_re}>/<list_name:roles>/<site:{site_re}>/<_id:{user_id_re}>'), listhandler.ListHandler, name='group', methods=['GET', 'PUT', 'DELETE']), + + webapp2.Route(_format(r'/api/<cont_name:{cont_name_re}>/<cid:{cid_re}>/<list_name:tags>'), listhandler.ListHandler, methods=['POST'], name='tags_post'), + webapp2.Route(_format(r'/api/<cont_name:{cont_name_re}>/<cid:{cid_re}>/<list_name:tags>/<value:{tag_re}>'), listhandler.ListHandler, name='tags'), + + webapp2.Route(_format(r'/api/<cont_name:{cont_name_re}>/<cid:{cid_re}>/<list_name:files>'), listhandler.FileListHandler, name='files_post', methods=['POST']), + webapp2.Route(_format(r'/api/<cont_name:{cont_name_re}>/<cid:{cid_re}>/<list_name:files>/<filename:{filename_re}>'), listhandler.FileListHandler, name='files'), + + webapp2.Route(_format(r'/api/<cont_name:collections|projects>/<cid:{cid_re}>/<list_name:permissions>'), listhandler.PermissionsListHandler, name='perms_post', methods=['POST']), + webapp2.Route(_format(r'/api/<cont_name:collections|projects>/<cid:{cid_re}>/<list_name:permissions>/<site:{site_re}>/<_id:{user_id_re}>'), listhandler.PermissionsListHandler, name='perms'), + + webapp2.Route(_format(r'/api/<cont_name:{cont_name_re}>/<cid:{cid_re}>/<list_name:notes>'), listhandler.NotesListHandler, name='notes_post', methods=['POST']), + webapp2.Route(_format(r'/api/<cont_name:{cont_name_re}>/<cid:{cid_re}>/<list_name:notes>/<_id:{note_id_re}>'), listhandler.NotesListHandler, name='notes'), + + webapp2.Route(r'/api/collections/curators', collectionshandler.CollectionsHandler, handler_method='curators', methods=['GET']), + webapp2.Route(r'/api/<cont_name:collections>', collectionshandler.CollectionsHandler, name='colls', handler_method='get_all', methods=['GET']), + webapp2.Route(r'/api/<cont_name:collections>', collectionshandler.CollectionsHandler, methods=['POST']), + + webapp2.Route(_format(r'/api/<cont_name:collections>/<cid:{cid_re}>'), collectionshandler.CollectionsHandler, name='coll_details', methods=['GET', 'PUT', 'DELETE']), + webapp2.Route(_format(r'/api/<cont_name:collections>/<cid:{cid_re}>/sessions'), collectionshandler.CollectionsHandler, name='coll_ses', handler_method='get_sessions', methods=['GET']), + webapp2.Route(_format(r'/api/<cont_name:collections>/<cid:{cid_re}>/acquisitions'), collectionshandler.CollectionsHandler, name='coll_acq', handler_method='get_acquisitions', methods=['GET']), + + webapp2.Route(_format(r'/api/users/<uid:{user_id_re}>/<cont_name:{cont_name_re}>'), containerhandler.ContainerHandler, name='user_conts', handler_method='get_all_for_user', methods=['GET']), + + webapp2.Route(r'/api/projects/groups', containerhandler.ContainerHandler, handler_method='get_groups_with_project', methods=['GET']), + + webapp2.Route(_format(r'/api/<par_cont_name:groups>/<par_id:{group_id_re}>/<cont_name:projects>'), containerhandler.ContainerHandler, name='cont_sublist', handler_method='get_all', methods=['GET']), + webapp2.Route(_format(r'/api/<par_cont_name:{cont_name_re}>/<par_id:{cid_re}>/<cont_name:{cont_name_re}>'), containerhandler.ContainerHandler, name='cont_sublist', handler_method='get_all', methods=['GET']), + + webapp2.Route(_format(r'/api/<cont_name:{cont_name_re}>'), containerhandler.ContainerHandler, name='cont_list', handler_method='get_all', methods=['GET']), + webapp2.Route(_format(r'/api/<cont_name:{cont_name_re}>'), containerhandler.ContainerHandler, methods=['POST']), + webapp2.Route(_format(r'/api/<cont_name:{cont_name_re}>/<cid:{cid_re}>'), containerhandler.ContainerHandler, name='cont_details', methods=['GET','PUT','DELETE']), ] diff --git a/api/dao/containerstorage.py b/api/dao/containerstorage.py index 6a26201e..832cbf7b 100644 --- a/api/dao/containerstorage.py +++ b/api/dao/containerstorage.py @@ -13,12 +13,14 @@ log = logging.getLogger('scitran.api') class ContainerStorage(object): """ - This class provides access to sublists of mongodb collection elements (called containers). + This class provides access to mongodb collection elements (called containers). + It is used by ContainerHandler istances for get, create, update and delete operations on containers. + Examples: projects, sessions, acquisitions and collections """ - def __init__(self, cont_name, use_oid = False): + def __init__(self, cont_name, use_object_id = False): self.cont_name = cont_name - self.use_oid = use_oid + self.use_object_id = use_object_id self.dbc = mongo.db[cont_name] def get_container(self, _id): @@ -50,7 +52,7 @@ class ContainerStorage(object): update = { '$set': util.mongo_dict(payload) } - if self.use_oid: + if self.use_object_id: try: _id = bson.objectid.ObjectId(_id) except bson.errors.InvalidId as e: @@ -58,7 +60,7 @@ class ContainerStorage(object): return self.dbc.update_one({'_id': _id}, update) def _delete_el(self, _id): - if self.use_oid: + if self.use_object_id: try: _id = bson.objectid.ObjectId(_id) except bson.errors.InvalidId as e: @@ -66,7 +68,7 @@ class ContainerStorage(object): return self.dbc.delete_one({'_id':_id}) def _get_el(self, _id, projection=None): - if self.use_oid: + if self.use_object_id: try: _id = bson.objectid.ObjectId(_id) except bson.errors.InvalidId as e: diff --git a/api/dao/liststorage.py b/api/dao/liststorage.py index 6d8f8fcb..dfa319db 100644 --- a/api/dao/liststorage.py +++ b/api/dao/liststorage.py @@ -14,12 +14,14 @@ log = logging.getLogger('scitran.api') class ListStorage(object): """ This class provides access to sublists of a mongodb collections elements (called containers). + It is used by ListHandler istances for get, create, update and delete operations on sublist of the containers. + Examples: permissions in projects, roles in groups, notes in projects, sessions, acquisitions, etc """ - def __init__(self, cont_name, list_name, use_oid = False): + def __init__(self, cont_name, list_name, use_object_id = False): self.cont_name = cont_name self.list_name = list_name - self.use_oid = use_oid + self.use_object_id = use_object_id self.dbc = mongo.db[cont_name] def get_container(self, _id, query_params=None): @@ -31,7 +33,7 @@ class ListStorage(object): For simplicity we load its full content. """ - if self.use_oid: + if self.use_object_id: _id = bson.objectid.ObjectId(_id) query = {'_id': _id} projection = None @@ -48,7 +50,7 @@ class ListStorage(object): Generic method to exec an operation. The request is dispatched to the corresponding private methods. """ - if self.use_oid: + if self.use_object_id: try: _id = bson.objectid.ObjectId(_id) except bson.errors.InvalidId as e: @@ -114,11 +116,15 @@ class ListStorage(object): class StringListStorage(ListStorage): + """ + This class provides access to string sublists of a mongodb collections elements (called containers). + The difference with other sublists is that the elements are not object but strings. + """ def get_container(self, _id, query_params=None): if self.dbc is None: raise RuntimeError('collection not initialized before calling get_container') - if self.use_oid: + if self.use_object_id: try: _id = bson.objectid.ObjectId(_id) except bson.errors.InvalidId as e: diff --git a/api/files.py b/api/files.py index 16eae437..5439469a 100644 --- a/api/files.py +++ b/api/files.py @@ -32,16 +32,17 @@ class FileRequest(object): self.mimetype = util.guess_mimetype(filename) self.filetype = util.guess_filetype(filename, self.mimetype) - def save_temp_file(self, tempdir_path): + def save_temp_file(self, tempdir_path, handler): self.tempdir_path = tempdir_path success, duration = self._save_temp_file(self.tempdir_path) if not success: - self.abort(400, 'Content-MD5 mismatch.') + return False throughput = self.filesize / duration.total_seconds() log.info('Received %s [%s, %s/s] from %s' % ( self.filename, util.hrsize(self.filesize), util.hrsize(throughput), self.client_addr)) + return success def move_temp_file(self, container_path): target_filepath = container_path + '/' + self.filename diff --git a/api/handlers/collectionshandler.py b/api/handlers/collectionshandler.py index 2b049889..127e31f2 100644 --- a/api/handlers/collectionshandler.py +++ b/api/handlers/collectionshandler.py @@ -20,8 +20,8 @@ class CollectionsHandler(ContainerHandler): container_handler_configurations['collections'] = { 'permchecker': containerauth.collection_permissions, - 'storage': containerstorage.ContainerStorage('collections', use_oid=True), - 'mongo_schema_file': 'mongo/collection.json', + 'storage': containerstorage.ContainerStorage('collections', use_object_id=True), + 'storage_schema_file': 'mongo/collection.json', 'payload_schema_file': 'input/collection.json', 'list_projection': {'metadata': 0} } diff --git a/api/handlers/containerhandler.py b/api/handlers/containerhandler.py index 8aac36d6..a3bb2bd4 100644 --- a/api/handlers/containerhandler.py +++ b/api/handlers/containerhandler.py @@ -35,7 +35,7 @@ class ContainerHandler(base.RequestHandler): Specific behaviors (permissions checking logic for authenticated and not superuser users, storage interaction) are specified in the container_handler_configurations """ - use_oid = { + use_object_id = { 'groups': False, 'projects': True, 'sessions': True, @@ -48,30 +48,31 @@ class ContainerHandler(base.RequestHandler): # # "children_cont" represents the children container. # "list projection" is used to filter data in mongo. + # "use_object_id" implies that the container ids are converted to ObjectId container_handler_configurations = { 'projects': { - 'storage': containerstorage.ContainerStorage('projects', use_oid=use_oid['projects']), + 'storage': containerstorage.ContainerStorage('projects', use_object_id=use_object_id['projects']), 'permchecker': containerauth.default_container, - 'parent_storage': containerstorage.ContainerStorage('groups', use_oid=use_oid['groups']), - 'mongo_schema_file': 'mongo/project.json', + 'parent_storage': containerstorage.ContainerStorage('groups', use_object_id=use_object_id['groups']), + 'storage_schema_file': 'mongo/project.json', 'payload_schema_file': 'input/project.json', 'list_projection': {'metadata': 0}, 'children_cont': 'sessions' }, 'sessions': { - 'storage': containerstorage.ContainerStorage('sessions', use_oid=use_oid['sessions']), + 'storage': containerstorage.ContainerStorage('sessions', use_object_id=use_object_id['sessions']), 'permchecker': containerauth.default_container, - 'parent_storage': containerstorage.ContainerStorage('projects', use_oid=use_oid['projects']), - 'mongo_schema_file': 'mongo/session.json', + 'parent_storage': containerstorage.ContainerStorage('projects', use_object_id=use_object_id['projects']), + 'storage_schema_file': 'mongo/session.json', 'payload_schema_file': 'input/session.json', 'list_projection': {'metadata': 0}, 'children_cont': 'acquisitions' }, 'acquisitions': { - 'storage': containerstorage.ContainerStorage('acquisitions', use_oid=use_oid['acquisitions']), + 'storage': containerstorage.ContainerStorage('acquisitions', use_object_id=use_object_id['acquisitions']), 'permchecker': containerauth.default_container, - 'parent_storage': containerstorage.ContainerStorage('sessions', use_oid=use_oid['sessions']), - 'mongo_schema_file': 'mongo/acquisition.json', + 'parent_storage': containerstorage.ContainerStorage('sessions', use_object_id=use_object_id['sessions']), + 'storage_schema_file': 'mongo/acquisition.json', 'payload_schema_file': 'input/acquisition.json', 'list_projection': {'metadata': 0} } @@ -128,7 +129,7 @@ class ContainerHandler(base.RequestHandler): if par_cont_name: if not par_id: self.abort(500, 'par_id is required when par_cont_name is provided') - if self.use_oid.get(par_cont_name): + if self.use_object_id.get(par_cont_name): if not bson.ObjectId.is_valid(par_id): self.abort(400, 'not a valid object id') par_id = bson.ObjectId(par_id) @@ -265,10 +266,11 @@ class ContainerHandler(base.RequestHandler): if payload['permissions'] is None: payload['permissions'] = target_parent_container['permissions'] - permchecker = self._get_permchecker(container, target_parent_container) payload['modified'] = datetime.datetime.utcnow() if payload.get('timestamp'): payload['timestamp'] = dateutil.parser.parse(payload['timestamp']) + + permchecker = self._get_permchecker(container, target_parent_container) try: # This line exec the actual request validating the payload that will update the container # and checking permissions using respectively the two decorators, mongo_validator and permchecker @@ -308,7 +310,7 @@ class ContainerHandler(base.RequestHandler): def _get_validators(self): - mongo_validator = validators.mongo_from_schema_file(self, self.config.get('mongo_schema_file')) + mongo_validator = validators.mongo_from_schema_file(self, self.config.get('storage_schema_file')) payload_validator = validators.payload_from_schema_file(self, self.config.get('payload_schema_file')) return mongo_validator, payload_validator diff --git a/api/handlers/grouphandler.py b/api/handlers/grouphandler.py index aa10427f..a97d8173 100644 --- a/api/handlers/grouphandler.py +++ b/api/handlers/grouphandler.py @@ -80,7 +80,7 @@ class GroupHandler(base.RequestHandler): self.abort(404, 'User {} not updated'.format(_id)) def _init_storage(self): - self.storage = containerstorage.ContainerStorage('groups', use_oid=False) + self.storage = containerstorage.ContainerStorage('groups', use_object_id=False) def _get_group(self, _id): group = self.storage.get_container(_id) diff --git a/api/handlers/listhandler.py b/api/handlers/listhandler.py index a40a88b3..f8242e10 100644 --- a/api/handlers/listhandler.py +++ b/api/handlers/listhandler.py @@ -21,37 +21,37 @@ def initialize_list_configurations(): This configurations are used by the ListHandler class to load the storage, the permissions checker and the json schema validators used to handle a request. - "use_oid" implies that the container ids is converted to ObjectId + "use_object_id" implies that the container ids are converted to ObjectId "get_full_container" allows the handler to load the full content of the container and not only the sublist element (this is used for permissions for example) """ container_default_configurations = { 'tags': { 'storage': liststorage.StringListStorage, 'permchecker': listauth.default_sublist, - 'use_oid': True, - 'mongo_schema_file': 'mongo/tag.json', + 'use_object_id': True, + 'storage_schema_file': 'mongo/tag.json', 'input_schema_file': 'input/tag.json' }, 'files': { 'storage': liststorage.ListStorage, 'permchecker': listauth.default_sublist, - 'use_oid': True, - 'mongo_schema_file': 'mongo/file.json', + 'use_object_id': True, + 'storage_schema_file': 'mongo/file.json', 'input_schema_file': 'input/file.json' }, 'permissions': { 'storage': liststorage.ListStorage, 'permchecker': listauth.permissions_sublist, - 'use_oid': True, + 'use_object_id': True, 'get_full_container': True, - 'mongo_schema_file': 'mongo/permission.json', + 'storage_schema_file': 'mongo/permission.json', 'input_schema_file': 'input/permission.json' }, 'notes': { 'storage': liststorage.ListStorage, 'permchecker': listauth.notes_sublist, - 'use_oid': True, - 'mongo_schema_file': 'mongo/note.json', + 'use_object_id': True, + 'storage_schema_file': 'mongo/note.json', 'input_schema_file': 'input/note.json' }, } @@ -60,9 +60,9 @@ def initialize_list_configurations(): 'roles':{ 'storage': liststorage.ListStorage, 'permchecker': listauth.group_roles_sublist, - 'use_oid': False, + 'use_object_id': False, 'get_full_container': True, - 'mongo_schema_file': 'mongo/permission.json', + 'storage_schema_file': 'mongo/permission.json', 'input_schema_file': 'input/permission.json' } }, @@ -78,7 +78,7 @@ def initialize_list_configurations(): storage = storage_class( cont_name, list_name, - use_oid=list_config.get('use_oid', False) + use_object_id=list_config.get('use_object_id', False) ) list_config['storage'] = storage return list_handler_configurations @@ -180,9 +180,9 @@ class ListHandler(base.RequestHandler): permchecker = permchecker(self, container) else: self.abort(404, 'Element {} not found in container {}'.format(_id, storage.cont_name)) - mongo_validator = validators.mongo_from_schema_file(self, config.get('mongo_schema_file')) + mongo_validator = validators.mongo_from_schema_file(self, config.get('storage_schema_file')) input_validator = validators.payload_from_schema_file(self, config.get('payload_schema_file')) - keycheck = validators.key_check(self, config.get('mongo_schema_file')) + keycheck = validators.key_check(self, config.get('storage_schema_file')) return container, permchecker, storage, mongo_validator, input_validator, keycheck @@ -357,11 +357,6 @@ class FileListHandler(ListHandler): result['removed'] = 0 return result - def put(self, cont_name, list_name, **kwargs): - fileinfo = super(FileListHandler, self).get(cont_name, list_name, **kwargs) - # TODO: implement file metadata updates - self.abort(400, 'PUT is not yet implemented') - def post(self, cont_name, list_name, **kwargs): force = self.is_true('force') _id = kwargs.pop('cid') @@ -371,7 +366,9 @@ class FileListHandler(ListHandler): file_request = files.FileRequest.from_handler(self, filename) result = None with tempfile.TemporaryDirectory(prefix='.tmp', dir=self.app.config['upload_path']) as tempdir_path: - file_request.save_temp_file(tempdir_path) + success = file_request.save_temp_file(tempdir_path) + if not success: + self.abort(400, 'Content-MD5 mismatch.') file_datetime = datetime.datetime.utcnow() file_properties = { 'name': file_request.filename, diff --git a/api/handlers/userhandler.py b/api/handlers/userhandler.py index 05224d8f..7e04992b 100644 --- a/api/handlers/userhandler.py +++ b/api/handlers/userhandler.py @@ -40,9 +40,6 @@ class UserHandler(base.RequestHandler): self.abort(400, 'no user is logged in') return user - def roles(self): - return ROLES - def get_all(self): self._init_storage() permchecker = userauth.list_permission_checker(self) @@ -99,7 +96,7 @@ class UserHandler(base.RequestHandler): self.abort(404, 'User {} not updated'.format(_id)) def _init_storage(self): - self.storage = containerstorage.ContainerStorage('users', use_oid=False) + self.storage = containerstorage.ContainerStorage('users', use_object_id=False) def _get_user(self, _id): user = self.storage.get_container(_id) diff --git a/api/mongo.py b/api/mongo.py index efd8ef7e..fd6ef7e2 100644 --- a/api/mongo.py +++ b/api/mongo.py @@ -1,26 +1,32 @@ import pymongo import datetime +import time db = None def configure_db(db_uri, site_id, site_name, api_uri): global db - db = pymongo.MongoClient(db_uri).get_default_database() - # TODO jobs indexes - # TODO review all indexes - db.projects.create_index([('gid', 1), ('name', 1)]) - db.sessions.create_index('project') - db.sessions.create_index('uid') - db.acquisitions.create_index('session') - db.acquisitions.create_index('uid') - db.acquisitions.create_index('collections') - db.authtokens.create_index('timestamp', expireAfterSeconds=600) - db.uploads.create_index('timestamp', expireAfterSeconds=60) - db.downloads.create_index('timestamp', expireAfterSeconds=60) - - now = datetime.datetime.utcnow() - db.groups.update_one({'_id': 'unknown'}, {'$setOnInsert': { 'created': now, 'modified': now, 'name': 'Unknown', 'roles': []}}, upsert=True) - db.sites.replace_one({'_id': site_id}, {'name': site_name, 'api_uri': api_uri}, upsert=True) - + for i in range(0,3): + try: + db = pymongo.MongoClient(db_uri).get_default_database() + # TODO jobs indexes + # TODO review all indexes + db.projects.create_index([('gid', 1), ('name', 1)]) + db.sessions.create_index('project') + db.sessions.create_index('uid') + db.acquisitions.create_index('session') + db.acquisitions.create_index('uid') + db.acquisitions.create_index('collections') + db.authtokens.create_index('timestamp', expireAfterSeconds=600) + db.uploads.create_index('timestamp', expireAfterSeconds=60) + db.downloads.create_index('timestamp', expireAfterSeconds=60) + now = datetime.datetime.utcnow() + db.groups.update_one({'_id': 'unknown'}, {'$setOnInsert': { 'created': now, 'modified': now, 'name': 'Unknown', 'roles': []}}, upsert=True) + db.sites.replace_one({'_id': site_id}, {'name': site_name, 'api_uri': api_uri}, upsert=True) + return + except Exception as e: + db = None + time.sleep(5) + raise e diff --git a/api/util.py b/api/util.py index fc58a1ef..37738dfb 100644 --- a/api/util.py +++ b/api/util.py @@ -114,6 +114,8 @@ def commit_file(dbc, _id, datainfo, filepath, data_path, force=False): log.debug('Replacing %s' % filename) shutil.move(filepath, target_filepath) update_set = {'files.$.dirty': True, 'files.$.modified': datetime.datetime.utcnow()} + # in this branch of the code, we are overriding an existing file. + # update_set allows to update all the fileinfo like size, hash, etc. for k,v in fileinfo.iteritems(): update_set['files.$.' + k] = v dbc.update_one({'_id':_id, 'files.filename': fileinfo['filename']}, diff --git a/api/validators.py b/api/validators.py index b84a0661..ed70826a 100644 --- a/api/validators.py +++ b/api/validators.py @@ -104,6 +104,17 @@ def payload_from_schema_file(handler, schema_file): return g def key_check(handler, schema_file): + """ + for sublists of mongo container there is no automatic key check when creating, updating or deleting an object. + We are adding a custom array field to the json schemas ("key_fields"). + The uniqueness is checked on the combination of all the elements of "key_fields". + For an example check api/schemas/input/permission.json + + So this method ensures that: + 1. after a POST and PUT request we don't have two items with the same values for the key set + 2. a GET will retrieve a single item + 3. a DELETE (most importantly) will delete a single item + """ if schema_file is None: return no_op schema = resolver.resolve(schema_file)[1] diff --git a/bin/api.wsgi b/bin/api.wsgi index 55993ebb..5f9fdbc8 100644 --- a/bin/api.wsgi +++ b/bin/api.wsgi @@ -41,6 +41,7 @@ args.upload_path = os.path.join(args.data_path, 'upload') from api import mongo mongo.configure_db(args.db_uri, args.site_id, args.site_name, args.api_uri) +# imports delayed after mongo has been fully initialized from api.util import log from api import api from api import centralclient, jobs diff --git a/test_bootstrap.json b/test_bootstrap.json index 08771343..c997fb0a 100644 --- a/test_bootstrap.json +++ b/test_bootstrap.json @@ -5,17 +5,7 @@ "name": "Scientific Transparency", "roles": [ { - "_id": "rfrigato@stanford.edu", - "access": "admin" - } - ] - }, - { - "_id": "unknown", - "name": "Unknown", - "roles": [ - { - "_id": "rfrigato@stanford.edu", + "_id": "admin@user.com", "access": "admin" } ] @@ -23,17 +13,17 @@ ], "users": [ { - "_id": "rfrigato@stanford.edu", - "email": "rfrigato@stanford.edu", - "firstname": "Renzo", - "lastname": "Frigato", + "_id": "admin@user.com", + "email": "admin@user.com", + "firstname": "Admin", + "lastname": "User", "root": true }, { - "_id": "renzo.frigato@gmail.com", - "email": "renzo.frigato@gmail.com", - "firstname": "Renzo", - "lastname": "Frigato", + "_id": "test@user.com", + "email": "test@user.com", + "firstname": "Test", + "lastname": "User", "root": true } ] diff --git a/test_integ/test_collection.py b/test_integ/test_collection.py index a2349cd3..1a0e444f 100644 --- a/test_integ/test_collection.py +++ b/test_integ/test_collection.py @@ -64,7 +64,7 @@ def teardown_db(): @with_setup(setup_db, teardown_db) -def test_sequence(): +def test_collections(): payload = { 'curator': 'admin@user.com', 'label': 'SciTran/Testing', diff --git a/test_integ/test_users.py b/test_integ/test_users.py index 69912a39..57b30b06 100644 --- a/test_integ/test_users.py +++ b/test_integ/test_users.py @@ -10,14 +10,14 @@ requests.packages.urllib3.disable_warnings() base_url = 'https://localhost:8443/api' def test_users(): - _id = 'test@user.com' + _id = 'new@user.com' r = requests.get(base_url + '/users/self?user=admin@user.com', verify=False) assert r.ok r = requests.get(base_url + '/users/' + _id + '?user=admin@user.com&root=true', verify=False) assert r.status_code == 404 payload = { '_id': _id, - 'firstname': 'Test', + 'firstname': 'New', 'lastname': 'User', } payload = json.dumps(payload) @@ -26,7 +26,7 @@ def test_users(): r = requests.get(base_url + '/users/' + _id + '?user=admin@user.com&root=true', verify=False) assert r.ok payload = { - 'firstname': 'New' + 'firstname': 'Realname' } payload = json.dumps(payload) r = requests.put(base_url + '/users/' + _id + '?user=admin@user.com&root=true', data=payload, verify=False) -- GitLab