From 7d7a9acddad021c08fc6843dad0c68c636c48aab Mon Sep 17 00:00:00 2001
From: Renzo Frigato <rfrigato@stanford.edu>
Date: Thu, 12 Nov 2015 17:16:52 -0800
Subject: [PATCH] improvements after code review

---
 api/api.py                         | 240 +++++++++--------------------
 api/dao/containerstorage.py        |  14 +-
 api/dao/liststorage.py             |  16 +-
 api/files.py                       |   5 +-
 api/handlers/collectionshandler.py |   4 +-
 api/handlers/containerhandler.py   |  28 ++--
 api/handlers/grouphandler.py       |   2 +-
 api/handlers/listhandler.py        |  37 ++---
 api/handlers/userhandler.py        |   5 +-
 api/mongo.py                       |  40 +++--
 api/util.py                        |   2 +
 api/validators.py                  |  11 ++
 bin/api.wsgi                       |   1 +
 test_bootstrap.json                |  28 ++--
 test_integ/test_collection.py      |   2 +-
 test_integ/test_users.py           |   6 +-
 16 files changed, 179 insertions(+), 262 deletions(-)

diff --git a/api/api.py b/api/api.py
index 45c92ba2..4e1233f6 100644
--- a/api/api.py
+++ b/api/api.py
@@ -17,13 +17,33 @@ from handlers import collectionshandler
 
 #regexes used in routing table:
 routing_regexes = {
+    # group id regex
+    # length between 2 and 32 characters
+    # allowed characters are [0-9a-z.@_-] (start and ends only with [0-9a-z])
     'group_id_re': '[0-9a-z][0-9a-z.@_-]{0,30}[0-9a-z]',
+    # container id regex
+    # hexadecimal string exactly of length 24
     'cid_re': '[0-9a-f]{24}',
+    # site id regex
+    # length less than 24 characters
+    # allowed characters are [0-9a-z]
     'site_re': '[0-9a-z]{0,24}',
+    # user id regex
+    # any length, allowed chars are [0-9a-z.@_-]
     'user_id_re': '[0-9a-z.@_-]*',
+    # container name regex
+    # possible values are projects, sessions, acquisitions or collections
     'cont_name_re': 'projects|sessions|acquisitions|collections',
+    # tag regex
+    # length between 3 and 24 characters
+    # any character allowed except '/''
     'tag_re': '[^/]{3,24}',
+    # filename regex
+    # length between 3 and 60 characters
+    # any character allowed except '/'
     'filename_re': '[^/]{3,60}',
+    # note id regex
+    # hexadecimal string exactly of length 24
     'note_id_re': '[0-9a-f]{24}'
 }
 
@@ -31,181 +51,63 @@ def _format(route):
     return route.format(**routing_regexes)
 
 routes = [
-    webapp2.Route(
-        r'/api',
-        core.Core
-    ),
+    webapp2.Route(r'/api',                  core.Core),
     webapp2_extras.routes.PathPrefixRoute(r'/api', [
-        webapp2.Route(
-            r'/download',
-            core.Core, handler_method='download', methods=['GET', 'POST'], name='download'
-        ),
-        webapp2.Route(
-            r'/reaper',
-            core.Core, handler_method='reaper', methods=['POST']
-        ),
-        webapp2.Route(
-            r'/sites',
-            core.Core, handler_method='sites', methods=['GET']
-        )
+        webapp2.Route(r'/download',         core.Core, handler_method='download', methods=['GET', 'POST'], name='download'),
+        webapp2.Route(r'/reaper',           core.Core, handler_method='reaper', methods=['POST']),
+        webapp2.Route(r'/sites',            core.Core, handler_method='sites', methods=['GET'])
     ]),
-    webapp2.Route(
-        r'/api/users',
-        userhandler.UserHandler, handler_method='get_all', methods=['GET']
-    ),
-    webapp2.Route(
-        r'/api/users',
-        userhandler.UserHandler, methods=['POST']
-    ),
+    webapp2.Route(r'/api/users',            userhandler.UserHandler, handler_method='get_all', methods=['GET']),
+    webapp2.Route(r'/api/users',            userhandler.UserHandler, methods=['POST']),
     webapp2_extras.routes.PathPrefixRoute(r'/api/users', [
-        webapp2.Route(
-            r'/self',
-            userhandler.UserHandler, handler_method='self', methods=['GET']
-        ),
-        webapp2.Route(
-            r'/roles',
-            userhandler.UserHandler, handler_method='roles', methods=['GET']
-        ),
-        webapp2.Route(
-            _format(r'/<_id:{user_id_re}>'),
-            userhandler.UserHandler, name='user'
-        ),
-        webapp2.Route(
-            _format(r'/<uid:{user_id_re}>/groups'),
-            grouphandler.GroupHandler, handler_method='get_all', methods=['GET'], name='groups'
-        ),
+        webapp2.Route(r'/self',                                 userhandler.UserHandler, handler_method='self', methods=['GET']),
+        webapp2.Route(_format(r'/<_id:{user_id_re}>'),          userhandler.UserHandler, name='user'),
+        webapp2.Route(_format(r'/<uid:{user_id_re}>/groups'),   grouphandler.GroupHandler, handler_method='get_all', methods=['GET'], name='groups'),
     ]),
-    webapp2.Route(
-        r'/api/jobs',
-        jobs.Jobs
-    ),
+    webapp2.Route(r'/api/jobs',             jobs.Jobs),
     webapp2_extras.routes.PathPrefixRoute(r'/api/jobs', [
-        webapp2.Route(
-            r'/next',
-            jobs.Jobs, handler_method='next', methods=['GET']
-        ),
-        webapp2.Route(
-            r'/count',
-            jobs.Jobs, handler_method='count', methods=['GET']
-        ),
-        webapp2.Route(
-            r'/addTestJob',
-            jobs.Jobs, handler_method='addTestJob', methods=['GET']
-        ),
-        webapp2.Route(
-            r'/<:[^/]+>',
-            jobs.Job,  name='job'
-        ),
+        webapp2.Route(r'/next',             jobs.Jobs, handler_method='next', methods=['GET']),
+        webapp2.Route(r'/count',            jobs.Jobs, handler_method='count', methods=['GET']),
+        webapp2.Route(r'/addTestJob',       jobs.Jobs, handler_method='addTestJob', methods=['GET']),
+        webapp2.Route(r'/<:[^/]+>',         jobs.Job,  name='job'),
     ]),
-    webapp2.Route(
-        r'/api/groups',
-        grouphandler.GroupHandler, handler_method='get_all', methods=['GET']
-    ),
-    webapp2.Route(
-        r'/api/groups',
-        grouphandler.GroupHandler, methods=['POST']
-    ),
-    webapp2.Route(
-        _format(r'/api/groups/<_id:{group_id_re}>'),
-        grouphandler.GroupHandler, name='group'
-    ),
-    webapp2.Route(
-        _format(r'/api/<cont_name:groups>/<cid:{group_id_re}>/<list_name:roles>'),
-        listhandler.ListHandler, name='group'
-    ),
-    webapp2.Route(
-        _format(r'/api/<cont_name:groups>/<cid:{group_id_re}>/<list_name:roles>/<site:{site_re}>/<_id:{user_id_re}>'),
-        listhandler.ListHandler, name='group', methods=['GET', 'PUT', 'DELETE']
-    ),
-    webapp2.Route(
-        _format(r'/api/<cont_name:{cont_name_re}>/<cid:{cid_re}>/<list_name:tags>'),
-        listhandler.ListHandler, methods=['POST'], name='tags_post'
-    ),
-    webapp2.Route(
-        _format(r'/api/<cont_name:{cont_name_re}>/<cid:{cid_re}>/<list_name:tags>/<value:{tag_re}>'),
-        listhandler.ListHandler, name='tags'
-    ),
-    webapp2.Route(
-        _format(r'/api/<cont_name:{cont_name_re}>/<cid:{cid_re}>/<list_name:files>'),
-        listhandler.FileListHandler, name='files_post', methods=['POST']
-    ),
-    webapp2.Route(
-        _format(r'/api/<cont_name:{cont_name_re}>/<cid:{cid_re}>/<list_name:files>/<filename:{filename_re}>'),
-        listhandler.FileListHandler, name='files'
-    ),
-    webapp2.Route(
-        _format(r'/api/<cont_name:collections|projects>/<cid:{cid_re}>/<list_name:permissions>'),
-        listhandler.PermissionsListHandler, name='perms_post', methods=['POST']
-    ),
-    webapp2.Route(
-        _format(
-            r'/api/<cont_name:collections|projects>/<cid:{cid_re}>' +
-            r'/<list_name:permissions>/<site:{site_re}>/<_id:{user_id_re}>'
-        ),
-        listhandler.PermissionsListHandler, name='perms'
-    ),
-    webapp2.Route(
-        _format(r'/api/<cont_name:{cont_name_re}>/<cid:{cid_re}>/<list_name:notes>'),
-        listhandler.NotesListHandler, name='notes_post', methods=['POST']
-    ),
-    webapp2.Route(
-        _format(r'/api/<cont_name:{cont_name_re}>/<cid:{cid_re}>/<list_name:notes>/<_id:{note_id_re}>'),
-        listhandler.NotesListHandler, name='notes'
-    ),
-
-    webapp2.Route(
-        r'/api/collections/curators',
-        collectionshandler.CollectionsHandler, handler_method='curators', methods=['GET']
-    ),
-    webapp2.Route(
-        r'/api/<cont_name:collections>',
-        collectionshandler.CollectionsHandler, name='colls', handler_method='get_all', methods=['GET']
-    ),
-    webapp2.Route(
-        r'/api/<cont_name:collections>',
-        collectionshandler.CollectionsHandler, methods=['POST']
-    ),
-    webapp2.Route(
-        _format(r'/api/<cont_name:collections>/<cid:{cid_re}>'),
-        collectionshandler.CollectionsHandler, name='coll_details', methods=['GET', 'PUT', 'DELETE']
-    ),
-    webapp2.Route(
-        _format(r'/api/<cont_name:collections>/<cid:{cid_re}>/sessions'),
-        collectionshandler.CollectionsHandler, name='coll_ses', handler_method='get_sessions', methods=['GET']
-    ),
-    webapp2.Route(
-        _format(r'/api/<cont_name:collections>/<cid:{cid_re}>/acquisitions'),
-        collectionshandler.CollectionsHandler, name='coll_acq', handler_method='get_acquisitions', methods=['GET']
-    ),
-
-    webapp2.Route(
-        _format(r'/api/users/<uid:{user_id_re}>/<cont_name:{cont_name_re}>'),
-        containerhandler.ContainerHandler, name='user_conts', handler_method='get_all_for_user', methods=['GET']
-    ),
-    webapp2.Route(
-        r'/api/projects/groups',
-        containerhandler.ContainerHandler, handler_method='get_groups_with_project', methods=['GET']
-    ),
-    webapp2.Route(
-        _format(r'/api/<par_cont_name:groups>/<par_id:{group_id_re}>/<cont_name:projects>'),
-        containerhandler.ContainerHandler, name='cont_sublist', handler_method='get_all', methods=['GET']
-    ),
-    webapp2.Route(
-        _format(r'/api/<par_cont_name:{cont_name_re}>/<par_id:{cid_re}>/<cont_name:{cont_name_re}>'),
-        containerhandler.ContainerHandler, name='cont_sublist', handler_method='get_all', methods=['GET']
-    ),
-    webapp2.Route(
-        _format(r'/api/<cont_name:{cont_name_re}>'),
-        containerhandler.ContainerHandler, name='cont_list', handler_method='get_all', methods=['GET']
-    ),
-    webapp2.Route(
-        _format(r'/api/<cont_name:{cont_name_re}>'),
-        containerhandler.ContainerHandler, methods=['POST']
-    ),
-    webapp2.Route(
-        _format(r'/api/<cont_name:{cont_name_re}>/<cid:{cid_re}>'),
-        containerhandler.ContainerHandler, name='cont_details', methods=['GET','PUT','DELETE']
-    ),
+    webapp2.Route(r'/api/groups',                                   grouphandler.GroupHandler, handler_method='get_all', methods=['GET']),
+    webapp2.Route(r'/api/groups',                                   grouphandler.GroupHandler, methods=['POST']),
+    webapp2.Route(_format(r'/api/groups/<_id:{group_id_re}>'),      grouphandler.GroupHandler, name='group'),
+
+    webapp2.Route(_format(r'/api/<cont_name:groups>/<cid:{group_id_re}>/<list_name:roles>'),                                        listhandler.ListHandler, name='group'),
+    webapp2.Route(_format(r'/api/<cont_name:groups>/<cid:{group_id_re}>/<list_name:roles>/<site:{site_re}>/<_id:{user_id_re}>'),    listhandler.ListHandler, name='group', methods=['GET', 'PUT', 'DELETE']),
+
+    webapp2.Route(_format(r'/api/<cont_name:{cont_name_re}>/<cid:{cid_re}>/<list_name:tags>'),                                      listhandler.ListHandler, methods=['POST'], name='tags_post'),
+    webapp2.Route(_format(r'/api/<cont_name:{cont_name_re}>/<cid:{cid_re}>/<list_name:tags>/<value:{tag_re}>'),                     listhandler.ListHandler, name='tags'),
+
+    webapp2.Route(_format(r'/api/<cont_name:{cont_name_re}>/<cid:{cid_re}>/<list_name:files>'),                                     listhandler.FileListHandler, name='files_post', methods=['POST']),
+    webapp2.Route(_format(r'/api/<cont_name:{cont_name_re}>/<cid:{cid_re}>/<list_name:files>/<filename:{filename_re}>'),            listhandler.FileListHandler, name='files'),
+
+    webapp2.Route(_format(r'/api/<cont_name:collections|projects>/<cid:{cid_re}>/<list_name:permissions>'),                                     listhandler.PermissionsListHandler, name='perms_post', methods=['POST']),
+    webapp2.Route(_format(r'/api/<cont_name:collections|projects>/<cid:{cid_re}>/<list_name:permissions>/<site:{site_re}>/<_id:{user_id_re}>'), listhandler.PermissionsListHandler, name='perms'),
+
+    webapp2.Route(_format(r'/api/<cont_name:{cont_name_re}>/<cid:{cid_re}>/<list_name:notes>'),                                     listhandler.NotesListHandler, name='notes_post', methods=['POST']),
+    webapp2.Route(_format(r'/api/<cont_name:{cont_name_re}>/<cid:{cid_re}>/<list_name:notes>/<_id:{note_id_re}>'),                  listhandler.NotesListHandler, name='notes'),
+
+    webapp2.Route(r'/api/collections/curators',                                         collectionshandler.CollectionsHandler, handler_method='curators', methods=['GET']),
+    webapp2.Route(r'/api/<cont_name:collections>',                                      collectionshandler.CollectionsHandler, name='colls', handler_method='get_all', methods=['GET']),
+    webapp2.Route(r'/api/<cont_name:collections>',                                      collectionshandler.CollectionsHandler, methods=['POST']),
+
+    webapp2.Route(_format(r'/api/<cont_name:collections>/<cid:{cid_re}>'),              collectionshandler.CollectionsHandler, name='coll_details', methods=['GET', 'PUT', 'DELETE']),
+    webapp2.Route(_format(r'/api/<cont_name:collections>/<cid:{cid_re}>/sessions'),     collectionshandler.CollectionsHandler, name='coll_ses', handler_method='get_sessions', methods=['GET']),
+    webapp2.Route(_format(r'/api/<cont_name:collections>/<cid:{cid_re}>/acquisitions'), collectionshandler.CollectionsHandler, name='coll_acq', handler_method='get_acquisitions', methods=['GET']),
+
+    webapp2.Route(_format(r'/api/users/<uid:{user_id_re}>/<cont_name:{cont_name_re}>'), containerhandler.ContainerHandler, name='user_conts', handler_method='get_all_for_user', methods=['GET']),
+
+    webapp2.Route(r'/api/projects/groups',                                              containerhandler.ContainerHandler, handler_method='get_groups_with_project', methods=['GET']),
+
+    webapp2.Route(_format(r'/api/<par_cont_name:groups>/<par_id:{group_id_re}>/<cont_name:projects>'),          containerhandler.ContainerHandler, name='cont_sublist', handler_method='get_all', methods=['GET']),
+    webapp2.Route(_format(r'/api/<par_cont_name:{cont_name_re}>/<par_id:{cid_re}>/<cont_name:{cont_name_re}>'), containerhandler.ContainerHandler, name='cont_sublist', handler_method='get_all', methods=['GET']),
+
+    webapp2.Route(_format(r'/api/<cont_name:{cont_name_re}>'),                          containerhandler.ContainerHandler, name='cont_list', handler_method='get_all', methods=['GET']),
+    webapp2.Route(_format(r'/api/<cont_name:{cont_name_re}>'),                          containerhandler.ContainerHandler, methods=['POST']),
+    webapp2.Route(_format(r'/api/<cont_name:{cont_name_re}>/<cid:{cid_re}>'),           containerhandler.ContainerHandler, name='cont_details', methods=['GET','PUT','DELETE']),
 ]
 
 
diff --git a/api/dao/containerstorage.py b/api/dao/containerstorage.py
index 6a26201e..832cbf7b 100644
--- a/api/dao/containerstorage.py
+++ b/api/dao/containerstorage.py
@@ -13,12 +13,14 @@ log = logging.getLogger('scitran.api')
 
 class ContainerStorage(object):
     """
-    This class provides access to sublists of mongodb collection elements (called containers).
+    This class provides access to mongodb collection elements (called containers).
+    It is used by ContainerHandler istances for get, create, update and delete operations on containers.
+    Examples: projects, sessions, acquisitions and collections
     """
 
-    def __init__(self, cont_name, use_oid = False):
+    def __init__(self, cont_name, use_object_id = False):
         self.cont_name = cont_name
-        self.use_oid = use_oid
+        self.use_object_id = use_object_id
         self.dbc = mongo.db[cont_name]
 
     def get_container(self, _id):
@@ -50,7 +52,7 @@ class ContainerStorage(object):
         update = {
             '$set': util.mongo_dict(payload)
         }
-        if self.use_oid:
+        if self.use_object_id:
             try:
                 _id = bson.objectid.ObjectId(_id)
             except bson.errors.InvalidId as e:
@@ -58,7 +60,7 @@ class ContainerStorage(object):
         return self.dbc.update_one({'_id': _id}, update)
 
     def _delete_el(self, _id):
-        if self.use_oid:
+        if self.use_object_id:
             try:
                 _id = bson.objectid.ObjectId(_id)
             except bson.errors.InvalidId as e:
@@ -66,7 +68,7 @@ class ContainerStorage(object):
         return self.dbc.delete_one({'_id':_id})
 
     def _get_el(self, _id, projection=None):
-        if self.use_oid:
+        if self.use_object_id:
             try:
                 _id = bson.objectid.ObjectId(_id)
             except bson.errors.InvalidId as e:
diff --git a/api/dao/liststorage.py b/api/dao/liststorage.py
index 6d8f8fcb..dfa319db 100644
--- a/api/dao/liststorage.py
+++ b/api/dao/liststorage.py
@@ -14,12 +14,14 @@ log = logging.getLogger('scitran.api')
 class ListStorage(object):
     """
     This class provides access to sublists of a mongodb collections elements (called containers).
+    It is used by ListHandler istances for get, create, update and delete operations on sublist of the containers.
+    Examples: permissions in projects, roles in groups, notes in projects, sessions, acquisitions, etc
     """
 
-    def __init__(self, cont_name, list_name, use_oid = False):
+    def __init__(self, cont_name, list_name, use_object_id = False):
         self.cont_name = cont_name
         self.list_name = list_name
-        self.use_oid = use_oid
+        self.use_object_id = use_object_id
         self.dbc = mongo.db[cont_name]
 
     def get_container(self, _id, query_params=None):
@@ -31,7 +33,7 @@ class ListStorage(object):
 
         For simplicity we load its full content.
         """
-        if self.use_oid:
+        if self.use_object_id:
             _id = bson.objectid.ObjectId(_id)
         query = {'_id': _id}
         projection = None
@@ -48,7 +50,7 @@ class ListStorage(object):
         Generic method to exec an operation.
         The request is dispatched to the corresponding private methods.
         """
-        if self.use_oid:
+        if self.use_object_id:
             try:
                 _id = bson.objectid.ObjectId(_id)
             except bson.errors.InvalidId as e:
@@ -114,11 +116,15 @@ class ListStorage(object):
 
 
 class StringListStorage(ListStorage):
+    """
+    This class provides access to string sublists of a mongodb collections elements (called containers).
+    The difference with other sublists is that the elements are not object but strings.
+    """
 
     def get_container(self, _id, query_params=None):
         if self.dbc is None:
             raise RuntimeError('collection not initialized before calling get_container')
-        if self.use_oid:
+        if self.use_object_id:
             try:
                 _id = bson.objectid.ObjectId(_id)
             except bson.errors.InvalidId as e:
diff --git a/api/files.py b/api/files.py
index 16eae437..5439469a 100644
--- a/api/files.py
+++ b/api/files.py
@@ -32,16 +32,17 @@ class FileRequest(object):
         self.mimetype = util.guess_mimetype(filename)
         self.filetype = util.guess_filetype(filename, self.mimetype)
 
-    def save_temp_file(self, tempdir_path):
+    def save_temp_file(self, tempdir_path, handler):
         self.tempdir_path = tempdir_path
         success, duration = self._save_temp_file(self.tempdir_path)
         if not success:
-            self.abort(400, 'Content-MD5 mismatch.')
+            return False
         throughput = self.filesize / duration.total_seconds()
         log.info('Received    %s [%s, %s/s] from %s' % (
             self.filename,
             util.hrsize(self.filesize), util.hrsize(throughput),
             self.client_addr))
+        return success
 
     def move_temp_file(self, container_path):
         target_filepath = container_path + '/' + self.filename
diff --git a/api/handlers/collectionshandler.py b/api/handlers/collectionshandler.py
index 2b049889..127e31f2 100644
--- a/api/handlers/collectionshandler.py
+++ b/api/handlers/collectionshandler.py
@@ -20,8 +20,8 @@ class CollectionsHandler(ContainerHandler):
 
     container_handler_configurations['collections'] = {
         'permchecker': containerauth.collection_permissions,
-        'storage': containerstorage.ContainerStorage('collections', use_oid=True),
-        'mongo_schema_file': 'mongo/collection.json',
+        'storage': containerstorage.ContainerStorage('collections', use_object_id=True),
+        'storage_schema_file': 'mongo/collection.json',
         'payload_schema_file': 'input/collection.json',
         'list_projection': {'metadata': 0}
     }
diff --git a/api/handlers/containerhandler.py b/api/handlers/containerhandler.py
index 8aac36d6..a3bb2bd4 100644
--- a/api/handlers/containerhandler.py
+++ b/api/handlers/containerhandler.py
@@ -35,7 +35,7 @@ class ContainerHandler(base.RequestHandler):
     Specific behaviors (permissions checking logic for authenticated and not superuser users, storage interaction)
     are specified in the container_handler_configurations
     """
-    use_oid = {
+    use_object_id = {
         'groups': False,
         'projects': True,
         'sessions': True,
@@ -48,30 +48,31 @@ class ContainerHandler(base.RequestHandler):
     #
     # "children_cont" represents the children container.
     # "list projection" is used to filter data in mongo.
+    # "use_object_id" implies that the container ids are converted to ObjectId
     container_handler_configurations = {
         'projects': {
-            'storage': containerstorage.ContainerStorage('projects', use_oid=use_oid['projects']),
+            'storage': containerstorage.ContainerStorage('projects', use_object_id=use_object_id['projects']),
             'permchecker': containerauth.default_container,
-            'parent_storage': containerstorage.ContainerStorage('groups', use_oid=use_oid['groups']),
-            'mongo_schema_file': 'mongo/project.json',
+            'parent_storage': containerstorage.ContainerStorage('groups', use_object_id=use_object_id['groups']),
+            'storage_schema_file': 'mongo/project.json',
             'payload_schema_file': 'input/project.json',
             'list_projection': {'metadata': 0},
             'children_cont': 'sessions'
         },
         'sessions': {
-            'storage': containerstorage.ContainerStorage('sessions', use_oid=use_oid['sessions']),
+            'storage': containerstorage.ContainerStorage('sessions', use_object_id=use_object_id['sessions']),
             'permchecker': containerauth.default_container,
-            'parent_storage': containerstorage.ContainerStorage('projects', use_oid=use_oid['projects']),
-            'mongo_schema_file': 'mongo/session.json',
+            'parent_storage': containerstorage.ContainerStorage('projects', use_object_id=use_object_id['projects']),
+            'storage_schema_file': 'mongo/session.json',
             'payload_schema_file': 'input/session.json',
             'list_projection': {'metadata': 0},
             'children_cont': 'acquisitions'
         },
         'acquisitions': {
-            'storage': containerstorage.ContainerStorage('acquisitions', use_oid=use_oid['acquisitions']),
+            'storage': containerstorage.ContainerStorage('acquisitions', use_object_id=use_object_id['acquisitions']),
             'permchecker': containerauth.default_container,
-            'parent_storage': containerstorage.ContainerStorage('sessions', use_oid=use_oid['sessions']),
-            'mongo_schema_file': 'mongo/acquisition.json',
+            'parent_storage': containerstorage.ContainerStorage('sessions', use_object_id=use_object_id['sessions']),
+            'storage_schema_file': 'mongo/acquisition.json',
             'payload_schema_file': 'input/acquisition.json',
             'list_projection': {'metadata': 0}
         }
@@ -128,7 +129,7 @@ class ContainerHandler(base.RequestHandler):
         if par_cont_name:
             if not par_id:
                 self.abort(500, 'par_id is required when par_cont_name is provided')
-            if self.use_oid.get(par_cont_name):
+            if self.use_object_id.get(par_cont_name):
                 if not bson.ObjectId.is_valid(par_id):
                     self.abort(400, 'not a valid object id')
                 par_id = bson.ObjectId(par_id)
@@ -265,10 +266,11 @@ class ContainerHandler(base.RequestHandler):
             if payload['permissions'] is None:
                 payload['permissions'] = target_parent_container['permissions']
 
-        permchecker = self._get_permchecker(container, target_parent_container)
         payload['modified'] = datetime.datetime.utcnow()
         if payload.get('timestamp'):
             payload['timestamp'] = dateutil.parser.parse(payload['timestamp'])
+
+        permchecker = self._get_permchecker(container, target_parent_container)
         try:
             # This line exec the actual request validating the payload that will update the container
             # and checking permissions using respectively the two decorators, mongo_validator and permchecker
@@ -308,7 +310,7 @@ class ContainerHandler(base.RequestHandler):
 
 
     def _get_validators(self):
-        mongo_validator = validators.mongo_from_schema_file(self, self.config.get('mongo_schema_file'))
+        mongo_validator = validators.mongo_from_schema_file(self, self.config.get('storage_schema_file'))
         payload_validator = validators.payload_from_schema_file(self, self.config.get('payload_schema_file'))
         return mongo_validator, payload_validator
 
diff --git a/api/handlers/grouphandler.py b/api/handlers/grouphandler.py
index aa10427f..a97d8173 100644
--- a/api/handlers/grouphandler.py
+++ b/api/handlers/grouphandler.py
@@ -80,7 +80,7 @@ class GroupHandler(base.RequestHandler):
             self.abort(404, 'User {} not updated'.format(_id))
 
     def _init_storage(self):
-        self.storage = containerstorage.ContainerStorage('groups', use_oid=False)
+        self.storage = containerstorage.ContainerStorage('groups', use_object_id=False)
 
     def _get_group(self, _id):
         group = self.storage.get_container(_id)
diff --git a/api/handlers/listhandler.py b/api/handlers/listhandler.py
index a40a88b3..f8242e10 100644
--- a/api/handlers/listhandler.py
+++ b/api/handlers/listhandler.py
@@ -21,37 +21,37 @@ def initialize_list_configurations():
     This configurations are used by the ListHandler class to load the storage, the permissions checker
     and the json schema validators used to handle a request.
 
-    "use_oid" implies that the container ids is converted to ObjectId
+    "use_object_id" implies that the container ids are converted to ObjectId
     "get_full_container" allows the handler to load the full content of the container and not only the sublist element (this is used for permissions for example)
     """
     container_default_configurations = {
         'tags': {
             'storage': liststorage.StringListStorage,
             'permchecker': listauth.default_sublist,
-            'use_oid': True,
-            'mongo_schema_file': 'mongo/tag.json',
+            'use_object_id': True,
+            'storage_schema_file': 'mongo/tag.json',
             'input_schema_file': 'input/tag.json'
         },
         'files': {
             'storage': liststorage.ListStorage,
             'permchecker': listauth.default_sublist,
-            'use_oid': True,
-            'mongo_schema_file': 'mongo/file.json',
+            'use_object_id': True,
+            'storage_schema_file': 'mongo/file.json',
             'input_schema_file': 'input/file.json'
         },
         'permissions': {
             'storage': liststorage.ListStorage,
             'permchecker': listauth.permissions_sublist,
-            'use_oid': True,
+            'use_object_id': True,
             'get_full_container': True,
-            'mongo_schema_file': 'mongo/permission.json',
+            'storage_schema_file': 'mongo/permission.json',
             'input_schema_file': 'input/permission.json'
         },
         'notes': {
             'storage': liststorage.ListStorage,
             'permchecker': listauth.notes_sublist,
-            'use_oid': True,
-            'mongo_schema_file': 'mongo/note.json',
+            'use_object_id': True,
+            'storage_schema_file': 'mongo/note.json',
             'input_schema_file': 'input/note.json'
         },
     }
@@ -60,9 +60,9 @@ def initialize_list_configurations():
             'roles':{
                 'storage': liststorage.ListStorage,
                 'permchecker': listauth.group_roles_sublist,
-                'use_oid': False,
+                'use_object_id': False,
                 'get_full_container': True,
-                'mongo_schema_file': 'mongo/permission.json',
+                'storage_schema_file': 'mongo/permission.json',
                 'input_schema_file': 'input/permission.json'
             }
         },
@@ -78,7 +78,7 @@ def initialize_list_configurations():
             storage = storage_class(
                 cont_name,
                 list_name,
-                use_oid=list_config.get('use_oid', False)
+                use_object_id=list_config.get('use_object_id', False)
             )
             list_config['storage'] = storage
     return list_handler_configurations
@@ -180,9 +180,9 @@ class ListHandler(base.RequestHandler):
                 permchecker = permchecker(self, container)
         else:
             self.abort(404, 'Element {} not found in container {}'.format(_id, storage.cont_name))
-        mongo_validator = validators.mongo_from_schema_file(self, config.get('mongo_schema_file'))
+        mongo_validator = validators.mongo_from_schema_file(self, config.get('storage_schema_file'))
         input_validator = validators.payload_from_schema_file(self, config.get('payload_schema_file'))
-        keycheck = validators.key_check(self, config.get('mongo_schema_file'))
+        keycheck = validators.key_check(self, config.get('storage_schema_file'))
         return container, permchecker, storage, mongo_validator, input_validator, keycheck
 
 
@@ -357,11 +357,6 @@ class FileListHandler(ListHandler):
             result['removed'] = 0
         return result
 
-    def put(self, cont_name, list_name, **kwargs):
-        fileinfo = super(FileListHandler, self).get(cont_name, list_name, **kwargs)
-        # TODO: implement file metadata updates
-        self.abort(400, 'PUT is not yet implemented')
-
     def post(self, cont_name, list_name, **kwargs):
         force = self.is_true('force')
         _id = kwargs.pop('cid')
@@ -371,7 +366,9 @@ class FileListHandler(ListHandler):
         file_request = files.FileRequest.from_handler(self, filename)
         result = None
         with tempfile.TemporaryDirectory(prefix='.tmp', dir=self.app.config['upload_path']) as tempdir_path:
-            file_request.save_temp_file(tempdir_path)
+            success = file_request.save_temp_file(tempdir_path)
+            if not success:
+                self.abort(400, 'Content-MD5 mismatch.')
             file_datetime = datetime.datetime.utcnow()
             file_properties = {
                 'name': file_request.filename,
diff --git a/api/handlers/userhandler.py b/api/handlers/userhandler.py
index 05224d8f..7e04992b 100644
--- a/api/handlers/userhandler.py
+++ b/api/handlers/userhandler.py
@@ -40,9 +40,6 @@ class UserHandler(base.RequestHandler):
             self.abort(400, 'no user is logged in')
         return user
 
-    def roles(self):
-        return ROLES
-
     def get_all(self):
         self._init_storage()
         permchecker = userauth.list_permission_checker(self)
@@ -99,7 +96,7 @@ class UserHandler(base.RequestHandler):
             self.abort(404, 'User {} not updated'.format(_id))
 
     def _init_storage(self):
-        self.storage = containerstorage.ContainerStorage('users', use_oid=False)
+        self.storage = containerstorage.ContainerStorage('users', use_object_id=False)
 
     def _get_user(self, _id):
         user = self.storage.get_container(_id)
diff --git a/api/mongo.py b/api/mongo.py
index efd8ef7e..fd6ef7e2 100644
--- a/api/mongo.py
+++ b/api/mongo.py
@@ -1,26 +1,32 @@
 import pymongo
 import datetime
+import time
 
 db = None
 
 
 def configure_db(db_uri, site_id, site_name, api_uri):
     global db
-    db = pymongo.MongoClient(db_uri).get_default_database()
-    # TODO jobs indexes
-    # TODO review all indexes
-    db.projects.create_index([('gid', 1), ('name', 1)])
-    db.sessions.create_index('project')
-    db.sessions.create_index('uid')
-    db.acquisitions.create_index('session')
-    db.acquisitions.create_index('uid')
-    db.acquisitions.create_index('collections')
-    db.authtokens.create_index('timestamp', expireAfterSeconds=600)
-    db.uploads.create_index('timestamp', expireAfterSeconds=60)
-    db.downloads.create_index('timestamp', expireAfterSeconds=60)
-
-    now = datetime.datetime.utcnow()
-    db.groups.update_one({'_id': 'unknown'}, {'$setOnInsert': { 'created': now, 'modified': now, 'name': 'Unknown', 'roles': []}}, upsert=True)
-    db.sites.replace_one({'_id': site_id}, {'name': site_name, 'api_uri': api_uri}, upsert=True)
-
+    for i in range(0,3):
+        try:
+            db = pymongo.MongoClient(db_uri).get_default_database()
+            # TODO jobs indexes
+            # TODO review all indexes
+            db.projects.create_index([('gid', 1), ('name', 1)])
+            db.sessions.create_index('project')
+            db.sessions.create_index('uid')
+            db.acquisitions.create_index('session')
+            db.acquisitions.create_index('uid')
+            db.acquisitions.create_index('collections')
+            db.authtokens.create_index('timestamp', expireAfterSeconds=600)
+            db.uploads.create_index('timestamp', expireAfterSeconds=60)
+            db.downloads.create_index('timestamp', expireAfterSeconds=60)
 
+            now = datetime.datetime.utcnow()
+            db.groups.update_one({'_id': 'unknown'}, {'$setOnInsert': { 'created': now, 'modified': now, 'name': 'Unknown', 'roles': []}}, upsert=True)
+            db.sites.replace_one({'_id': site_id}, {'name': site_name, 'api_uri': api_uri}, upsert=True)
+            return
+        except Exception as e:
+            db = None
+            time.sleep(5)
+    raise e
diff --git a/api/util.py b/api/util.py
index fc58a1ef..37738dfb 100644
--- a/api/util.py
+++ b/api/util.py
@@ -114,6 +114,8 @@ def commit_file(dbc, _id, datainfo, filepath, data_path, force=False):
                     log.debug('Replacing   %s' % filename)
                     shutil.move(filepath, target_filepath)
                     update_set = {'files.$.dirty': True, 'files.$.modified': datetime.datetime.utcnow()}
+                    # in this branch of the code, we are overriding an existing file.
+                    # update_set allows to update all the fileinfo like size, hash, etc.
                     for k,v in fileinfo.iteritems():
                         update_set['files.$.' + k] = v
                     dbc.update_one({'_id':_id, 'files.filename': fileinfo['filename']},
diff --git a/api/validators.py b/api/validators.py
index b84a0661..ed70826a 100644
--- a/api/validators.py
+++ b/api/validators.py
@@ -104,6 +104,17 @@ def payload_from_schema_file(handler, schema_file):
     return g
 
 def key_check(handler, schema_file):
+    """
+    for sublists of mongo container there is no automatic key check when creating, updating or deleting an object.
+    We are adding a custom array field to the json schemas ("key_fields").
+    The uniqueness is checked on the combination of all the elements of "key_fields".
+    For an example check api/schemas/input/permission.json
+
+    So this method ensures that:
+    1. after a POST and PUT request we don't have two items with the same values for the key set
+    2. a GET will retrieve a single item
+    3. a DELETE (most importantly) will delete a single item
+    """
     if schema_file is None:
         return no_op
     schema = resolver.resolve(schema_file)[1]
diff --git a/bin/api.wsgi b/bin/api.wsgi
index 55993ebb..5f9fdbc8 100644
--- a/bin/api.wsgi
+++ b/bin/api.wsgi
@@ -41,6 +41,7 @@ args.upload_path = os.path.join(args.data_path, 'upload')
 from api import mongo
 mongo.configure_db(args.db_uri, args.site_id, args.site_name, args.api_uri)
 
+# imports delayed after mongo has been fully initialized
 from api.util import log
 from api import api
 from api import centralclient, jobs
diff --git a/test_bootstrap.json b/test_bootstrap.json
index 08771343..c997fb0a 100644
--- a/test_bootstrap.json
+++ b/test_bootstrap.json
@@ -5,17 +5,7 @@
                     "name": "Scientific Transparency",
                     "roles": [
                             {
-                                    "_id": "rfrigato@stanford.edu",
-                                    "access": "admin"
-                            }
-                    ]
-            },
-            {
-                    "_id": "unknown",
-                    "name": "Unknown",
-                    "roles": [
-                            {
-                                    "_id": "rfrigato@stanford.edu",
+                                    "_id": "admin@user.com",
                                     "access": "admin"
                             }
                     ]
@@ -23,17 +13,17 @@
     ],
     "users": [
             {
-                    "_id": "rfrigato@stanford.edu",
-                    "email": "rfrigato@stanford.edu",
-                    "firstname": "Renzo",
-                    "lastname": "Frigato",
+                    "_id": "admin@user.com",
+                    "email": "admin@user.com",
+                    "firstname": "Admin",
+                    "lastname": "User",
                     "root": true
             },
             {
-                    "_id": "renzo.frigato@gmail.com",
-                    "email": "renzo.frigato@gmail.com",
-                    "firstname": "Renzo",
-                    "lastname": "Frigato",
+                    "_id": "test@user.com",
+                    "email": "test@user.com",
+                    "firstname": "Test",
+                    "lastname": "User",
                     "root": true
             }
     ]
diff --git a/test_integ/test_collection.py b/test_integ/test_collection.py
index a2349cd3..1a0e444f 100644
--- a/test_integ/test_collection.py
+++ b/test_integ/test_collection.py
@@ -64,7 +64,7 @@ def teardown_db():
 
 
 @with_setup(setup_db, teardown_db)
-def test_sequence():
+def test_collections():
     payload = {
         'curator': 'admin@user.com',
         'label': 'SciTran/Testing',
diff --git a/test_integ/test_users.py b/test_integ/test_users.py
index 69912a39..57b30b06 100644
--- a/test_integ/test_users.py
+++ b/test_integ/test_users.py
@@ -10,14 +10,14 @@ requests.packages.urllib3.disable_warnings()
 base_url = 'https://localhost:8443/api'
 
 def test_users():
-    _id = 'test@user.com'
+    _id = 'new@user.com'
     r = requests.get(base_url + '/users/self?user=admin@user.com', verify=False)
     assert r.ok
     r = requests.get(base_url + '/users/' + _id + '?user=admin@user.com&root=true', verify=False)
     assert r.status_code == 404
     payload = {
         '_id': _id,
-        'firstname': 'Test',
+        'firstname': 'New',
         'lastname': 'User',
     }
     payload = json.dumps(payload)
@@ -26,7 +26,7 @@ def test_users():
     r = requests.get(base_url + '/users/' + _id + '?user=admin@user.com&root=true', verify=False)
     assert r.ok
     payload = {
-        'firstname': 'New'
+        'firstname': 'Realname'
     }
     payload = json.dumps(payload)
     r = requests.put(base_url + '/users/' + _id + '?user=admin@user.com&root=true', data=payload, verify=False)
-- 
GitLab