diff --git a/api.py b/api.py index 7fdd934a994fb8fbb15e7b2ee9c18dce9150afa3..2f2f81675848af29b1092ac0f9c2fa419d38bdd0 100755 --- a/api.py +++ b/api.py @@ -56,6 +56,9 @@ routes = [ webapp2.Route(r'/<:[0-9a-f]{24}>', projects.Project, name='project'), webapp2.Route(r'/<:[0-9a-f]{24}>/file', projects.Project, handler_method='get_file', methods=['GET', 'POST']), webapp2.Route(r'/<:[0-9a-f]{24}>/file', projects.Project, handler_method='put_file', methods=['PUT']), + webapp2.Route(r'/<:[0-9a-f]{24}>/attachment', projects.Project, handler_method='delete_attachment', methods=['DELETE']), + webapp2.Route(r'/<:[0-9a-f]{24}>/attachment', projects.Project, handler_method='get_attachment', methods=['GET', 'POST']), + webapp2.Route(r'/<:[0-9a-f]{24}>/attachment', projects.Project, handler_method='put_attachment', methods=['PUT']), webapp2.Route(r'/<:[0-9a-f]{24}>/sessions', sessions.Sessions, name='sessions'), ]), webapp2.Route(r'/api/collections', collections_.Collections), @@ -66,6 +69,9 @@ routes = [ webapp2.Route(r'/<:[0-9a-f]{24}>', collections_.Collection, name='collection'), webapp2.Route(r'/<:[0-9a-f]{24}>/file', collections_.Collection, handler_method='get_file', methods=['GET', 'POST']), webapp2.Route(r'/<:[0-9a-f]{24}>/file', collections_.Collection, handler_method='put_file', methods=['PUT']), + webapp2.Route(r'/<:[0-9a-f]{24}>/attachment', collections_.Collection, handler_method='delete_attachment', methods=['DELETE']), + webapp2.Route(r'/<:[0-9a-f]{24}>/attachment', collections_.Collection, handler_method='get_attachment', methods=['GET', 'POST']), + webapp2.Route(r'/<:[0-9a-f]{24}>/attachment', collections_.Collection, handler_method='put_attachment', methods=['PUT']), webapp2.Route(r'/<:[0-9a-f]{24}>/sessions', collections_.CollectionSessions, name='coll_sessions'), webapp2.Route(r'/<:[0-9a-f]{24}>/acquisitions', collections_.CollectionAcquisitions, name='coll_acquisitions'), ]), @@ -75,6 +81,9 @@ routes = [ webapp2.Route(r'/<:[0-9a-f]{24}>', sessions.Session, name='session'), webapp2.Route(r'/<:[0-9a-f]{24}>/file', sessions.Session, handler_method='get_file', methods=['GET', 'POST']), webapp2.Route(r'/<:[0-9a-f]{24}>/file', sessions.Session, handler_method='put_file', methods=['PUT']), + webapp2.Route(r'/<:[0-9a-f]{24}>/attachment', sessions.Session, handler_method='delete_attachment', methods=['DELETE']), + webapp2.Route(r'/<:[0-9a-f]{24}>/attachment', sessions.Session, handler_method='get_attachment', methods=['GET', 'POST']), + webapp2.Route(r'/<:[0-9a-f]{24}>/attachment', sessions.Session, handler_method='put_attachment', methods=['PUT']), webapp2.Route(r'/<:[0-9a-f]{24}>/acquisitions', acquisitions.Acquisitions, name='acquisitions'), ]), webapp2_extras.routes.PathPrefixRoute(r'/api/acquisitions', [ @@ -83,6 +92,9 @@ routes = [ webapp2.Route(r'/<:[0-9a-f]{24}>', acquisitions.Acquisition, name='acquisition'), webapp2.Route(r'/<:[0-9a-f]{24}>/file', acquisitions.Acquisition, handler_method='get_file', methods=['GET', 'POST']), webapp2.Route(r'/<:[0-9a-f]{24}>/file', acquisitions.Acquisition, handler_method='put_file', methods=['PUT']), + webapp2.Route(r'/<:[0-9a-f]{24}>/attachment', acquisitions.Acquisition, handler_method='delete_attachment', methods=['DELETE']), + webapp2.Route(r'/<:[0-9a-f]{24}>/attachment', acquisitions.Acquisition, handler_method='get_attachment', methods=['GET', 'POST']), + webapp2.Route(r'/<:[0-9a-f]{24}>/attachment', acquisitions.Acquisition, handler_method='put_attachment', methods=['PUT']), ]), ] diff --git a/containers.py b/containers.py index 14f63994518cde60587706b8300cc8d5471554b9..b25fb5cd863ca5260d268ff264d24ec9496abcc4 100644 --- a/containers.py +++ b/containers.py @@ -4,6 +4,7 @@ import logging log = logging.getLogger('scitran.api') import os +import json import hashlib import datetime import jsonschema @@ -13,6 +14,8 @@ import base import util import users +import tempdir as tempfile + FILE_SCHEMA = { '$schema': 'http://json-schema.org/draft-04/schema#', 'title': 'File', @@ -192,11 +195,14 @@ class Container(base.RequestHandler): self.abort(404, 'no such file') filename = file_info['name'] + file_info['ext'] filepath = os.path.join(self.app.config['data_path'], str(_id)[-3:] + '/' + str(_id), filename) - ticket = util.download_ticket('single', filepath, filename, file_info['size']) - tkt_id = self.app.db.downloads.insert(ticket) if self.request.method == 'GET': - self.redirect_to('download', _abort=True, ticket=tkt_id) - return {'url': self.uri_for('download', _full=True, ticket=tkt_id)} + self.response.app_iter = open(filepath, 'rb') + self.response.headers['Content-Length'] = str(file_info['size']) # must be set after setting app_iter + self.response.headers['Content-Type'] = 'application/octet-stream' + else: + ticket = util.download_ticket('single', filepath, filename, file_info['size']) + tkt_id = self.app.db.downloads.insert(ticket) + return {'url': self.uri_for('download', _full=True, ticket=tkt_id)} def put_file(self, cid=None): """ @@ -209,6 +215,7 @@ class Container(base.RequestHandler): the current container. """ + # TODO; revise how engine's upload their data to be compatible with the put_attachment fxn def receive_stream_and_validate(stream, digest, filename): # FIXME pull this out to also be used from core.Core.put() and also replace the duplicated code below hash_ = hashlib.sha1() @@ -261,3 +268,96 @@ class Container(base.RequestHandler): status, detail = util.insert_file(self.dbc, _id, file_info, filepath, file_info['sha1'], data_path, quarantine_path) if status != 200: self.abort(status, detail) + + def put_attachment(self, cid): + """ + Recieve a targetted user upload of an attachment. + + Attachments are different from files, in that they are not 'research ready'. Attachments + represent other documents that are generally not useable by the engine; documents like + consent forms, pen/paper questionnaires, study recruiting materials, etc. + + Internally, attachments are distinguished from files because of what metadata is + required. Attachments really only need a 'kinds' and 'type'. We don't expect iteration over + an attachment in a way that would require tracking 'state'. + """ + # TODO read self.request.body, using '------WebKitFormBoundary' as divider + # first line is 'content-disposition' line, extract filename + # second line is content-type, determine how to write to a file, as bytes or as string + # third linedata_path = self.app.config['data_path'], just a separator, useless + data_path = self.app.config['data_path'] + quarantine_path = self.app.config['quarantine_path'] + _id = bson.ObjectId(cid) + hashes = [] + with tempfile.TemporaryDirectory(prefix='.tmp', dir=self.app.config['data_path']) as tempdir_path: + # get and hash the metadata + metahash = hashlib.sha1() + metastr = self.request.POST.get('metadata').file.read() # returns a string? + metadata = json.loads(metastr) + metahash.update(metastr) + hashes.append({'name': 'metadata', 'sha1': metahash.hexdigest()}) + + sha1s = json.loads(self.request.POST.get('sha').file.read()) + for finfo in metadata: + fname = finfo.get('name') + finfo.get('ext') # finfo['ext'] will always be empty + fhash = hashlib.sha1() + fobj = self.request.POST.get(fname).file + filepath = os.path.join(tempdir_path, fname) + with open(filepath, 'wb') as fd: + for chunk in iter(lambda: fobj.read(2**20), ''): + fhash.update(chunk) + fd.write(chunk) + for s in sha1s: + if fname == s.get('name'): + if fhash.hexdigest() != s.get('sha1'): + self.abort(400, 'Content-MD5 mismatch %s vs %s' % (fhash.hexdigest(), s.get('sha1'))) + else: + finfo['sha1'] = s.get('sha1') + status, detail = util.insert_file(self.dbc, _id, finfo, filepath, s.get('sha1'), data_path, quarantine_path, flavor='attachment') + if status != 200: + self.abort(400, 'upload failed') + break + else: + self.abort(400, '%s is not listed in the sha1s' % fname) + + def get_attachment(self, cid): + """Download one attachment.""" + fname = self.request.get('name') + _id = bson.ObjectId(cid) + container, _ = self._get(_id, 'download') + fpath = os.path.join(self.app.config['data_path'], str(_id)[-3:] + '/' + str(_id), fname) + for a_info in container['attachments']: + if (a_info['name'] + a_info['ext']) == fname: + break + else: + self.abort(404, 'no such file') + if self.request.method == 'GET': + self.response.app_iter = open(fpath, 'rb') + self.response.headers['Content-Length'] = str(a_info['size']) # must be set after setting app_iter + self.response.headers['Content-Type'] = 'application/octet-stream' + else: + ticket = util.download_ticket('single', fpath, fname, a_info['size']) + tkt_id = self.app.db.downloads.insert(ticket) + return {'url': self.uri_for('download', _full=True, ticket=tkt_id)} + + def delete_attachment(self, cid): + """Delete one attachment.""" + fname = self.request.get('name') + _id = bson.ObjectId(cid) + container, _ = self._get(_id, 'download') + fpath = os.path.join(self.app.config['data_path'], str(_id)[-3:] + '/' + str(_id), fname) + for a_info in container['attachments']: + if (a_info['name'] + a_info['ext']) == fname: + break + else: + self.abort(404, 'no such file') + + name, ext = os.path.splitext(fname) + success = self.dbc.update({'_id': _id, 'attachments.name': fname}, {'$pull': {'attachments': {'name': fname}}}) + if not success['updatedExisting']: + log.info('could not remove database entry.') + if os.path.exists(fpath): + os.remove(fpath) + log.info('removed file %s' % fpath) + else: + log.info('could not remove file, file %s does not exist' % fpath) diff --git a/util.py b/util.py index d372f3d296fee59ff871cf52ed071291d1785af7..21df542cd41ada15591afadae07d5194119db3e5 100644 --- a/util.py +++ b/util.py @@ -15,7 +15,8 @@ import scitran.data PROJECTION_FIELDS = ['timestamp', 'permissions', 'public'] -def insert_file(dbc, _id, file_info, filepath, digest, data_path, quarantine_path): +def insert_file(dbc, _id, file_info, filepath, digest, data_path, quarantine_path, flavor='file'): + """Insert a file as an attachment or as a file.""" filename = os.path.basename(filepath) if _id is None: try: @@ -48,20 +49,21 @@ def insert_file(dbc, _id, file_info, filepath, digest, data_path, quarantine_pat ) filename = dataset.nims_file_name + dataset.nims_file_ext else: + flavor = flavor + 's' file_spec = dict( _id=_id, - files={'$elemMatch': { - 'type': file_info['type'], - 'kinds': file_info['kinds'], - 'state': file_info['state'], + flavor={'$elemMatch': { + 'type': file_info.get('type'), + 'kinds': file_info.get('kinds'), + 'state': file_info.get('state'), }}, ) container_path = os.path.join(data_path, str(_id)[-3:] + '/' + str(_id)) if not os.path.exists(container_path): os.makedirs(container_path) - success = dbc.update(file_spec, {'$set': {'files.$': file_info}}) + success = dbc.update(file_spec, {'$set': {flavor + '.$': file_info}}) if not success['updatedExisting']: - dbc.update({'_id': _id}, {'$push': {'files': file_info}}) + dbc.update({'_id': _id}, {'$push': {flavor: file_info}}) shutil.move(filepath, container_path + '/' + filename) log.debug('Done %s' % os.path.basename(filepath)) # must use filepath, since filename is updated for sorted files return 200, 'Success'