Skip to content
Snippets Groups Projects
containers.py 16.3 KiB
Newer Older
Gunnar Schaefer's avatar
Gunnar Schaefer committed
# @author:  Gunnar Schaefer, Kevin S. Hahn

import logging
log = logging.getLogger('scitran.api')

import os
import json
Gunnar Schaefer's avatar
Gunnar Schaefer committed
import hashlib
import datetime
import jsonschema
import bson.json_util

import tempdir as tempfile

Gunnar Schaefer's avatar
Gunnar Schaefer committed
import base
import util
import users

import tempdir as tempfile

Gunnar Schaefer's avatar
Gunnar Schaefer committed
FILE_SCHEMA = {
    '$schema': 'http://json-schema.org/draft-04/schema#',
    'title': 'File',
    'type': 'object',
    'properties': {
        'name': {
            'title': 'Name',
            'type': 'string',
        },
        'ext': {
            'title': 'Extension',
            'type': 'string',
        },
        'size': {
            'title': 'Size',
            'type': 'integer',
        },
        'sha1': {
            'title': 'SHA-1',
            'type': 'string',
        },
        'type': {
            'title': 'Type',
            'type': 'string',
        },
        'kinds': {
            'title': 'Kinds',
            'type': 'array',
        },
        'state': {
            'title': 'State',
            'type': 'array',
        },
    },
    'required': ['name', 'ext', 'size', 'sha1', 'type', 'kinds', 'state'],
    'additionalProperties': False
}

FILE_UPLOAD_SCHEMA = {
    '$schema': 'http://json-schema.org/draft-04/schema#',
    'title': 'Upload',
    'type': 'object',
    'properties': {
        'files': {
            'title': 'Files',
            'type': 'array',
            'items': FILE_SCHEMA,
            'uniqueItems': True,
        },
    },
    'required': ['files'],
    'additionalProperties': False,
}

FILE_DOWNLOAD_SCHEMA = {
    '$schema': 'http://json-schema.org/draft-04/schema#',
    'title': 'File Download',
    'anyOf': [
        {
            'type': 'object',
            'properties': {
                'type': {
                    'title': 'Type',
                    'type': 'string',
                },
                'kinds': {
                    'title': 'Kinds',
                    'type': 'array',
                },
                'state': {
                    'title': 'State',
                    'type': 'array',
                },
            },
            'required': ['type', 'kinds', 'state'],
        },
        {
            'type': 'object',
            'properties': {
                'name': {
                    'title': 'Type',
                    'type': 'string',
                },
                'ext': {
                    'title': 'Type',
                    'type': 'string',
                },
            },
            'required': ['name', 'ext'],
        },
    ],
}


class ContainerList(base.RequestHandler):

    def _get(self, query, projection, admin_only=False):
        if self.public_request:
            query['public'] = True
        else:
            projection['permissions'] = {'$elemMatch': {'_id': self.uid, 'site': self.source_site}}
            if not self.superuser_request:
                if admin_only:
                    query['permissions'] = {'$elemMatch': {'_id': self.uid, 'site': self.source_site, 'access': 'admin'}}
                else:
                    query['permissions'] = {'$elemMatch': {'_id': self.uid, 'site': self.source_site}}
        containers = list(self.dbc.find(query, projection))
        for container in containers:
            container['_id'] = str(container['_id'])
        return containers


class Container(base.RequestHandler):

    def _get(self, _id, min_role=None, perm_only=False, dbc=None, dbc_name=None):
        dbc = dbc or self.dbc
        dbc_name = dbc_name or self.__class__.__name__
        container = dbc.find_one({'_id': _id}, ['permissions'] if perm_only else None)
Gunnar Schaefer's avatar
Gunnar Schaefer committed
        if not container:
            self.abort(404, 'no such ' + dbc_name)
Gunnar Schaefer's avatar
Gunnar Schaefer committed
        user_perm = util.user_perm(container['permissions'], self.uid, self.source_site)
Gunnar Schaefer's avatar
Gunnar Schaefer committed
        if self.public_request:
            if not container.get('public', False):
                self.abort(403, 'this ' + dbc_name + 'is not public')
Gunnar Schaefer's avatar
Gunnar Schaefer committed
            del container['permissions']
        elif not self.superuser_request:
Gunnar Schaefer's avatar
Gunnar Schaefer committed
            if not user_perm:
                self.abort(403, self.uid + ' does not have permissions on this ' + dbc_name)
Gunnar Schaefer's avatar
Gunnar Schaefer committed
            if min_role and users.INTEGER_ROLES[user_perm['access']] < users.INTEGER_ROLES[min_role]:
                self.abort(403, self.uid + ' does not have at least ' + min_role + ' permissions on this ' + dbc_name)
Gunnar Schaefer's avatar
Gunnar Schaefer committed
            if user_perm['access'] != 'admin': # if not admin, mask permissions of other users
                container['permissions'] = [user_perm]
        if self.request.get('paths').lower() in ('1', 'true'):
            for file_info in container['files']:
                file_info['path'] = str(_id)[-3:] + '/' + str(_id) + '/' + file_info['name'] + file_info['ext']
        container['_id'] = str(container['_id'])
        return container, user_perm
    def put(self, _id):
        json_body = self.validate_json_body(_id, ['project'])
        self._get(_id, 'admin' if 'permissions' in json_body else 'rw', perm_only=True)
        self.update_db(_id, json_body)
        return json_body

    def validate_json_body(self, _id, oid_keys=[]):
Gunnar Schaefer's avatar
Gunnar Schaefer committed
        try:
            json_body = self.request.json_body
            jsonschema.validate(json_body, self.put_schema)
        except (ValueError, jsonschema.ValidationError) as e:
            self.abort(400, str(e))
        if 'permissions' in json_body and json_body['permissions'] is None:
            json_body.pop('permissions')
        for key in oid_keys:
            if key in json_body:
                json_body[key] = bson.ObjectId(json_body[key])
Gunnar Schaefer's avatar
Gunnar Schaefer committed
        return json_body

    def update_db(self, _id, json_body):
        self.dbc.update({'_id': _id}, {'$set': util.mongo_dict(json_body)})

Gunnar Schaefer's avatar
Gunnar Schaefer committed
    def get_file(self, cid):
        try:
            file_spec = self.request.json_body
            jsonschema.validate(file_spec, FILE_DOWNLOAD_SCHEMA)
        except (ValueError, jsonschema.ValidationError) as e:
            self.abort(400, str(e))
        _id = bson.ObjectId(cid)
Gunnar Schaefer's avatar
Gunnar Schaefer committed
        container, _ = self._get(_id, 'ro')
Gunnar Schaefer's avatar
Gunnar Schaefer committed
        for file_info in container['files']:
            if 'name' in file_spec:
                if file_info['name'] == file_spec['name'] and file_info['ext'] == file_spec['ext']:
                    break
            else:
                if file_info['type'] == file_spec['type'] and file_info['kinds'] == file_spec['kinds'] and file_info['state'] == file_spec['state']:
                    break
        else:
            self.abort(404, 'no such file')
        filename = file_info['name'] + file_info['ext']
        filepath = os.path.join(self.app.config['data_path'], str(_id)[-3:] + '/' + str(_id), filename)
        if self.request.method == 'GET':
Gunnar Schaefer's avatar
Gunnar Schaefer committed
            self.response.app_iter = open(filepath, 'rb')
            self.response.headers['Content-Length'] = str(file_info['size']) # must be set after setting app_iter
            self.response.headers['Content-Type'] = 'application/octet-stream'
            self.response.headers['Content-Disposition'] = 'attachment; filename=%s' % str(filename)
Gunnar Schaefer's avatar
Gunnar Schaefer committed
        else:
            ticket = util.download_ticket('single', filepath, filename, file_info['size'])
            tkt_id = self.app.db.downloads.insert(ticket)
            return {'url': self.uri_for('download', _full=True, ticket=tkt_id)}
Gunnar Schaefer's avatar
Gunnar Schaefer committed

    def put_file(self, cid=None):
        """
        Receive a targeted processor or user upload.

        Accepts a multipart request that contains json in first part, and data in second part.
        This POST route is used to add a file to an existing container, not for creating new containers.
        This upload is different from the main PUT route, because this does not update the primary
        metadata, nor does it try to determine where to place the file.  It always gets placed in
        the current container.

        """
        # TODO; revise how engine's upload their data to be compatible with the put_attachment fxn
Gunnar Schaefer's avatar
Gunnar Schaefer committed
        def receive_stream_and_validate(stream, digest, filename):
            # FIXME pull this out to also be used from core.Core.put() and also replace the duplicated code below
            hash_ = hashlib.sha1()
            filepath = os.path.join(tempdir_path, filename)
            with open(filepath, 'wb') as fd:
                for chunk in iter(lambda: stream.read(2**20), ''):
                    hash_.update(chunk)
                    fd.write(chunk)
            if hash_.hexdigest() != digest:
                self.abort(400, 'Content-MD5 mismatch.')
            return filepath

        if cid is None: # sortable user upload
            pass
        else:           # targeted upload
            pass
        if self.request.content_type != 'multipart/form-data':  # do not accept the OTHER sort of multipart
Gunnar Schaefer's avatar
Gunnar Schaefer committed
            self.abort(400, 'content-type must be "multipart/form-data"')
        try:
            metadata = json.loads(self.request.get('metadata'))
            jsonschema.validate(metadata, FILE_UPLOAD_SCHEMA)
        except (ValueError, jsonschema.ValidationError) as e:
            self.abort(400, str(e))
        if self.public_request: # processor upload
            _id = None
            # FIXME check that processor is legit
        elif cid is not None:   # targeted user upload
            _id = bson.ObjectId(cid)
            container, _ = self._get(_id, 'rw')
Gunnar Schaefer's avatar
Gunnar Schaefer committed
        else:                   # sortable user upload
            pass
            # FIXME: pre-parse file, reject if unparsable
        data_path = self.app.config['data_path']
        quarantine_path = self.app.config['quarantine_path']
        with tempfile.TemporaryDirectory(prefix='.tmp', dir=data_path) as tempdir_path:
            for file_info in metadata['files']:
                hash_ = hashlib.sha1()
                filename = file_info['name'] + file_info['ext']
                filepath = os.path.join(tempdir_path, filename)
Gunnar Schaefer's avatar
Gunnar Schaefer committed
                field_storage_obj = self.request.POST.get(filename)
                with open(filepath, 'wb') as fd:
Gunnar Schaefer's avatar
Gunnar Schaefer committed
                    for chunk in iter(lambda: field_storage_obj.file.read(2**20), ''):
                        hash_.update(chunk)
                        fd.write(chunk)
                if hash_.hexdigest() != file_info['sha1']:
                    self.abort(400, 'Content-MD5 mismatch.')
                log.info('Received    %s [%s] from %s' % (filename, util.hrsize(file_info['size']), self.request.user_agent)) # FIXME: user_agent or uid
                status, detail = util.insert_file(self.dbc, _id, file_info, filepath, file_info['sha1'], data_path, quarantine_path)
                if status != 200:
                    self.abort(status, detail)

    def put_attachment(self, cid):
        """
        Recieve a targetted user upload of an attachment.

        Attachments are different from files, in that they are not 'research ready'.  Attachments
        represent other documents that are generally not useable by the engine; documents like
        consent forms, pen/paper questionnaires, study recruiting materials, etc.

        Internally, attachments are distinguished from files because of what metadata is
        required.  Attachments really only need a 'kinds' and 'type'.  We don't expect iteration over
        an attachment in a way that would require tracking 'state'.
        """
        # TODO read self.request.body, using '------WebKitFormBoundary' as divider
        # first line is 'content-disposition' line, extract filename
        # second line is content-type, determine how to write to a file, as bytes or as string
        # third linedata_path = self.app.config['data_path'], just a separator, useless
        if self.request.content_type != 'multipart/form-data':
            self.abort(400, 'content-type must be "multipart/form-data"')
        # TODO: metadata validation
        _id = bson.ObjectId(cid)
        container, _ = self._get(_id, 'rw')
        data_path = self.app.config['data_path']
        quarantine_path = self.app.config['quarantine_path']
        hashes = []
        with tempfile.TemporaryDirectory(prefix='.tmp', dir=self.app.config['data_path']) as tempdir_path:
            # get and hash the metadata
            metahash = hashlib.sha1()
            metastr = self.request.POST.get('metadata').file.read()  # returns a string?
            metadata = json.loads(metastr)
            metahash.update(metastr)
            hashes.append({'name': 'metadata', 'sha1': metahash.hexdigest()})

            sha1s = json.loads(self.request.POST.get('sha').file.read())
            for finfo in metadata:
                fname = finfo.get('name') + finfo.get('ext')  # finfo['ext'] will always be empty
                fhash = hashlib.sha1()
                fobj = self.request.POST.get(fname).file
                filepath = os.path.join(tempdir_path, fname)
                with open(filepath, 'wb') as fd:
                    for chunk in iter(lambda: fobj.read(2**20), ''):
                        fhash.update(chunk)
                        fd.write(chunk)
                for s in sha1s:
                    if fname == s.get('name'):
                        if fhash.hexdigest() != s.get('sha1'):
                            self.abort(400, 'Content-MD5 mismatch %s vs %s' % (fhash.hexdigest(), s.get('sha1')))
                        else:
                            finfo['sha1'] = s.get('sha1')
                            status, detail = util.insert_file(self.dbc, _id, finfo, filepath, s.get('sha1'), data_path, quarantine_path, flavor='attachment')
                        if status != 200:
                            self.abort(400, 'upload failed')
                        break
                else:
                    self.abort(400, '%s is not listed in the sha1s' % fname)

    def get_tile(self, cid):
        """fetch info about a tiled tiff, or retrieve a specific tile."""
        _id = bson.ObjectId(cid)
        montage_info = self.dbc.find_one(
            {
                '_id': _id,
                '$and': [
                    {'files.kinds': ['montage']},
                    {'files.ext': '.tiff'},
                ],
            },
            ['files.$'],
        )
        if not montage_info:
            self.abort(404, 'montage tiff not found')
        fn = montage_info['files'][0]['name'] + montage_info['files'][0]['ext']
        fp = os.path.join(self.app.config['data_path'], cid[-3:], cid, fn)
        z = self.request.get('z')
        x = self.request.get('x')
        y = self.request.get('y')
        if not (z and x and y):
            return util.get_info(fp)
        else:
            self.response.content_type = 'image/png'
            tile = util.get_tile(fp, int(z), int(x), int(y))
            if tile:
                self.response.write(tile)

    def get_attachment(self, cid):
        """Download one attachment."""
        fname = self.request.get('name')
        _id = bson.ObjectId(cid)
Gunnar Schaefer's avatar
Gunnar Schaefer committed
        container, _ = self._get(_id, 'ro')
        fpath = os.path.join(self.app.config['data_path'], str(_id)[-3:] + '/' + str(_id), fname)
        for a_info in container['attachments']:
            if (a_info['name'] + a_info['ext']) == fname:
                break
        else:
            self.abort(404, 'no such file')
        if self.request.method == 'GET':
Gunnar Schaefer's avatar
Gunnar Schaefer committed
            self.response.app_iter = open(fpath, 'rb')
            self.response.headers['Content-Length'] = str(a_info['size']) # must be set after setting app_iter
            self.response.headers['Content-Type'] = 'application/octet-stream'
        else:
            ticket = util.download_ticket('single', fpath, fname, a_info['size'])
            tkt_id = self.app.db.downloads.insert(ticket)
            return {'url': self.uri_for('download', _full=True, ticket=tkt_id)}

    def delete_attachment(self, cid):
        """Delete one attachment."""
        fname = self.request.get('name')
        _id = bson.ObjectId(cid)
Gunnar Schaefer's avatar
Gunnar Schaefer committed
        container, _ = self._get(_id, 'rw')
        fpath = os.path.join(self.app.config['data_path'], str(_id)[-3:] + '/' + str(_id), fname)
        for a_info in container['attachments']:
            if (a_info['name'] + a_info['ext']) == fname:
                break
        else:
            self.abort(404, 'no such file')
        name, ext = os.path.splitext(fname)
        success = self.dbc.update({'_id': _id, 'attachments.name': fname}, {'$pull': {'attachments': {'name': fname}}})
        if not success['updatedExisting']:
            log.info('could not remove database entry.')
        if os.path.exists(fpath):
            os.remove(fpath)
            log.info('removed file %s' % fpath)
        else:
            log.info('could not remove file, file %s does not exist' % fpath)