Skip to content
Snippets Groups Projects
Commit 22e1ae28 authored by Kevin S. Hahn's avatar Kevin S. Hahn
Browse files

reduce duplicate code in targetted uploads

parent ba9f429b
No related branches found
No related tags found
No related merge requests found
...@@ -207,18 +207,25 @@ class Container(base.RequestHandler): ...@@ -207,18 +207,25 @@ class Container(base.RequestHandler):
tkt_id = self.app.db.downloads.insert(ticket) tkt_id = self.app.db.downloads.insert(ticket)
return {'url': self.uri_for('download', fn=filename, _full=True, ticket=tkt_id)} return {'url': self.uri_for('download', fn=filename, _full=True, ticket=tkt_id)}
def put_file(self, cid=None): def _put(self, cid=None, flavor='file'):
""" """
Receive a targeted processor or user upload. Receive a targeted processor or user upload for an attachment or file.
This PUT route is used to add a file to an existing container, not for creating new containers.
This upload is different from the main PUT route, because this does not update the main container
metadata, nor does it try to parse the file to determine sorting information. The uploaded file(s)
will always get uploaded to the specificied container.
Accepts a multipart request that contains json in first part, and data in second part. Accepts a multipart request that contains the following form fields:
This POST route is used to add a file to an existing container, not for creating new containers. - 'metadata': list of dicts, each dict contains metadata for a file
This upload is different from the main PUT route, because this does not update the primary - filename: file object
metadata, nor does it try to determine where to place the file. It always gets placed in - 'sha': list of dicts, each dict contains 'name' and 'sha1'.
the current container.
""" """
# TODO; revise how engine's upload their data to be compatible with the put_attachment fxn # TODO read self.request.body, using '------WebKitFormBoundary' as divider
# first line is 'content-disposition' line, extract filename
# second line is content-type, determine how to write to a file, as bytes or as string
# third linedata_path = self.app.config['data_path'], just a separator, useless
def receive_stream_and_validate(stream, digest, filename): def receive_stream_and_validate(stream, digest, filename):
# FIXME pull this out to also be used from core.Core.put() and also replace the duplicated code below # FIXME pull this out to also be used from core.Core.put() and also replace the duplicated code below
hash_ = hashlib.sha1() hash_ = hashlib.sha1()
...@@ -231,61 +238,6 @@ class Container(base.RequestHandler): ...@@ -231,61 +238,6 @@ class Container(base.RequestHandler):
self.abort(400, 'Content-MD5 mismatch.') self.abort(400, 'Content-MD5 mismatch.')
return filepath return filepath
if cid is None: # sortable user upload
pass
else: # targeted upload
pass
if self.request.content_type != 'multipart/form-data': # do not accept the OTHER sort of multipart
self.abort(400, 'content-type must be "multipart/form-data"')
try:
metadata = json.loads(self.request.get('metadata'))
jsonschema.validate(metadata, FILE_UPLOAD_SCHEMA)
except (ValueError, jsonschema.ValidationError) as e:
self.abort(400, str(e))
if self.public_request: # processor upload
_id = None
# FIXME check that processor is legit
elif cid is not None: # targeted user upload
_id = bson.ObjectId(cid)
container, _ = self._get(_id, 'rw')
else: # sortable user upload
pass
# FIXME: pre-parse file, reject if unparsable
data_path = self.app.config['data_path']
quarantine_path = self.app.config['quarantine_path']
with tempfile.TemporaryDirectory(prefix='.tmp', dir=data_path) as tempdir_path:
for file_info in metadata['files']:
hash_ = hashlib.sha1()
filename = file_info['name'] + file_info['ext']
filepath = os.path.join(tempdir_path, filename)
field_storage_obj = self.request.POST.get(filename)
with open(filepath, 'wb') as fd:
for chunk in iter(lambda: field_storage_obj.file.read(2**20), ''):
hash_.update(chunk)
fd.write(chunk)
if hash_.hexdigest() != file_info['sha1']:
self.abort(400, 'Content-MD5 mismatch.')
log.info('Received %s [%s] from %s' % (filename, util.hrsize(file_info['size']), self.request.user_agent)) # FIXME: user_agent or uid
status, detail = util.insert_file(self.dbc, _id, file_info, filepath, file_info['sha1'], data_path, quarantine_path)
if status != 200:
self.abort(status, detail)
def put_attachment(self, cid):
"""
Recieve a targetted user upload of an attachment.
Attachments are different from files, in that they are not 'research ready'. Attachments
represent other documents that are generally not useable by the engine; documents like
consent forms, pen/paper questionnaires, study recruiting materials, etc.
Internally, attachments are distinguished from files because of what metadata is
required. Attachments really only need a 'kinds' and 'type'. We don't expect iteration over
an attachment in a way that would require tracking 'state'.
"""
# TODO read self.request.body, using '------WebKitFormBoundary' as divider
# first line is 'content-disposition' line, extract filename
# second line is content-type, determine how to write to a file, as bytes or as string
# third linedata_path = self.app.config['data_path'], just a separator, useless
if self.request.content_type != 'multipart/form-data': if self.request.content_type != 'multipart/form-data':
self.abort(400, 'content-type must be "multipart/form-data"') self.abort(400, 'content-type must be "multipart/form-data"')
# TODO: metadata validation # TODO: metadata validation
...@@ -318,13 +270,21 @@ class Container(base.RequestHandler): ...@@ -318,13 +270,21 @@ class Container(base.RequestHandler):
self.abort(400, 'Content-MD5 mismatch %s vs %s' % (fhash.hexdigest(), s.get('sha1'))) self.abort(400, 'Content-MD5 mismatch %s vs %s' % (fhash.hexdigest(), s.get('sha1')))
else: else:
finfo['sha1'] = s.get('sha1') finfo['sha1'] = s.get('sha1')
status, detail = util.insert_file(self.dbc, _id, finfo, filepath, s.get('sha1'), data_path, quarantine_path, flavor='attachment') status, detail = util.insert_file(self.dbc, _id, finfo, filepath, s.get('sha1'), data_path, quarantine_path, flavor=flavor)
if status != 200: if status != 200:
self.abort(400, 'upload failed') self.abort(400, 'upload failed')
break break
else: else:
self.abort(400, '%s is not listed in the sha1s' % fname) self.abort(400, '%s is not listed in the sha1s' % fname)
def put_file(self, cid=None):
"""Receive a targeted upload of a dataset file."""
self._put(cid, flavor='file')
def put_attachment(self, cid):
"""Recieve a targetted upload of an attachment file."""
self._put(cid, flavor='attachment')
def get_tile(self, cid): def get_tile(self, cid):
"""fetch info about a tiled tiff, or retrieve a specific tile.""" """fetch info about a tiled tiff, or retrieve a specific tile."""
_id = bson.ObjectId(cid) _id = bson.ObjectId(cid)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment