# @author: Gunnar Schaefer, Kevin S. Hahn import logging log = logging.getLogger('nimsapi') import os import re import copy import shutil import difflib import hashlib import tarfile import datetime import markdown import nimsdata import base import tempdir as tempfile def hrsize(size): if size < 1000: return '%d%s' % (size, 'B') for suffix in 'KMGTPEZY': size /= 1024. if size < 10.: return '%.1f%s' % (size, suffix) if size < 1000.: return '%.0f%s' % (size, suffix) return '%.0f%s' % (size, 'Y') def sort_file(db, filepath, digest, store_path): filename = os.path.basename(filepath) try: log.info('Parsing %s' % filename) dataset = nimsdata.parse(filepath) except nimsdata.NIMSDataError: return 415, 'Can\'t parse %s' % filename else: log.info('Sorting %s' % filename) # TODO: bail, if any required params are not set acquisition_spec, acquisition_id = update_db(db, dataset) acquisition_path = os.path.join(store_path, acquisition_id[-3:] + '/' + acquisition_id) if not os.path.exists(acquisition_path): os.makedirs(acquisition_path) file_spec = copy.deepcopy(acquisition_spec) file_spec['files'] = {'$elemMatch': {'type': dataset.nims_file_type, 'state': dataset.nims_file_state, 'kinds': dataset.nims_file_kinds}} file_info = dict( name=dataset.nims_file_name, ext=dataset.nims_file_ext, size=os.path.getsize(filepath), sha1=digest, #hash=dataset.nims_hash, TODO: datasets should be able to hash themselves (but not here) type=dataset.nims_file_type, kinds=dataset.nims_file_kinds, state=dataset.nims_file_state, ) success = db.acquisitions.update(file_spec, {'$set': {'files.$': file_info}}) if not success['updatedExisting']: db.acquisitions.update(acquisition_spec, {'$push': {'files': file_info}}) shutil.move(filepath, acquisition_path + '/' + dataset.nims_file_name + dataset.nims_file_ext) log.debug('Done %s' % filename) return 200, 'Success' def update_db(db, dataset): existing_group_ids = [g['_id'] for g in db.groups.find(None, ['_id'])] group_id_matches = difflib.get_close_matches(dataset.nims_group_id, existing_group_ids, cutoff=0.8) if len(group_id_matches) == 1: group_id = group_id_matches[0] project_name = dataset.nims_project or 'untitled' else: group_id = 'unknown' project_name = dataset.nims_group_id + ('/' + dataset.nims_project if dataset.nims_project else '') group = db.groups.find_one({'_id': group_id}) project_spec = {'gid': group['_id'], 'name': project_name} project = db.projects.find_and_modify( project_spec, {'$setOnInsert': dict(group=group.get('name'), permissions=group['roles'], files=[])}, upsert=True, new=True, ) # project timestamp and timezone are inherited from latest acquisition if dataset.nims_metadata_status and project.get('timestamp', dataset.nims_timestamp - datetime.timedelta(1)) < dataset.nims_timestamp: db.projects.update(project_spec, {'$set': dict(timestamp=dataset.nims_timestamp, timezone=dataset.nims_timezone)}) session_spec = {'uid': dataset.nims_session_id} session = db.sessions.find_and_modify( session_spec, { '$setOnInsert': dict(project=project['_id'], permissions=project['permissions'], files=[]), '$set': entity_metadata(dataset, dataset.session_properties, session_spec), # session_spec ensures non-empty $set '$addToSet': {'domains': dataset.nims_file_domain}, }, upsert=True, new=True, ) # session timestamp and timezone are inherited from earliest acquisition if dataset.nims_metadata_status and session.get('timestamp', dataset.nims_timestamp + datetime.timedelta(1)) > dataset.nims_timestamp: db.sessions.update(session_spec, {'$set': dict(timestamp=dataset.nims_timestamp, timezone=dataset.nims_timezone)}) acquisition_spec = {'uid': dataset.nims_acquisition_id} acquisition = db.acquisitions.find_and_modify( acquisition_spec, { '$setOnInsert': dict(session=session['_id'], permissions=session['permissions'], files=[]), '$set': entity_metadata(dataset, dataset.acquisition_properties, acquisition_spec), # acquisition_spec ensures non-empty $set '$addToSet': {'types': {'$each': [{'domain': dataset.nims_file_domain, 'kind': kind} for kind in dataset.nims_file_kinds]}}, }, upsert=True, new=True, ) return acquisition_spec, str(acquisition['_id']) def entity_metadata(dataset, properties, metadata={}, parent_key=''): metadata = copy.deepcopy(metadata) if dataset.nims_metadata_status is not None: parent_key = parent_key and parent_key + '.' for key, attributes in properties.iteritems(): if attributes['type'] == 'object': metadata.update(entity_metadata(dataset, attributes['properties'], parent_key=key)) else: value = getattr(dataset, attributes['field']) if 'field' in attributes else None if value or value == 0: # drop Nones and empty iterables metadata[parent_key + key] = value return metadata class Core(base.RequestHandler): """/nimsapi """ def head(self): """Return 200 OK.""" self.response.set_status(200) def get(self): """Return API documentation""" resources = """ Resource | Description :--------------------------------------------------------|:----------------------- nimsapi/login | user login [(nimsapi/sites)] | local and remote sites [(nimsapi/roles)] | user roles nimsapi/upload | upload nimsapi/download | download [(nimsapi/log)] | log messages [(nimsapi/search)] | search [(nimsapi/users)] | list of users [(nimsapi/users/count)] | count of users [(nimsapi/users/schema)] | schema for single user nimsapi/users/*<uid>* | details for user *<uid>* [(nimsapi/groups)] | list of groups [(nimsapi/groups/count)] | count of groups [(nimsapi/groups/schema)] | schema for single group nimsapi/groups/*<gid>* | details for group *<gid>* [(nimsapi/projects)] | list of projects [(nimsapi/projects/count)] | count of projects [(nimsapi/projects/schema)] | schema for single project nimsapi/projects/*<pid>* | details for project *<pid>* nimsapi/projects/*<pid>*/sessions | list sessions for project *<pid>* [(nimsapi/sessions/count)] | count of sessions [(nimsapi/sessions/schema)] | schema for single session nimsapi/sessions/*<sid>* | details for session *<sid>* nimsapi/sessions/*<sid>*/move | move session *<sid>* to a different project nimsapi/sessions/*<sid>*/acquisitions | list acquisitions for session *<sid>* [(nimsapi/acquisitions/count)] | count of acquisitions [(nimsapi/acquisitions/schema)] | schema for single acquisition nimsapi/acquisitions/*<aid>* | details for acquisition *<aid>* [(nimsapi/collections)] | list of collections [(nimsapi/collections/count)] | count of collections [(nimsapi/collections/schema)] | schema for single collection nimsapi/collections/*<cid>* | details for collection *<cid>* nimsapi/collections/*<cid>*/sessions | list of sessions for collection *<cid>* nimsapi/collections/*<cid>*/acquisitions?session=*<sid>* | list of acquisitions for collection *<cid>*, optionally restricted to session *<sid>* """ if self.debug: resources = re.sub(r'\[\((.*)\)\]', r'[\1](\1?user=%s)' % self.uid, resources) else: resources = re.sub(r'\[\((.*)\)\]', r'[\1](\1)', resources) resources = resources.replace('<', '<').replace('>', '>').strip() self.response.headers['Content-Type'] = 'text/html; charset=utf-8' self.response.write('<html>\n') self.response.write('<head>\n') self.response.write('<title>NIMSAPI</title>\n') self.response.write('<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">\n') self.response.write('<style type="text/css">\n') self.response.write('table {width:0%; border-width:1px; padding: 0;border-collapse: collapse;}\n') self.response.write('table tr {border-top: 1px solid #b8b8b8; background-color: white; margin: 0; padding: 0;}\n') self.response.write('table tr:nth-child(2n) {background-color: #f8f8f8;}\n') self.response.write('table thead tr :last-child {width:100%;}\n') self.response.write('table tr th {font-weight: bold; border: 1px solid #b8b8b8; background-color: #cdcdcd; margin: 0; padding: 6px 13px;}\n') self.response.write('table tr th {font-weight: bold; border: 1px solid #b8b8b8; background-color: #cdcdcd; margin: 0; padding: 6px 13px;}\n') self.response.write('table tr td {border: 1px solid #b8b8b8; margin: 0; padding: 6px 13px;}\n') self.response.write('table tr th :first-child, table tr td :first-child {margin-top: 0;}\n') self.response.write('table tr th :last-child, table tr td :last-child {margin-bottom: 0;}\n') self.response.write('</style>\n') self.response.write('</head>\n') self.response.write('<body style="min-width:900px">\n') if self.debug and not self.request.get('user', None): self.response.write('<form name="username" action="" method="get">\n') self.response.write('Username: <input type="text" name="user">\n') self.response.write('<input type="submit" value="Generate Custom Links">\n') self.response.write('</form>\n') self.response.write(markdown.markdown(resources, ['extra'])) self.response.write('</body>\n') self.response.write('</html>\n') def put(self): # TODO add security: either authenticated user or machine-to-machine CRAM if 'Content-MD5' not in self.request.headers: self.abort(400, 'Request must contain a valid "Content-MD5" header.') filename = self.request.get('filename', 'upload') with tempfile.TemporaryDirectory(prefix='.tmp', dir=self.app.config['store_path']) as tempdir_path: hash_ = hashlib.sha1() filepath = os.path.join(tempdir_path, filename) with open(filepath, 'wb') as fd: for chunk in iter(lambda: self.request.body_file.read(2**20), ''): hash_.update(chunk) fd.write(chunk) if hash_.hexdigest() != self.request.headers['Content-MD5']: self.abort(400, 'Content-MD5 mismatch.') if not tarfile.is_tarfile(filepath): self.abort(415, 'Only tar files are accepted.') log.info('Received %s [%s] from %s' % (filename, hrsize(self.request.content_length), self.request.user_agent)) status, detail = sort_file(self.app.db, filepath, hash_.hexdigest(), self.app.config['store_path']) if status != 200: self.abort(status, detail) def download(self): if self.request.method == 'OPTIONS': return self.options() paths = [] symlinks = [] for js_id in self.request.get('id', allow_multiple=True): type_, _id = js_id.split('_') _idpaths, _idsymlinks = resource_types[type_].download_info(_id) paths += _idpaths symlinks += _idsymlinks def login(self): """Return details for the current User.""" if self.request.method == 'OPTIONS': return self.options() log.debug(self.uid + ' has logged in') return self.app.db.users.find_and_modify({'_id': self.uid}, {'$inc': {'logins': 1}}, fields=['firstname', 'lastname', 'superuser']) def sites(self): """Return local and remote sites.""" if self.request.method == 'OPTIONS': return self.options() if self.request.get('all').lower() in ('1', 'true'): remotes = list(self.app.db.remotes.find(None, ['name'])) else: remotes = (self.app.db.users.find_one({'_id': self.uid}, ['remotes']) or {}).get('remotes', []) return dict(local={'_id': self.app.config['site_id'], 'name': self.app.config['site_name']}, remotes=remotes) def roles(self): """Return the list of user roles.""" if self.request.method == 'OPTIONS': return self.options() return base.ROLES def log(self): """Return logs.""" if self.request.method == 'OPTIONS': return self.options() try: logs = open(self.app.config['log_path']).readlines() except IOError as e: if 'Permission denied' in e: body_template = '${explanation}<br /><br />${detail}<br /><br />${comment}' comment = 'To fix permissions, run the following command: chmod o+r ' + self.app.config['log_path'] self.abort(500, detail=str(e), comment=comment, body_template=body_template) else: # file does not exist self.abort(500, 'log_path variable misconfigured or not set') try: n = int(self.request.get('n', 10000)) except: self.abort(400, 'n must be an integer') return [line.strip() for line in reversed(logs) if re.match('[-:0-9 ]{18} +nimsapi:(?!.*[/a-z]*/log )', line)][:n] search_schema = { 'title': 'Search', 'type': 'array', 'items': [ { 'title': 'Session', 'type': 'array', 'items': [ { 'title': 'Date', 'type': 'date', 'field': 'session.date', }, { 'title': 'Subject', 'type': 'array', 'items': [ { 'title': 'Name', 'type': 'array', 'items': [ { 'title': 'First', 'type': 'string', 'field': 'session.subject.firstname', }, { 'title': 'Last', 'type': 'string', 'field': 'session.subject.lastname', }, ], }, { 'title': 'Date of Birth', 'type': 'date', 'field': 'session.subject.dob', }, { 'title': 'Sex', 'type': 'string', 'enum': ['male', 'female'], 'field': 'session.subject.sex', }, ], }, ], }, { 'title': 'MR', 'type': 'array', 'items': [ { 'title': 'Scan Type', 'type': 'string', 'enum': ['anatomical', 'fMRI', 'DTI'], 'field': 'acquisition.type', }, { 'title': 'Echo Time', 'type': 'number', 'field': 'acquisition.echo_time', }, { 'title': 'Size', 'type': 'array', 'items': [ { 'title': 'X', 'type': 'integer', 'field': 'acquisition.size.x', }, { 'title': 'Y', 'type': 'integer', 'field': 'acquisition.size.y', }, ], }, ], }, { 'title': 'EEG', 'type': 'array', 'items': [ { 'title': 'Electrode Count', 'type': 'integer', 'field': 'acquisition.electrode_count', }, ], }, ], } def search(self): """Search.""" if self.request.method == 'OPTIONS': return self.options() elif self.request.method == 'GET': return self.search_schema else: pass