From a678f4a2b9bf57ff5f8bf15bdd998972d5a8d38e Mon Sep 17 00:00:00 2001 From: Renzo Frigato <rfrigato@stanford.edu> Date: Mon, 29 Feb 2016 17:09:42 -0800 Subject: [PATCH] use schemas as they are served from the API --- api/api.py | 10 ++- api/config.py | 58 +++++++++++++ api/download.py | 3 +- api/handlers/containerhandler.py | 7 +- api/handlers/grouphandler.py | 13 ++- api/handlers/listhandler.py | 8 +- api/handlers/schemahandler.py | 21 +++++ api/handlers/userhandler.py | 13 ++- api/root.py | 3 + api/upload.py | 6 +- api/util.py | 7 ++ api/validators.py | 134 +++++++++++++------------------ 12 files changed, 187 insertions(+), 96 deletions(-) create mode 100644 api/handlers/schemahandler.py diff --git a/api/api.py b/api/api.py index 11589df1..9f029a00 100644 --- a/api/api.py +++ b/api/api.py @@ -17,6 +17,7 @@ from handlers import grouphandler from handlers import containerhandler from handlers import collectionshandler from handlers import searchhandler +from handlers import schemahandler log = config.log @@ -56,12 +57,14 @@ routing_regexes = { # any character allowed except '/'' 'tag_re': '[^/]{3,24}', # filename regex - # length between 3 and 60 characters # any character allowed except '/' 'filename_re': '[^/]+', # note id regex # hexadecimal string exactly of length 24 - 'note_id_re': '[0-9a-f]{24}' + 'note_id_re': '[0-9a-f]{24}', + # schema regex + # example: schema_path/schema.json + 'schema_re': '[^/.]{3,60}/[^/.]{3,60}\.json' } def _format(route): @@ -134,7 +137,8 @@ routes = [ webapp2.Route(_format(r'/api/<par_cont_name:groups>/<par_id:{group_id_re}>/<cont_name:projects>'), containerhandler.ContainerHandler, name='cont_sublist_groups', handler_method='get_all', methods=['GET']), webapp2.Route(_format(r'/api/<par_cont_name:{cont_name_re}>/<par_id:{cid_re}>/<cont_name:{cont_name_re}>'), containerhandler.ContainerHandler, name='cont_sublist', handler_method='get_all', methods=['GET']), webapp2.Route(_format(r'/api/search'), searchhandler.SearchHandler, name='es_proxy', methods=['GET']), - webapp2.Route(_format(r'/api/search/<cont_name:{cont_name_re}>'), searchhandler.SearchHandler, name='es_proxy', methods=['GET']), + webapp2.Route(_format(r'/api/search/<cont_name:{cont_name_re}>'), searchhandler.SearchHandler, name='es_proxy_1', methods=['GET']), + webapp2.Route(_format(r'/api/schemas/<schema:{schema_re}>'), schemahandler.SchemaHandler, name='schemas', methods=['GET']), ] diff --git a/api/config.py b/api/config.py index 6ca252b2..ad3c92e3 100644 --- a/api/config.py +++ b/api/config.py @@ -1,5 +1,6 @@ import os import copy +import glob import logging import pymongo import datetime @@ -46,6 +47,7 @@ DEFAULT_CONFIG = { 'db_connect_timeout': '2000', 'db_server_selection_timeout': '3000', 'data_path': os.path.join(os.path.dirname(__file__), '../persistent/data'), + 'schema_path': 'api/schemas', 'elasticsearch_host': 'localhost:9200', } } @@ -87,6 +89,62 @@ log.debug(str(db)) es = elasticsearch.Elasticsearch([__config['persistent']['elasticsearch_host']]) +# validate the lists of json schemas +schema_path = __config['persistent']['schema_path'] + +expected_mongo_schemas = set([ + 'acquisition.json', + 'collection.json', + 'container.json', + 'file.json', + 'group.json', + 'note.json', + 'permission.json', + 'project.json', + 'session.json', + 'subject.json', + 'user.json', + 'avatars.json', + 'tag.json' +]) +expected_input_schemas = set([ + 'acquisition.json', + 'collection.json', + 'container.json', + 'file.json', + 'group.json', + 'note.json', + 'packfile.json', + 'permission.json', + 'project.json', + 'session.json', + 'subject.json', + 'user.json', + 'avatars.json', + 'download.json', + 'tag.json', + 'enginemetadata.json', + 'uploader.json', + 'reaper.json' +]) +mongo_schemas = set() +input_schemas = set() +# validate and cache schemas at start time +for schema_filepath in glob.glob(schema_path + '/mongo/*.json'): + schema_file = os.path.basename(schema_filepath) + mongo_schemas.add(schema_file) + with open(schema_filepath, 'rU') as f: + pass + +assert mongo_schemas == expected_mongo_schemas, '{} is different from {}'.format(mongo_schemas, expected_mongo_schemas) + +for schema_filepath in glob.glob(schema_path + '/input/*.json'): + schema_file = os.path.basename(schema_filepath) + input_schemas.add(schema_file) + with open(schema_filepath, 'rU') as f: + pass + +assert input_schemas == expected_input_schemas, '{} is different from {}'.format(input_schemas, expected_input_schemas) def initialize_db(): log.info('Initializing database, creating indexes') diff --git a/api/download.py b/api/download.py index 6dcb7750..6955c742 100644 --- a/api/download.py +++ b/api/download.py @@ -198,7 +198,8 @@ class Download(base.RequestHandler): config.db.projects.update_one({'_id': project_id}, {'$inc': {'counter': 1}}) else: req_spec = self.request.json_body - validator = validators.payload_from_schema_file(self, 'download.json') + payload_schema_uri = util.schema_uri(self, 'input', 'download.json') + validator = validators.from_schema_path(payload_schema_uri) validator(req_spec, 'POST') log.debug(json.dumps(req_spec, sort_keys=True, indent=4, separators=(',', ': '))) return self._preflight_archivestream(req_spec) diff --git a/api/handlers/containerhandler.py b/api/handlers/containerhandler.py index 5112daee..5c789f78 100644 --- a/api/handlers/containerhandler.py +++ b/api/handlers/containerhandler.py @@ -306,8 +306,10 @@ class ContainerHandler(base.RequestHandler): def _get_validators(self): - mongo_validator = validators.mongo_from_schema_file(self.config.get('storage_schema_file')) - payload_validator = validators.payload_from_schema_file(self.config.get('payload_schema_file')) + mongo_schema_uri = util.schema_uri(self, 'mongo', self.config.get('storage_schema_file')) + mongo_validator = validators.decorator_from_schema_path(mongo_schema_uri) + payload_schema_uri = util.schema_uri(self, 'input', self.config.get('payload_schema_file')) + payload_validator = validators.from_schema_path(payload_schema_uri) return mongo_validator, payload_validator def _get_parent_container(self, payload): @@ -329,7 +331,6 @@ class ContainerHandler(base.RequestHandler): log.debug(parent_container) return parent_container, parent_id_property - def _get_container(self, _id): try: container = self.storage.get_container(_id) diff --git a/api/handlers/grouphandler.py b/api/handlers/grouphandler.py index 62344c1c..8e709e72 100644 --- a/api/handlers/grouphandler.py +++ b/api/handlers/grouphandler.py @@ -1,6 +1,7 @@ import datetime from .. import base +from .. import util from .. import config from .. import debuginfo from .. import validators @@ -58,8 +59,10 @@ class GroupHandler(base.RequestHandler): self.abort(404, 'no such Group: ' + _id) permchecker = groupauth.default(self, group) payload = self.request.json_body - mongo_validator = validators.mongo_from_schema_file('group.json') - payload_validator = validators.payload_from_schema_file('group.json') + mongo_schema_uri = util.schema_uri(self, 'mongo', 'group.json') + mongo_validator = validators.decorator_from_schema_path(mongo_schema_uri) + payload_schema_uri = util.schema_uri(self, 'input', 'group.json') + payload_validator = validators.from_schema_path(payload_schema_uri) payload_validator(payload, 'PUT') result = mongo_validator(permchecker(self.storage.exec_op))('PUT', _id=_id, payload=payload) if result.modified_count == 1: @@ -71,8 +74,10 @@ class GroupHandler(base.RequestHandler): self._init_storage() permchecker = groupauth.default(self, None) payload = self.request.json_body - mongo_validator = validators.mongo_from_schema_file('group.json') - payload_validator = validators.payload_from_schema_file('group.json') + mongo_schema_uri = util.schema_uri(self, 'mongo', 'group.json') + mongo_validator = validators.decorator_from_schema_path(mongo_schema_uri) + payload_schema_uri = util.schema_uri(self, 'input', 'group.json') + payload_validator = validators.from_schema_path(payload_schema_uri) payload_validator(payload, 'POST') payload['created'] = payload['modified'] = datetime.datetime.utcnow() payload['roles'] = [{'_id': self.uid, 'access': 'admin', 'site': self.user_site}] if self.uid else [] diff --git a/api/handlers/listhandler.py b/api/handlers/listhandler.py index 9ac1181c..b9086002 100644 --- a/api/handlers/listhandler.py +++ b/api/handlers/listhandler.py @@ -183,9 +183,11 @@ class ListHandler(base.RequestHandler): permchecker = permchecker(self, container) else: self.abort(404, 'Element {} not found in container {}'.format(_id, storage.cont_name)) - mongo_validator = validators.mongo_from_schema_file(config.get('storage_schema_file')) - input_validator = validators.payload_from_schema_file(config.get('input_schema_file')) - keycheck = validators.key_check(config.get('storage_schema_file')) + mongo_schema_uri = util.schema_uri(self, 'mongo', config.get('storage_schema_file')) + mongo_validator = validators.decorator_from_schema_path(mongo_schema_uri) + input_schema_uri = util.schema_uri(self, 'input', config.get('input_schema_file')) + input_validator = validators.from_schema_path(input_schema_uri) + keycheck = validators.key_check(mongo_schema_uri) return container, permchecker, storage, mongo_validator, input_validator, keycheck diff --git a/api/handlers/schemahandler.py b/api/handlers/schemahandler.py new file mode 100644 index 00000000..18eb4a00 --- /dev/null +++ b/api/handlers/schemahandler.py @@ -0,0 +1,21 @@ +import os +import json +import datetime + +from .. import base +from .. import config + +log = config.log + +class SchemaHandler(base.RequestHandler): + + def __init__(self, request=None, response=None): + super(SchemaHandler, self).__init__(request, response) + + def get(self, schema, **kwargs): + schema_path = os.path.join(config.get_item('persistent', 'schema_path'), schema) + try: + with open(schema_path, 'ru') as f: + return json.load(f) + except IOError as e: + self.abort(404, str(e)) diff --git a/api/handlers/userhandler.py b/api/handlers/userhandler.py index 42b6e65e..ca36fe0d 100644 --- a/api/handlers/userhandler.py +++ b/api/handlers/userhandler.py @@ -3,6 +3,7 @@ import datetime import requests from .. import base +from .. import util from .. import config from .. import validators from ..auth import userauth, always_ok, ROLES @@ -64,8 +65,10 @@ class UserHandler(base.RequestHandler): user = self._get_user(_id) permchecker = userauth.default(self, user) payload = self.request.json_body - mongo_validator = validators.mongo_from_schema_file('user.json') - payload_validator = validators.payload_from_schema_file('user.json') + mongo_schema_uri = util.schema_uri(self, 'mongo', 'user.json') + mongo_validator = validators.decorator_from_schema_path(mongo_schema_uri) + payload_schema_uri = util.schema_uri(self, 'input', 'user.json') + payload_validator = validators.from_schema_path(payload_schema_uri) payload_validator(payload, 'PUT') payload['modified'] = datetime.datetime.utcnow() result = mongo_validator(permchecker(self.storage.exec_op))('PUT', _id=_id, payload=payload) @@ -78,8 +81,10 @@ class UserHandler(base.RequestHandler): self._init_storage() permchecker = userauth.default(self) payload = self.request.json_body - mongo_validator = validators.mongo_from_schema_file('user.json') - payload_validator = validators.payload_from_schema_file('user.json') + mongo_schema_uri = util.schema_uri(self, 'mongo', 'user.json') + mongo_validator = validators.decorator_from_schema_path(mongo_schema_uri) + payload_schema_uri = util.schema_uri(self, 'input', 'user.json') + payload_validator = validators.from_schema_path(payload_schema_uri) payload_validator(payload, 'POST') payload['created'] = payload['modified'] = datetime.datetime.utcnow() payload['root'] = payload.get('root', False) diff --git a/api/root.py b/api/root.py index c98063e5..cbcc9b66 100644 --- a/api/root.py +++ b/api/root.py @@ -2,6 +2,9 @@ import re import markdown from . import base +from . import config + +log = config.log class Root(base.RequestHandler): diff --git a/api/upload.py b/api/upload.py index f52cfc04..74e584b3 100644 --- a/api/upload.py +++ b/api/upload.py @@ -170,7 +170,8 @@ class Upload(base.RequestHandler): self.abort(400, str(e)) if not file_store.metadata: self.abort(400, 'metadata is missing') - metadata_validator = validators.payload_from_schema_file('uploader.json') + payload_schema_uri = util.schema_uri(self, 'input', 'uploader.json') + metadata_validator = validators.from_schema_path(payload_schema_uri) metadata_validator(file_store.metadata, 'POST') try: target_containers = reaperutil.create_root_to_leaf_hierarchy(file_store.metadata, file_store.files) @@ -216,7 +217,8 @@ class Upload(base.RequestHandler): self.abort(400, str(e)) if not file_store.metadata: self.abort(400, 'metadata is missing') - metadata_validator = validators.payload_from_schema_file('enginemetadata.json') + payload_schema_uri = util.schema_uri(self, 'input', 'enginemetadata.json') + metadata_validator = validators.from_schema_path(payload_schema_uri) metadata_validator(file_store.metadata, 'POST') file_infos = file_store.metadata['acquisition'].pop('files', []) now = datetime.datetime.utcnow() diff --git a/api/util.py b/api/util.py index f553ba1f..1d3f927a 100644 --- a/api/util.py +++ b/api/util.py @@ -136,6 +136,13 @@ def send_json_http_exception(response, message, code): response.headers['Content-Type'] = 'application/json; charset=utf-8' response.write(content) +def schema_uri(handler, type_, schema_name): + return handler.uri_for( + 'schemas', + schema=type_ + '/' + schema_name, + _full=True + ) + class Enum(baseEnum.Enum): # Enum strings are prefixed by their class: "Category.classifier". # This overrides that behaviour and removes the prefix. diff --git a/api/validators.py b/api/validators.py index d73f56b3..aaf223c4 100644 --- a/api/validators.py +++ b/api/validators.py @@ -1,7 +1,10 @@ import os +import re import copy -import glob +import json +import requests import jsonschema +from jsonschema.compat import urlopen, urlsplit from . import config @@ -13,91 +16,69 @@ class InputValidationException(Exception): class DBValidationException(Exception): pass -# following https://github.com/Julian/jsonschema/issues/98 -# json schema files are expected to be in the schemas folder relative to this module -schema_path = os.path.abspath(os.path.dirname(__file__)) - -resolver_input = jsonschema.RefResolver('file://' + schema_path + '/schemas/input/', None) -resolver_mongo = jsonschema.RefResolver('file://' + schema_path + '/schemas/mongo/', None) - -expected_mongo_schemas = set([ - 'acquisition.json', - 'collection.json', - 'container.json', - 'file.json', - 'group.json', - 'note.json', - 'permission.json', - 'project.json', - 'session.json', - 'subject.json', - 'user.json', - 'avatars.json', - 'tag.json' -]) -expected_input_schemas = set([ - 'acquisition.json', - 'collection.json', - 'container.json', - 'file.json', - 'group.json', - 'note.json', - 'permission.json', - 'project.json', - 'session.json', - 'subject.json', - 'user.json', - 'avatars.json', - 'download.json', - 'tag.json', - 'enginemetadata.json', - 'packfile.json', - 'uploader.json', - 'reaper.json' -]) -mongo_schemas = set() -input_schemas = set() -# validate and cache schemas at start time -for schema_filepath in glob.glob(schema_path + '/schemas/mongo/*.json'): - schema_file = os.path.basename(schema_filepath) - mongo_schemas.add(schema_file) - resolver_mongo.resolve(schema_file) - -assert mongo_schemas == expected_mongo_schemas, '{} is different from {}'.format(mongo_schemas, expected_mongo_schemas) - -for schema_filepath in glob.glob(schema_path + '/schemas/input/*.json'): - schema_file = os.path.basename(schema_filepath) - input_schemas.add(schema_file) - resolver_input.resolve(schema_file) - -assert input_schemas == expected_input_schemas, '{} is different from {}'.format(input_schemas, expected_input_schemas) - -def validate_data(data, schema_name, verb, optional=False): +def validate_data(data, schema_url, verb, optional=False): """ Convenience method to validate a JSON schema against some action. If optional is set, validate_data won't complain about null data. """ + raise NotImplementedError('this needs to be adapted, using the new schema endpoint') if optional and data is None: return - validator = payload_from_schema_file(schema_name) + validator = from_schema_path(schema_url) validator(data, verb) def _validate_json(json_data, schema, resolver): jsonschema.validate(json_data, schema, resolver=resolver) - #jsonschema.Draft4Validator(schema, resolver=resolver).validate(json_data) + +class RefResolver(jsonschema.RefResolver): + + def resolve_remote(self, uri): + """override default resolve_remote + to allow testing then there is no ssl certificate + """ + scheme = urlsplit(uri).scheme + + if scheme in self.handlers: + result = self.handlers[scheme](uri) + elif ( + scheme in [u"http", u"https"] and + requests and + getattr(requests.Response, "json", None) is not None + ): + # Requests has support for detecting the correct encoding of + # json over http + if callable(requests.Response.json): + result = requests.get(uri, verify=False).json() + else: + result = requests.get(uri, verify=False).json + else: + # Otherwise, pass off to urllib and assume utf-8 + result = json.loads(urlopen(uri).read().decode("utf-8")) + + if self.cache_remote: + self.store[uri] = result + return result + +# We store the resolvers for each base_uri we use, so that we reuse the schemas cached by the resolvers. +resolvers = {} +def _resolve_schema(schema_url): + base_uri, schema_name = re.match('(.*/)(.*)', schema_url).groups() + if not resolvers.get(base_uri): + resolvers[base_uri] = RefResolver(base_uri, None) + return resolvers[base_uri].resolve(schema_name)[1], resolvers[base_uri] def no_op(g, *args): return g -def mongo_from_schema_file(schema_file): - if schema_file is None: +def decorator_from_schema_path(schema_url): + if schema_url is None: return no_op - schema = resolver_mongo.resolve(schema_file)[1] + schema, resolver = _resolve_schema(schema_url) def g(exec_op): - def mongo_val(method, **kwargs): + def validator(method, **kwargs): payload = kwargs['payload'] log.debug(payload) if method == 'PUT' and schema.get('required'): @@ -107,17 +88,18 @@ def mongo_from_schema_file(schema_file): _schema = schema if method in ['POST', 'PUT']: try: - _validate_json(payload, _schema, resolver_mongo) + _validate_json(payload, _schema, resolver) except jsonschema.ValidationError as e: raise DBValidationException(str(e)) return exec_op(method, **kwargs) - return mongo_val + return validator return g -def payload_from_schema_file(schema_file): - if schema_file is None: +def from_schema_path(schema_url): + if schema_url is None: return no_op - schema = resolver_input.resolve(schema_file)[1] + # split the url in base_uri and schema_name + schema, resolver = _resolve_schema(schema_url) def g(payload, method): if method == 'PUT' and schema.get('required'): _schema = copy.copy(schema) @@ -126,12 +108,12 @@ def payload_from_schema_file(schema_file): _schema = schema if method in ['POST', 'PUT']: try: - _validate_json(payload, _schema, resolver_input) + _validate_json(payload, _schema, resolver) except jsonschema.ValidationError as e: raise InputValidationException(str(e)) return g -def key_check(schema_file): +def key_check(schema_url): """ for sublists of mongo container there is no automatic key check when creating, updating or deleting an object. We are adding a custom array field to the json schemas ("key_fields"). @@ -146,9 +128,9 @@ def key_check(schema_file): 2. a GET will retrieve a single item 3. a DELETE (most importantly) will delete a single item """ - if schema_file is None: + if schema_url is None: return no_op - schema = resolver_mongo.resolve(schema_file)[1] + schema, _ = _resolve_schema(schema_url) log.debug(schema) if schema.get('key_fields') is None: return no_op -- GitLab