diff --git a/api/config.py b/api/config.py index 186ed608ae7c2181ccf619807dfaaaa92e798a6a..8bc5375bc230a1112b911c3f0091d79aa07b978c 100644 --- a/api/config.py +++ b/api/config.py @@ -171,7 +171,7 @@ expected_input_schemas = set([ 'project.json', 'project-template.json', 'project-update.json', - 'rule-add.json', + 'rule-new.json', 'rule-update.json', 'session.json', 'session-update.json', diff --git a/api/jobs/handlers.py b/api/jobs/handlers.py index 52d797f09d75b1bf5897f8dc0cf31e490e8b8554..478599dbbf9ab2c3570e99ba81ca24156a1455d7 100644 --- a/api/jobs/handlers.py +++ b/api/jobs/handlers.py @@ -175,7 +175,7 @@ class RulesHandler(base.RequestHandler): doc = self.request.json - validate_data(doc, 'rule-add.json', 'input', 'POST', optional=True) + validate_data(doc, 'rule-new.json', 'input', 'POST', optional=True) try: get_gear_by_name(doc['alg']) except APINotFoundException: diff --git a/api/jobs/rules.py b/api/jobs/rules.py index 3e6e3758547f6aa5a2665fd52f2221d94cd9bf37..f55d0bfc197b12fd8a2071060dd9c652cedbdc8d 100644 --- a/api/jobs/rules.py +++ b/api/jobs/rules.py @@ -1,4 +1,5 @@ import fnmatch +import re from .. import config from ..types import Origin @@ -58,53 +59,53 @@ def get_base_rules(): def _log_file_key_error(file_, container, error): log.warning('file ' + file_.get('name', '?') + ' in container ' + str(container.get('_id', '?')) + ' ' + error) -def eval_match(match_type, match_param, file_, container): +def eval_match(match_type, match_param, file_, container, regex=False): """ Given a match entry, return if the match succeeded. """ - def lower(x): - return x.lower() - + def match(value): + if regex: + return re.match(match_param, value, flags=re.IGNORECASE) is not None + elif match_type == 'file.name': + return fnmatch.fnmatch(value.lower(), match_param.lower()) + else: + return match_param.lower() == value.lower() # Match the file's type if match_type == 'file.type': file_type = file_.get('type') if file_type: - return file_type.lower() == match_param.lower() + return match(file_type) else: _log_file_key_error(file_, container, 'has no type') return False # Match a shell glob for the file name elif match_type == 'file.name': - return fnmatch.fnmatch(file_['name'].lower(), match_param.lower()) + return match(file_['name']) # Match any of the file's measurements elif match_type == 'file.measurements': - try: - if match_param: - return match_param.lower() in map(lower, file_.get('measurements', [])) - else: - return False - except KeyError: - _log_file_key_error(file_, container, 'has no measurements key') + if match_param: + return any(match(value) for value in file_.get('measurements', [])) + else: return False # Match the container having any file (including this one) with this type elif match_type == 'container.has-type': for c_file in container['files']: c_file_type = c_file.get('type') - if c_file_type and match_param.lower() == c_file_type.lower(): + if c_file_type and match(c_file_type): return True return False # Match the container having any file (including this one) with this measurement elif match_type == 'container.has-measurement': - for c_file in container['files']: - if match_param: - if match_param.lower() in map(lower, c_file.get('measurements', [])): + if match_param: + for c_file in container['files']: + if any(match(value) for value in c_file.get('measurements', [])): return True return False @@ -121,7 +122,7 @@ def eval_rule(rule, file_, container): has_match = False for match in rule.get('any', []): - if eval_match(match['type'], match['value'], file_, container): + if eval_match(match['type'], match['value'], file_, container, regex=match.get('regex')): has_match = True break @@ -131,7 +132,7 @@ def eval_rule(rule, file_, container): # Are there matches in the 'all' set? for match in rule.get('all', []): - if not eval_match(match['type'], match['value'], file_, container): + if not eval_match(match['type'], match['value'], file_, container, regex=match.get('regex')): return False return True diff --git a/raml/resources/projects.raml b/raml/resources/projects.raml index af9aafcc672ddccc2372b6410ea1e15a2eee563e..faadd8166df073d938620d46487cf29c5b6179d0 100644 --- a/raml/resources/projects.raml +++ b/raml/resources/projects.raml @@ -81,6 +81,23 @@ post: get: responses: 200: + body: + application/json: + schema: !include ../schemas/output/rule-list.json + post: + body: + application/json: + schema: !include ../schemas/input/rule-new.json + /{RuleId}: + uriParameters: + RuleId: + type: string + required: true + put: + body: + application/json: + schema: !include ../schemas/input/rule-update.json + /analyses: type: analyses-list /{AnalysisId}: diff --git a/raml/schemas/definitions/rule.json b/raml/schemas/definitions/rule.json index 64f8553f1db87b5d912b131206db6873fdf05184..20ce0b428a4ee13c2713d2276ac8aeb6bb1c101e 100644 --- a/raml/schemas/definitions/rule.json +++ b/raml/schemas/definitions/rule.json @@ -1,22 +1,51 @@ { "$schema": "http://json-schema.org/draft-04/schema#", - "type": "object", - "properties": { - "alg": { - "type": "string" - }, - "all":{ - "type":"array", - "items":{ - "type":"array", - "items":{ - "type":"string" - } + "definitions": { + "rule-items": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "file.type", + "file.name", + "file.measurements", + "container.has-type", + "container.has-measurement" + ] + }, + "value": { "type": "string" }, + "regex": { "type": "boolean" } + }, + "required": [ "type", "value" ], + "additionalProperties": false + } + }, + + "rule-input": { + "type": "object", + "properties": { + "_id": { "type": "string" }, + "project_id": { "type": "string" }, + "alg": { "type": "string" }, + "name": { "type": "string" }, + "any": { "$ref": "#/definitions/rule-items" }, + "all": { "$ref": "#/definitions/rule-items" } + }, + "additionalProperties": false + }, + + "rule-output": { + "type": "object", + "properties": { + "_id": { "type": "string" }, + "alg": { "type": "string" }, + "name": { "type": "string" }, + "any": { "$ref": "#/definitions/rule-items" }, + "all": { "$ref": "#/definitions/rule-items" } + } } - } - }, - "required": [ - "alg", - "all" - ] + } } diff --git a/raml/schemas/input/rule-add.json b/raml/schemas/input/rule-add.json deleted file mode 100644 index bf41dde0b862c13dad873c79eed9ad69ba1d0715..0000000000000000000000000000000000000000 --- a/raml/schemas/input/rule-add.json +++ /dev/null @@ -1,62 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-04/schema#", - "title": "rule", - "type": "object", - "properties": { - "alg": { - "type": "string" - }, - "name": { - "type": "string" - }, - "any": { - "type": "array", - "items": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "file.type", - "file.name", - "file.measurements", - "container.has-type", - "container.has-measurement" - ] - }, - "value": { - "type": "string" - } - } - } - }, - "all": { - "type": "array", - "items": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "file.type", - "file.name", - "file.measurements", - "container.has-type", - "container.has-measurement" - ] - }, - "value": { - "type": "string" - } - } - } - } - }, - "required": [ - "alg", - "all", - "any", - "name" - ], - "additionalProperties": false -} diff --git a/raml/schemas/input/rule-new.json b/raml/schemas/input/rule-new.json new file mode 100644 index 0000000000000000000000000000000000000000..c456eaec4c2662f9ad060cc397b46b682f63c64c --- /dev/null +++ b/raml/schemas/input/rule-new.json @@ -0,0 +1,7 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "title": "Rule", + "type": "object", + "allOf": [{"$ref": "../definitions/rule.json#/definitions/rule-input"}], + "required": ["alg", "name", "any", "all"] +} diff --git a/raml/schemas/input/rule-update.json b/raml/schemas/input/rule-update.json index 2db5a226703f5b91169c3d274ef60529fbb6c755..31ffc6d6023a7d57fc4c77f5a31bc98628a8241c 100644 --- a/raml/schemas/input/rule-update.json +++ b/raml/schemas/input/rule-update.json @@ -1,62 +1,6 @@ { "$schema": "http://json-schema.org/draft-04/schema#", - "title": "rule", + "title": "Rule", "type": "object", - "properties": { - "_id": { - "type": "string" - }, - "project_id": { - "type": "string" - }, - "alg": { - "type": "string" - }, - "name": { - "type": "string" - }, - "any": { - "type": "array", - "items": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "file.type", - "file.name", - "file.measurements", - "container.has-type", - "container.has-measurement" - ] - }, - "value": { - "type": "string" - } - } - } - }, - "all": { - "type": "array", - "items": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "file.type", - "file.name", - "file.measurements", - "container.has-type", - "container.has-measurement" - ] - }, - "value": { - "type": "string" - } - } - } - } - }, - "additionalProperties": false + "allOf": [{"$ref": "../definitions/rule.json#/definitions/rule-input"}] } diff --git a/raml/schemas/output/rule-list.json b/raml/schemas/output/rule-list.json new file mode 100644 index 0000000000000000000000000000000000000000..fffc98fe82dd5e0c5e78f4b26643c2169555cc4d --- /dev/null +++ b/raml/schemas/output/rule-list.json @@ -0,0 +1,5 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "array", + "items": { "$ref": "../definitions/rule.json#/definitions/rule-output" } +} diff --git a/raml/schemas/output/rules-list.json b/raml/schemas/output/rules-list.json deleted file mode 100644 index 22c31bcbf81f61a8469309c85c56a088e79cf464..0000000000000000000000000000000000000000 --- a/raml/schemas/output/rules-list.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-04/schema#", - "type": "array", - "items": { - "$ref":"../definitions/rule.json#" - } -} diff --git a/tests/integration_tests/python/test_rules.py b/tests/integration_tests/python/test_rules.py index cecbb0ec53d74bd1bda923a509cb59d5e3d1b8a4..b517d25f6273c46de6b8830fd1f1c8c5056233f9 100644 --- a/tests/integration_tests/python/test_rules.py +++ b/tests/integration_tests/python/test_rules.py @@ -108,8 +108,6 @@ def test_site_rules(randstr, data_builder, as_admin, as_user, as_public): assert r.status_code == 404 - - def test_site_rules_copied_to_new_projects(randstr, data_builder, file_form, as_admin, as_root): gear_1_name = randstr() gear_1 = data_builder.create_gear(gear={'name': gear_1_name, 'version': '0.0.1'}) @@ -333,7 +331,7 @@ def test_rules(randstr, data_builder, file_form, as_root, as_admin, with_user, a assert r.ok - # add valid container.has-<something> project rule w/ non-existent gear + # add valid container.has-<something> project rule # NOTE this is a legacy rule r = as_admin.post('/projects/' + project + '/rules', json={ 'alg': gear_name, @@ -391,4 +389,29 @@ def test_rules(randstr, data_builder, file_form, as_root, as_admin, with_user, a r = as_admin.delete('/projects/' + project + '/rules/' + rule2) assert r.ok + # add regex rule + # NOTE this is a legacy rule + r = as_admin.post('/projects/' + project + '/rules', json={ + 'alg': gear_name, + 'name': 'file-measurement-regex', + 'any': [], + 'all': [ + {'type': 'file.name', 'value': 'test\d+\.(csv|txt)', 'regex': True}, + ] + }) + assert r.ok + rule3 = r.json()['_id'] + + # upload file matching regex rule + r = as_admin.post('/projects/' + project + '/files', files=file_form('test999.txt')) + assert r.ok + + # test that job was created via regex rule + gear_jobs = [job for job in api_db.jobs.find({'gear_id': gear_2})] + assert len(gear_jobs) == 3 + + # delete rule + r = as_admin.delete('/projects/' + project + '/rules/' + rule3) + assert r.ok + # TODO add and test 'new-style' rules diff --git a/tests/unit_tests/python/test_rules.py b/tests/unit_tests/python/test_rules.py index ca32b7fce8a4122d0d7704a20159e29f3258482a..8dca055329460134391b585d56f882ccd1067822 100644 --- a/tests/unit_tests/python/test_rules.py +++ b/tests/unit_tests/python/test_rules.py @@ -1,25 +1,27 @@ - import pytest + from api.jobs import rules # Statefully holds onto some construction args and can return tuples to unroll for calling rules.eval_match. # Might indicate a need for a match tuple in rules.py. class rulePart: # Hold onto some param state - def __init__(self, match_type=None, match_param=None, file_=None, container=None): + def __init__(self, match_type=None, match_param=None, file_=None, container=None, regex=None): self.match_type = match_type self.match_param = match_param self.file_ = file_ self.container = container + self.regex = regex # Return params as tuple, optionally with some modifications - def gen(self, match_type=None, match_param=None, file_=None, container=None): + def gen(self, match_type=None, match_param=None, file_=None, container=None, regex=None): return ( - match_type if match_type else self.match_type, - match_param if match_param else self.match_param, - file_ if file_ else self.file_, - container if container else self.container, + match_type if match_type is not None else self.match_type, + match_param if match_param is not None else self.match_param, + file_ if file_ is not None else self.file_, + container if container is not None else self.container, + regex if regex is not None else self.regex, ) # DISCUSS: this basically asserts that the log helper doesn't throw, which is of non-zero but questionable value. @@ -44,6 +46,17 @@ def test_eval_match_file_type(): result = rules.eval_match(*args) assert result == False +def test_eval_match_file_type_regex(): + part = rulePart(match_type='file.type', file_={'type': 'dicom'}, regex=True) + + args = part.gen(match_param='DiC[o]{1}M$') + result = rules.eval_match(*args) + assert result == True + + args = part.gen(match_param='DiC[o]{2}M$') + result = rules.eval_match(*args) + assert result == False + def test_eval_match_file_name_match_exact(): part = rulePart(match_type='file.name', match_param='file.txt') @@ -66,6 +79,17 @@ def test_eval_match_file_name_match_relative(): result = rules.eval_match(*args) assert result == False +def test_eval_match_file_name_match_regex(): + part = rulePart(match_type='file.name', file_={'name': 'test.dicom.zip'}, regex=True) + + args = part.gen(match_param='.*DiC[o]{1}M\.zip$') + result = rules.eval_match(*args) + assert result == True + + args = part.gen(match_param='.*DiC[o]{2}M\.zip$') + result = rules.eval_match(*args) + assert result == False + def test_eval_match_file_measurements(): part = rulePart(match_type='file.measurements', file_={'measurements': ['a', 'diffusion', 'b'] }) @@ -77,8 +101,19 @@ def test_eval_match_file_measurements(): result = rules.eval_match(*args) assert result == False - # Check match returns false without raising when not given a file.type - args = part.gen(file_={'a': 'b'}, match_param='', container={'a': 'b'}) + # Check match returns false without raising when not given a file.measurement + args = part.gen(match_param='', file_={'a': 'b'}, container={'a': 'b'}) + result = rules.eval_match(*args) + assert result == False + +def test_eval_match_file_measurements_regex(): + part = rulePart(match_type='file.measurements', file_={'measurements': ['diffusion']}, regex=True) + + args = part.gen(match_param='test|diffusion') + result = rules.eval_match(*args) + assert result == True + + args = part.gen(match_param='test|foo') result = rules.eval_match(*args) assert result == False @@ -96,6 +131,20 @@ def test_eval_match_container_has_type(): result = rules.eval_match(*args) assert result == False +def test_eval_match_container_has_type_regex(): + part = rulePart(match_type='container.has-type', regex=True, container={'files': [ + {'type': 'diffusion'}, + {'type': 'other'}, + ]}) + + args = part.gen(match_param='test|diffusion') + result = rules.eval_match(*args) + assert result == True + + args = part.gen(match_param='test|foo') + result = rules.eval_match(*args) + assert result == False + def test_eval_match_unknown_type(): with pytest.raises(Exception): rules.eval_match('does-not-exist', None, None, None)