Skip to content
Snippets Groups Projects
Commit f67eb1bb authored by Ambrus Simon's avatar Ambrus Simon
Browse files

Add rule regex matching

parent a263571c
No related branches found
No related tags found
No related merge requests found
import fnmatch
import re
from .. import config
from ..types import Origin
......@@ -58,53 +59,53 @@ def get_base_rules():
def _log_file_key_error(file_, container, error):
log.warning('file ' + file_.get('name', '?') + ' in container ' + str(container.get('_id', '?')) + ' ' + error)
def eval_match(match_type, match_param, file_, container):
def eval_match(match_type, match_param, file_, container, regex=False):
"""
Given a match entry, return if the match succeeded.
"""
def lower(x):
return x.lower()
def match(value):
if regex:
return re.match(match_param, value, flags=re.IGNORECASE) is not None
elif match_type == 'file.name':
return fnmatch.fnmatch(value.lower(), match_param.lower())
else:
return match_param.lower() == value.lower()
# Match the file's type
if match_type == 'file.type':
file_type = file_.get('type')
if file_type:
return file_type.lower() == match_param.lower()
return match(file_type)
else:
_log_file_key_error(file_, container, 'has no type')
return False
# Match a shell glob for the file name
elif match_type == 'file.name':
return fnmatch.fnmatch(file_['name'].lower(), match_param.lower())
return match(file_['name'])
# Match any of the file's measurements
elif match_type == 'file.measurements':
try:
if match_param:
return match_param.lower() in map(lower, file_.get('measurements', []))
else:
return False
except KeyError:
_log_file_key_error(file_, container, 'has no measurements key')
if match_param:
return any(match(value) for value in file_.get('measurements', []))
else:
return False
# Match the container having any file (including this one) with this type
elif match_type == 'container.has-type':
for c_file in container['files']:
c_file_type = c_file.get('type')
if c_file_type and match_param.lower() == c_file_type.lower():
if c_file_type and match(c_file_type):
return True
return False
# Match the container having any file (including this one) with this measurement
elif match_type == 'container.has-measurement':
for c_file in container['files']:
if match_param:
if match_param.lower() in map(lower, c_file.get('measurements', [])):
if match_param:
for c_file in container['files']:
if any(match(value) for value in c_file.get('measurements', [])):
return True
return False
......@@ -121,7 +122,7 @@ def eval_rule(rule, file_, container):
has_match = False
for match in rule.get('any', []):
if eval_match(match['type'], match['value'], file_, container):
if eval_match(match['type'], match['value'], file_, container, regex=match.get('regex')):
has_match = True
break
......@@ -131,7 +132,7 @@ def eval_rule(rule, file_, container):
# Are there matches in the 'all' set?
for match in rule.get('all', []):
if not eval_match(match['type'], match['value'], file_, container):
if not eval_match(match['type'], match['value'], file_, container, regex=match.get('regex')):
return False
return True
......
......@@ -16,7 +16,8 @@
"container.has-measurement"
]
},
"value": { "type": "string" }
"value": { "type": "string" },
"regex": { "type": "boolean" }
},
"required": [ "type", "value" ],
"additionalProperties": false
......
......@@ -108,8 +108,6 @@ def test_site_rules(randstr, data_builder, as_admin, as_user, as_public):
assert r.status_code == 404
def test_site_rules_copied_to_new_projects(randstr, data_builder, file_form, as_admin, as_root):
gear_1_name = randstr()
gear_1 = data_builder.create_gear(gear={'name': gear_1_name, 'version': '0.0.1'})
......@@ -333,7 +331,7 @@ def test_rules(randstr, data_builder, file_form, as_root, as_admin, with_user, a
assert r.ok
# add valid container.has-<something> project rule w/ non-existent gear
# add valid container.has-<something> project rule
# NOTE this is a legacy rule
r = as_admin.post('/projects/' + project + '/rules', json={
'alg': gear_name,
......@@ -391,4 +389,29 @@ def test_rules(randstr, data_builder, file_form, as_root, as_admin, with_user, a
r = as_admin.delete('/projects/' + project + '/rules/' + rule2)
assert r.ok
# add regex rule
# NOTE this is a legacy rule
r = as_admin.post('/projects/' + project + '/rules', json={
'alg': gear_name,
'name': 'file-measurement-regex',
'any': [],
'all': [
{'type': 'file.name', 'value': 'test\d+\.(csv|txt)', 'regex': True},
]
})
assert r.ok
rule3 = r.json()['_id']
# upload file matching regex rule
r = as_admin.post('/projects/' + project + '/files', files=file_form('test999.txt'))
assert r.ok
# test that job was created via regex rule
gear_jobs = [job for job in api_db.jobs.find({'gear_id': gear_2})]
assert len(gear_jobs) == 3
# delete rule
r = as_admin.delete('/projects/' + project + '/rules/' + rule3)
assert r.ok
# TODO add and test 'new-style' rules
import pytest
from api.jobs import rules
# Statefully holds onto some construction args and can return tuples to unroll for calling rules.eval_match.
# Might indicate a need for a match tuple in rules.py.
class rulePart:
# Hold onto some param state
def __init__(self, match_type=None, match_param=None, file_=None, container=None):
def __init__(self, match_type=None, match_param=None, file_=None, container=None, regex=None):
self.match_type = match_type
self.match_param = match_param
self.file_ = file_
self.container = container
self.regex = regex
# Return params as tuple, optionally with some modifications
def gen(self, match_type=None, match_param=None, file_=None, container=None):
def gen(self, match_type=None, match_param=None, file_=None, container=None, regex=None):
return (
match_type if match_type else self.match_type,
match_param if match_param else self.match_param,
file_ if file_ else self.file_,
container if container else self.container,
match_type if match_type is not None else self.match_type,
match_param if match_param is not None else self.match_param,
file_ if file_ is not None else self.file_,
container if container is not None else self.container,
regex if regex is not None else self.regex,
)
# DISCUSS: this basically asserts that the log helper doesn't throw, which is of non-zero but questionable value.
......@@ -44,6 +46,17 @@ def test_eval_match_file_type():
result = rules.eval_match(*args)
assert result == False
def test_eval_match_file_type_regex():
part = rulePart(match_type='file.type', file_={'type': 'dicom'}, regex=True)
args = part.gen(match_param='DiC[o]{1}M$')
result = rules.eval_match(*args)
assert result == True
args = part.gen(match_param='DiC[o]{2}M$')
result = rules.eval_match(*args)
assert result == False
def test_eval_match_file_name_match_exact():
part = rulePart(match_type='file.name', match_param='file.txt')
......@@ -66,6 +79,17 @@ def test_eval_match_file_name_match_relative():
result = rules.eval_match(*args)
assert result == False
def test_eval_match_file_name_match_regex():
part = rulePart(match_type='file.name', file_={'name': 'test.dicom.zip'}, regex=True)
args = part.gen(match_param='.*DiC[o]{1}M\.zip$')
result = rules.eval_match(*args)
assert result == True
args = part.gen(match_param='.*DiC[o]{2}M\.zip$')
result = rules.eval_match(*args)
assert result == False
def test_eval_match_file_measurements():
part = rulePart(match_type='file.measurements', file_={'measurements': ['a', 'diffusion', 'b'] })
......@@ -77,8 +101,19 @@ def test_eval_match_file_measurements():
result = rules.eval_match(*args)
assert result == False
# Check match returns false without raising when not given a file.type
args = part.gen(file_={'a': 'b'}, match_param='', container={'a': 'b'})
# Check match returns false without raising when not given a file.measurement
args = part.gen(match_param='', file_={'a': 'b'}, container={'a': 'b'})
result = rules.eval_match(*args)
assert result == False
def test_eval_match_file_measurements_regex():
part = rulePart(match_type='file.measurements', file_={'measurements': ['diffusion']}, regex=True)
args = part.gen(match_param='test|diffusion')
result = rules.eval_match(*args)
assert result == True
args = part.gen(match_param='test|foo')
result = rules.eval_match(*args)
assert result == False
......@@ -96,6 +131,20 @@ def test_eval_match_container_has_type():
result = rules.eval_match(*args)
assert result == False
def test_eval_match_container_has_type_regex():
part = rulePart(match_type='container.has-type', regex=True, container={'files': [
{'type': 'diffusion'},
{'type': 'other'},
]})
args = part.gen(match_param='test|diffusion')
result = rules.eval_match(*args)
assert result == True
args = part.gen(match_param='test|foo')
result = rules.eval_match(*args)
assert result == False
def test_eval_match_unknown_type():
with pytest.raises(Exception):
rules.eval_match('does-not-exist', None, None, None)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment