From 4a7754dd9eb88b9e3192d870cb7d56dc06281395 Mon Sep 17 00:00:00 2001
From: Renzo Frigato <rfrigato@stanford.edu>
Date: Tue, 8 Mar 2016 15:29:17 -0800
Subject: [PATCH] add advanced datatree search query

---
 api/api.py                    |   3 +-
 api/handlers/searchhandler.py | 105 +++++++++++++++++++++++++++++++++-
 2 files changed, 106 insertions(+), 2 deletions(-)

diff --git a/api/api.py b/api/api.py
index 9f029a00..091febb9 100644
--- a/api/api.py
+++ b/api/api.py
@@ -137,7 +137,8 @@ routes = [
     webapp2.Route(_format(r'/api/<par_cont_name:groups>/<par_id:{group_id_re}>/<cont_name:projects>'),          containerhandler.ContainerHandler, name='cont_sublist_groups', handler_method='get_all', methods=['GET']),
     webapp2.Route(_format(r'/api/<par_cont_name:{cont_name_re}>/<par_id:{cid_re}>/<cont_name:{cont_name_re}>'), containerhandler.ContainerHandler, name='cont_sublist', handler_method='get_all', methods=['GET']),
     webapp2.Route(_format(r'/api/search'),                                            searchhandler.SearchHandler, name='es_proxy', methods=['GET']),
-    webapp2.Route(_format(r'/api/search/<cont_name:{cont_name_re}>'),                 searchhandler.SearchHandler, name='es_proxy_1', methods=['GET']),
+    webapp2.Route(_format(r'/api/search/files'),                                      searchhandler.SearchHandler, handler_method='get_datatree', name='es_data', methods=['GET']),
+    webapp2.Route(_format(r'/api/search/<cont_name:{cont_name_re}>'),                 searchhandler.SearchHandler, name='es_proxy', methods=['GET']),
     webapp2.Route(_format(r'/api/schemas/<schema:{schema_re}>'),                      schemahandler.SchemaHandler, name='schemas', methods=['GET']),
 ]
 
diff --git a/api/handlers/searchhandler.py b/api/handlers/searchhandler.py
index 6d109c4a..c92b72be 100644
--- a/api/handlers/searchhandler.py
+++ b/api/handlers/searchhandler.py
@@ -1,3 +1,4 @@
+import bson
 import datetime
 import elasticsearch
 
@@ -6,8 +7,77 @@ from .. import config
 
 log = config.log
 
+parent_container = {
+    'acquisitions': 'sessions',
+    'sessions': 'projects'
+}
+
+def _filter_body_by_type(body, doc_type):
+    query = {
+        'query': {
+            'filtered': {
+                'query': body,
+                'filter': {
+                    'type': {
+                        'value': doc_type
+                    }
+                }
+            }
+        },
+        'min_score': 0.5
+    }
+    return query
 
 class SearchHandler(base.RequestHandler):
+    """This class allows to proxy queries to elasticsearch
+    The get method just wraps the body in a convenient elasticsearch query.
+    The get_datatree (for the special doc_type 'files') for each result build the datatree
+    with the containers in their hierarchy.
+    output example:
+    [
+        {
+          "mimetype": "application/zip",
+          "hash": "v0-sha384-8607a3c17008ff24d0cb9e1ccd60f5c7bcc1810b8c1dc9ee0f14ee91b7b1f897b78fcb035ff0135520a58bebfcdbd78b",
+          "name": "8613_6_1_t1.zip",
+          "project": {
+            "group": "scitran",
+            "created": "2016-03-08T22:46:01.941000+00:00",
+            "modified": "2016-03-08T22:46:33.030000+00:00",
+            "label": "Neuroscience",
+            "_id": "56df5629b13d67a9cbfca1ea",
+            "public": false
+          },
+          "session": {
+            "group": "scitran",
+            "created": "2016-03-08T22:46:16.221000+00:00",
+            "modified": "2016-03-08T22:46:18.822000+00:00",
+            "label": "1.2.840.113619.6.353.50113891957665820485497041858168751557",
+            "project": "56df5629b13d67a9cbfca1ea",
+            "_id": "56df5638b13d67a9cbfca1f7",
+            "public": false,
+            "subject": {
+              "code": "ex8613"
+            }
+          },
+          "container_name": "acquisitions",
+          "type": "dicom",
+          "acquisition": {
+            "created": "2016-03-08T22:46:17.164000",
+            "timestamp": "2015-01-07T17:38:09",
+            "modified": "2016-03-08T22:46:17.164000",
+            "label": "T1_high-res_inplane_Ret_knk",
+            "instrument": "MRI",
+            "session": "56df5638b13d67a9cbfca1f7",
+            "measurement": "anatomical",
+            "timezone": "America/Los_Angeles",
+            "_id": "56df5639b13d67a9cbfca1f9",
+            "public": false
+          },
+          "size": 3216386
+        },
+        ...
+    ]
+    """
 
     def __init__(self, request=None, response=None):
         super(SearchHandler, self).__init__(request, response)
@@ -17,8 +87,41 @@ class SearchHandler(base.RequestHandler):
             self.abort(403, 'search is available only for authenticated users')
         size = self.get_param('size')
         body = self.request.json_body
+        query = _filter_body_by_type(body, cont_name)
         try:
-            results = config.es.search(index='scitran', doc_type=cont_name, body=body, _source=['_id'], size=size or 10)
+            results = config.es.search(index='scitran', body=body, _source=['_id'], size=size or 10)
         except elasticsearch.exceptions.ConnectionError as e:
             self.abort(503, 'elasticsearch is not available')
         return results['hits']['hits']
+
+    def get_datatree(self, **kwargs):
+        if self.public_request:
+            self.abort(403, 'search is available only for authenticated users')
+        size = self.get_param('size')
+        body = self.request.json_body
+        query = _filter_body_by_type(body, 'files')
+        try:
+            es_results = config.es.search(index='scitran', body=query, size=size or 10)
+            ## elastic search results are wrapped in subkey ['hits']['hits']
+            es_results = es_results['hits']['hits']
+            results = []
+            for result in es_results:
+                # extract the source of the result
+                result = result['_source']
+                # add to the result the container hierarchy references
+                container = result.pop('container')
+                container.pop('permissions')
+                cont_name = result['container_name']
+                result[cont_name[:-1]] = container
+                while parent_container.get(cont_name):
+                    parent_cont_name = parent_container[cont_name]
+                    parent_id = bson.objectid.ObjectId(container[parent_cont_name[:-1]])
+                    container = config.db[parent_cont_name].find_one({'_id': parent_id})
+                    container.pop('permissions')
+                    result[parent_cont_name[:-1]] = container
+                    cont_name = parent_cont_name
+                results.append(result)
+        except elasticsearch.exceptions.ConnectionError as e:
+            self.abort(503, 'elasticsearch is not available')
+        return results
+
-- 
GitLab