Updates to jsonld context for the DCC schemas.

[htsworkflow.git] / htsworkflow / submission / encoded.py
diff --git a/htsworkflow/submission/encoded.py b/htsworkflow/submission/encoded.py

index 8eec0c0ef60cd44e0123cfc8b7d28d1f8625c50d..04aff707166e20956d18e69add164b7e3c989f99 100644 (file)
--- a/htsworkflow/submission/encoded.py
+++ b/htsworkflow/submission/encoded.py
@@ -10,9 +10,8 @@ import logging
  import json
  import jsonschema
  import requests
-from requests.utils import urlparse, urlunparse
  import types
-from urlparse import urljoin
+from urlparse import urljoin, urlparse, urlunparse
  
  LOGGER = logging.getLogger(__name__)
  
@@ -21,13 +20,17 @@ ENCODED_CONTEXT = {
      # provide common defaults.
      None: {
          # terms in multiple encoded objects
+        'award': { '@type': '@id' },
+        'dataset': {'@type': '@id'},
          'description': 'rdf:description',
+        'documents': { '@type': '@id' },
          'experiment': {'@type': '@id'},
          'href': { '@type': '@id' },
          'lab': { '@type': '@id' },
          'library': {'@type': '@id' },
          'pi': { '@type': '@id' },
          'platform': { '@type': '@id' },
+        'replicates': { '@type': '@id' },
          'submitted_by': { '@type': '@id' },
          'url': { '@type': '@id' },
      },
@@ -40,9 +43,8 @@ ENCODED_CONTEXT = {
      },
      'experiment': {
          "assay_term_id": { "@type": "@id" },
-    },
-    'file': {
-        'dataset': {'@type': '@id'},
+        "files": { "@type": "@id" },
+        "original_files": { "@type": "@id"},
      },
      # I tried to use the JSON-LD mapping capabilities to convert the lab
      # contact information into a vcard record, but the encoded model
@@ -54,17 +56,13 @@ ENCODED_CONTEXT = {
      #    "state": "vcard:region",
      #    "country": "vcard:country"
      #},
-    'human_donor': {
-        'award': { '@type': '@id' },
-    },
      'library': {
-        'award': { '@type': '@id' },
          'nucleic_acid_term_id': { '@type': '@id' }
      }
  }
  
  #FIXME: this needs to be initialized from rdfns
-_encoded_namespaces = {
+ENCODED_NAMESPACES = {
      # JSON-LD lets you define namespaces so you can used the shorted url syntax.
      # (instead of http://www.w3.org/2000/01/rdf-schema#label you can do
      # rdfs:label)
@@ -82,19 +80,20 @@ _encoded_namespaces = {
      # OBI: available from http://svn.code.sf.net/p/obi/code/releases/2012-07-01/merged/merged-obi-comments.owl
      'SO': 'http://purl.obolibrary.org/obo/SO_', # Sequence ontology
      # SO: available from http://www.berkeleybop.org/ontologies/so.owl
+    # NTR: New Term Request space for DCC to implement new ontology terms
  
  }
-ENCODED_CONTEXT[None].update(_encoded_namespaces)
+
  ENCODED_SCHEMA_ROOT='/profiles/'
  
  class ENCODED:
      '''Programatic access encoded, the software powering ENCODE3's submit site.
      '''
-    def __init__(self, server, context=None):
+    def __init__(self, server, contexts=None):
          self.server = server
          self.username = None
          self.password = None
-        self.context = context if context else ENCODED_CONTEXT
+        self.contexts = contexts if contexts else ENCODED_CONTEXT
          self.schemas = {}
  
      def get_auth(self):
@@ -109,7 +108,7 @@ class ENCODED:
              self.username = authenticators[0]
              self.password = authenticators[2]
  
-    def add_jsonld_context(self, tree, contexts, base):
+    def add_jsonld_context(self, tree, default_base):
          """Add contexts to various objects in the tree.
  
          tree is a json tree returned from the DCC's encoded database.
@@ -118,11 +117,10 @@ class ENCODED:
          base, if supplied allows setting the base url that relative
              urls will be resolved against.
          """
-        tree['@context'] = contexts[None]
-        tree['@context']['@base'] = base
-        self.add_jsonld_child_context(tree, contexts)
+        self.add_jsonld_child_context(tree, default_base)
+        self.add_jsonld_namespaces(tree['@context'])
  
-    def add_jsonld_child_context(self, obj, contexts):
+    def add_jsonld_child_context(self, obj, default_base):
          '''Add JSON-LD context to the encoded JSON.
  
          This is recursive because some of the IDs were relative URLs
@@ -136,24 +134,41 @@ class ENCODED:
          if isinstance(obj, collections.Sequence):
              # how should I update lists?
              for v in obj:
-                self.add_jsonld_child_context(v, contexts)
+                self.add_jsonld_child_context(v, default_base)
              return
  
          if isinstance(obj, collections.Mapping):
              for v in obj.values():
-                self.add_jsonld_child_context(v, contexts)
+                self.add_jsonld_child_context(v, default_base)
  
          # we have an object. attach a context to it.
          if self._is_encoded_object(obj):
-            default_base = contexts[None]['@base']
-            context = {'@base': urljoin(default_base, obj['@id']),
-                       '@vocab': self.get_schema_url(obj)}
-            for t in obj['@type']:
-                if t in contexts:
-                    context.update(contexts[t])
+            context = self.create_jsonld_context(obj, default_base)
              if len(context) > 0:
                  obj.setdefault('@context', {}).update(context)
  
+    def add_jsonld_namespaces(self, context):
+        '''Add shortcut namespaces to a context
+
+        Only needs to be run on the top-most context
+        '''
+        context.update(ENCODED_NAMESPACES)
+
+    def create_jsonld_context(self, obj, default_base):
+        '''Synthesize the context for a encoded type
+
+        self.contexts[None] = default context attributes added to any type
+        self.contexts[type] = context attributes for this type.
+        '''
+        context = {'@base': urljoin(default_base, obj['@id']),
+                    '@vocab': self.get_schema_url(obj)}
+        # add in defaults
+        context.update(self.contexts[None])
+        for t in obj['@type']:
+            if t in self.contexts:
+                context.update(self.contexts[t])
+        return context
+
      def get_json(self, obj_id, **kwargs):
          '''GET an ENCODE object as JSON and return as dict
  
@@ -188,7 +203,7 @@ class ENCODED:
          '''
          url = self.prepare_url(obj_id)
          json = self.get_json(obj_id, **kwargs)
-        self.add_jsonld_context(json, self.context, url)
+        self.add_jsonld_context(json, url)
          return json
  
      def get_object_type(self, obj):
@@ -201,7 +216,7 @@ class ENCODED:
      def get_schema_url(self, obj):
          obj_type = self.get_object_type(obj)
          if obj_type:
-            return self.prepare_url(ENCODED_SCHEMA_ROOT + obj_type + '.json')
+            return self.prepare_url(ENCODED_SCHEMA_ROOT + obj_type + '.json') + '#'
  
      def _is_encoded_object(self, obj):
          '''Test to see if an object is a JSON-LD object
@@ -255,12 +270,34 @@ class ENCODED:
          # clean up potentially messy urls
          url = urlparse(request_url)._asdict()
          if not url['scheme']:
-            url['scheme'] = 'http'
+            url['scheme'] = 'https'
          if not url['netloc']:
              url['netloc'] = self.server
          url = urlunparse(url.values())
          return url
  
+    def search_jsonld(self, term, **kwargs):
+        '''Send search request to ENCODED
+        '''
+        url = self.prepare_url('/search/')
+        result = self.get_json(url, searchTerm=term, **kwargs)
+        self.convert_search_to_jsonld(result)
+        return result
+
+    def convert_search_to_jsonld(self, result):
+        '''Add the context to search result
+
+        Also remove hard to handle nested attributes
+          e.g. remove object.term when we have no id
+        '''
+        graph = result['@graph']
+        for i, obj in enumerate(graph):
+            # suppress nested attributes
+            graph[i] = {k: v for k, v in obj.items() if '.' not in k}
+
+        self.add_jsonld_context(result, self.prepare_url(result['@id']))
+        return result
+
      def validate(self, obj):
          obj_type = self.get_object_type(obj)
          schema_url = self.get_schema_url(obj)