X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=htsworkflow.git;a=blobdiff_plain;f=htsworkflow%2Fsubmission%2Fencoded.py;h=1228d387f015a39a9da12c8ad5ca296d218c68de;hp=8eec0c0ef60cd44e0123cfc8b7d28d1f8625c50d;hb=dd49302bb6f8b2503cc8c9b7151ed852858e10a1;hpb=c5e0925e23fef42c2ffa7d6ba4fa24d79e3f87aa diff --git a/htsworkflow/submission/encoded.py b/htsworkflow/submission/encoded.py index 8eec0c0..1228d38 100644 --- a/htsworkflow/submission/encoded.py +++ b/htsworkflow/submission/encoded.py @@ -64,7 +64,7 @@ ENCODED_CONTEXT = { } #FIXME: this needs to be initialized from rdfns -_encoded_namespaces = { +ENCODED_NAMESPACES = { # JSON-LD lets you define namespaces so you can used the shorted url syntax. # (instead of http://www.w3.org/2000/01/rdf-schema#label you can do # rdfs:label) @@ -84,17 +84,17 @@ _encoded_namespaces = { # SO: available from http://www.berkeleybop.org/ontologies/so.owl } -ENCODED_CONTEXT[None].update(_encoded_namespaces) + ENCODED_SCHEMA_ROOT='/profiles/' class ENCODED: '''Programatic access encoded, the software powering ENCODE3's submit site. ''' - def __init__(self, server, context=None): + def __init__(self, server, contexts=None): self.server = server self.username = None self.password = None - self.context = context if context else ENCODED_CONTEXT + self.contexts = contexts if contexts else ENCODED_CONTEXT self.schemas = {} def get_auth(self): @@ -109,7 +109,7 @@ class ENCODED: self.username = authenticators[0] self.password = authenticators[2] - def add_jsonld_context(self, tree, contexts, base): + def add_jsonld_context(self, tree, default_base): """Add contexts to various objects in the tree. tree is a json tree returned from the DCC's encoded database. @@ -118,11 +118,10 @@ class ENCODED: base, if supplied allows setting the base url that relative urls will be resolved against. """ - tree['@context'] = contexts[None] - tree['@context']['@base'] = base - self.add_jsonld_child_context(tree, contexts) + self.add_jsonld_child_context(tree, default_base) + self.add_jsonld_namespaces(tree['@context']) - def add_jsonld_child_context(self, obj, contexts): + def add_jsonld_child_context(self, obj, default_base): '''Add JSON-LD context to the encoded JSON. This is recursive because some of the IDs were relative URLs @@ -136,24 +135,41 @@ class ENCODED: if isinstance(obj, collections.Sequence): # how should I update lists? for v in obj: - self.add_jsonld_child_context(v, contexts) + self.add_jsonld_child_context(v, default_base) return if isinstance(obj, collections.Mapping): for v in obj.values(): - self.add_jsonld_child_context(v, contexts) + self.add_jsonld_child_context(v, default_base) # we have an object. attach a context to it. if self._is_encoded_object(obj): - default_base = contexts[None]['@base'] - context = {'@base': urljoin(default_base, obj['@id']), - '@vocab': self.get_schema_url(obj)} - for t in obj['@type']: - if t in contexts: - context.update(contexts[t]) + context = self.create_jsonld_context(obj, default_base) if len(context) > 0: obj.setdefault('@context', {}).update(context) + def add_jsonld_namespaces(self, context): + '''Add shortcut namespaces to a context + + Only needs to be run on the top-most context + ''' + context.update(ENCODED_NAMESPACES) + + def create_jsonld_context(self, obj, default_base): + '''Synthesize the context for a encoded type + + self.contexts[None] = default context attributes added to any type + self.contexts[type] = context attributes for this type. + ''' + context = {'@base': urljoin(default_base, obj['@id']), + '@vocab': self.get_schema_url(obj)} + # add in defaults + context.update(self.contexts[None]) + for t in obj['@type']: + if t in self.contexts: + context.update(self.contexts[t]) + return context + def get_json(self, obj_id, **kwargs): '''GET an ENCODE object as JSON and return as dict @@ -188,7 +204,7 @@ class ENCODED: ''' url = self.prepare_url(obj_id) json = self.get_json(obj_id, **kwargs) - self.add_jsonld_context(json, self.context, url) + self.add_jsonld_context(json, url) return json def get_object_type(self, obj): @@ -201,7 +217,7 @@ class ENCODED: def get_schema_url(self, obj): obj_type = self.get_object_type(obj) if obj_type: - return self.prepare_url(ENCODED_SCHEMA_ROOT + obj_type + '.json') + return self.prepare_url(ENCODED_SCHEMA_ROOT + obj_type + '.json') + '#' def _is_encoded_object(self, obj): '''Test to see if an object is a JSON-LD object @@ -261,6 +277,28 @@ class ENCODED: url = urlunparse(url.values()) return url + def search_jsonld(self, term, **kwargs): + '''Send search request to ENCODED + ''' + url = self.prepare_url('/search/') + result = self.get_json(url, searchTerm=term, **kwargs) + self.convert_search_to_jsonld(result) + return result + + def convert_search_to_jsonld(self, result): + '''Add the context to search result + + Also remove hard to handle nested attributes + e.g. remove object.term when we have no id + ''' + graph = result['@graph'] + for i, obj in enumerate(graph): + # suppress nested attributes + graph[i] = {k: v for k, v in obj.items() if '.' not in k} + + self.add_jsonld_context(result, self.prepare_url(result['@id'])) + return result + def validate(self, obj): obj_type = self.get_object_type(obj) schema_url = self.get_schema_url(obj)