1 """Interface with encoded software for ENCODE3 data submission & warehouse
3 This allows retrieving blocks
7 from __future__ import print_function
13 from requests.utils import urlparse, urlunparse
15 from urlparse import urljoin
17 LOGGER = logging.getLogger(__name__)
20 # The None context will get added to the root of the tree and will
21 # provide common defaults.
23 # terms in multiple encoded objects
24 'description': 'rdf:description',
25 'experiment': {'@type': '@id'},
26 'href': { '@type': '@id' },
27 'lab': { '@type': '@id' },
28 'library': {'@type': '@id' },
29 'pi': { '@type': '@id' },
30 'platform': { '@type': '@id' },
31 'submitted_by': { '@type': '@id' },
32 'url': { '@type': '@id' },
34 # Identify and markup contained classes.
35 # e.g. in the tree there was a sub-dictionary named 'biosample'
36 # That dictionary had a term 'biosample_term_id, which is the
37 # term that should be used as the @id.
39 'biosample_term_id': { '@type': '@id' },
42 "assay_term_id": { "@type": "@id" },
45 'dataset': {'@type': '@id'},
47 # I tried to use the JSON-LD mapping capabilities to convert the lab
48 # contact information into a vcard record, but the encoded model
49 # didn't lend itself well to the vcard schema
51 # "address1": "vcard:street-address",
52 # "address2": "vcard:street-address",
53 # "city": "vcard:locality",
54 # "state": "vcard:region",
55 # "country": "vcard:country"
58 'award': { '@type': '@id' },
61 'award': { '@type': '@id' },
62 'nucleic_acid_term_id': { '@type': '@id' }
66 #FIXME: this needs to be initialized from rdfns
67 ENCODED_NAMESPACES = {
68 # JSON-LD lets you define namespaces so you can used the shorted url syntax.
69 # (instead of http://www.w3.org/2000/01/rdf-schema#label you can do
71 "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
72 "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
73 "owl": "http://www.w3.org/2002/07/owl#",
74 "dc": "htp://purl.org/dc/elements/1.1/",
75 "xsd": "http://www.w3.org/2001/XMLSchema#",
76 "vcard": "http://www.w3.org/2006/vcard/ns#",
78 # for some namespaces I made a best guess for the ontology root.
79 "EFO": "http://www.ebi.ac.uk/efo/", # EFO ontology
80 "OBO": "http://purl.obolibrary.org/obo/", # OBO ontology
81 "OBI": "http://purl.obolibrary.org/obo/OBI_", # Ontology for Biomedical Investigations
82 # OBI: available from http://svn.code.sf.net/p/obi/code/releases/2012-07-01/merged/merged-obi-comments.owl
83 'SO': 'http://purl.obolibrary.org/obo/SO_', # Sequence ontology
84 # SO: available from http://www.berkeleybop.org/ontologies/so.owl
88 ENCODED_SCHEMA_ROOT='/profiles/'
91 '''Programatic access encoded, the software powering ENCODE3's submit site.
93 def __init__(self, server, contexts=None):
97 self.contexts = contexts if contexts else ENCODED_CONTEXT
101 return (self.username, self.password)
102 auth = property(get_auth)
104 def load_netrc(self):
106 session = netrc.netrc()
107 authenticators = session.authenticators(self.server)
109 self.username = authenticators[0]
110 self.password = authenticators[2]
112 def add_jsonld_context(self, tree, default_base):
113 """Add contexts to various objects in the tree.
115 tree is a json tree returned from the DCC's encoded database.
116 contexts is a dictionary of dictionaries containing contexts
117 for the various possible encoded classes.
118 base, if supplied allows setting the base url that relative
119 urls will be resolved against.
121 self.add_jsonld_child_context(tree, default_base)
122 self.add_jsonld_namespaces(tree['@context'])
124 def add_jsonld_child_context(self, obj, default_base):
125 '''Add JSON-LD context to the encoded JSON.
127 This is recursive because some of the IDs were relative URLs
128 and I needed a way to properly compute a the correct base URL.
130 # pretend strings aren't iterable
131 if type(obj) in types.StringTypes:
134 # recurse on container types
135 if isinstance(obj, collections.Sequence):
136 # how should I update lists?
138 self.add_jsonld_child_context(v, default_base)
141 if isinstance(obj, collections.Mapping):
142 for v in obj.values():
143 self.add_jsonld_child_context(v, default_base)
145 # we have an object. attach a context to it.
146 if self._is_encoded_object(obj):
147 context = self.create_jsonld_context(obj, default_base)
149 obj.setdefault('@context', {}).update(context)
151 def add_jsonld_namespaces(self, context):
152 '''Add shortcut namespaces to a context
154 Only needs to be run on the top-most context
156 context.update(ENCODED_NAMESPACES)
158 def create_jsonld_context(self, obj, default_base):
159 '''Synthesize the context for a encoded type
161 self.contexts[None] = default context attributes added to any type
162 self.contexts[type] = context attributes for this type.
164 context = {'@base': urljoin(default_base, obj['@id']),
165 '@vocab': self.get_schema_url(obj)}
167 context.update(self.contexts[None])
168 for t in obj['@type']:
169 if t in self.contexts:
170 context.update(self.contexts[t])
173 def get_json(self, obj_id, **kwargs):
174 '''GET an ENCODE object as JSON and return as dict
176 Uses prepare_url to allow url short-cuts
177 if no keyword arguments are specified it will default to adding limit=all
178 Alternative keyword arguments can be passed in and will be sent to the host.
181 limit - (integer or 'all') how many records to return, all for all of them
182 embed - (bool) if true expands linking ids into their associated object.
183 format - text/html or application/json
186 kwargs['limit'] = 'all'
188 url = self.prepare_url(obj_id)
189 LOGGER.info('requesting url: {}'.format(url))
192 headers = {'content-type': 'application/json'}
193 LOGGER.debug('username: %s, password: %s', self.username, self.password)
194 response = requests.get(url, auth=self.auth, headers=headers, params=kwargs)
195 if not response.status_code == requests.codes.ok:
196 LOGGER.error("Error http status: {}".format(response.status_code))
197 response.raise_for_status()
198 return response.json()
200 def get_jsonld(self, obj_id, **kwargs):
201 '''Get ENCODE object as JSONLD annotated with classses contexts
203 see get_json for documentation about what keywords can be passed.
205 url = self.prepare_url(obj_id)
206 json = self.get_json(obj_id, **kwargs)
207 self.add_jsonld_context(json, url)
210 def get_object_type(self, obj):
211 """Return type for a encoded object
213 obj_type = obj.get('@type')
214 if obj_type and isinstance(obj_type, collections.Sequence):
217 def get_schema_url(self, obj):
218 obj_type = self.get_object_type(obj)
220 return self.prepare_url(ENCODED_SCHEMA_ROOT + obj_type + '.json') + '#'
222 def _is_encoded_object(self, obj):
223 '''Test to see if an object is a JSON-LD object
225 Some of the nested dictionaries lack the @id or @type
226 information necessary to convert them.
228 if not isinstance(obj, collections.Iterable):
231 if '@id' in obj and '@type' in obj:
236 def patch_json(self, obj_id, changes):
237 """Given a dictionary of changes push them as a HTTP patch request
239 url = self.prepare_url(obj_id)
240 payload = json.dumps(changes)
241 response = requests.patch(url, auth=self.auth, data=payload)
242 if response.status_code != requests.codes.ok:
243 LOGGER.error("Error http status: {}".format(response.status_code))
244 response.raise_for_status()
245 return response.json()
247 def put_json(self, obj_id, new_object):
248 url = self.prepare_url(obj_id)
249 payload = json.dumps(new_object)
250 response = requests.put(url, auth=self.auth, data=payload)
251 if response.status_code != requests.codes.created:
252 LOGGER.error("Error http status: {}".format(response.status_code))
253 response.raise_for_status()
254 return response.json()
256 def prepare_url(self, request_url):
257 '''This attempts to provide some convienence for accessing a URL
259 Given a url fragment it will default to :
261 * requests to self.server
263 This allows fairly flexible urls. e.g.
265 prepare_url('/experiments/ENCSR000AEG')
266 prepare_url('submit.encodedcc.org/experiments/ENCSR000AEG')
267 prepare_url('http://submit.encodedcc.org/experiments/ENCSR000AEG?limit=all')
269 should all return the same url
271 # clean up potentially messy urls
272 url = urlparse(request_url)._asdict()
273 if not url['scheme']:
274 url['scheme'] = 'http'
275 if not url['netloc']:
276 url['netloc'] = self.server
277 url = urlunparse(url.values())
280 def search_jsonld(self, term, **kwargs):
281 '''Send search request to ENCODED
283 url = self.prepare_url('/search/')
284 result = self.get_json(url, searchTerm=term, **kwargs)
285 self.convert_search_to_jsonld(result)
288 def convert_search_to_jsonld(self, result):
289 '''Add the context to search result
291 Also remove hard to handle nested attributes
292 e.g. remove object.term when we have no id
294 graph = result['@graph']
295 for i, obj in enumerate(graph):
296 # suppress nested attributes
297 graph[i] = {k: v for k, v in obj.items() if '.' not in k}
299 self.add_jsonld_context(result, self.prepare_url(result['@id']))
302 def validate(self, obj):
303 obj_type = self.get_object_type(obj)
304 schema_url = self.get_schema_url(obj)
306 raise ValueError("Unable to construct schema url")
308 schema = self.schemas.setdefault(obj_type, self.get_json(schema_url))
312 jsonschema.validate(hidden, schema)
315 if __name__ == '__main__':
317 from htsworkflow.util.rdfhelp import get_model, dump_model
318 from htsworkflow.util.rdfjsonld import load_into_model
319 from pprint import pprint
321 logging.basicConfig(level=logging.DEBUG)
322 encoded = ENCODED('test.encodedcc.org')
324 body = encoded.get_jsonld('/experiments/ENCSR000AEC/')
326 load_into_model(model, body)