1 """Interface with encoded software for ENCODE3 data submission & warehouse
3 This allows retrieving blocks
7 from __future__ import print_function
14 from urlparse import urljoin, urlparse, urlunparse
16 LOGGER = logging.getLogger(__name__)
19 # The None context will get added to the root of the tree and will
20 # provide common defaults.
22 # terms in multiple encoded objects
23 'description': 'rdf:description',
24 'experiment': {'@type': '@id'},
25 'href': { '@type': '@id' },
26 'lab': { '@type': '@id' },
27 'library': {'@type': '@id' },
28 'pi': { '@type': '@id' },
29 'platform': { '@type': '@id' },
30 'submitted_by': { '@type': '@id' },
31 'url': { '@type': '@id' },
33 # Identify and markup contained classes.
34 # e.g. in the tree there was a sub-dictionary named 'biosample'
35 # That dictionary had a term 'biosample_term_id, which is the
36 # term that should be used as the @id.
38 'biosample_term_id': { '@type': '@id' },
41 "assay_term_id": { "@type": "@id" },
44 'dataset': {'@type': '@id'},
46 # I tried to use the JSON-LD mapping capabilities to convert the lab
47 # contact information into a vcard record, but the encoded model
48 # didn't lend itself well to the vcard schema
50 # "address1": "vcard:street-address",
51 # "address2": "vcard:street-address",
52 # "city": "vcard:locality",
53 # "state": "vcard:region",
54 # "country": "vcard:country"
57 'award': { '@type': '@id' },
60 'award': { '@type': '@id' },
61 'nucleic_acid_term_id': { '@type': '@id' }
65 #FIXME: this needs to be initialized from rdfns
66 ENCODED_NAMESPACES = {
67 # JSON-LD lets you define namespaces so you can used the shorted url syntax.
68 # (instead of http://www.w3.org/2000/01/rdf-schema#label you can do
70 "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
71 "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
72 "owl": "http://www.w3.org/2002/07/owl#",
73 "dc": "htp://purl.org/dc/elements/1.1/",
74 "xsd": "http://www.w3.org/2001/XMLSchema#",
75 "vcard": "http://www.w3.org/2006/vcard/ns#",
77 # for some namespaces I made a best guess for the ontology root.
78 "EFO": "http://www.ebi.ac.uk/efo/", # EFO ontology
79 "OBO": "http://purl.obolibrary.org/obo/", # OBO ontology
80 "OBI": "http://purl.obolibrary.org/obo/OBI_", # Ontology for Biomedical Investigations
81 # OBI: available from http://svn.code.sf.net/p/obi/code/releases/2012-07-01/merged/merged-obi-comments.owl
82 'SO': 'http://purl.obolibrary.org/obo/SO_', # Sequence ontology
83 # SO: available from http://www.berkeleybop.org/ontologies/so.owl
87 ENCODED_SCHEMA_ROOT='/profiles/'
90 '''Programatic access encoded, the software powering ENCODE3's submit site.
92 def __init__(self, server, contexts=None):
96 self.contexts = contexts if contexts else ENCODED_CONTEXT
100 return (self.username, self.password)
101 auth = property(get_auth)
103 def load_netrc(self):
105 session = netrc.netrc()
106 authenticators = session.authenticators(self.server)
108 self.username = authenticators[0]
109 self.password = authenticators[2]
111 def add_jsonld_context(self, tree, default_base):
112 """Add contexts to various objects in the tree.
114 tree is a json tree returned from the DCC's encoded database.
115 contexts is a dictionary of dictionaries containing contexts
116 for the various possible encoded classes.
117 base, if supplied allows setting the base url that relative
118 urls will be resolved against.
120 self.add_jsonld_child_context(tree, default_base)
121 self.add_jsonld_namespaces(tree['@context'])
123 def add_jsonld_child_context(self, obj, default_base):
124 '''Add JSON-LD context to the encoded JSON.
126 This is recursive because some of the IDs were relative URLs
127 and I needed a way to properly compute a the correct base URL.
129 # pretend strings aren't iterable
130 if type(obj) in types.StringTypes:
133 # recurse on container types
134 if isinstance(obj, collections.Sequence):
135 # how should I update lists?
137 self.add_jsonld_child_context(v, default_base)
140 if isinstance(obj, collections.Mapping):
141 for v in obj.values():
142 self.add_jsonld_child_context(v, default_base)
144 # we have an object. attach a context to it.
145 if self._is_encoded_object(obj):
146 context = self.create_jsonld_context(obj, default_base)
148 obj.setdefault('@context', {}).update(context)
150 def add_jsonld_namespaces(self, context):
151 '''Add shortcut namespaces to a context
153 Only needs to be run on the top-most context
155 context.update(ENCODED_NAMESPACES)
157 def create_jsonld_context(self, obj, default_base):
158 '''Synthesize the context for a encoded type
160 self.contexts[None] = default context attributes added to any type
161 self.contexts[type] = context attributes for this type.
163 context = {'@base': urljoin(default_base, obj['@id']),
164 '@vocab': self.get_schema_url(obj)}
166 context.update(self.contexts[None])
167 for t in obj['@type']:
168 if t in self.contexts:
169 context.update(self.contexts[t])
172 def get_json(self, obj_id, **kwargs):
173 '''GET an ENCODE object as JSON and return as dict
175 Uses prepare_url to allow url short-cuts
176 if no keyword arguments are specified it will default to adding limit=all
177 Alternative keyword arguments can be passed in and will be sent to the host.
180 limit - (integer or 'all') how many records to return, all for all of them
181 embed - (bool) if true expands linking ids into their associated object.
182 format - text/html or application/json
185 kwargs['limit'] = 'all'
187 url = self.prepare_url(obj_id)
188 LOGGER.info('requesting url: {}'.format(url))
191 headers = {'content-type': 'application/json'}
192 LOGGER.debug('username: %s, password: %s', self.username, self.password)
193 response = requests.get(url, auth=self.auth, headers=headers, params=kwargs)
194 if not response.status_code == requests.codes.ok:
195 LOGGER.error("Error http status: {}".format(response.status_code))
196 response.raise_for_status()
197 return response.json()
199 def get_jsonld(self, obj_id, **kwargs):
200 '''Get ENCODE object as JSONLD annotated with classses contexts
202 see get_json for documentation about what keywords can be passed.
204 url = self.prepare_url(obj_id)
205 json = self.get_json(obj_id, **kwargs)
206 self.add_jsonld_context(json, url)
209 def get_object_type(self, obj):
210 """Return type for a encoded object
212 obj_type = obj.get('@type')
213 if obj_type and isinstance(obj_type, collections.Sequence):
216 def get_schema_url(self, obj):
217 obj_type = self.get_object_type(obj)
219 return self.prepare_url(ENCODED_SCHEMA_ROOT + obj_type + '.json') + '#'
221 def _is_encoded_object(self, obj):
222 '''Test to see if an object is a JSON-LD object
224 Some of the nested dictionaries lack the @id or @type
225 information necessary to convert them.
227 if not isinstance(obj, collections.Iterable):
230 if '@id' in obj and '@type' in obj:
235 def patch_json(self, obj_id, changes):
236 """Given a dictionary of changes push them as a HTTP patch request
238 url = self.prepare_url(obj_id)
239 payload = json.dumps(changes)
240 response = requests.patch(url, auth=self.auth, data=payload)
241 if response.status_code != requests.codes.ok:
242 LOGGER.error("Error http status: {}".format(response.status_code))
243 response.raise_for_status()
244 return response.json()
246 def put_json(self, obj_id, new_object):
247 url = self.prepare_url(obj_id)
248 payload = json.dumps(new_object)
249 response = requests.put(url, auth=self.auth, data=payload)
250 if response.status_code != requests.codes.created:
251 LOGGER.error("Error http status: {}".format(response.status_code))
252 response.raise_for_status()
253 return response.json()
255 def prepare_url(self, request_url):
256 '''This attempts to provide some convienence for accessing a URL
258 Given a url fragment it will default to :
260 * requests to self.server
262 This allows fairly flexible urls. e.g.
264 prepare_url('/experiments/ENCSR000AEG')
265 prepare_url('submit.encodedcc.org/experiments/ENCSR000AEG')
266 prepare_url('http://submit.encodedcc.org/experiments/ENCSR000AEG?limit=all')
268 should all return the same url
270 # clean up potentially messy urls
271 url = urlparse(request_url)._asdict()
272 if not url['scheme']:
273 url['scheme'] = 'http'
274 if not url['netloc']:
275 url['netloc'] = self.server
276 url = urlunparse(url.values())
279 def search_jsonld(self, term, **kwargs):
280 '''Send search request to ENCODED
282 url = self.prepare_url('/search/')
283 result = self.get_json(url, searchTerm=term, **kwargs)
284 self.convert_search_to_jsonld(result)
287 def convert_search_to_jsonld(self, result):
288 '''Add the context to search result
290 Also remove hard to handle nested attributes
291 e.g. remove object.term when we have no id
293 graph = result['@graph']
294 for i, obj in enumerate(graph):
295 # suppress nested attributes
296 graph[i] = {k: v for k, v in obj.items() if '.' not in k}
298 self.add_jsonld_context(result, self.prepare_url(result['@id']))
301 def validate(self, obj):
302 obj_type = self.get_object_type(obj)
303 schema_url = self.get_schema_url(obj)
305 raise ValueError("Unable to construct schema url")
307 schema = self.schemas.setdefault(obj_type, self.get_json(schema_url))
311 jsonschema.validate(hidden, schema)
314 if __name__ == '__main__':
316 from htsworkflow.util.rdfhelp import get_model, dump_model
317 from htsworkflow.util.rdfjsonld import load_into_model
318 from pprint import pprint
320 logging.basicConfig(level=logging.DEBUG)
321 encoded = ENCODED('test.encodedcc.org')
323 body = encoded.get_jsonld('/experiments/ENCSR000AEC/')
325 load_into_model(model, body)