1 """Interface with encoded software for ENCODE3 data submission & warehouse
3 This allows retrieving blocks
7 from __future__ import print_function
14 from urlparse import urljoin, urlparse, urlunparse
16 LOGGER = logging.getLogger(__name__)
19 # The None context will get added to the root of the tree and will
20 # provide common defaults.
22 # terms in multiple encoded objects
23 'award': { '@type': '@id' },
24 'dataset': {'@type': '@id'},
25 'description': 'rdf:description',
26 'documents': { '@type': '@id' },
27 'experiment': {'@type': '@id'},
28 'href': { '@type': '@id' },
29 'lab': { '@type': '@id' },
30 'library': {'@type': '@id' },
31 'pi': { '@type': '@id' },
32 'platform': { '@type': '@id' },
33 'replicates': { '@type': '@id' },
34 'submitted_by': { '@type': '@id' },
35 'url': { '@type': '@id' },
37 # Identify and markup contained classes.
38 # e.g. in the tree there was a sub-dictionary named 'biosample'
39 # That dictionary had a term 'biosample_term_id, which is the
40 # term that should be used as the @id.
42 'biosample_term_id': { '@type': '@id' },
45 "assay_term_id": { "@type": "@id" },
46 "files": { "@type": "@id" },
47 "original_files": { "@type": "@id"},
49 # I tried to use the JSON-LD mapping capabilities to convert the lab
50 # contact information into a vcard record, but the encoded model
51 # didn't lend itself well to the vcard schema
53 # "address1": "vcard:street-address",
54 # "address2": "vcard:street-address",
55 # "city": "vcard:locality",
56 # "state": "vcard:region",
57 # "country": "vcard:country"
60 'nucleic_acid_term_id': { '@type': '@id' }
64 #FIXME: this needs to be initialized from rdfns
65 ENCODED_NAMESPACES = {
66 # JSON-LD lets you define namespaces so you can used the shorted url syntax.
67 # (instead of http://www.w3.org/2000/01/rdf-schema#label you can do
69 "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
70 "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
71 "owl": "http://www.w3.org/2002/07/owl#",
72 "dc": "htp://purl.org/dc/elements/1.1/",
73 "xsd": "http://www.w3.org/2001/XMLSchema#",
74 "vcard": "http://www.w3.org/2006/vcard/ns#",
76 # for some namespaces I made a best guess for the ontology root.
77 "EFO": "http://www.ebi.ac.uk/efo/", # EFO ontology
78 "OBO": "http://purl.obolibrary.org/obo/", # OBO ontology
79 "OBI": "http://purl.obolibrary.org/obo/OBI_", # Ontology for Biomedical Investigations
80 # OBI: available from http://svn.code.sf.net/p/obi/code/releases/2012-07-01/merged/merged-obi-comments.owl
81 'SO': 'http://purl.obolibrary.org/obo/SO_', # Sequence ontology
82 # SO: available from http://www.berkeleybop.org/ontologies/so.owl
83 # NTR: New Term Request space for DCC to implement new ontology terms
87 ENCODED_SCHEMA_ROOT='/profiles/'
90 '''Programatic access encoded, the software powering ENCODE3's submit site.
92 def __init__(self, server, contexts=None):
96 self.contexts = contexts if contexts else ENCODED_CONTEXT
97 self.json_headers = {'Content-Type': 'application/json'}
101 return (self.username, self.password)
102 auth = property(get_auth)
104 def load_netrc(self):
106 session = netrc.netrc()
107 authenticators = session.authenticators(self.server)
109 self.username = authenticators[0]
110 self.password = authenticators[2]
112 def add_jsonld_context(self, tree, default_base):
113 """Add contexts to various objects in the tree.
115 tree is a json tree returned from the DCC's encoded database.
116 contexts is a dictionary of dictionaries containing contexts
117 for the various possible encoded classes.
118 base, if supplied allows setting the base url that relative
119 urls will be resolved against.
121 self.add_jsonld_child_context(tree, default_base)
122 self.add_jsonld_namespaces(tree['@context'])
124 def add_jsonld_child_context(self, obj, default_base):
125 '''Add JSON-LD context to the encoded JSON.
127 This is recursive because some of the IDs were relative URLs
128 and I needed a way to properly compute a the correct base URL.
130 # pretend strings aren't iterable
131 if type(obj) in types.StringTypes:
134 # recurse on container types
135 if isinstance(obj, collections.Sequence):
136 # how should I update lists?
138 self.add_jsonld_child_context(v, default_base)
141 if isinstance(obj, collections.Mapping):
142 for v in obj.values():
143 self.add_jsonld_child_context(v, default_base)
145 # we have an object. attach a context to it.
146 if self._is_encoded_object(obj):
147 context = self.create_jsonld_context(obj, default_base)
149 obj.setdefault('@context', {}).update(context)
151 def add_jsonld_namespaces(self, context):
152 '''Add shortcut namespaces to a context
154 Only needs to be run on the top-most context
156 context.update(ENCODED_NAMESPACES)
158 def create_jsonld_context(self, obj, default_base):
159 '''Synthesize the context for a encoded type
161 self.contexts[None] = default context attributes added to any type
162 self.contexts[type] = context attributes for this type.
164 context = {'@base': urljoin(default_base, obj['@id']),
165 '@vocab': self.get_schema_url(obj)}
167 context.update(self.contexts[None])
168 for t in obj['@type']:
169 if t in self.contexts:
170 context.update(self.contexts[t])
173 def get_json(self, obj_id, **kwargs):
174 '''GET an ENCODE object as JSON and return as dict
176 Uses prepare_url to allow url short-cuts
177 if no keyword arguments are specified it will default to adding limit=all
178 Alternative keyword arguments can be passed in and will be sent to the host.
181 limit - (integer or 'all') how many records to return, all for all of them
182 embed - (bool) if true expands linking ids into their associated object.
183 format - text/html or application/json
186 kwargs['limit'] = 'all'
188 url = self.prepare_url(obj_id)
189 LOGGER.info('requesting url: {}'.format(url))
193 LOGGER.debug('username: %s, password: %s', self.username, self.password)
194 response = requests.get(url, auth=self.auth, headers=self.json_headers, params=kwargs)
195 if not response.status_code == requests.codes.ok:
196 LOGGER.error("Error http status: {}".format(response.status_code))
197 response.raise_for_status()
198 return response.json()
200 def get_jsonld(self, obj_id, **kwargs):
201 '''Get ENCODE object as JSONLD annotated with classses contexts
203 see get_json for documentation about what keywords can be passed.
205 url = self.prepare_url(obj_id)
206 json = self.get_json(obj_id, **kwargs)
207 self.add_jsonld_context(json, url)
210 def get_object_type(self, obj):
211 """Return type for a encoded object
213 obj_type = obj.get('@type')
214 if obj_type and isinstance(obj_type, collections.Sequence):
217 def get_schema_url(self, obj):
218 obj_type = self.get_object_type(obj)
220 return self.prepare_url(ENCODED_SCHEMA_ROOT + obj_type + '.json') + '#'
222 def _is_encoded_object(self, obj):
223 '''Test to see if an object is a JSON-LD object
225 Some of the nested dictionaries lack the @id or @type
226 information necessary to convert them.
228 if not isinstance(obj, collections.Iterable):
231 if '@id' in obj and '@type' in obj:
236 def patch_json(self, obj_id, changes):
237 """Given a dictionary of changes push them as a HTTP patch request
239 url = self.prepare_url(obj_id)
240 payload = json.dumps(changes)
241 response = requests.patch(url, auth=self.auth, headers=self.json_headers, data=payload)
242 if response.status_code != requests.codes.ok:
243 LOGGER.error("Error http status: {}".format(response.status_code))
244 LOGGER.error("Response: %s", response.text)
245 response.raise_for_status()
246 return response.json()
248 def put_json(self, obj_id, new_object):
249 url = self.prepare_url(obj_id)
250 payload = json.dumps(new_object)
251 response = requests.put(url, auth=self.auth, headers=self.json_headers, data=payload)
252 if response.status_code != requests.codes.created:
253 LOGGER.error("Error http status: {}".format(response.status_code))
254 response.raise_for_status()
255 return response.json()
257 def prepare_url(self, request_url):
258 '''This attempts to provide some convienence for accessing a URL
260 Given a url fragment it will default to :
262 * requests to self.server
264 This allows fairly flexible urls. e.g.
266 prepare_url('/experiments/ENCSR000AEG')
267 prepare_url('submit.encodedcc.org/experiments/ENCSR000AEG')
268 prepare_url('http://submit.encodedcc.org/experiments/ENCSR000AEG?limit=all')
270 should all return the same url
272 # clean up potentially messy urls
273 url = urlparse(request_url)._asdict()
274 if not url['scheme']:
275 url['scheme'] = 'https'
276 if not url['netloc']:
277 url['netloc'] = self.server
278 url = urlunparse(url.values())
281 def search_jsonld(self, term, **kwargs):
282 '''Send search request to ENCODED
284 url = self.prepare_url('/search/')
285 result = self.get_json(url, searchTerm=term, **kwargs)
286 self.convert_search_to_jsonld(result)
289 def convert_search_to_jsonld(self, result):
290 '''Add the context to search result
292 Also remove hard to handle nested attributes
293 e.g. remove object.term when we have no id
295 graph = result['@graph']
296 for i, obj in enumerate(graph):
297 # suppress nested attributes
298 graph[i] = {k: v for k, v in obj.items() if '.' not in k}
300 self.add_jsonld_context(result, self.prepare_url(result['@id']))
303 def validate(self, obj):
304 obj_type = self.get_object_type(obj)
305 schema_url = self.get_schema_url(obj)
307 raise ValueError("Unable to construct schema url")
309 schema = self.schemas.setdefault(obj_type, self.get_json(schema_url))
313 jsonschema.validate(hidden, schema)
316 if __name__ == '__main__':
318 from htsworkflow.util.rdfhelp import get_model, dump_model
319 from htsworkflow.util.rdfjsonld import load_into_model
320 from pprint import pprint
322 logging.basicConfig(level=logging.DEBUG)
323 encoded = ENCODED('test.encodedcc.org')
325 body = encoded.get_jsonld('/experiments/ENCSR000AEC/')
327 load_into_model(model, body)