1 """Interface with encoded software for ENCODE3 data submission & warehouse
3 This allows retrieving blocks
7 from __future__ import print_function
13 from requests.utils import urlparse, urlunparse
15 from urlparse import urljoin
17 LOGGER = logging.getLogger(__name__)
20 # The None context will get added to the root of the tree and will
21 # provide common defaults.
23 # terms in multiple encoded objects
24 'description': 'rdf:description',
25 'experiment': {'@type': '@id'},
26 'href': { '@type': '@id' },
27 'lab': { '@type': '@id' },
28 'library': {'@type': '@id' },
29 'pi': { '@type': '@id' },
30 'platform': { '@type': '@id' },
31 'submitted_by': { '@type': '@id' },
32 'url': { '@type': '@id' },
34 # Identify and markup contained classes.
35 # e.g. in the tree there was a sub-dictionary named 'biosample'
36 # That dictionary had a term 'biosample_term_id, which is the
37 # term that should be used as the @id.
39 'biosample_term_id': { '@type': '@id' },
42 "assay_term_id": { "@type": "@id" },
45 'dataset': {'@type': '@id'},
47 # I tried to use the JSON-LD mapping capabilities to convert the lab
48 # contact information into a vcard record, but the encoded model
49 # didn't lend itself well to the vcard schema
51 # "address1": "vcard:street-address",
52 # "address2": "vcard:street-address",
53 # "city": "vcard:locality",
54 # "state": "vcard:region",
55 # "country": "vcard:country"
58 'award': { '@type': '@id' },
61 'award': { '@type': '@id' },
62 'nucleic_acid_term_id': { '@type': '@id' }
66 #FIXME: this needs to be initialized from rdfns
67 _encoded_namespaces = {
68 # JSON-LD lets you define namespaces so you can used the shorted url syntax.
69 # (instead of http://www.w3.org/2000/01/rdf-schema#label you can do
71 "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
72 "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
73 "owl": "http://www.w3.org/2002/07/owl#",
74 "dc": "htp://purl.org/dc/elements/1.1/",
75 "xsd": "http://www.w3.org/2001/XMLSchema#",
76 "vcard": "http://www.w3.org/2006/vcard/ns#",
78 # for some namespaces I made a best guess for the ontology root.
79 "EFO": "http://www.ebi.ac.uk/efo/", # EFO ontology
80 "OBO": "http://purl.obolibrary.org/obo/", # OBO ontology
81 "OBI": "http://purl.obolibrary.org/obo/OBI_", # Ontology for Biomedical Investigations
82 # OBI: available from http://svn.code.sf.net/p/obi/code/releases/2012-07-01/merged/merged-obi-comments.owl
83 'SO': 'http://purl.obolibrary.org/obo/SO_', # Sequence ontology
84 # SO: available from http://www.berkeleybop.org/ontologies/so.owl
87 ENCODED_CONTEXT[None].update(_encoded_namespaces)
88 ENCODED_SCHEMA_ROOT='/profiles/'
91 '''Programatic access encoded, the software powering ENCODE3's submit site.
93 def __init__(self, server, context=None):
97 self.context = context if context else ENCODED_CONTEXT
101 return (self.username, self.password)
102 auth = property(get_auth)
104 def load_netrc(self):
106 session = netrc.netrc()
107 authenticators = session.authenticators(self.server)
109 self.username = authenticators[0]
110 self.password = authenticators[2]
112 def add_jsonld_context(self, tree, contexts, base):
113 """Add contexts to various objects in the tree.
115 tree is a json tree returned from the DCC's encoded database.
116 contexts is a dictionary of dictionaries containing contexts
117 for the various possible encoded classes.
118 base, if supplied allows setting the base url that relative
119 urls will be resolved against.
121 tree['@context'] = contexts[None]
122 tree['@context']['@base'] = base
123 self.add_jsonld_child_context(tree, contexts)
125 def add_jsonld_child_context(self, obj, contexts):
126 '''Add JSON-LD context to the encoded JSON.
128 This is recursive because some of the IDs were relative URLs
129 and I needed a way to properly compute a the correct base URL.
131 # pretend strings aren't iterable
132 if type(obj) in types.StringTypes:
135 # recurse on container types
136 if isinstance(obj, collections.Sequence):
137 # how should I update lists?
139 self.add_jsonld_child_context(v, contexts)
142 if isinstance(obj, collections.Mapping):
143 for v in obj.values():
144 self.add_jsonld_child_context(v, contexts)
146 # we have an object. attach a context to it.
147 if self._is_encoded_object(obj):
148 default_base = contexts[None]['@base']
149 context = {'@base': urljoin(default_base, obj['@id']),
150 '@vocab': self.get_schema_url(obj)}
151 for t in obj['@type']:
153 context.update(contexts[t])
155 obj.setdefault('@context', {}).update(context)
157 def get_json(self, obj_id, **kwargs):
158 '''GET an ENCODE object as JSON and return as dict
160 Uses prepare_url to allow url short-cuts
161 if no keyword arguments are specified it will default to adding limit=all
162 Alternative keyword arguments can be passed in and will be sent to the host.
165 limit - (integer or 'all') how many records to return, all for all of them
166 embed - (bool) if true expands linking ids into their associated object.
167 format - text/html or application/json
170 kwargs['limit'] = 'all'
172 url = self.prepare_url(obj_id)
173 LOGGER.info('requesting url: {}'.format(url))
176 headers = {'content-type': 'application/json'}
177 LOGGER.debug('username: %s, password: %s', self.username, self.password)
178 response = requests.get(url, auth=self.auth, headers=headers, params=kwargs)
179 if not response.status_code == requests.codes.ok:
180 LOGGER.error("Error http status: {}".format(response.status_code))
181 response.raise_for_status()
182 return response.json()
184 def get_jsonld(self, obj_id, **kwargs):
185 '''Get ENCODE object as JSONLD annotated with classses contexts
187 see get_json for documentation about what keywords can be passed.
189 url = self.prepare_url(obj_id)
190 json = self.get_json(obj_id, **kwargs)
191 self.add_jsonld_context(json, self.context, url)
194 def get_object_type(self, obj):
195 """Return type for a encoded object
197 obj_type = obj.get('@type')
198 if obj_type and isinstance(obj_type, collections.Sequence):
201 def get_schema_url(self, obj):
202 obj_type = self.get_object_type(obj)
204 return self.prepare_url(ENCODED_SCHEMA_ROOT + obj_type + '.json')
206 def _is_encoded_object(self, obj):
207 '''Test to see if an object is a JSON-LD object
209 Some of the nested dictionaries lack the @id or @type
210 information necessary to convert them.
212 if not isinstance(obj, collections.Iterable):
215 if '@id' in obj and '@type' in obj:
220 def patch_json(self, obj_id, changes):
221 """Given a dictionary of changes push them as a HTTP patch request
223 url = self.prepare_url(obj_id)
224 payload = json.dumps(changes)
225 response = requests.patch(url, auth=self.auth, data=payload)
226 if response.status_code != requests.codes.ok:
227 LOGGER.error("Error http status: {}".format(response.status_code))
228 response.raise_for_status()
229 return response.json()
231 def put_json(self, obj_id, new_object):
232 url = self.prepare_url(obj_id)
233 payload = json.dumps(new_object)
234 response = requests.put(url, auth=self.auth, data=payload)
235 if response.status_code != requests.codes.created:
236 LOGGER.error("Error http status: {}".format(response.status_code))
237 response.raise_for_status()
238 return response.json()
240 def prepare_url(self, request_url):
241 '''This attempts to provide some convienence for accessing a URL
243 Given a url fragment it will default to :
245 * requests to self.server
247 This allows fairly flexible urls. e.g.
249 prepare_url('/experiments/ENCSR000AEG')
250 prepare_url('submit.encodedcc.org/experiments/ENCSR000AEG')
251 prepare_url('http://submit.encodedcc.org/experiments/ENCSR000AEG?limit=all')
253 should all return the same url
255 # clean up potentially messy urls
256 url = urlparse(request_url)._asdict()
257 if not url['scheme']:
258 url['scheme'] = 'http'
259 if not url['netloc']:
260 url['netloc'] = self.server
261 url = urlunparse(url.values())
264 def validate(self, obj):
265 obj_type = self.get_object_type(obj)
266 schema_url = self.get_schema_url(obj)
268 raise ValueError("Unable to construct schema url")
270 schema = self.schemas.setdefault(obj_type, self.get_json(schema_url))
274 jsonschema.validate(hidden, schema)
277 if __name__ == '__main__':
279 from htsworkflow.util.rdfhelp import get_model, dump_model
280 from htsworkflow.util.rdfjsonld import load_into_model
281 from pprint import pprint
283 logging.basicConfig(level=logging.DEBUG)
284 encoded = ENCODED('test.encodedcc.org')
286 body = encoded.get_jsonld('/experiments/ENCSR000AEC/')
288 load_into_model(model, body)