1 """Interface with encoded software for ENCODE3 data submission & warehouse
3 This allows retrieving blocks
7 from __future__ import print_function
13 from requests.utils import urlparse, urlunparse
15 from urlparse import urljoin
17 LOGGER = logging.getLogger(__name__)
20 # The None context will get added to the root of the tree and will
21 # provide common defaults.
23 # terms in multiple encoded objects
24 'description': 'rdf:description',
25 'experiment': {'@type': '@id'},
26 'href': { '@type': '@id' },
27 'lab': { '@type': '@id' },
28 'library': {'@type': '@id' },
29 'pi': { '@type': '@id' },
30 'platform': { '@type': '@id' },
31 'submitted_by': { '@type': '@id' },
32 'url': { '@type': '@id' },
34 # Identify and markup contained classes.
35 # e.g. in the tree there was a sub-dictionary named 'biosample'
36 # That dictionary had a term 'biosample_term_id, which is the
37 # term that should be used as the @id.
39 'biosample_term_id': { '@type': '@id' },
42 "assay_term_id": { "@type": "@id" },
45 'dataset': {'@type': '@id'},
47 # I tried to use the JSON-LD mapping capabilities to convert the lab
48 # contact information into a vcard record, but the encoded model
49 # didn't lend itself well to the vcard schema
51 # "address1": "vcard:street-address",
52 # "address2": "vcard:street-address",
53 # "city": "vcard:locality",
54 # "state": "vcard:region",
55 # "country": "vcard:country"
58 'award': { '@type': '@id' },
61 'award': { '@type': '@id' },
62 'nucleic_acid_term_id': { '@type': '@id' }
66 #FIXME: this needs to be initialized from rdfns
67 ENCODED_NAMESPACES = {
68 # JSON-LD lets you define namespaces so you can used the shorted url syntax.
69 # (instead of http://www.w3.org/2000/01/rdf-schema#label you can do
71 "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
72 "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
73 "owl": "http://www.w3.org/2002/07/owl#",
74 "dc": "htp://purl.org/dc/elements/1.1/",
75 "xsd": "http://www.w3.org/2001/XMLSchema#",
76 "vcard": "http://www.w3.org/2006/vcard/ns#",
78 # for some namespaces I made a best guess for the ontology root.
79 "EFO": "http://www.ebi.ac.uk/efo/", # EFO ontology
80 "OBO": "http://purl.obolibrary.org/obo/", # OBO ontology
81 "OBI": "http://purl.obolibrary.org/obo/OBI_", # Ontology for Biomedical Investigations
82 # OBI: available from http://svn.code.sf.net/p/obi/code/releases/2012-07-01/merged/merged-obi-comments.owl
83 'SO': 'http://purl.obolibrary.org/obo/SO_', # Sequence ontology
84 # SO: available from http://www.berkeleybop.org/ontologies/so.owl
88 ENCODED_SCHEMA_ROOT='/profiles/'
91 '''Programatic access encoded, the software powering ENCODE3's submit site.
93 def __init__(self, server, contexts=None):
97 self.contexts = contexts if contexts else ENCODED_CONTEXT
101 return (self.username, self.password)
102 auth = property(get_auth)
104 def load_netrc(self):
106 session = netrc.netrc()
107 authenticators = session.authenticators(self.server)
109 self.username = authenticators[0]
110 self.password = authenticators[2]
112 def add_jsonld_context(self, tree, contexts, base):
113 """Add contexts to various objects in the tree.
115 tree is a json tree returned from the DCC's encoded database.
116 contexts is a dictionary of dictionaries containing contexts
117 for the various possible encoded classes.
118 base, if supplied allows setting the base url that relative
119 urls will be resolved against.
121 tree['@context'] = contexts[None]
122 tree['@context']['@base'] = base
123 self.add_jsonld_child_context(tree, contexts)
125 def add_jsonld_child_context(self, obj, contexts):
126 '''Add JSON-LD context to the encoded JSON.
128 This is recursive because some of the IDs were relative URLs
129 and I needed a way to properly compute a the correct base URL.
131 # pretend strings aren't iterable
132 if type(obj) in types.StringTypes:
135 # recurse on container types
136 if isinstance(obj, collections.Sequence):
137 # how should I update lists?
139 self.add_jsonld_child_context(v, default_base)
142 if isinstance(obj, collections.Mapping):
143 for v in obj.values():
144 self.add_jsonld_child_context(v, default_base)
146 # we have an object. attach a context to it.
147 if self._is_encoded_object(obj):
148 context = self.create_jsonld_context(obj, default_base)
150 obj.setdefault('@context', {}).update(context)
152 def add_jsonld_namespaces(self, context):
153 '''Add shortcut namespaces to a context
155 Only needs to be run on the top-most context
157 context.update(ENCODED_NAMESPACES)
159 def create_jsonld_context(self, obj, default_base):
160 '''Synthesize the context for a encoded type
162 self.contexts[None] = default context attributes added to any type
163 self.contexts[type] = context attributes for this type.
165 context = {'@base': urljoin(default_base, obj['@id']),
166 '@vocab': self.get_schema_url(obj)}
168 context.update(self.contexts[None])
169 for t in obj['@type']:
170 if t in self.contexts:
171 context.update(self.contexts[t])
174 def get_json(self, obj_id, **kwargs):
175 '''GET an ENCODE object as JSON and return as dict
177 Uses prepare_url to allow url short-cuts
178 if no keyword arguments are specified it will default to adding limit=all
179 Alternative keyword arguments can be passed in and will be sent to the host.
182 limit - (integer or 'all') how many records to return, all for all of them
183 embed - (bool) if true expands linking ids into their associated object.
184 format - text/html or application/json
187 kwargs['limit'] = 'all'
189 url = self.prepare_url(obj_id)
190 LOGGER.info('requesting url: {}'.format(url))
193 headers = {'content-type': 'application/json'}
194 LOGGER.debug('username: %s, password: %s', self.username, self.password)
195 response = requests.get(url, auth=self.auth, headers=headers, params=kwargs)
196 if not response.status_code == requests.codes.ok:
197 LOGGER.error("Error http status: {}".format(response.status_code))
198 response.raise_for_status()
199 return response.json()
201 def get_jsonld(self, obj_id, **kwargs):
202 '''Get ENCODE object as JSONLD annotated with classses contexts
204 see get_json for documentation about what keywords can be passed.
206 url = self.prepare_url(obj_id)
207 json = self.get_json(obj_id, **kwargs)
208 self.add_jsonld_context(json, self.context, url)
211 def get_object_type(self, obj):
212 """Return type for a encoded object
214 obj_type = obj.get('@type')
215 if obj_type and isinstance(obj_type, collections.Sequence):
218 def get_schema_url(self, obj):
219 obj_type = self.get_object_type(obj)
221 return self.prepare_url(ENCODED_SCHEMA_ROOT + obj_type + '.json')
223 def _is_encoded_object(self, obj):
224 '''Test to see if an object is a JSON-LD object
226 Some of the nested dictionaries lack the @id or @type
227 information necessary to convert them.
229 if not isinstance(obj, collections.Iterable):
232 if '@id' in obj and '@type' in obj:
237 def patch_json(self, obj_id, changes):
238 """Given a dictionary of changes push them as a HTTP patch request
240 url = self.prepare_url(obj_id)
241 payload = json.dumps(changes)
242 response = requests.patch(url, auth=self.auth, data=payload)
243 if response.status_code != requests.codes.ok:
244 LOGGER.error("Error http status: {}".format(response.status_code))
245 response.raise_for_status()
246 return response.json()
248 def put_json(self, obj_id, new_object):
249 url = self.prepare_url(obj_id)
250 payload = json.dumps(new_object)
251 response = requests.put(url, auth=self.auth, data=payload)
252 if response.status_code != requests.codes.created:
253 LOGGER.error("Error http status: {}".format(response.status_code))
254 response.raise_for_status()
255 return response.json()
257 def prepare_url(self, request_url):
258 '''This attempts to provide some convienence for accessing a URL
260 Given a url fragment it will default to :
262 * requests to self.server
264 This allows fairly flexible urls. e.g.
266 prepare_url('/experiments/ENCSR000AEG')
267 prepare_url('submit.encodedcc.org/experiments/ENCSR000AEG')
268 prepare_url('http://submit.encodedcc.org/experiments/ENCSR000AEG?limit=all')
270 should all return the same url
272 # clean up potentially messy urls
273 url = urlparse(request_url)._asdict()
274 if not url['scheme']:
275 url['scheme'] = 'http'
276 if not url['netloc']:
277 url['netloc'] = self.server
278 url = urlunparse(url.values())
281 def validate(self, obj):
282 obj_type = self.get_object_type(obj)
283 schema_url = self.get_schema_url(obj)
285 raise ValueError("Unable to construct schema url")
287 schema = self.schemas.setdefault(obj_type, self.get_json(schema_url))
291 jsonschema.validate(hidden, schema)
294 if __name__ == '__main__':
296 from htsworkflow.util.rdfhelp import get_model, dump_model
297 from htsworkflow.util.rdfjsonld import load_into_model
298 from pprint import pprint
300 logging.basicConfig(level=logging.DEBUG)
301 encoded = ENCODED('test.encodedcc.org')
303 body = encoded.get_jsonld('/experiments/ENCSR000AEC/')
305 load_into_model(model, body)