Utility class for accessing ENCODE3's submit / data warehouse server.

author Diane Trout <diane@ghic.org>

Wed, 15 Jan 2014 01:31:01 +0000 (17:31 -0800)

committer Diane Trout <diane@ghic.org>

Wed, 15 Jan 2014 01:31:01 +0000 (17:31 -0800)
author Diane Trout <diane@ghic.org>
Wed, 15 Jan 2014 01:31:01 +0000 (17:31 -0800)
committer Diane Trout <diane@ghic.org>
Wed, 15 Jan 2014 01:31:01 +0000 (17:31 -0800)
diff --git a/htsworkflow/submission/encoded.py b/htsworkflow/submission/encoded.py

new file mode 100644 (file)

index 0000000..8eec0c0
--- /dev/null
+++ b/htsworkflow/submission/encoded.py
@@ -0,0 +1,289 @@
+"""Interface with encoded software for ENCODE3 data submission & warehouse
+
+This allows retrieving blocks
+"""
+
+
+from __future__ import print_function
+import collections
+import logging
+import json
+import jsonschema
+import requests
+from requests.utils import urlparse, urlunparse
+import types
+from urlparse import urljoin
+
+LOGGER = logging.getLogger(__name__)
+
+ENCODED_CONTEXT = {
+    # The None context will get added to the root of the tree and will
+    # provide common defaults.
+    None: {
+        # terms in multiple encoded objects
+        'description': 'rdf:description',
+        'experiment': {'@type': '@id'},
+        'href': { '@type': '@id' },
+        'lab': { '@type': '@id' },
+        'library': {'@type': '@id' },
+        'pi': { '@type': '@id' },
+        'platform': { '@type': '@id' },
+        'submitted_by': { '@type': '@id' },
+        'url': { '@type': '@id' },
+    },
+    # Identify and markup contained classes.
+    # e.g. in the tree there was a sub-dictionary named 'biosample'
+    # That dictionary had a term 'biosample_term_id, which is the
+    # term that should be used as the @id.
+    'biosample': {
+        'biosample_term_id': { '@type': '@id' },
+    },
+    'experiment': {
+        "assay_term_id": { "@type": "@id" },
+    },
+    'file': {
+        'dataset': {'@type': '@id'},
+    },
+    # I tried to use the JSON-LD mapping capabilities to convert the lab
+    # contact information into a vcard record, but the encoded model
+    # didn't lend itself well to the vcard schema
+    #'lab': {
+    #    "address1": "vcard:street-address",
+    #    "address2": "vcard:street-address",
+    #    "city": "vcard:locality",
+    #    "state": "vcard:region",
+    #    "country": "vcard:country"
+    #},
+    'human_donor': {
+        'award': { '@type': '@id' },
+    },
+    'library': {
+        'award': { '@type': '@id' },
+        'nucleic_acid_term_id': { '@type': '@id' }
+    }
+}
+
+#FIXME: this needs to be initialized from rdfns
+_encoded_namespaces = {
+    # JSON-LD lets you define namespaces so you can used the shorted url syntax.
+    # (instead of http://www.w3.org/2000/01/rdf-schema#label you can do
+    # rdfs:label)
+    "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
+    "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
+    "owl": "http://www.w3.org/2002/07/owl#",
+    "dc": "htp://purl.org/dc/elements/1.1/",
+    "xsd": "http://www.w3.org/2001/XMLSchema#",
+    "vcard": "http://www.w3.org/2006/vcard/ns#",
+
+    # for some namespaces I made a best guess for the ontology root.
+    "EFO": "http://www.ebi.ac.uk/efo/", # EFO ontology
+    "OBO": "http://purl.obolibrary.org/obo/", # OBO ontology
+    "OBI": "http://purl.obolibrary.org/obo/OBI_", # Ontology for Biomedical Investigations
+    # OBI: available from http://svn.code.sf.net/p/obi/code/releases/2012-07-01/merged/merged-obi-comments.owl
+    'SO': 'http://purl.obolibrary.org/obo/SO_', # Sequence ontology
+    # SO: available from http://www.berkeleybop.org/ontologies/so.owl
+
+}
+ENCODED_CONTEXT[None].update(_encoded_namespaces)
+ENCODED_SCHEMA_ROOT='/profiles/'
+
+class ENCODED:
+    '''Programatic access encoded, the software powering ENCODE3's submit site.
+    '''
+    def __init__(self, server, context=None):
+        self.server = server
+        self.username = None
+        self.password = None
+        self.context = context if context else ENCODED_CONTEXT
+        self.schemas = {}
+
+    def get_auth(self):
+        return (self.username, self.password)
+    auth = property(get_auth)
+
+    def load_netrc(self):
+        import netrc
+        session = netrc.netrc()
+        authenticators = session.authenticators(self.server)
+        if authenticators:
+            self.username = authenticators[0]
+            self.password = authenticators[2]
+
+    def add_jsonld_context(self, tree, contexts, base):
+        """Add contexts to various objects in the tree.
+
+        tree is a json tree returned from the DCC's encoded database.
+        contexts is a dictionary of dictionaries containing contexts
+                for the various  possible encoded classes.
+        base, if supplied allows setting the base url that relative
+            urls will be resolved against.
+        """
+        tree['@context'] = contexts[None]
+        tree['@context']['@base'] = base
+        self.add_jsonld_child_context(tree, contexts)
+
+    def add_jsonld_child_context(self, obj, contexts):
+        '''Add JSON-LD context to the encoded JSON.
+
+        This is recursive because some of the IDs were relative URLs
+        and I needed a way to properly compute a the correct base URL.
+        '''
+        # pretend strings aren't iterable
+        if type(obj) in types.StringTypes:
+            return
+
+        # recurse on container types
+        if isinstance(obj, collections.Sequence):
+            # how should I update lists?
+            for v in obj:
+                self.add_jsonld_child_context(v, contexts)
+            return
+
+        if isinstance(obj, collections.Mapping):
+            for v in obj.values():
+                self.add_jsonld_child_context(v, contexts)
+
+        # we have an object. attach a context to it.
+        if self._is_encoded_object(obj):
+            default_base = contexts[None]['@base']
+            context = {'@base': urljoin(default_base, obj['@id']),
+                       '@vocab': self.get_schema_url(obj)}
+            for t in obj['@type']:
+                if t in contexts:
+                    context.update(contexts[t])
+            if len(context) > 0:
+                obj.setdefault('@context', {}).update(context)
+
+    def get_json(self, obj_id, **kwargs):
+        '''GET an ENCODE object as JSON and return as dict
+
+        Uses prepare_url to allow url short-cuts
+        if no keyword arguments are specified it will default to adding limit=all
+        Alternative keyword arguments can be passed in and will be sent to the host.
+
+        Known keywords are:
+          limit - (integer or 'all') how many records to return, all for all of them
+          embed - (bool) if true expands linking ids into their associated object.
+          format - text/html or application/json
+        '''
+        if len(kwargs) == 0:
+            kwargs['limit'] = 'all'
+
+        url = self.prepare_url(obj_id)
+        LOGGER.info('requesting url: {}'.format(url))
+
+        # do the request
+        headers = {'content-type': 'application/json'}
+        LOGGER.debug('username: %s, password: %s', self.username, self.password)
+        response = requests.get(url, auth=self.auth, headers=headers, params=kwargs)
+        if not response.status_code == requests.codes.ok:
+            LOGGER.error("Error http status: {}".format(response.status_code))
+            response.raise_for_status()
+        return response.json()
+
+    def get_jsonld(self, obj_id, **kwargs):
+        '''Get ENCODE object as JSONLD annotated with classses contexts
+
+        see get_json for documentation about what keywords can be passed.
+        '''
+        url = self.prepare_url(obj_id)
+        json = self.get_json(obj_id, **kwargs)
+        self.add_jsonld_context(json, self.context, url)
+        return json
+
+    def get_object_type(self, obj):
+        """Return type for a encoded object
+        """
+        obj_type = obj.get('@type')
+        if obj_type and isinstance(obj_type, collections.Sequence):
+            return obj_type[0]
+
+    def get_schema_url(self, obj):
+        obj_type = self.get_object_type(obj)
+        if obj_type:
+            return self.prepare_url(ENCODED_SCHEMA_ROOT + obj_type + '.json')
+
+    def _is_encoded_object(self, obj):
+        '''Test to see if an object is a JSON-LD object
+
+        Some of the nested dictionaries lack the @id or @type
+        information necessary to convert them.
+        '''
+        if not isinstance(obj, collections.Iterable):
+            return False
+
+        if '@id' in obj and '@type' in obj:
+            return True
+        return False
+
+
+    def patch_json(self, obj_id, changes):
+        """Given a dictionary of changes push them as a HTTP patch request
+        """
+        url = self.prepare_url(obj_id)
+        payload = json.dumps(changes)
+        response = requests.patch(url, auth=self.auth, data=payload)
+        if response.status_code != requests.codes.ok:
+            LOGGER.error("Error http status: {}".format(response.status_code))
+            response.raise_for_status()
+        return response.json()
+
+    def put_json(self, obj_id, new_object):
+        url = self.prepare_url(obj_id)
+        payload = json.dumps(new_object)
+        response = requests.put(url, auth=self.auth, data=payload)
+        if response.status_code != requests.codes.created:
+            LOGGER.error("Error http status: {}".format(response.status_code))
+            response.raise_for_status()
+        return response.json()
+
+    def prepare_url(self, request_url):
+        '''This attempts to provide some convienence for accessing a URL
+
+        Given a url fragment it will default to :
+        * requests over http
+        * requests to self.server
+
+        This allows fairly flexible urls. e.g.
+
+        prepare_url('/experiments/ENCSR000AEG')
+        prepare_url('submit.encodedcc.org/experiments/ENCSR000AEG')
+        prepare_url('http://submit.encodedcc.org/experiments/ENCSR000AEG?limit=all')
+
+        should all return the same url
+        '''
+        # clean up potentially messy urls
+        url = urlparse(request_url)._asdict()
+        if not url['scheme']:
+            url['scheme'] = 'http'
+        if not url['netloc']:
+            url['netloc'] = self.server
+        url = urlunparse(url.values())
+        return url
+
+    def validate(self, obj):
+        obj_type = self.get_object_type(obj)
+        schema_url = self.get_schema_url(obj)
+        if not schema_url:
+            raise ValueError("Unable to construct schema url")
+
+        schema = self.schemas.setdefault(obj_type, self.get_json(schema_url))
+        hidden = obj.copy()
+        del hidden['@id']
+        del hidden['@type']
+        jsonschema.validate(hidden, schema)
+
+
+if __name__ == '__main__':
+    # try it
+    from htsworkflow.util.rdfhelp import get_model, dump_model
+    from htsworkflow.util.rdfjsonld import load_into_model
+    from pprint import pprint
+    model = get_model()
+    logging.basicConfig(level=logging.DEBUG)
+    encoded = ENCODED('test.encodedcc.org')
+    encoded.load_netrc()
+    body = encoded.get_jsonld('/experiments/ENCSR000AEC/')
+    pprint(body)
+    load_into_model(model, body)
+    #dump_model(model)
diff --git a/htsworkflow/submission/test/library.json b/htsworkflow/submission/test/library.json

new file mode 100644 (file)

index 0000000..f694e10
--- /dev/null
+++ b/htsworkflow/submission/test/library.json
@@ -0,0 +1,359 @@
+{
+    "properties": {
+        "accession": {
+            "comment": "Only admins are allowed to set or update this value.",
+            "accessionType": "LB",
+            "description": "A unique identifier to be used to reference the object.",
+            "permission": "import_items",
+            "serverDefault": "accession",
+            "format": "accession",
+            "title": "Accession",
+            "type": "string"
+        },
+        "alternate_accessions": {
+            "description": "Accessions previously assigned to objects that have been merged with this object.",
+            "title": "Alternate accessions",
+            "default": [],
+            "items": {
+                "comment": "Only admins are allowed to set or update this value.",
+                "format": "accession",
+                "type": "string",
+                "description": "An accession previously assigned to an object that has been merged with this object.",
+                "title": "Alternate Accession"
+            },
+            "permission": "import_items",
+            "type": "array"
+        },
+        "lot_id": {
+            "type": "string",
+            "description": "The lot identifier provided by the originating lab or vendor.",
+            "title": "Lot ID"
+        },
+        "aliases": {
+            "default": [],
+            "items": {
+                "comment": "Current convention is colon separated lab name and lab identifier. (e.g. john-doe:42).",
+                "pattern": "^\\S+:\\S+",
+                "type": "string",
+                "description": "A lab specific identifier to reference an object.",
+                "title": "Lab alias"
+            },
+            "type": "array",
+            "description": "Lab specific identifiers to reference an object.",
+            "title": "Lab aliases"
+        },
+        "submitted_by": {
+            "comment": "Do not submit, value is assigned by the server. The user that created the object.",
+            "linkTo": "user",
+            "title": "Submitted by",
+            "serverDefault": "userid",
+            "permission": "import_items",
+            "type": "string"
+        },
+        "documents": {
+            "default": [],
+            "items": {
+                "comment": "See document.json for available identifiers.",
+                "type": "string",
+                "description": "A document that describe the preparation of the library. ",
+                "linkTo": "document",
+                "title": "Protocol document"
+            },
+            "type": "array",
+            "description": "Documents that describe the preparation of the library.",
+            "title": "Protocol documents"
+        },
+        "fragmentation_date": {
+            "comment": "Date can be submitted in as YYYY-MM-DD or YYYY-MM-DDTHH:MM:SSTZD (TZD is the time zone designator; use Z to express time in UTC or for time expressed in local time add a time zone offset from UTC +HH:MM or -HH:MM).",
+            "type": "string",
+            "anyOf": [
+                {
+                    "format": "date-time"
+                },
+                {
+                    "format": "date"
+                }
+            ],
+            "description": "The date that the nucleic acid was fragmented.",
+            "title": "Fragmentation date"
+        },
+        "uuid": {
+            "format": "uuid",
+            "serverDefault": "uuid4",
+            "title": "UUID",
+            "requestMethod": "POST",
+            "permission": "import_items",
+            "type": "string"
+        },
+        "strand_specificity": {
+            "default": false,
+            "type": "boolean",
+            "description": "The preparation of the library using a strand-specific protocol.",
+            "title": "Strand specificity"
+        },
+        "fragmentation_method": {
+            "description": "A short description or reference of the nucleic acid fragmentation protocol used in library preparation, if applicable.",
+            "title": "Fragmentation method",
+            "default": "see document",
+            "format": "semi-controlled",
+            "XXXenum": [
+                "sonication",
+                "see document",
+                "covaris shearing",
+                "chemical (part of Illumina TruSeq mRNA Kit)",
+                "Illumina/Nextera tagmentation",
+                "bioruptor twin",
+                "n/a"
+            ],
+            "type": "string"
+        },
+        "schema_version": {
+            "comment": "Do not submit, value is assigned by the server. The version of the JSON schema that the server uses to validate the object. Schema version indicates generation of schema used to save version to to enable upgrade steps to work. Individual schemas should set the default.",
+            "pattern": "^\\d+(\\.\\d+)*$",
+            "default": "2",
+            "type": "string",
+            "requestMethod": []
+        },
+        "lysis_method": {
+            "description": "A short description or reference of the cell lysis protocol used in library preparation, if applicable",
+            "title": "Lysis method",
+            "default": "see document",
+            "format": "semi-controlled",
+            "XXXenum": [
+                "miRNeasy Mini kit (QIAGEN cat#:217004)",
+                "Trizol (LifeTech cat#: 15596-018)",
+                "Ambion mirVana",
+                "Qiagen #74204",
+                "QIAGEN DNeasy Blood & Tissue Kit",
+                "see document",
+                "n/a"
+            ],
+            "type": "string"
+        },
+        "source": {
+            "comment": "See source.json for available identifiers.",
+            "title": "Source",
+            "type": "string",
+            "description": "The originating lab or vendor.",
+            "linkTo": "source"
+        },
+        "biosample": {
+            "comment": "See biosample.json for available identifiers.",
+            "title": "Biosample",
+            "type": "string",
+            "description": "The biosample that nucleic acid was isolated from to generate the library.",
+            "linkTo": "biosample"
+        },
+        "extraction_method": {
+            "description": "A short description or reference of the nucleic acid extraction protocol used in library preparation, if applicable.",
+            "title": "Extraction method",
+            "default": "see document",
+            "format": "semi-controlled",
+            "XXXenum": [
+                "miRNeasy Mini kit (QIAGEN cat#:217004)",
+                "Trizol (LifeTech cat#: 15596-018)",
+                "Ambion mirVana",
+                "Qiagen #74204",
+                "QIAGEN DNeasy Blood & Tissue Kit",
+                "see document",
+                "n/a"
+            ],
+            "type": "string"
+        },
+        "library_size_selection_method": {
+            "description": "A short description or reference of the size selection protocol used in library preparation, if applicable.",
+            "title": "Size selection method",
+            "default": "see document",
+            "format": "semi-controlled",
+            "XXXenum": [
+                "gel",
+                "see document",
+                "SPRI beads"
+            ],
+            "type": "string"
+        },
+        "status": {
+            "default": "CURRENT",
+            "enum": [
+                "CURRENT",
+                "DELETED"
+            ],
+            "type": "string",
+            "title": "Status"
+        },
+        "nucleic_acid_term_name": {
+            "enum": [
+                "DNA",
+                "RNA",
+                "polyadenylated mRNA",
+                "miRNA"
+            ],
+            "type": "string",
+            "description": "SO (Sequence Ontology) term best matching the nucleic acid isolated to generate the library (e.g. 'RNA' for a total RNA library, even if that library is subsequently reverse transcribed for DNA sequencing.)",
+            "title": "Nucleic acid term"
+        },
+        "treatments": {
+            "default": [],
+            "items": {
+                "comment": "See treatment.json for available identifiers.",
+                "type": "string",
+                "linkTo": "treatment",
+                "title": "Treatment"
+            },
+            "type": "array",
+            "title": "Treatments"
+        },
+        "award": {
+            "comment": "See award.json for list of available identifiers.",
+            "title": "Grant",
+            "type": "string",
+            "description": "Grant associated with the submission.",
+            "linkTo": "award"
+        },
+        "depleted_in_term_name": {
+            "default": [],
+            "items": {
+                "enum": [
+                    "rRNA",
+                    "polyadenylated mRNA",
+                    "capped mRNA"
+                ],
+                "type": "string",
+                "description": "SO (Sequence Ontology) term best matching the nucleic acid that was diminished from the library.",
+                "title": "Depleted in term"
+            },
+            "type": "array"
+        },
+        "paired_ended": {
+            "default": false,
+            "XXXnote": "Is this redundant to the field found in replicate.json",
+            "type": "boolean",
+            "description": "Whether or not the library was prepared with paired ends",
+            "title": "Paired ended"
+        },
+        "lab": {
+            "comment": "See lab.json for list of available identifiers.",
+            "title": "Lab",
+            "type": "string",
+            "description": "Lab associated with the submission.",
+            "linkTo": "lab"
+        },
+        "depleted_in_term_id": {
+            "default": [],
+            "items": {
+                "comment": "Based on the choice in depleted_in_term_name use the following guide: rRNA - SO:0000252,  polyadenylated mRNA - SO:0000871 or capped mRNA - SO:0000862",
+                "enum": [
+                    "SO:0000252",
+                    "SO:0000871",
+                    "SO:0000862"
+                ],
+                "type": "string",
+                "description": "SO (Sequence Ontology) identifier best matching the nucleic acid that was diminished from the library.",
+                "title": "Depleted in ID"
+            },
+            "type": "array"
+        },
+        "product_id": {
+            "type": "string",
+            "description": "The product identifier provided by the originating lab or vendor.",
+            "title": "Product ID"
+        },
+        "size_range": {
+            "pattern": "(^[0-9]+-[0-9]+$|^[<>][0-9]+$)",
+            "type": "string",
+            "description": "The measured size range of the purified nucleic acid, in kD.",
+            "title": "Size range"
+        },
+        "notes": {
+            "title": "Notes",
+            "type": "string",
+            "description": "Additional information.",
+            "permission": "import_items"
+        },
+        "nucleic_acid_term_id": {
+            "comment": "Based on the choice in nucleic_acid_term_name use the following guide: DNA - SO:0000352, RNA - SO:0000356,  polyadenylated mRNA - SO:0000871 or miRNA - SO:0000276",
+            "enum": [
+                "SO:0000352",
+                "SO:0000356",
+                "SO:0000871",
+                "SO:0000276"
+            ],
+            "type": "string",
+            "description": "SO (Sequence Ontology) identifier best matching the nucleic acid isolated to generate the library (e.g. 'SO:0000356' for a total RNA library, even if that library is subsequently reverse transcribed for DNA sequencing.)",
+            "title": "Nucleic acid ID"
+        },
+        "nucleic_acid_starting_quantity": {
+            "pattern": "[0-9]+",
+            "type": "string",
+            "description": "The starting amount of nucleic acid before selection and purification.",
+            "title": "Nucleic acid starting quantity"
+        },
+        "date_created": {
+            "comment": "Do not submit, value is assigned by the server. The date the object is created.",
+            "title": "Date created",
+            "serverDefault": "now",
+            "permission": "import_items",
+            "anyOf": [
+                {
+                    "format": "date-time"
+                },
+                {
+                    "format": "date"
+                }
+            ],
+            "type": "string"
+        }
+    },
+    "description": "Schema for submitting a nucleic acid library.",
+    "title": "Library",
+    "required": [
+        "award",
+        "lab",
+        "nucleic_acid_term_id"
+    ],
+    "mixinProperties": [
+        {
+            "$ref": "mixins.json#/schema_version"
+        },
+        {
+            "$ref": "mixins.json#/uuid"
+        },
+        {
+            "$ref": "mixins.json#/accession"
+        },
+        {
+            "$ref": "mixins.json#/aliases"
+        },
+        {
+            "$ref": "mixins.json#/attribution"
+        },
+        {
+            "$ref": "mixins.json#/standard_status"
+        },
+        {
+            "$ref": "mixins.json#/submitted"
+        },
+        {
+            "$ref": "mixins.json#/source"
+        },
+        {
+            "$ref": "mixins.json#/product_id"
+        },
+        {
+            "$ref": "mixins.json#/lot_id"
+        },
+        {
+            "$ref": "mixins.json#/notes"
+        }
+    ],
+    "XXXcomment": "is source required?",
+    "identifyingProperties": [
+        "uuid",
+        "accession",
+        "aliases"
+    ],
+    "additionalProperties": false,
+    "$schema": "http://json-schema.org/draft-04/schema#",
+    "type": "object",
+    "id": "/profiles/library.json"
+}
diff --git a/htsworkflow/submission/test/test_encoded.py b/htsworkflow/submission/test/test_encoded.py

new file mode 100644 (file)

index 0000000..e5b2707
--- /dev/null
+++ b/htsworkflow/submission/test/test_encoded.py
@@ -0,0 +1,110 @@
+import json
+import os
+from pprint import pprint
+from unittest2 import TestCase, TestSuite, defaultTestLoader, skip
+
+from htsworkflow.submission.encoded import ENCODED
+
+class TestEncoded(TestCase):
+    def test_prepare_url(self):
+        encode = ENCODED('test.encodedcc.edu')
+
+        tests = [
+            ('/experiments', 'http://test.encodedcc.edu/experiments'),
+            ('/experiments/ENCLB045ZZZ',
+             'http://test.encodedcc.edu/experiments/ENCLB045ZZZ'),
+            ('http://submit.encodedcc.edu/experiments/ENCLB045ZZZ',
+             'http://submit.encodedcc.edu/experiments/ENCLB045ZZZ'),
+        ]
+        for url, result in tests:
+            self.assertEqual(encode.prepare_url(url), result)
+
+    def test_validate(self):
+        """Test validation
+        """
+        schema_file = os.path.join(os.path.dirname(__file__), 'library.json')
+        schema = json.loads(open(schema_file, 'r').read())
+
+        obj = {u'@id': u'/libraries/ENCLB045ZZZ/',
+               u'@type': [u'library', u'item'],
+               u'accession': u'ENCLB045ZZZ',
+               u'aliases': [],
+               u'alternate_accessions': [],
+               u'award': u'/awards/U54HG006998/',
+               u'biosample': u'/biosamples/ENCBS089RNA/',
+               u'date_created': u'2014-01-14T19:44:51.061770+00:00',
+               u'depleted_in_term_id': [],
+               u'depleted_in_term_name': [],
+               u'documents': [],
+               u'extraction_method': u'Ambion mirVana',
+               u'fragmentation_method': u'Illumina/Nextera tagmentation',
+               u'lab': u'/labs/barbara-wold/',
+               u'library_size_selection_method': u'SPRI beads',
+               u'lysis_method': u'Ambion mirVana',
+               u'nucleic_acid_term_id': u'SO:0000871',
+               u'nucleic_acid_term_name': u'polyadenylated mRNA',
+               u'paired_ended': False,
+               u'schema_version': u'2',
+               u'size_range': u'>200',
+               u'status': u'CURRENT',
+               u'strand_specificity': False,
+               u'submitted_by': u'/users/0e3dde9b-aaf9-42dd-87f7-975a85072ed2/',
+               u'treatments': [],
+               u'uuid': u'42c46028-708f-4347-a3df-2c82dfb021c4'}
+        encode = ENCODED('submit.encodedcc.org')
+        encode.schemas[u'library'] = schema
+        encode.validate(obj)
+        self.assertTrue('@id' in obj)
+
+    def test_add_context(self):
+        """Checking to make sure nested @base and @vocab urls are set correctly
+        """
+        obj = {
+            "nucleic_acid_term_name": "RNA",
+            "accession": "ENCLB044ZZZ",
+            "@id": "/libraries/ENCLB044ZZZ/",
+            "schema_version": "1",
+            "@type": [
+                "library",
+                "item"
+            ],
+            "lysis_method": "Ambion mirVana",
+            "nucleic_acid_term_id": "SO:0000356",
+            "biosample": {
+                "biosample_term_name": "GM12878",
+                "description": "B-lymphocyte, lymphoblastoid, International HapMap Project - CEPH/Utah - European Caucasion, Epstein-Barr Virus",
+                "accession": "ENCBS090RNA",
+                "date_created": "2013-10-29T21:15:29.144260+00:00",
+                "@id": "/biosamples/ENCBS090RNA/",
+                "aliases": [
+                "brenton-graveley:GM12878-2",
+                "thomas-gingeras:191WC"
+                ],
+                "organism": "/organisms/human/",
+                "@type": [
+                "biosample",
+                "item"
+                ]
+            },
+        }
+
+        encode = ENCODED('test.encodedcc.org')
+        bio_base = encode.prepare_url(obj['biosample']['@id'])
+
+        url = encode.prepare_url('/libraries/ENCLB044ZZZ/?format=json&embed=False')
+        schema_url = encode.get_schema_url(obj)
+        encode.add_jsonld_context(obj, encode.context, url)
+
+        self.assertEqual(obj['biosample']['@context']['@base'], bio_base)
+        self.assertEqual(obj['@context']['@vocab'], schema_url)
+
+
+def suite():
+    suite = TestSuite()
+    suite.addTests(
+        defaultTestLoader.loadTestsFromTestCase(TestEncoded))
+    return suite
+
+if __name__ == "__main__":
+    from unittest2 import main
+    main(defaultTest='suite')
diff --git a/setup.py b/setup.py

index f63f1e10b587b4fe7001f6ecc4962780ba2efd70..f37b3addaf734b2c22fcaf3aa73c655cc426ca23 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -37,6 +37,7 @@ setup(
                        'httplib2',
                        'keyring',
                        'PyLD',
+                      'requests',
                        # This dependency is redland librdf, which doesn't have a public egg
                        #'librdf >= 1.0.14',
      ],
author	Diane Trout <diane@ghic.org>
	Wed, 15 Jan 2014 01:31:01 +0000 (17:31 -0800)
committer	Diane Trout <diane@ghic.org>
	Wed, 15 Jan 2014 01:31:01 +0000 (17:31 -0800)
htsworkflow/submission/encoded.py	[new file with mode: 0644]	patch \| blob
htsworkflow/submission/test/library.json	[new file with mode: 0644]	patch \| blob
htsworkflow/submission/test/test_encoded.py	[new file with mode: 0644]	patch \| blob
setup.py		patch \| blob \| history