Merge branch 'master' of mus.cacr.caltech.edu:htsworkflow
authorDiane Trout <diane@caltech.edu>
Wed, 22 Jan 2014 22:14:26 +0000 (14:14 -0800)
committerDiane Trout <diane@caltech.edu>
Wed, 22 Jan 2014 22:14:26 +0000 (14:14 -0800)
23 files changed:
.gitignore
htsworkflow/frontend/experiments/experiments.py
htsworkflow/frontend/experiments/fixtures/test_flowcells.json
htsworkflow/frontend/experiments/models.py
htsworkflow/frontend/experiments/test_experiments.py
htsworkflow/frontend/inventory/test_inventory.py
htsworkflow/frontend/samples/test_samples.py
htsworkflow/pipelines/sequences.py
htsworkflow/settings.py
htsworkflow/submission/condorfastq.py
htsworkflow/submission/encoded.py [new file with mode: 0644]
htsworkflow/submission/geo.py
htsworkflow/submission/submission.py
htsworkflow/submission/test/library.json [new file with mode: 0644]
htsworkflow/submission/test/test_condorfastq.py
htsworkflow/submission/test/test_encoded.py [new file with mode: 0644]
htsworkflow/submission/trackhub_submission.py
htsworkflow/util/rdfhelp.py
htsworkflow/util/rdfjsonld.py [new file with mode: 0644]
htsworkflow/util/test/test_rdfhelp.py
htsworkflow/util/test/test_rdfjsonld.py [new file with mode: 0644]
setup.py
test/test_copier.py

index 3eb65bb2d0094085311b677055db9b2a331daa01..6b1d69947426e7180d43aae6ed10ba7ca127cbff 100644 (file)
@@ -2,10 +2,14 @@
 *.py[co]
 .coverage
 *,cover
+dist
+docs/build/
 *.egg-info
+htsworkflow.kdev4
+.kateproject
+.kateproject.d/
+.kdev4/
 .noseids
+RELEASE-VERSION
 .ropeproject
 .tox
-dist
-RELEASE-VERSION
-docs/build/
index f24d13d5b8dc3e830b2b3c1548251886ec7f1299..9493765dcb02cf31528c295be5dd5bd5389165cf 100644 (file)
@@ -14,6 +14,7 @@ from django.core.exceptions import ObjectDoesNotExist
 from django.core.mail import send_mail, mail_admins
 from django.http import HttpResponse, Http404
 from django.conf import settings
+from django.utils import timezone
 
 from htsworkflow.frontend.auth import require_api_key
 from htsworkflow.frontend.experiments.models import \
@@ -179,7 +180,7 @@ def updStatus(request):
       rec.run_status = UpdatedStatus
 
       #if there's a message update that too
-      mytimestamp = datetime.now().__str__()
+      mytimestamp = timezone.now().__str__()
       mytimestamp = re.sub(pattern=":[^:]*$",repl="",string=mytimestamp)
       if request.REQUEST.has_key('msg'):
         rec.run_note += ", "+request.REQUEST['msg']+" ("+mytimestamp+")"
@@ -325,7 +326,7 @@ def estimateFlowcellTimeRemaining(flowcell):
     estimate_mid = estimateFlowcellDuration(flowcell)
 
     # offset for how long we've been running
-    running_time = datetime.now() - flowcell.run_date
+    running_time = timezone.now() - flowcell.run_date
     estimate_mid -= running_time
 
     return estimate_mid
index d84bf17709c02311ade3120c37ec824d01574551..a89d2c463a3a9de889eb892172050eb1b6ba54f7 100644 (file)
@@ -7,12 +7,12 @@
        "is_active": true,
        "is_superuser": false,
        "is_staff": false,
-       "last_login": "2009-01-01 00:00:01",
+       "last_login": "2009-01-01T00:00:01-0800",
        "groups": [],
        "user_permissions": [],
        "password": "sha1$foo$5e4eefec1144a04becfb7da79244f07c487fc345",
        "email": "",
-       "date_joined": "2009-01-01 00:01:01"
+       "date_joined": "2009-01-01T00:01:01-0800"
        }
    },
    {"pk": 5, "model": "samples.htsuser",
        "is_active": true,
        "is_superuser": false,
        "is_staff": true,
-       "last_login": "2009-01-01 00:00:01",
+       "last_login": "2009-01-01T00:00:01-0800",
        "groups": [],
        "user_permissions": [],
        "password": "sha1$foo$5e4eefec1144a04becfb7da79244f07c487fc345",
        "email": "",
-       "date_joined": "2009-01-01 00:01:01"
+       "date_joined": "2009-01-01T00:01:01-0800"
        }
    },
    {"pk": 6, "model": "samples.htsuser",
        "is_active": true,
        "is_superuser": true,
        "is_staff": true,
-       "last_login": "2009-01-01 00:00:01",
+       "last_login": "2009-01-01T00:00:01-0800",
        "groups": [],
        "user_permissions": [],
        "password": "sha1$foo$5e4eefec1144a04becfb7da79244f07c487fc345",
        "email": "",
-       "date_joined": "2009-01-01 00:01:01"
+       "date_joined": "2009-01-01T00:01:01-0800"
        }
    },
    {"pk": 7, "model": "samples.htsuser",
  {"pk": 153, "model": "experiments.flowcell",
   "fields": {
       "paired_end": true,
-      "run_date": "2007-09-27 22:12:13",
+      "run_date": "2007-09-27T22:12:13-0800",
       "read_length": 36,
       "notes": "",
       "advanced_run": false,
    {"pk": 152, "model": "experiments.flowcell",
    "fields": {
        "paired_end": false,
-       "run_date": "2009-09-10 18:30:15",
+       "run_date": "2009-09-10T18:30:15-0800",
        "read_length": 38,
        "notes": "328461 4897273 RGT-0248815\r\n328479 4897265 RGT-0249274\r\n330421 4822845 SR-0005496",
        "advanced_run": false,
   {"pk": 151, "model": "experiments.flowcell",
    "fields": {
        "paired_end": false,
-       "run_date": "2009-09-08 15:39:28",
+       "run_date": "2009-09-08T15:39:28-0800",
        "read_length": 38,
        "notes": "Rebuild of broken flowcell\r\n\r\n328461 4820241 RGT-0215719\r\n328479 4897265 RGT-0249510\r\n330421 4822845 SR-0005402\r\n",
        "advanced_run": false,
  {"pk": 200, "model": "experiments.flowcell",
   "fields": {
       "paired_end": true,
-      "run_date": "2007-09-27 22:12:13",
+      "run_date": "2007-09-27T22:12:13-0800",
       "read_length": 36,
       "notes": "",
       "advanced_run": false,
index e3771cbde31be4f654fd44dc5b043ed847144257..ce2f6b7dea877106ba81e154451be6b1ffee9786 100644 (file)
@@ -9,6 +9,7 @@ import uuid
 from django.conf import settings
 from django.core.exceptions import ObjectDoesNotExist
 from django.core import urlresolvers
+from django.utils import timezone
 from django.db import models
 from django.db.models.signals import post_init, pre_save
 
@@ -16,6 +17,8 @@ from htsworkflow.frontend.samples.models import Library
 from htsworkflow.util.conversion import parse_flowcell_id
 from htsworkflow.pipelines import runfolder
 
+import pytz
+
 LOGGER = logging.getLogger(__name__)
 default_pM = 5
 try:
@@ -219,7 +222,7 @@ class FlowCell(models.Model):
 
     def import_data_run(self, relative_pathname, run_xml_name, force=False):
         """Given a result directory import files"""
-        now = datetime.datetime.now()
+        now = timezone.now()
         run_dir = get_absolute_pathname(relative_pathname)
         run_xml_path = os.path.join(run_dir, run_xml_name)
 
@@ -243,7 +246,8 @@ class FlowCell(models.Model):
             run.runfolder_name = run_xml_data.runfolder_name
             run.cycle_start = run_xml_data.image_analysis.start
             run.cycle_stop = run_xml_data.image_analysis.stop
-            run.run_start_time = run_xml_data.image_analysis.date
+            naive_run_start_time = datetime.datetime.fromordinal(run_xml_data.image_analysis.date.toordinal())
+            run.run_start_time = pytz.timezone(settings.TIME_ZONE).localize(naive_run_start_time)
             run.image_software = run_xml_data.image_analysis.software
             run.image_version = run_xml_data.image_analysis.version
             run.basecall_software = run_xml_data.bustard.software
@@ -253,7 +257,7 @@ class FlowCell(models.Model):
                 run.alignment_software = run_xml_data.gerald.software
                 run.alignment_version = run_xml_data.gerald.version
 
-            run.last_update_time = datetime.datetime.now()
+            run.last_update_time = timezone.now()
             run.save()
 
             run.update_result_files()
@@ -356,7 +360,7 @@ class DataRun(models.Model):
 
                     self.datafile_set.add(newfile)
 
-        self.last_update_time = datetime.datetime.now()
+        self.last_update_time = timezone.now()
 
     def lane_files(self):
         lanes = {}
index 5878d726d7ee1d268382230a56a235aef7ca29e6..11214b1163ad4fb2a4b6a7de1879cc8095847a97 100644 (file)
@@ -29,16 +29,6 @@ LANE_SET = range(1,9)
 NSMAP = {'libns':'http://jumpgate.caltech.edu/wiki/LibraryOntology#'}
 
 from django.db import connection
-OLD_DB_NAME = settings.DATABASE_NAME
-VERBOSITY = 0
-def setUpModule():
-    setup_test_environment()
-    settings.DEBUG = False
-    connection.creation.create_test_db(VERBOSITY)
-
-def tearDownModule():
-    connection.creation.destroy_test_db(OLD_DB_NAME, VERBOSITY)
-    teardown_test_environment()
 
 class ClusterStationTestCases(TestCase):
     fixtures = ['test_flowcells.json']
@@ -687,16 +677,6 @@ class TestSequencer(TestCase):
         errmsgs = list(inference.run_validation())
         self.assertEqual(len(errmsgs), 0)
 
-
-OLD_DB = settings.DATABASES['default']['NAME']
-def setUpModule():
-    setup_test_environment()
-    connection.creation.create_test_db()
-
-def tearDownModule():
-    connection.creation.destroy_test_db(OLD_DB)
-    teardown_test_environment()
-
 def suite():
     from unittest2 import TestSuite, defaultTestLoader
     suite = TestSuite()
index 86d37b7cbe7e740468f04df9380ae8fa82c98b35..d7c2336b0866848ea49bc090e1cfbafbdbbc1fd4 100644 (file)
@@ -113,15 +113,6 @@ class InventoryTestCase(TestCase):
         flowcells = [ str(x.uri) for x in targets]
         return flowcells
 
-OLD_DB = settings.DATABASES['default']['NAME']
-def setUpModule():
-    setup_test_environment()
-    connection.creation.create_test_db()
-
-def tearDownModule():
-    connection.creation.destroy_test_db(OLD_DB)
-    teardown_test_environment()
-
 def suite():
     from unittest2 import TestSuite, defaultTestLoader
     suite = TestSuite()
index f0844e55cce7fbf3b33fbdfbf03b9d9264b3e13a..594f2813e6519988b42abccf0c070a81c99c16f8 100644 (file)
@@ -332,15 +332,6 @@ def get_rdf_memory_model():
     model = RDF.Model(storage)
     return model
 
-OLD_DB = settings.DATABASES['default']['NAME']
-def setUpModule():
-    setup_test_environment()
-    connection.creation.create_test_db()
-
-def tearDownModule():
-    connection.creation.destroy_test_db(OLD_DB)
-    teardown_test_environment()
-
 def suite():
     from unittest2 import TestSuite, defaultTestLoader
     suite = TestSuite()
index 87212dddbb0acda92894cf49fec9937b7f2d9e21..2aba7099915b95fe0f2097e4445c42dd9b805941 100644 (file)
@@ -12,7 +12,7 @@ from urlparse import urljoin, urlparse
 import RDF
 from htsworkflow.util.rdfhelp import libraryOntology as libNS
 from htsworkflow.util.rdfhelp import toTypedNode, fromTypedNode, rdfNS, \
-     stripNamespace, dump_model, simplify_uri
+     strip_namespace, dump_model, simplify_uri
 
 LOGGER = logging.getLogger(__name__)
 
@@ -222,7 +222,7 @@ class SequenceFile(object):
             raise KeyError(u"%s not found" % (unicode(seq_id),))
 
         seq_type_node = model.get_target(seq_id, libNS['file_type'])
-        seq_type = stripNamespace(libNS, seq_type_node)
+        seq_type = strip_namespace(libNS, seq_type_node)
 
         path = urlparse(str(seq_id.uri)).path
         flowcellNode = get_one(seq_id, libNS['flowcell'])
index b44a5884eda96acb4b9687b630f8197acd40e396..ba2137ae56d35bf61bea11559ea73b8b7f94aff0 100644 (file)
@@ -94,6 +94,7 @@ USE_L10N = True
 
 USE_TZ = True
 
+TIME_ZONE='America/Los_Angeles'
 
 # Static files (CSS, JavaScript, Images)
 # https://docs.djangoproject.com/en/1.6/howto/static-files/
@@ -105,6 +106,10 @@ STATIC_URL = '/static/'
 # Update this in settings_local to point to your flowcell result directory
 RESULT_HOME_DIR = os.path.join(BASE_DIR, 'test', 'results')
 
+# configure who is sending email and who should get BCCs of announcments
+NOTIFICATION_SENDER = "noreply@example.com"
+NOTIFICATION_BCC=[]
+
 try:
     # allow local customizations
     from settings_local import *
index 17e463351282b7b8091f922760e8b1fd69a2c934..ffbd8e0704356f4798d2dfb3790734bdcc782c9c 100644 (file)
@@ -16,7 +16,7 @@ from htsworkflow.pipelines import desplit_fastq
 from htsworkflow.submission.fastqname import FastqName
 from htsworkflow.util.rdfhelp import get_model, dump_model, load_into_model, \
      fromTypedNode, \
-     stripNamespace
+     strip_namespace
 from htsworkflow.util.rdfns import *
 from htsworkflow.util.conversion import parse_flowcell_id
 
@@ -347,7 +347,7 @@ class SequenceResult(object):
     ispaired = property(_get_ispaired)
 
     def _get_filetype(self):
-        return stripNamespace(libraryOntology, self._filetype)
+        return strip_namespace(libraryOntology, self._filetype)
     filetype = property(_get_filetype)
 
     def _get_path(self):
diff --git a/htsworkflow/submission/encoded.py b/htsworkflow/submission/encoded.py
new file mode 100644 (file)
index 0000000..1228d38
--- /dev/null
@@ -0,0 +1,327 @@
+"""Interface with encoded software for ENCODE3 data submission & warehouse
+
+This allows retrieving blocks
+"""
+
+
+from __future__ import print_function
+import collections
+import logging
+import json
+import jsonschema
+import requests
+from requests.utils import urlparse, urlunparse
+import types
+from urlparse import urljoin
+
+LOGGER = logging.getLogger(__name__)
+
+ENCODED_CONTEXT = {
+    # The None context will get added to the root of the tree and will
+    # provide common defaults.
+    None: {
+        # terms in multiple encoded objects
+        'description': 'rdf:description',
+        'experiment': {'@type': '@id'},
+        'href': { '@type': '@id' },
+        'lab': { '@type': '@id' },
+        'library': {'@type': '@id' },
+        'pi': { '@type': '@id' },
+        'platform': { '@type': '@id' },
+        'submitted_by': { '@type': '@id' },
+        'url': { '@type': '@id' },
+    },
+    # Identify and markup contained classes.
+    # e.g. in the tree there was a sub-dictionary named 'biosample'
+    # That dictionary had a term 'biosample_term_id, which is the
+    # term that should be used as the @id.
+    'biosample': {
+        'biosample_term_id': { '@type': '@id' },
+    },
+    'experiment': {
+        "assay_term_id": { "@type": "@id" },
+    },
+    'file': {
+        'dataset': {'@type': '@id'},
+    },
+    # I tried to use the JSON-LD mapping capabilities to convert the lab
+    # contact information into a vcard record, but the encoded model
+    # didn't lend itself well to the vcard schema
+    #'lab': {
+    #    "address1": "vcard:street-address",
+    #    "address2": "vcard:street-address",
+    #    "city": "vcard:locality",
+    #    "state": "vcard:region",
+    #    "country": "vcard:country"
+    #},
+    'human_donor': {
+        'award': { '@type': '@id' },
+    },
+    'library': {
+        'award': { '@type': '@id' },
+        'nucleic_acid_term_id': { '@type': '@id' }
+    }
+}
+
+#FIXME: this needs to be initialized from rdfns
+ENCODED_NAMESPACES = {
+    # JSON-LD lets you define namespaces so you can used the shorted url syntax.
+    # (instead of http://www.w3.org/2000/01/rdf-schema#label you can do
+    # rdfs:label)
+    "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
+    "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
+    "owl": "http://www.w3.org/2002/07/owl#",
+    "dc": "htp://purl.org/dc/elements/1.1/",
+    "xsd": "http://www.w3.org/2001/XMLSchema#",
+    "vcard": "http://www.w3.org/2006/vcard/ns#",
+
+    # for some namespaces I made a best guess for the ontology root.
+    "EFO": "http://www.ebi.ac.uk/efo/", # EFO ontology
+    "OBO": "http://purl.obolibrary.org/obo/", # OBO ontology
+    "OBI": "http://purl.obolibrary.org/obo/OBI_", # Ontology for Biomedical Investigations
+    # OBI: available from http://svn.code.sf.net/p/obi/code/releases/2012-07-01/merged/merged-obi-comments.owl
+    'SO': 'http://purl.obolibrary.org/obo/SO_', # Sequence ontology
+    # SO: available from http://www.berkeleybop.org/ontologies/so.owl
+
+}
+
+ENCODED_SCHEMA_ROOT='/profiles/'
+
+class ENCODED:
+    '''Programatic access encoded, the software powering ENCODE3's submit site.
+    '''
+    def __init__(self, server, contexts=None):
+        self.server = server
+        self.username = None
+        self.password = None
+        self.contexts = contexts if contexts else ENCODED_CONTEXT
+        self.schemas = {}
+
+    def get_auth(self):
+        return (self.username, self.password)
+    auth = property(get_auth)
+
+    def load_netrc(self):
+        import netrc
+        session = netrc.netrc()
+        authenticators = session.authenticators(self.server)
+        if authenticators:
+            self.username = authenticators[0]
+            self.password = authenticators[2]
+
+    def add_jsonld_context(self, tree, default_base):
+        """Add contexts to various objects in the tree.
+
+        tree is a json tree returned from the DCC's encoded database.
+        contexts is a dictionary of dictionaries containing contexts
+                for the various  possible encoded classes.
+        base, if supplied allows setting the base url that relative
+            urls will be resolved against.
+        """
+        self.add_jsonld_child_context(tree, default_base)
+        self.add_jsonld_namespaces(tree['@context'])
+
+    def add_jsonld_child_context(self, obj, default_base):
+        '''Add JSON-LD context to the encoded JSON.
+
+        This is recursive because some of the IDs were relative URLs
+        and I needed a way to properly compute a the correct base URL.
+        '''
+        # pretend strings aren't iterable
+        if type(obj) in types.StringTypes:
+            return
+
+        # recurse on container types
+        if isinstance(obj, collections.Sequence):
+            # how should I update lists?
+            for v in obj:
+                self.add_jsonld_child_context(v, default_base)
+            return
+
+        if isinstance(obj, collections.Mapping):
+            for v in obj.values():
+                self.add_jsonld_child_context(v, default_base)
+
+        # we have an object. attach a context to it.
+        if self._is_encoded_object(obj):
+            context = self.create_jsonld_context(obj, default_base)
+            if len(context) > 0:
+                obj.setdefault('@context', {}).update(context)
+
+    def add_jsonld_namespaces(self, context):
+        '''Add shortcut namespaces to a context
+
+        Only needs to be run on the top-most context
+        '''
+        context.update(ENCODED_NAMESPACES)
+
+    def create_jsonld_context(self, obj, default_base):
+        '''Synthesize the context for a encoded type
+
+        self.contexts[None] = default context attributes added to any type
+        self.contexts[type] = context attributes for this type.
+        '''
+        context = {'@base': urljoin(default_base, obj['@id']),
+                    '@vocab': self.get_schema_url(obj)}
+        # add in defaults
+        context.update(self.contexts[None])
+        for t in obj['@type']:
+            if t in self.contexts:
+                context.update(self.contexts[t])
+        return context
+
+    def get_json(self, obj_id, **kwargs):
+        '''GET an ENCODE object as JSON and return as dict
+
+        Uses prepare_url to allow url short-cuts
+        if no keyword arguments are specified it will default to adding limit=all
+        Alternative keyword arguments can be passed in and will be sent to the host.
+
+        Known keywords are:
+          limit - (integer or 'all') how many records to return, all for all of them
+          embed - (bool) if true expands linking ids into their associated object.
+          format - text/html or application/json
+        '''
+        if len(kwargs) == 0:
+            kwargs['limit'] = 'all'
+
+        url = self.prepare_url(obj_id)
+        LOGGER.info('requesting url: {}'.format(url))
+
+        # do the request
+        headers = {'content-type': 'application/json'}
+        LOGGER.debug('username: %s, password: %s', self.username, self.password)
+        response = requests.get(url, auth=self.auth, headers=headers, params=kwargs)
+        if not response.status_code == requests.codes.ok:
+            LOGGER.error("Error http status: {}".format(response.status_code))
+            response.raise_for_status()
+        return response.json()
+
+    def get_jsonld(self, obj_id, **kwargs):
+        '''Get ENCODE object as JSONLD annotated with classses contexts
+
+        see get_json for documentation about what keywords can be passed.
+        '''
+        url = self.prepare_url(obj_id)
+        json = self.get_json(obj_id, **kwargs)
+        self.add_jsonld_context(json, url)
+        return json
+
+    def get_object_type(self, obj):
+        """Return type for a encoded object
+        """
+        obj_type = obj.get('@type')
+        if obj_type and isinstance(obj_type, collections.Sequence):
+            return obj_type[0]
+
+    def get_schema_url(self, obj):
+        obj_type = self.get_object_type(obj)
+        if obj_type:
+            return self.prepare_url(ENCODED_SCHEMA_ROOT + obj_type + '.json') + '#'
+
+    def _is_encoded_object(self, obj):
+        '''Test to see if an object is a JSON-LD object
+
+        Some of the nested dictionaries lack the @id or @type
+        information necessary to convert them.
+        '''
+        if not isinstance(obj, collections.Iterable):
+            return False
+
+        if '@id' in obj and '@type' in obj:
+            return True
+        return False
+
+
+    def patch_json(self, obj_id, changes):
+        """Given a dictionary of changes push them as a HTTP patch request
+        """
+        url = self.prepare_url(obj_id)
+        payload = json.dumps(changes)
+        response = requests.patch(url, auth=self.auth, data=payload)
+        if response.status_code != requests.codes.ok:
+            LOGGER.error("Error http status: {}".format(response.status_code))
+            response.raise_for_status()
+        return response.json()
+
+    def put_json(self, obj_id, new_object):
+        url = self.prepare_url(obj_id)
+        payload = json.dumps(new_object)
+        response = requests.put(url, auth=self.auth, data=payload)
+        if response.status_code != requests.codes.created:
+            LOGGER.error("Error http status: {}".format(response.status_code))
+            response.raise_for_status()
+        return response.json()
+
+    def prepare_url(self, request_url):
+        '''This attempts to provide some convienence for accessing a URL
+
+        Given a url fragment it will default to :
+        * requests over http
+        * requests to self.server
+
+        This allows fairly flexible urls. e.g.
+
+        prepare_url('/experiments/ENCSR000AEG')
+        prepare_url('submit.encodedcc.org/experiments/ENCSR000AEG')
+        prepare_url('http://submit.encodedcc.org/experiments/ENCSR000AEG?limit=all')
+
+        should all return the same url
+        '''
+        # clean up potentially messy urls
+        url = urlparse(request_url)._asdict()
+        if not url['scheme']:
+            url['scheme'] = 'http'
+        if not url['netloc']:
+            url['netloc'] = self.server
+        url = urlunparse(url.values())
+        return url
+
+    def search_jsonld(self, term, **kwargs):
+        '''Send search request to ENCODED
+        '''
+        url = self.prepare_url('/search/')
+        result = self.get_json(url, searchTerm=term, **kwargs)
+        self.convert_search_to_jsonld(result)
+        return result
+
+    def convert_search_to_jsonld(self, result):
+        '''Add the context to search result
+
+        Also remove hard to handle nested attributes
+          e.g. remove object.term when we have no id
+        '''
+        graph = result['@graph']
+        for i, obj in enumerate(graph):
+            # suppress nested attributes
+            graph[i] = {k: v for k, v in obj.items() if '.' not in k}
+
+        self.add_jsonld_context(result, self.prepare_url(result['@id']))
+        return result
+
+    def validate(self, obj):
+        obj_type = self.get_object_type(obj)
+        schema_url = self.get_schema_url(obj)
+        if not schema_url:
+            raise ValueError("Unable to construct schema url")
+
+        schema = self.schemas.setdefault(obj_type, self.get_json(schema_url))
+        hidden = obj.copy()
+        del hidden['@id']
+        del hidden['@type']
+        jsonschema.validate(hidden, schema)
+
+
+if __name__ == '__main__':
+    # try it
+    from htsworkflow.util.rdfhelp import get_model, dump_model
+    from htsworkflow.util.rdfjsonld import load_into_model
+    from pprint import pprint
+    model = get_model()
+    logging.basicConfig(level=logging.DEBUG)
+    encoded = ENCODED('test.encodedcc.org')
+    encoded.load_netrc()
+    body = encoded.get_jsonld('/experiments/ENCSR000AEC/')
+    pprint(body)
+    load_into_model(model, body)
+    #dump_model(model)
index ef8d9457218a76ce53f0872f44efafd6d478dea1..1d98bd5c9550e36408b3ba4faed02ee8d28cedee 100644 (file)
@@ -8,7 +8,7 @@ from htsworkflow.submission.submission import Submission
 from htsworkflow.util.rdfhelp import \
      fromTypedNode, \
      geoSoftNS, \
-     stripNamespace, \
+     strip_namespace, \
      submissionOntology
 
 from django.conf import settings
@@ -207,7 +207,7 @@ class GEOSubmission(Submission):
     def query_to_soft_dictionary(self, results, heading):
         attributes = []
         for r in results:
-            name = stripNamespace(geoSoftNS, r['name'])
+            name = strip_namespace(geoSoftNS, r['name'])
             if name is not None:
                 if name.lower() == heading.lower():
                     name = '^' + name
index c5a3f34b76c20aaae69cdb254b92e47190697752..897053fbc148477c108d232635958db45488dfc5 100644 (file)
@@ -11,7 +11,7 @@ from htsworkflow.util.rdfhelp import \
      dump_model, \
      fromTypedNode, \
      get_model, \
-     stripNamespace, \
+     strip_namespace, \
      toTypedNode
 from htsworkflow.util.rdfns import *
 from htsworkflow.util.hashfile import make_md5sum
@@ -133,7 +133,7 @@ class Submission(object):
             RDF.Statement(fileNode,
                           libraryOntology['library'],
                           libNode))
-                          
+
         LOGGER.debug("Done.")
 
     def make_file_node(self, pathname, submissionNode):
@@ -175,7 +175,7 @@ class Submission(object):
         except ValueError:
             # currently its just ignore it if the fastq name parser fails
             return
-        
+
         terms = [('flowcell', libraryOntology['flowcell_id']),
                  ('lib_id', libraryOntology['library_id']),
                  ('lane', libraryOntology['lane_number']),
@@ -186,7 +186,7 @@ class Submission(object):
             if value is not None:
                 s = RDF.Statement(fileNode, model_term, toTypedNode(value))
                 self.model.append(s)
-                
+
     def add_label(self, file_type, file_node, lib_node):
         """Add rdfs:label to a file node
         """
@@ -390,7 +390,7 @@ def list_submissions(model):
     query = RDF.SPARQLQuery(query_body)
     rdfstream = query.execute(model)
     for row in rdfstream:
-        s = stripNamespace(submissionLog, row['submission'])
+        s = strip_namespace(submissionLog, row['submission'])
         if s[-1] in ['#', '/', '?']:
             s = s[:-1]
         yield s
diff --git a/htsworkflow/submission/test/library.json b/htsworkflow/submission/test/library.json
new file mode 100644 (file)
index 0000000..f694e10
--- /dev/null
@@ -0,0 +1,359 @@
+{
+    "properties": {
+        "accession": {
+            "comment": "Only admins are allowed to set or update this value.",
+            "accessionType": "LB",
+            "description": "A unique identifier to be used to reference the object.",
+            "permission": "import_items",
+            "serverDefault": "accession",
+            "format": "accession",
+            "title": "Accession",
+            "type": "string"
+        },
+        "alternate_accessions": {
+            "description": "Accessions previously assigned to objects that have been merged with this object.",
+            "title": "Alternate accessions",
+            "default": [],
+            "items": {
+                "comment": "Only admins are allowed to set or update this value.",
+                "format": "accession",
+                "type": "string",
+                "description": "An accession previously assigned to an object that has been merged with this object.",
+                "title": "Alternate Accession"
+            },
+            "permission": "import_items",
+            "type": "array"
+        },
+        "lot_id": {
+            "type": "string",
+            "description": "The lot identifier provided by the originating lab or vendor.",
+            "title": "Lot ID"
+        },
+        "aliases": {
+            "default": [],
+            "items": {
+                "comment": "Current convention is colon separated lab name and lab identifier. (e.g. john-doe:42).",
+                "pattern": "^\\S+:\\S+",
+                "type": "string",
+                "description": "A lab specific identifier to reference an object.",
+                "title": "Lab alias"
+            },
+            "type": "array",
+            "description": "Lab specific identifiers to reference an object.",
+            "title": "Lab aliases"
+        },
+        "submitted_by": {
+            "comment": "Do not submit, value is assigned by the server. The user that created the object.",
+            "linkTo": "user",
+            "title": "Submitted by",
+            "serverDefault": "userid",
+            "permission": "import_items",
+            "type": "string"
+        },
+        "documents": {
+            "default": [],
+            "items": {
+                "comment": "See document.json for available identifiers.",
+                "type": "string",
+                "description": "A document that describe the preparation of the library. ",
+                "linkTo": "document",
+                "title": "Protocol document"
+            },
+            "type": "array",
+            "description": "Documents that describe the preparation of the library.",
+            "title": "Protocol documents"
+        },
+        "fragmentation_date": {
+            "comment": "Date can be submitted in as YYYY-MM-DD or YYYY-MM-DDTHH:MM:SSTZD (TZD is the time zone designator; use Z to express time in UTC or for time expressed in local time add a time zone offset from UTC +HH:MM or -HH:MM).",
+            "type": "string",
+            "anyOf": [
+                {
+                    "format": "date-time"
+                },
+                {
+                    "format": "date"
+                }
+            ],
+            "description": "The date that the nucleic acid was fragmented.",
+            "title": "Fragmentation date"
+        },
+        "uuid": {
+            "format": "uuid",
+            "serverDefault": "uuid4",
+            "title": "UUID",
+            "requestMethod": "POST",
+            "permission": "import_items",
+            "type": "string"
+        },
+        "strand_specificity": {
+            "default": false,
+            "type": "boolean",
+            "description": "The preparation of the library using a strand-specific protocol.",
+            "title": "Strand specificity"
+        },
+        "fragmentation_method": {
+            "description": "A short description or reference of the nucleic acid fragmentation protocol used in library preparation, if applicable.",
+            "title": "Fragmentation method",
+            "default": "see document",
+            "format": "semi-controlled",
+            "XXXenum": [
+                "sonication",
+                "see document",
+                "covaris shearing",
+                "chemical (part of Illumina TruSeq mRNA Kit)",
+                "Illumina/Nextera tagmentation",
+                "bioruptor twin",
+                "n/a"
+            ],
+            "type": "string"
+        },
+        "schema_version": {
+            "comment": "Do not submit, value is assigned by the server. The version of the JSON schema that the server uses to validate the object. Schema version indicates generation of schema used to save version to to enable upgrade steps to work. Individual schemas should set the default.",
+            "pattern": "^\\d+(\\.\\d+)*$",
+            "default": "2",
+            "type": "string",
+            "requestMethod": []
+        },
+        "lysis_method": {
+            "description": "A short description or reference of the cell lysis protocol used in library preparation, if applicable",
+            "title": "Lysis method",
+            "default": "see document",
+            "format": "semi-controlled",
+            "XXXenum": [
+                "miRNeasy Mini kit (QIAGEN cat#:217004)",
+                "Trizol (LifeTech cat#: 15596-018)",
+                "Ambion mirVana",
+                "Qiagen #74204",
+                "QIAGEN DNeasy Blood & Tissue Kit",
+                "see document",
+                "n/a"
+            ],
+            "type": "string"
+        },
+        "source": {
+            "comment": "See source.json for available identifiers.",
+            "title": "Source",
+            "type": "string",
+            "description": "The originating lab or vendor.",
+            "linkTo": "source"
+        },
+        "biosample": {
+            "comment": "See biosample.json for available identifiers.",
+            "title": "Biosample",
+            "type": "string",
+            "description": "The biosample that nucleic acid was isolated from to generate the library.",
+            "linkTo": "biosample"
+        },
+        "extraction_method": {
+            "description": "A short description or reference of the nucleic acid extraction protocol used in library preparation, if applicable.",
+            "title": "Extraction method",
+            "default": "see document",
+            "format": "semi-controlled",
+            "XXXenum": [
+                "miRNeasy Mini kit (QIAGEN cat#:217004)",
+                "Trizol (LifeTech cat#: 15596-018)",
+                "Ambion mirVana",
+                "Qiagen #74204",
+                "QIAGEN DNeasy Blood & Tissue Kit",
+                "see document",
+                "n/a"
+            ],
+            "type": "string"
+        },
+        "library_size_selection_method": {
+            "description": "A short description or reference of the size selection protocol used in library preparation, if applicable.",
+            "title": "Size selection method",
+            "default": "see document",
+            "format": "semi-controlled",
+            "XXXenum": [
+                "gel",
+                "see document",
+                "SPRI beads"
+            ],
+            "type": "string"
+        },
+        "status": {
+            "default": "CURRENT",
+            "enum": [
+                "CURRENT",
+                "DELETED"
+            ],
+            "type": "string",
+            "title": "Status"
+        },
+        "nucleic_acid_term_name": {
+            "enum": [
+                "DNA",
+                "RNA",
+                "polyadenylated mRNA",
+                "miRNA"
+            ],
+            "type": "string",
+            "description": "SO (Sequence Ontology) term best matching the nucleic acid isolated to generate the library (e.g. 'RNA' for a total RNA library, even if that library is subsequently reverse transcribed for DNA sequencing.)",
+            "title": "Nucleic acid term"
+        },
+        "treatments": {
+            "default": [],
+            "items": {
+                "comment": "See treatment.json for available identifiers.",
+                "type": "string",
+                "linkTo": "treatment",
+                "title": "Treatment"
+            },
+            "type": "array",
+            "title": "Treatments"
+        },
+        "award": {
+            "comment": "See award.json for list of available identifiers.",
+            "title": "Grant",
+            "type": "string",
+            "description": "Grant associated with the submission.",
+            "linkTo": "award"
+        },
+        "depleted_in_term_name": {
+            "default": [],
+            "items": {
+                "enum": [
+                    "rRNA",
+                    "polyadenylated mRNA",
+                    "capped mRNA"
+                ],
+                "type": "string",
+                "description": "SO (Sequence Ontology) term best matching the nucleic acid that was diminished from the library.",
+                "title": "Depleted in term"
+            },
+            "type": "array"
+        },
+        "paired_ended": {
+            "default": false,
+            "XXXnote": "Is this redundant to the field found in replicate.json",
+            "type": "boolean",
+            "description": "Whether or not the library was prepared with paired ends",
+            "title": "Paired ended"
+        },
+        "lab": {
+            "comment": "See lab.json for list of available identifiers.",
+            "title": "Lab",
+            "type": "string",
+            "description": "Lab associated with the submission.",
+            "linkTo": "lab"
+        },
+        "depleted_in_term_id": {
+            "default": [],
+            "items": {
+                "comment": "Based on the choice in depleted_in_term_name use the following guide: rRNA - SO:0000252,  polyadenylated mRNA - SO:0000871 or capped mRNA - SO:0000862",
+                "enum": [
+                    "SO:0000252",
+                    "SO:0000871",
+                    "SO:0000862"
+                ],
+                "type": "string",
+                "description": "SO (Sequence Ontology) identifier best matching the nucleic acid that was diminished from the library.",
+                "title": "Depleted in ID"
+            },
+            "type": "array"
+        },
+        "product_id": {
+            "type": "string",
+            "description": "The product identifier provided by the originating lab or vendor.",
+            "title": "Product ID"
+        },
+        "size_range": {
+            "pattern": "(^[0-9]+-[0-9]+$|^[<>][0-9]+$)",
+            "type": "string",
+            "description": "The measured size range of the purified nucleic acid, in kD.",
+            "title": "Size range"
+        },
+        "notes": {
+            "title": "Notes",
+            "type": "string",
+            "description": "Additional information.",
+            "permission": "import_items"
+        },
+        "nucleic_acid_term_id": {
+            "comment": "Based on the choice in nucleic_acid_term_name use the following guide: DNA - SO:0000352, RNA - SO:0000356,  polyadenylated mRNA - SO:0000871 or miRNA - SO:0000276",
+            "enum": [
+                "SO:0000352",
+                "SO:0000356",
+                "SO:0000871",
+                "SO:0000276"
+            ],
+            "type": "string",
+            "description": "SO (Sequence Ontology) identifier best matching the nucleic acid isolated to generate the library (e.g. 'SO:0000356' for a total RNA library, even if that library is subsequently reverse transcribed for DNA sequencing.)",
+            "title": "Nucleic acid ID"
+        },
+        "nucleic_acid_starting_quantity": {
+            "pattern": "[0-9]+",
+            "type": "string",
+            "description": "The starting amount of nucleic acid before selection and purification.",
+            "title": "Nucleic acid starting quantity"
+        },
+        "date_created": {
+            "comment": "Do not submit, value is assigned by the server. The date the object is created.",
+            "title": "Date created",
+            "serverDefault": "now",
+            "permission": "import_items",
+            "anyOf": [
+                {
+                    "format": "date-time"
+                },
+                {
+                    "format": "date"
+                }
+            ],
+            "type": "string"
+        }
+    },
+    "description": "Schema for submitting a nucleic acid library.",
+    "title": "Library",
+    "required": [
+        "award",
+        "lab",
+        "nucleic_acid_term_id"
+    ],
+    "mixinProperties": [
+        {
+            "$ref": "mixins.json#/schema_version"
+        },
+        {
+            "$ref": "mixins.json#/uuid"
+        },
+        {
+            "$ref": "mixins.json#/accession"
+        },
+        {
+            "$ref": "mixins.json#/aliases"
+        },
+        {
+            "$ref": "mixins.json#/attribution"
+        },
+        {
+            "$ref": "mixins.json#/standard_status"
+        },
+        {
+            "$ref": "mixins.json#/submitted"
+        },
+        {
+            "$ref": "mixins.json#/source"
+        },
+        {
+            "$ref": "mixins.json#/product_id"
+        },
+        {
+            "$ref": "mixins.json#/lot_id"
+        },
+        {
+            "$ref": "mixins.json#/notes"
+        }
+    ],
+    "XXXcomment": "is source required?",
+    "identifyingProperties": [
+        "uuid",
+        "accession",
+        "aliases"
+    ],
+    "additionalProperties": false,
+    "$schema": "http://json-schema.org/draft-04/schema#",
+    "type": "object",
+    "id": "/profiles/library.json"
+}
index 09d68083c1ec50052c450a71e9d9e24356573cf7..1e1c2d9bc56677cdef09b5b2efca48d296e5fdb3 100644 (file)
@@ -680,16 +680,6 @@ class TestCondorFastq(TestCase):
             self.assertTrue('12345_C02F9ACXX_c202_l3_r2.fastq' in arguments[3])
 
 
-OLD_DB = settings.DATABASES['default']['NAME']
-def setUpModule():
-    setup_test_environment()
-    connection.creation.create_test_db()
-
-def tearDownModule():
-    connection.creation.destroy_test_db(OLD_DB)
-    teardown_test_environment()
-
-
 def suite():
     from unittest2 import TestSuite, defaultTestLoader
     suite = TestSuite()
diff --git a/htsworkflow/submission/test/test_encoded.py b/htsworkflow/submission/test/test_encoded.py
new file mode 100644 (file)
index 0000000..675d944
--- /dev/null
@@ -0,0 +1,189 @@
+import json
+import os
+from pprint import pprint
+from unittest2 import TestCase, TestSuite, defaultTestLoader, skip
+
+from htsworkflow.submission.encoded import (ENCODED,
+     ENCODED_CONTEXT,
+     ENCODED_NAMESPACES
+)
+
+class TestEncoded(TestCase):
+    def test_prepare_url(self):
+        encode = ENCODED('test.encodedcc.edu')
+
+        tests = [
+            ('/experiments', 'http://test.encodedcc.edu/experiments'),
+            ('/experiments/ENCLB045ZZZ',
+             'http://test.encodedcc.edu/experiments/ENCLB045ZZZ'),
+            ('http://submit.encodedcc.edu/experiments/ENCLB045ZZZ',
+             'http://submit.encodedcc.edu/experiments/ENCLB045ZZZ'),
+        ]
+        for url, result in tests:
+            self.assertEqual(encode.prepare_url(url), result)
+
+    def test_validate(self):
+        """Test validation
+        """
+        schema_file = os.path.join(os.path.dirname(__file__), 'library.json')
+        schema = json.loads(open(schema_file, 'r').read())
+
+        obj = {u'@id': u'/libraries/ENCLB045ZZZ/',
+               u'@type': [u'library', u'item'],
+               u'accession': u'ENCLB045ZZZ',
+               u'aliases': [],
+               u'alternate_accessions': [],
+               u'award': u'/awards/U54HG006998/',
+               u'biosample': u'/biosamples/ENCBS089RNA/',
+               u'date_created': u'2014-01-14T19:44:51.061770+00:00',
+               u'depleted_in_term_id': [],
+               u'depleted_in_term_name': [],
+               u'documents': [],
+               u'extraction_method': u'Ambion mirVana',
+               u'fragmentation_method': u'Illumina/Nextera tagmentation',
+               u'lab': u'/labs/barbara-wold/',
+               u'library_size_selection_method': u'SPRI beads',
+               u'lysis_method': u'Ambion mirVana',
+               u'nucleic_acid_term_id': u'SO:0000871',
+               u'nucleic_acid_term_name': u'polyadenylated mRNA',
+               u'paired_ended': False,
+               u'schema_version': u'2',
+               u'size_range': u'>200',
+               u'status': u'CURRENT',
+               u'strand_specificity': False,
+               u'submitted_by': u'/users/0e3dde9b-aaf9-42dd-87f7-975a85072ed2/',
+               u'treatments': [],
+               u'uuid': u'42c46028-708f-4347-a3df-2c82dfb021c4'}
+        encode = ENCODED('submit.encodedcc.org')
+        encode.schemas[u'library'] = schema
+        encode.validate(obj)
+        self.assertTrue('@id' in obj)
+
+    def test_create_context(self):
+        linked_id = {'@type': '@id'}
+        library = { '@id': '/libraries/1234', '@type': ['library', 'item'] }
+
+        encode = ENCODED('test.encodedcc.org')
+        url = encode.prepare_url(library['@id'])
+        context = encode.create_jsonld_context(library, url)
+        self.assertEqual(context['@vocab'], 'http://test.encodedcc.org/profiles/library.json#')
+        self.assertEqual(context['award'], linked_id )
+        self._verify_context(context, 'library')
+        # namespaces not added yet.
+        self.assertRaises(AssertionError, self._verify_namespaces, context)
+        encode.add_jsonld_namespaces(context)
+        self._verify_namespaces(context)
+
+    def test_add_context(self):
+        """Checking to make sure nested @base and @vocab urls are set correctly
+        """
+        obj = {
+            "nucleic_acid_term_name": "RNA",
+            "accession": "ENCLB044ZZZ",
+            "@id": "/libraries/ENCLB044ZZZ/",
+            "schema_version": "1",
+            "@type": [
+                "library",
+                "item"
+            ],
+            "lysis_method": "Ambion mirVana",
+            "nucleic_acid_term_id": "SO:0000356",
+            "biosample": {
+                "biosample_term_name": "GM12878",
+                "description": "B-lymphocyte, lymphoblastoid, International HapMap Project - CEPH/Utah - European Caucasion, Epstein-Barr Virus",
+                "accession": "ENCBS090RNA",
+                "date_created": "2013-10-29T21:15:29.144260+00:00",
+                "@id": "/biosamples/ENCBS090RNA/",
+                "aliases": [
+                "brenton-graveley:GM12878-2",
+                "thomas-gingeras:191WC"
+                ],
+                "organism": "/organisms/human/",
+                "@type": [
+                "biosample",
+                "item"
+                ]
+            },
+        }
+
+        encode = ENCODED('test.encodedcc.org')
+        bio_base = encode.prepare_url(obj['biosample']['@id'])
+
+        url = encode.prepare_url('/libraries/ENCLB044ZZZ/?format=json&embed=False')
+        schema_url = encode.get_schema_url(obj)
+        encode.add_jsonld_context(obj, url)
+
+        self.assertEqual(obj['biosample']['@context']['@base'], bio_base)
+        self.assertEqual(obj['@context']['@vocab'], schema_url)
+        self._verify_context(obj['@context'], 'library')
+        self._verify_namespaces(obj['@context'])
+        self._verify_context(obj['biosample']['@context'], 'biosample')
+        self.assertEqual(obj['@context']['rdf'], 'http://www.w3.org/1999/02/22-rdf-syntax-ns#')
+        self.assertEqual(obj['@context']['OBO'], 'http://purl.obolibrary.org/obo/')
+
+
+    def test_convert_search_to_jsonld(self):
+        example = {'count': {'biosamples': 2},
+                   'portal_title': 'ENCODE',
+                   'title': 'Search',
+                   'notification': 'Success',
+                   'filters': [],
+                   '@id': '/search/?searchTerm=wold',
+                   '@type': ['search'],
+                   'facets': [],
+                    '@graph': [{
+                    u'@id': u'/biosamples/ENCBS125ENC/',
+                    u'@type': [u'biosample', u'item'],
+                    u'accession': u'ENCBS125ENC',
+                    u'award.rfa': u'ENCODE2-Mouse',
+                    u'biosample_term_name': u'myocyte',
+                    u'biosample_type': u'in vitro differentiated cells',
+                    u'characterizations.length': [],
+                    u'constructs.length': [],
+                    u'lab.title': u'Barbara Wold, Caltech',
+                    u'life_stage': u'unknown',
+                    u'organism.name': u'mouse',
+                    u'source.title': u'Barbara Wold',
+                    u'status': u'CURRENT',
+                    u'treatments.length': []},
+                    {u'@id': u'/biosamples/ENCBS126ENC/',
+                    u'@type': [u'biosample', u'item'],
+                    u'accession': u'ENCBS126ENC',
+                    u'award.rfa': u'ENCODE2-Mouse',
+                    u'biosample_term_name': u'myocyte',
+                    u'biosample_type': u'in vitro differentiated cells',
+                    u'characterizations.length': [],
+                    u'constructs.length': [],
+                    u'lab.title': u'Barbara Wold, Caltech',
+                    u'life_stage': u'unknown',
+                    u'organism.name': u'mouse',
+                    u'source.title': u'Barbara Wold',
+                    u'status': u'CURRENT',
+                    u'treatments.length': []},
+                    ]}
+
+        encode = ENCODED('test.encodedcc.org')
+        result = encode.convert_search_to_jsonld(example)
+        for obj in result['@graph']:
+            self.assertNotIn('award.rfa', obj)
+
+    def _verify_context(self, context, obj_type):
+        for context_key in [None, obj_type]:
+            for k in ENCODED_CONTEXT[context_key]:
+                self.assertIn(k, context)
+                self.assertEqual(ENCODED_CONTEXT[context_key][k], context[k])
+
+    def _verify_namespaces(self, context):
+        for k in ENCODED_NAMESPACES:
+            self.assertIn(k, context)
+            self.assertEqual(ENCODED_NAMESPACES[k], context[k])
+
+def suite():
+    suite = TestSuite()
+    suite.addTests(
+        defaultTestLoader.loadTestsFromTestCase(TestEncoded))
+    return suite
+
+if __name__ == "__main__":
+    from unittest2 import main
+    main(defaultTest='suite')
index e383175a16884c3d1d24a69042bf916a44aa71c9..3aa4a96ae26fa96c44e0c0f58c67503890fe38dc 100644 (file)
@@ -11,7 +11,6 @@ from htsworkflow.submission.submission import Submission
 from htsworkflow.util.rdfhelp import \
      fromTypedNode, \
      geoSoftNS, \
-     stripNamespace, \
      submissionOntology
 from htsworkflow.util.url import parse_ssh_url
 from htsworkflow.util.ucsc import bigWigInfo
@@ -121,9 +120,9 @@ class TrackHubSubmission(Submission):
                 'long_label': str(track_label),
                 'subgroups': track_subgroup,
             }
-            
-            LOGGER.debug('track attributes: %s', pformat(attributes))       
-            newtrack = Track(**attributes)                    
+
+            LOGGER.debug('track attributes: %s', pformat(attributes))
+            newtrack = Track(**attributes)
             view.add_tracks([newtrack])
 
         results = hub.render()
@@ -186,10 +185,10 @@ class TrackHubSubmission(Submission):
                 value = self.sanitize_name(track[k])
                 track_subgroups[k] = value
         return track_subgroups
-    
+
     def make_track_type(self, track):
         """Further annotate tracktype.
-        
+
         bigWig files can have additional information. Add it if we can
         """
         track_type = track['file_type']
index ac5f6ccd1056a561418cd9b17e155ee936097630..48294416abf1def5a9bc1c3d77b0a4eaa1a607b0 100644 (file)
@@ -214,7 +214,7 @@ def simplify_uri(uri):
                 return element
     raise ValueError("Unable to simplify %s" % (uri,))
 
-def stripNamespace(namespace, term):
+def strip_namespace(namespace, term):
     """Remove the namespace portion of a term
 
     returns None if they aren't in common
@@ -232,15 +232,17 @@ def stripNamespace(namespace, term):
     return term_s.replace(namespace._prefix, "")
 
 
-def get_model(model_name=None, directory=None):
+def get_model(model_name=None, directory=None, use_contexts=True):
     if directory is None:
         directory = os.getcwd()
 
+    contexts = 'yes' if use_contexts else 'no'
+
     if model_name is None:
-        storage = RDF.MemoryStorage(options_string="contexts='yes'")
+        storage = RDF.MemoryStorage(options_string="contexts='{}'".format(contexts))
         logger.info("Using RDF Memory model")
     else:
-        options = "contexts='yes',hash-type='bdb',dir='{0}'".format(directory)
+        options = "contexts='{0}',hash-type='bdb',dir='{1}'".format(contexts, directory)
         storage = RDF.HashStorage(model_name,
                       options=options)
         logger.info("Using {0} with options {1}".format(model_name, options))
@@ -281,10 +283,10 @@ def load_into_model(model, parser_name, path, ns=None):
         except RDF.RedlandError, e:
             errmsg = "RDF.RedlandError: {0} {1} tries remaining"
             logger.error(errmsg.format(str(e), retries))
-            
+
     if not succeeded:
         logger.warn("Unable to download %s", url)
-        
+
     for s in statements:
         conditionally_add_statement(model, s, ns)
 
diff --git a/htsworkflow/util/rdfjsonld.py b/htsworkflow/util/rdfjsonld.py
new file mode 100644 (file)
index 0000000..45046a5
--- /dev/null
@@ -0,0 +1,33 @@
+import RDF
+from pyld import jsonld
+
+def load_into_model(model, json_data):
+    '''Given a PyLD dictionary, load its statements into our Redland model
+    '''
+    json_graphs = jsonld.to_rdf(json_data)
+    for graph in json_graphs:
+        for triple in json_graphs[graph]:
+            stmt = triple_to_statement(triple)
+            model.add_statement(stmt) #, graph_context)
+
+def triple_to_statement(triple):
+    '''Convert PyLD triple dictionary to a librdf statement
+    '''
+    s = to_node(triple['subject'])
+    p = to_node(triple['predicate'])
+    o = to_node(triple['object'])
+    return RDF.Statement(s, p, o)
+
+def to_node(item):
+    '''Convert a PyLD node to a Redland node'''
+    nodetype = item['type']
+    value = item['value']
+    datatype = item.get('datatype', None)
+
+    if nodetype == 'blank node':
+        return RDF.Node(blank=value)
+    elif nodetype == 'IRI':
+        return RDF.Node(uri_string=str(value))
+    else:
+        return RDF.Node(literal=unicode(value).encode('utf-8'),
+                        datatype=RDF.Uri(datatype))
index 3f328d8d18b43b8ec5bd80fb5f2f4a70501b67a8..2f141904f830183d309720b4629205a2c9bdf67d 100644 (file)
@@ -19,7 +19,7 @@ from htsworkflow.util.rdfhelp import \
      rdfsNS, \
      remove_schemas, \
      toTypedNode, \
-     stripNamespace, \
+     strip_namespace, \
      simplify_uri, \
      sanitize_literal, \
      xsdNS
@@ -121,17 +121,17 @@ try:
 
             term = 'foo'
             node = nsOrg[term]
-            self.assertEqual(stripNamespace(nsOrg, node), term)
-            self.assertEqual(stripNamespace(nsCom, node), None)
-            self.assertEqual(stripNamespace(nsOrg, node.uri), term)
+            self.assertEqual(strip_namespace(nsOrg, node), term)
+            self.assertEqual(strip_namespace(nsCom, node), None)
+            self.assertEqual(strip_namespace(nsOrg, node.uri), term)
 
         def test_strip_namespace_exceptions(self):
             nsOrg = RDF.NS('example.org/example#')
             nsCom = RDF.NS('example.com/example#')
 
             node = toTypedNode('bad')
-            self.assertRaises(ValueError, stripNamespace, nsOrg, node)
-            self.assertRaises(ValueError, stripNamespace, nsOrg, nsOrg)
+            self.assertRaises(ValueError, strip_namespace, nsOrg, node)
+            self.assertRaises(ValueError, strip_namespace, nsOrg, nsOrg)
 
         def test_simplify_uri(self):
             DATA = [('http://asdf.org/foo/bar', 'bar'),
diff --git a/htsworkflow/util/test/test_rdfjsonld.py b/htsworkflow/util/test/test_rdfjsonld.py
new file mode 100644 (file)
index 0000000..8e501ba
--- /dev/null
@@ -0,0 +1,56 @@
+from unittest2 import TestCase, TestSuite, defaultTestLoader, skip
+
+from htsworkflow.util.rdfjsonld import load_into_model, to_node, triple_to_statement
+from htsworkflow.util.rdfhelp import get_model
+
+jstatement = {
+    'object': {'datatype': u'http://www.w3.org/2001/XMLSchema#dateTime',
+                'type': 'literal',
+                'value': '1940-10-09'},
+    'predicate': {'type': 'IRI',
+                    'value': u'http://schema.org/birthDate'},
+    'subject': {'type': 'blank node',
+                'value': '_:a'}
+}
+doc = {
+  "@context": "http://json-ld.org/contexts/person.jsonld",
+  "@id": "http://dbpedia.org/resource/John_Lennon",
+  "name": "John Lennon",
+  "born": "1940-10-09",
+  "spouse": "http://dbpedia.org/resource/Cynthia_Lennon"
+}
+
+class TestJsonLD(TestCase):
+    def test_to_node(self):
+        obj = to_node(jstatement['object'])
+        self.assertTrue(obj.is_literal())
+        self.assertEqual(str(obj), '1940-10-09')
+        pred = to_node(jstatement['predicate'])
+        self.assertTrue(pred.is_resource())
+        self.assertEqual(str(pred.uri), jstatement['predicate']['value'])
+        subj = to_node(jstatement['subject'])
+        self.assertTrue(subj.is_blank())
+
+    def test_to_statement(self):
+        stmt = triple_to_statement(jstatement)
+        self.assertTrue(stmt.object.is_literal())
+        self.assertEqual(str(stmt.object), '1940-10-09')
+        self.assertTrue(stmt.predicate.is_resource())
+        self.assertEqual(str(stmt.predicate.uri), jstatement['predicate']['value'])
+        self.assertTrue(stmt.subject.is_blank())
+
+    def test_load_model(self):
+        model = get_model(use_contexts=False)
+        self.assertEqual(len(model), 0)
+        load_into_model(model, doc)
+        self.assertEqual(len(model), 3)
+
+def suite():
+    suite = TestSuite()
+    suite.addTests(
+        defaultTestLoader.loadTestsFromTestCase(TestJsonLD))
+    return suite
+
+if __name__ == "__main__":
+    from unittest2 import main
+    main(defaultTest='suite')
index 2d63df10ef7a35c623d0a0e219cf2c26571a1b6d..f37b3addaf734b2c22fcaf3aa73c655cc426ca23 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -36,6 +36,8 @@ setup(
                       'benderjab >= 0.2',
                       'httplib2',
                       'keyring',
+                      'PyLD',
+                      'requests',
                       # This dependency is redland librdf, which doesn't have a public egg
                       #'librdf >= 1.0.14',
     ],
index 3e26cc679605f36d45d5f24016a99f959df63b4e..a2dd5d7c0624b99858fd31612bbb5b647f379fc8 100644 (file)
@@ -1,9 +1,15 @@
-from unittest2 import TestCase
+from unittest import TestCase, skipIf
 
 from StringIO import StringIO
-from htsworkflow.automation import copier
 from htsworkflow.automation.solexa import is_runfolder
 
+try:
+    from htsworkflow.automation import copier
+    BENDERJAB_UNAVAILABLE = False
+except ImportError as e:
+    BENDERJAB_UNAVAILABLE = True
+
+@skipIf(BENDERJAB_UNAVAILABLE, "Can't test copier daemon without a working benderjab")
 class testCopier(TestCase):
     def test_empty_config(self):
         cfg = StringIO("""[fake]
@@ -11,9 +17,9 @@ something: unrelated
 """)
         bot = copier.CopierBot('fake', configfile=cfg)
         self.failUnlessRaises(RuntimeError, bot.read_config)
-        
+
     def test_full_config(self):
-        cfg = StringIO("""[copier]        
+        cfg = StringIO("""[copier]
 jid: copier@example.fake
 password: badpassword
 authorized_users: user1@example.fake user2@example.fake
@@ -32,7 +38,7 @@ notify_users: user3@example.fake
         self.failUnlessEqual(len(c.authorized_users), 2)
         self.failUnlessEqual(c.authorized_users[0], 'user1@example.fake')
         self.failUnlessEqual(c.authorized_users[1], 'user2@example.fake')
-        self.failUnlessEqual(c.rsync.source_base_list[0], 
+        self.failUnlessEqual(c.rsync.source_base_list[0],
                              'rsync://localhost/tmp/sequencer_source/')
         self.failUnlessEqual(c.rsync.dest_base, '/tmp/sequencer_destination')
         self.failUnlessEqual(len(c.notify_users), 1)
@@ -40,16 +46,16 @@ notify_users: user3@example.fake
         self.failUnlessEqual(c.validate_url('rsync://other/tmp'), None)
         self.failUnlessEqual(c.validate_url('http://localhost/tmp'), None)
         # In the rsync process the URL gets a trailing '/' added to it
-        # But in the bot config its still slash-less. 
+        # But in the bot config its still slash-less.
         # It is debatable when to add the trailing slash.
         self.failUnlessEqual(
-          c.validate_url('rsync://localhost/tmp/sequencer_source'), 
-          'rsync://localhost/tmp/sequencer_source') 
+          c.validate_url('rsync://localhost/tmp/sequencer_source'),
+          'rsync://localhost/tmp/sequencer_source')
         self.failUnlessEqual(
-          c.validate_url('rsync://localhost/tmp/sequencer_source/'), 
+          c.validate_url('rsync://localhost/tmp/sequencer_source/'),
           'rsync://localhost/tmp/sequencer_source/')
         self.failUnlessEqual(
-          c.validate_url('rsync://localhost/tmp/sequencer_source/bleem'), 
+          c.validate_url('rsync://localhost/tmp/sequencer_source/bleem'),
           'rsync://localhost/tmp/sequencer_source/bleem')
         self.failUnlessEqual(
           c.validate_url('rsync://user@server:1234/other_sequencer'),