From: Diane Trout Date: Wed, 22 Jan 2014 22:14:26 +0000 (-0800) Subject: Merge branch 'master' of mus.cacr.caltech.edu:htsworkflow X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=htsworkflow.git;a=commitdiff_plain;h=11b3dcf63286b5b07dfd278e0c96a7fb8f8266f2;hp=7f57cb3f65bcaa2c7de359e3e7c2a0d10d44e0d8 Merge branch 'master' of mus.cacr.caltech.edu:htsworkflow --- diff --git a/.gitignore b/.gitignore index 3eb65bb..6b1d699 100644 --- a/.gitignore +++ b/.gitignore @@ -2,10 +2,14 @@ *.py[co] .coverage *,cover +dist +docs/build/ *.egg-info +htsworkflow.kdev4 +.kateproject +.kateproject.d/ +.kdev4/ .noseids +RELEASE-VERSION .ropeproject .tox -dist -RELEASE-VERSION -docs/build/ diff --git a/htsworkflow/frontend/experiments/experiments.py b/htsworkflow/frontend/experiments/experiments.py index f24d13d..9493765 100644 --- a/htsworkflow/frontend/experiments/experiments.py +++ b/htsworkflow/frontend/experiments/experiments.py @@ -14,6 +14,7 @@ from django.core.exceptions import ObjectDoesNotExist from django.core.mail import send_mail, mail_admins from django.http import HttpResponse, Http404 from django.conf import settings +from django.utils import timezone from htsworkflow.frontend.auth import require_api_key from htsworkflow.frontend.experiments.models import \ @@ -179,7 +180,7 @@ def updStatus(request): rec.run_status = UpdatedStatus #if there's a message update that too - mytimestamp = datetime.now().__str__() + mytimestamp = timezone.now().__str__() mytimestamp = re.sub(pattern=":[^:]*$",repl="",string=mytimestamp) if request.REQUEST.has_key('msg'): rec.run_note += ", "+request.REQUEST['msg']+" ("+mytimestamp+")" @@ -325,7 +326,7 @@ def estimateFlowcellTimeRemaining(flowcell): estimate_mid = estimateFlowcellDuration(flowcell) # offset for how long we've been running - running_time = datetime.now() - flowcell.run_date + running_time = timezone.now() - flowcell.run_date estimate_mid -= running_time return estimate_mid diff --git a/htsworkflow/frontend/experiments/fixtures/test_flowcells.json b/htsworkflow/frontend/experiments/fixtures/test_flowcells.json index d84bf17..a89d2c4 100644 --- a/htsworkflow/frontend/experiments/fixtures/test_flowcells.json +++ b/htsworkflow/frontend/experiments/fixtures/test_flowcells.json @@ -7,12 +7,12 @@ "is_active": true, "is_superuser": false, "is_staff": false, - "last_login": "2009-01-01 00:00:01", + "last_login": "2009-01-01T00:00:01-0800", "groups": [], "user_permissions": [], "password": "sha1$foo$5e4eefec1144a04becfb7da79244f07c487fc345", "email": "", - "date_joined": "2009-01-01 00:01:01" + "date_joined": "2009-01-01T00:01:01-0800" } }, {"pk": 5, "model": "samples.htsuser", @@ -26,12 +26,12 @@ "is_active": true, "is_superuser": false, "is_staff": true, - "last_login": "2009-01-01 00:00:01", + "last_login": "2009-01-01T00:00:01-0800", "groups": [], "user_permissions": [], "password": "sha1$foo$5e4eefec1144a04becfb7da79244f07c487fc345", "email": "", - "date_joined": "2009-01-01 00:01:01" + "date_joined": "2009-01-01T00:01:01-0800" } }, {"pk": 6, "model": "samples.htsuser", @@ -45,12 +45,12 @@ "is_active": true, "is_superuser": true, "is_staff": true, - "last_login": "2009-01-01 00:00:01", + "last_login": "2009-01-01T00:00:01-0800", "groups": [], "user_permissions": [], "password": "sha1$foo$5e4eefec1144a04becfb7da79244f07c487fc345", "email": "", - "date_joined": "2009-01-01 00:01:01" + "date_joined": "2009-01-01T00:01:01-0800" } }, {"pk": 7, "model": "samples.htsuser", @@ -122,7 +122,7 @@ {"pk": 153, "model": "experiments.flowcell", "fields": { "paired_end": true, - "run_date": "2007-09-27 22:12:13", + "run_date": "2007-09-27T22:12:13-0800", "read_length": 36, "notes": "", "advanced_run": false, @@ -487,7 +487,7 @@ {"pk": 152, "model": "experiments.flowcell", "fields": { "paired_end": false, - "run_date": "2009-09-10 18:30:15", + "run_date": "2009-09-10T18:30:15-0800", "read_length": 38, "notes": "328461 4897273 RGT-0248815\r\n328479 4897265 RGT-0249274\r\n330421 4822845 SR-0005496", "advanced_run": false, @@ -811,7 +811,7 @@ {"pk": 151, "model": "experiments.flowcell", "fields": { "paired_end": false, - "run_date": "2009-09-08 15:39:28", + "run_date": "2009-09-08T15:39:28-0800", "read_length": 38, "notes": "Rebuild of broken flowcell\r\n\r\n328461 4820241 RGT-0215719\r\n328479 4897265 RGT-0249510\r\n330421 4822845 SR-0005402\r\n", "advanced_run": false, @@ -1250,7 +1250,7 @@ {"pk": 200, "model": "experiments.flowcell", "fields": { "paired_end": true, - "run_date": "2007-09-27 22:12:13", + "run_date": "2007-09-27T22:12:13-0800", "read_length": 36, "notes": "", "advanced_run": false, diff --git a/htsworkflow/frontend/experiments/models.py b/htsworkflow/frontend/experiments/models.py index e3771cb..ce2f6b7 100644 --- a/htsworkflow/frontend/experiments/models.py +++ b/htsworkflow/frontend/experiments/models.py @@ -9,6 +9,7 @@ import uuid from django.conf import settings from django.core.exceptions import ObjectDoesNotExist from django.core import urlresolvers +from django.utils import timezone from django.db import models from django.db.models.signals import post_init, pre_save @@ -16,6 +17,8 @@ from htsworkflow.frontend.samples.models import Library from htsworkflow.util.conversion import parse_flowcell_id from htsworkflow.pipelines import runfolder +import pytz + LOGGER = logging.getLogger(__name__) default_pM = 5 try: @@ -219,7 +222,7 @@ class FlowCell(models.Model): def import_data_run(self, relative_pathname, run_xml_name, force=False): """Given a result directory import files""" - now = datetime.datetime.now() + now = timezone.now() run_dir = get_absolute_pathname(relative_pathname) run_xml_path = os.path.join(run_dir, run_xml_name) @@ -243,7 +246,8 @@ class FlowCell(models.Model): run.runfolder_name = run_xml_data.runfolder_name run.cycle_start = run_xml_data.image_analysis.start run.cycle_stop = run_xml_data.image_analysis.stop - run.run_start_time = run_xml_data.image_analysis.date + naive_run_start_time = datetime.datetime.fromordinal(run_xml_data.image_analysis.date.toordinal()) + run.run_start_time = pytz.timezone(settings.TIME_ZONE).localize(naive_run_start_time) run.image_software = run_xml_data.image_analysis.software run.image_version = run_xml_data.image_analysis.version run.basecall_software = run_xml_data.bustard.software @@ -253,7 +257,7 @@ class FlowCell(models.Model): run.alignment_software = run_xml_data.gerald.software run.alignment_version = run_xml_data.gerald.version - run.last_update_time = datetime.datetime.now() + run.last_update_time = timezone.now() run.save() run.update_result_files() @@ -356,7 +360,7 @@ class DataRun(models.Model): self.datafile_set.add(newfile) - self.last_update_time = datetime.datetime.now() + self.last_update_time = timezone.now() def lane_files(self): lanes = {} diff --git a/htsworkflow/frontend/experiments/test_experiments.py b/htsworkflow/frontend/experiments/test_experiments.py index 5878d72..11214b1 100644 --- a/htsworkflow/frontend/experiments/test_experiments.py +++ b/htsworkflow/frontend/experiments/test_experiments.py @@ -29,16 +29,6 @@ LANE_SET = range(1,9) NSMAP = {'libns':'http://jumpgate.caltech.edu/wiki/LibraryOntology#'} from django.db import connection -OLD_DB_NAME = settings.DATABASE_NAME -VERBOSITY = 0 -def setUpModule(): - setup_test_environment() - settings.DEBUG = False - connection.creation.create_test_db(VERBOSITY) - -def tearDownModule(): - connection.creation.destroy_test_db(OLD_DB_NAME, VERBOSITY) - teardown_test_environment() class ClusterStationTestCases(TestCase): fixtures = ['test_flowcells.json'] @@ -687,16 +677,6 @@ class TestSequencer(TestCase): errmsgs = list(inference.run_validation()) self.assertEqual(len(errmsgs), 0) - -OLD_DB = settings.DATABASES['default']['NAME'] -def setUpModule(): - setup_test_environment() - connection.creation.create_test_db() - -def tearDownModule(): - connection.creation.destroy_test_db(OLD_DB) - teardown_test_environment() - def suite(): from unittest2 import TestSuite, defaultTestLoader suite = TestSuite() diff --git a/htsworkflow/frontend/inventory/test_inventory.py b/htsworkflow/frontend/inventory/test_inventory.py index 86d37b7..d7c2336 100644 --- a/htsworkflow/frontend/inventory/test_inventory.py +++ b/htsworkflow/frontend/inventory/test_inventory.py @@ -113,15 +113,6 @@ class InventoryTestCase(TestCase): flowcells = [ str(x.uri) for x in targets] return flowcells -OLD_DB = settings.DATABASES['default']['NAME'] -def setUpModule(): - setup_test_environment() - connection.creation.create_test_db() - -def tearDownModule(): - connection.creation.destroy_test_db(OLD_DB) - teardown_test_environment() - def suite(): from unittest2 import TestSuite, defaultTestLoader suite = TestSuite() diff --git a/htsworkflow/frontend/samples/test_samples.py b/htsworkflow/frontend/samples/test_samples.py index f0844e5..594f281 100644 --- a/htsworkflow/frontend/samples/test_samples.py +++ b/htsworkflow/frontend/samples/test_samples.py @@ -332,15 +332,6 @@ def get_rdf_memory_model(): model = RDF.Model(storage) return model -OLD_DB = settings.DATABASES['default']['NAME'] -def setUpModule(): - setup_test_environment() - connection.creation.create_test_db() - -def tearDownModule(): - connection.creation.destroy_test_db(OLD_DB) - teardown_test_environment() - def suite(): from unittest2 import TestSuite, defaultTestLoader suite = TestSuite() diff --git a/htsworkflow/pipelines/sequences.py b/htsworkflow/pipelines/sequences.py index 87212dd..2aba709 100644 --- a/htsworkflow/pipelines/sequences.py +++ b/htsworkflow/pipelines/sequences.py @@ -12,7 +12,7 @@ from urlparse import urljoin, urlparse import RDF from htsworkflow.util.rdfhelp import libraryOntology as libNS from htsworkflow.util.rdfhelp import toTypedNode, fromTypedNode, rdfNS, \ - stripNamespace, dump_model, simplify_uri + strip_namespace, dump_model, simplify_uri LOGGER = logging.getLogger(__name__) @@ -222,7 +222,7 @@ class SequenceFile(object): raise KeyError(u"%s not found" % (unicode(seq_id),)) seq_type_node = model.get_target(seq_id, libNS['file_type']) - seq_type = stripNamespace(libNS, seq_type_node) + seq_type = strip_namespace(libNS, seq_type_node) path = urlparse(str(seq_id.uri)).path flowcellNode = get_one(seq_id, libNS['flowcell']) diff --git a/htsworkflow/settings.py b/htsworkflow/settings.py index b44a588..ba2137a 100644 --- a/htsworkflow/settings.py +++ b/htsworkflow/settings.py @@ -94,6 +94,7 @@ USE_L10N = True USE_TZ = True +TIME_ZONE='America/Los_Angeles' # Static files (CSS, JavaScript, Images) # https://docs.djangoproject.com/en/1.6/howto/static-files/ @@ -105,6 +106,10 @@ STATIC_URL = '/static/' # Update this in settings_local to point to your flowcell result directory RESULT_HOME_DIR = os.path.join(BASE_DIR, 'test', 'results') +# configure who is sending email and who should get BCCs of announcments +NOTIFICATION_SENDER = "noreply@example.com" +NOTIFICATION_BCC=[] + try: # allow local customizations from settings_local import * diff --git a/htsworkflow/submission/condorfastq.py b/htsworkflow/submission/condorfastq.py index 17e4633..ffbd8e0 100644 --- a/htsworkflow/submission/condorfastq.py +++ b/htsworkflow/submission/condorfastq.py @@ -16,7 +16,7 @@ from htsworkflow.pipelines import desplit_fastq from htsworkflow.submission.fastqname import FastqName from htsworkflow.util.rdfhelp import get_model, dump_model, load_into_model, \ fromTypedNode, \ - stripNamespace + strip_namespace from htsworkflow.util.rdfns import * from htsworkflow.util.conversion import parse_flowcell_id @@ -347,7 +347,7 @@ class SequenceResult(object): ispaired = property(_get_ispaired) def _get_filetype(self): - return stripNamespace(libraryOntology, self._filetype) + return strip_namespace(libraryOntology, self._filetype) filetype = property(_get_filetype) def _get_path(self): diff --git a/htsworkflow/submission/encoded.py b/htsworkflow/submission/encoded.py new file mode 100644 index 0000000..1228d38 --- /dev/null +++ b/htsworkflow/submission/encoded.py @@ -0,0 +1,327 @@ +"""Interface with encoded software for ENCODE3 data submission & warehouse + +This allows retrieving blocks +""" + + +from __future__ import print_function +import collections +import logging +import json +import jsonschema +import requests +from requests.utils import urlparse, urlunparse +import types +from urlparse import urljoin + +LOGGER = logging.getLogger(__name__) + +ENCODED_CONTEXT = { + # The None context will get added to the root of the tree and will + # provide common defaults. + None: { + # terms in multiple encoded objects + 'description': 'rdf:description', + 'experiment': {'@type': '@id'}, + 'href': { '@type': '@id' }, + 'lab': { '@type': '@id' }, + 'library': {'@type': '@id' }, + 'pi': { '@type': '@id' }, + 'platform': { '@type': '@id' }, + 'submitted_by': { '@type': '@id' }, + 'url': { '@type': '@id' }, + }, + # Identify and markup contained classes. + # e.g. in the tree there was a sub-dictionary named 'biosample' + # That dictionary had a term 'biosample_term_id, which is the + # term that should be used as the @id. + 'biosample': { + 'biosample_term_id': { '@type': '@id' }, + }, + 'experiment': { + "assay_term_id": { "@type": "@id" }, + }, + 'file': { + 'dataset': {'@type': '@id'}, + }, + # I tried to use the JSON-LD mapping capabilities to convert the lab + # contact information into a vcard record, but the encoded model + # didn't lend itself well to the vcard schema + #'lab': { + # "address1": "vcard:street-address", + # "address2": "vcard:street-address", + # "city": "vcard:locality", + # "state": "vcard:region", + # "country": "vcard:country" + #}, + 'human_donor': { + 'award': { '@type': '@id' }, + }, + 'library': { + 'award': { '@type': '@id' }, + 'nucleic_acid_term_id': { '@type': '@id' } + } +} + +#FIXME: this needs to be initialized from rdfns +ENCODED_NAMESPACES = { + # JSON-LD lets you define namespaces so you can used the shorted url syntax. + # (instead of http://www.w3.org/2000/01/rdf-schema#label you can do + # rdfs:label) + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + "owl": "http://www.w3.org/2002/07/owl#", + "dc": "htp://purl.org/dc/elements/1.1/", + "xsd": "http://www.w3.org/2001/XMLSchema#", + "vcard": "http://www.w3.org/2006/vcard/ns#", + + # for some namespaces I made a best guess for the ontology root. + "EFO": "http://www.ebi.ac.uk/efo/", # EFO ontology + "OBO": "http://purl.obolibrary.org/obo/", # OBO ontology + "OBI": "http://purl.obolibrary.org/obo/OBI_", # Ontology for Biomedical Investigations + # OBI: available from http://svn.code.sf.net/p/obi/code/releases/2012-07-01/merged/merged-obi-comments.owl + 'SO': 'http://purl.obolibrary.org/obo/SO_', # Sequence ontology + # SO: available from http://www.berkeleybop.org/ontologies/so.owl + +} + +ENCODED_SCHEMA_ROOT='/profiles/' + +class ENCODED: + '''Programatic access encoded, the software powering ENCODE3's submit site. + ''' + def __init__(self, server, contexts=None): + self.server = server + self.username = None + self.password = None + self.contexts = contexts if contexts else ENCODED_CONTEXT + self.schemas = {} + + def get_auth(self): + return (self.username, self.password) + auth = property(get_auth) + + def load_netrc(self): + import netrc + session = netrc.netrc() + authenticators = session.authenticators(self.server) + if authenticators: + self.username = authenticators[0] + self.password = authenticators[2] + + def add_jsonld_context(self, tree, default_base): + """Add contexts to various objects in the tree. + + tree is a json tree returned from the DCC's encoded database. + contexts is a dictionary of dictionaries containing contexts + for the various possible encoded classes. + base, if supplied allows setting the base url that relative + urls will be resolved against. + """ + self.add_jsonld_child_context(tree, default_base) + self.add_jsonld_namespaces(tree['@context']) + + def add_jsonld_child_context(self, obj, default_base): + '''Add JSON-LD context to the encoded JSON. + + This is recursive because some of the IDs were relative URLs + and I needed a way to properly compute a the correct base URL. + ''' + # pretend strings aren't iterable + if type(obj) in types.StringTypes: + return + + # recurse on container types + if isinstance(obj, collections.Sequence): + # how should I update lists? + for v in obj: + self.add_jsonld_child_context(v, default_base) + return + + if isinstance(obj, collections.Mapping): + for v in obj.values(): + self.add_jsonld_child_context(v, default_base) + + # we have an object. attach a context to it. + if self._is_encoded_object(obj): + context = self.create_jsonld_context(obj, default_base) + if len(context) > 0: + obj.setdefault('@context', {}).update(context) + + def add_jsonld_namespaces(self, context): + '''Add shortcut namespaces to a context + + Only needs to be run on the top-most context + ''' + context.update(ENCODED_NAMESPACES) + + def create_jsonld_context(self, obj, default_base): + '''Synthesize the context for a encoded type + + self.contexts[None] = default context attributes added to any type + self.contexts[type] = context attributes for this type. + ''' + context = {'@base': urljoin(default_base, obj['@id']), + '@vocab': self.get_schema_url(obj)} + # add in defaults + context.update(self.contexts[None]) + for t in obj['@type']: + if t in self.contexts: + context.update(self.contexts[t]) + return context + + def get_json(self, obj_id, **kwargs): + '''GET an ENCODE object as JSON and return as dict + + Uses prepare_url to allow url short-cuts + if no keyword arguments are specified it will default to adding limit=all + Alternative keyword arguments can be passed in and will be sent to the host. + + Known keywords are: + limit - (integer or 'all') how many records to return, all for all of them + embed - (bool) if true expands linking ids into their associated object. + format - text/html or application/json + ''' + if len(kwargs) == 0: + kwargs['limit'] = 'all' + + url = self.prepare_url(obj_id) + LOGGER.info('requesting url: {}'.format(url)) + + # do the request + headers = {'content-type': 'application/json'} + LOGGER.debug('username: %s, password: %s', self.username, self.password) + response = requests.get(url, auth=self.auth, headers=headers, params=kwargs) + if not response.status_code == requests.codes.ok: + LOGGER.error("Error http status: {}".format(response.status_code)) + response.raise_for_status() + return response.json() + + def get_jsonld(self, obj_id, **kwargs): + '''Get ENCODE object as JSONLD annotated with classses contexts + + see get_json for documentation about what keywords can be passed. + ''' + url = self.prepare_url(obj_id) + json = self.get_json(obj_id, **kwargs) + self.add_jsonld_context(json, url) + return json + + def get_object_type(self, obj): + """Return type for a encoded object + """ + obj_type = obj.get('@type') + if obj_type and isinstance(obj_type, collections.Sequence): + return obj_type[0] + + def get_schema_url(self, obj): + obj_type = self.get_object_type(obj) + if obj_type: + return self.prepare_url(ENCODED_SCHEMA_ROOT + obj_type + '.json') + '#' + + def _is_encoded_object(self, obj): + '''Test to see if an object is a JSON-LD object + + Some of the nested dictionaries lack the @id or @type + information necessary to convert them. + ''' + if not isinstance(obj, collections.Iterable): + return False + + if '@id' in obj and '@type' in obj: + return True + return False + + + def patch_json(self, obj_id, changes): + """Given a dictionary of changes push them as a HTTP patch request + """ + url = self.prepare_url(obj_id) + payload = json.dumps(changes) + response = requests.patch(url, auth=self.auth, data=payload) + if response.status_code != requests.codes.ok: + LOGGER.error("Error http status: {}".format(response.status_code)) + response.raise_for_status() + return response.json() + + def put_json(self, obj_id, new_object): + url = self.prepare_url(obj_id) + payload = json.dumps(new_object) + response = requests.put(url, auth=self.auth, data=payload) + if response.status_code != requests.codes.created: + LOGGER.error("Error http status: {}".format(response.status_code)) + response.raise_for_status() + return response.json() + + def prepare_url(self, request_url): + '''This attempts to provide some convienence for accessing a URL + + Given a url fragment it will default to : + * requests over http + * requests to self.server + + This allows fairly flexible urls. e.g. + + prepare_url('/experiments/ENCSR000AEG') + prepare_url('submit.encodedcc.org/experiments/ENCSR000AEG') + prepare_url('http://submit.encodedcc.org/experiments/ENCSR000AEG?limit=all') + + should all return the same url + ''' + # clean up potentially messy urls + url = urlparse(request_url)._asdict() + if not url['scheme']: + url['scheme'] = 'http' + if not url['netloc']: + url['netloc'] = self.server + url = urlunparse(url.values()) + return url + + def search_jsonld(self, term, **kwargs): + '''Send search request to ENCODED + ''' + url = self.prepare_url('/search/') + result = self.get_json(url, searchTerm=term, **kwargs) + self.convert_search_to_jsonld(result) + return result + + def convert_search_to_jsonld(self, result): + '''Add the context to search result + + Also remove hard to handle nested attributes + e.g. remove object.term when we have no id + ''' + graph = result['@graph'] + for i, obj in enumerate(graph): + # suppress nested attributes + graph[i] = {k: v for k, v in obj.items() if '.' not in k} + + self.add_jsonld_context(result, self.prepare_url(result['@id'])) + return result + + def validate(self, obj): + obj_type = self.get_object_type(obj) + schema_url = self.get_schema_url(obj) + if not schema_url: + raise ValueError("Unable to construct schema url") + + schema = self.schemas.setdefault(obj_type, self.get_json(schema_url)) + hidden = obj.copy() + del hidden['@id'] + del hidden['@type'] + jsonschema.validate(hidden, schema) + + +if __name__ == '__main__': + # try it + from htsworkflow.util.rdfhelp import get_model, dump_model + from htsworkflow.util.rdfjsonld import load_into_model + from pprint import pprint + model = get_model() + logging.basicConfig(level=logging.DEBUG) + encoded = ENCODED('test.encodedcc.org') + encoded.load_netrc() + body = encoded.get_jsonld('/experiments/ENCSR000AEC/') + pprint(body) + load_into_model(model, body) + #dump_model(model) diff --git a/htsworkflow/submission/geo.py b/htsworkflow/submission/geo.py index ef8d945..1d98bd5 100644 --- a/htsworkflow/submission/geo.py +++ b/htsworkflow/submission/geo.py @@ -8,7 +8,7 @@ from htsworkflow.submission.submission import Submission from htsworkflow.util.rdfhelp import \ fromTypedNode, \ geoSoftNS, \ - stripNamespace, \ + strip_namespace, \ submissionOntology from django.conf import settings @@ -207,7 +207,7 @@ class GEOSubmission(Submission): def query_to_soft_dictionary(self, results, heading): attributes = [] for r in results: - name = stripNamespace(geoSoftNS, r['name']) + name = strip_namespace(geoSoftNS, r['name']) if name is not None: if name.lower() == heading.lower(): name = '^' + name diff --git a/htsworkflow/submission/submission.py b/htsworkflow/submission/submission.py index c5a3f34..897053f 100644 --- a/htsworkflow/submission/submission.py +++ b/htsworkflow/submission/submission.py @@ -11,7 +11,7 @@ from htsworkflow.util.rdfhelp import \ dump_model, \ fromTypedNode, \ get_model, \ - stripNamespace, \ + strip_namespace, \ toTypedNode from htsworkflow.util.rdfns import * from htsworkflow.util.hashfile import make_md5sum @@ -133,7 +133,7 @@ class Submission(object): RDF.Statement(fileNode, libraryOntology['library'], libNode)) - + LOGGER.debug("Done.") def make_file_node(self, pathname, submissionNode): @@ -175,7 +175,7 @@ class Submission(object): except ValueError: # currently its just ignore it if the fastq name parser fails return - + terms = [('flowcell', libraryOntology['flowcell_id']), ('lib_id', libraryOntology['library_id']), ('lane', libraryOntology['lane_number']), @@ -186,7 +186,7 @@ class Submission(object): if value is not None: s = RDF.Statement(fileNode, model_term, toTypedNode(value)) self.model.append(s) - + def add_label(self, file_type, file_node, lib_node): """Add rdfs:label to a file node """ @@ -390,7 +390,7 @@ def list_submissions(model): query = RDF.SPARQLQuery(query_body) rdfstream = query.execute(model) for row in rdfstream: - s = stripNamespace(submissionLog, row['submission']) + s = strip_namespace(submissionLog, row['submission']) if s[-1] in ['#', '/', '?']: s = s[:-1] yield s diff --git a/htsworkflow/submission/test/library.json b/htsworkflow/submission/test/library.json new file mode 100644 index 0000000..f694e10 --- /dev/null +++ b/htsworkflow/submission/test/library.json @@ -0,0 +1,359 @@ +{ + "properties": { + "accession": { + "comment": "Only admins are allowed to set or update this value.", + "accessionType": "LB", + "description": "A unique identifier to be used to reference the object.", + "permission": "import_items", + "serverDefault": "accession", + "format": "accession", + "title": "Accession", + "type": "string" + }, + "alternate_accessions": { + "description": "Accessions previously assigned to objects that have been merged with this object.", + "title": "Alternate accessions", + "default": [], + "items": { + "comment": "Only admins are allowed to set or update this value.", + "format": "accession", + "type": "string", + "description": "An accession previously assigned to an object that has been merged with this object.", + "title": "Alternate Accession" + }, + "permission": "import_items", + "type": "array" + }, + "lot_id": { + "type": "string", + "description": "The lot identifier provided by the originating lab or vendor.", + "title": "Lot ID" + }, + "aliases": { + "default": [], + "items": { + "comment": "Current convention is colon separated lab name and lab identifier. (e.g. john-doe:42).", + "pattern": "^\\S+:\\S+", + "type": "string", + "description": "A lab specific identifier to reference an object.", + "title": "Lab alias" + }, + "type": "array", + "description": "Lab specific identifiers to reference an object.", + "title": "Lab aliases" + }, + "submitted_by": { + "comment": "Do not submit, value is assigned by the server. The user that created the object.", + "linkTo": "user", + "title": "Submitted by", + "serverDefault": "userid", + "permission": "import_items", + "type": "string" + }, + "documents": { + "default": [], + "items": { + "comment": "See document.json for available identifiers.", + "type": "string", + "description": "A document that describe the preparation of the library. ", + "linkTo": "document", + "title": "Protocol document" + }, + "type": "array", + "description": "Documents that describe the preparation of the library.", + "title": "Protocol documents" + }, + "fragmentation_date": { + "comment": "Date can be submitted in as YYYY-MM-DD or YYYY-MM-DDTHH:MM:SSTZD (TZD is the time zone designator; use Z to express time in UTC or for time expressed in local time add a time zone offset from UTC +HH:MM or -HH:MM).", + "type": "string", + "anyOf": [ + { + "format": "date-time" + }, + { + "format": "date" + } + ], + "description": "The date that the nucleic acid was fragmented.", + "title": "Fragmentation date" + }, + "uuid": { + "format": "uuid", + "serverDefault": "uuid4", + "title": "UUID", + "requestMethod": "POST", + "permission": "import_items", + "type": "string" + }, + "strand_specificity": { + "default": false, + "type": "boolean", + "description": "The preparation of the library using a strand-specific protocol.", + "title": "Strand specificity" + }, + "fragmentation_method": { + "description": "A short description or reference of the nucleic acid fragmentation protocol used in library preparation, if applicable.", + "title": "Fragmentation method", + "default": "see document", + "format": "semi-controlled", + "XXXenum": [ + "sonication", + "see document", + "covaris shearing", + "chemical (part of Illumina TruSeq mRNA Kit)", + "Illumina/Nextera tagmentation", + "bioruptor twin", + "n/a" + ], + "type": "string" + }, + "schema_version": { + "comment": "Do not submit, value is assigned by the server. The version of the JSON schema that the server uses to validate the object. Schema version indicates generation of schema used to save version to to enable upgrade steps to work. Individual schemas should set the default.", + "pattern": "^\\d+(\\.\\d+)*$", + "default": "2", + "type": "string", + "requestMethod": [] + }, + "lysis_method": { + "description": "A short description or reference of the cell lysis protocol used in library preparation, if applicable", + "title": "Lysis method", + "default": "see document", + "format": "semi-controlled", + "XXXenum": [ + "miRNeasy Mini kit (QIAGEN cat#:217004)", + "Trizol (LifeTech cat#: 15596-018)", + "Ambion mirVana", + "Qiagen #74204", + "QIAGEN DNeasy Blood & Tissue Kit", + "see document", + "n/a" + ], + "type": "string" + }, + "source": { + "comment": "See source.json for available identifiers.", + "title": "Source", + "type": "string", + "description": "The originating lab or vendor.", + "linkTo": "source" + }, + "biosample": { + "comment": "See biosample.json for available identifiers.", + "title": "Biosample", + "type": "string", + "description": "The biosample that nucleic acid was isolated from to generate the library.", + "linkTo": "biosample" + }, + "extraction_method": { + "description": "A short description or reference of the nucleic acid extraction protocol used in library preparation, if applicable.", + "title": "Extraction method", + "default": "see document", + "format": "semi-controlled", + "XXXenum": [ + "miRNeasy Mini kit (QIAGEN cat#:217004)", + "Trizol (LifeTech cat#: 15596-018)", + "Ambion mirVana", + "Qiagen #74204", + "QIAGEN DNeasy Blood & Tissue Kit", + "see document", + "n/a" + ], + "type": "string" + }, + "library_size_selection_method": { + "description": "A short description or reference of the size selection protocol used in library preparation, if applicable.", + "title": "Size selection method", + "default": "see document", + "format": "semi-controlled", + "XXXenum": [ + "gel", + "see document", + "SPRI beads" + ], + "type": "string" + }, + "status": { + "default": "CURRENT", + "enum": [ + "CURRENT", + "DELETED" + ], + "type": "string", + "title": "Status" + }, + "nucleic_acid_term_name": { + "enum": [ + "DNA", + "RNA", + "polyadenylated mRNA", + "miRNA" + ], + "type": "string", + "description": "SO (Sequence Ontology) term best matching the nucleic acid isolated to generate the library (e.g. 'RNA' for a total RNA library, even if that library is subsequently reverse transcribed for DNA sequencing.)", + "title": "Nucleic acid term" + }, + "treatments": { + "default": [], + "items": { + "comment": "See treatment.json for available identifiers.", + "type": "string", + "linkTo": "treatment", + "title": "Treatment" + }, + "type": "array", + "title": "Treatments" + }, + "award": { + "comment": "See award.json for list of available identifiers.", + "title": "Grant", + "type": "string", + "description": "Grant associated with the submission.", + "linkTo": "award" + }, + "depleted_in_term_name": { + "default": [], + "items": { + "enum": [ + "rRNA", + "polyadenylated mRNA", + "capped mRNA" + ], + "type": "string", + "description": "SO (Sequence Ontology) term best matching the nucleic acid that was diminished from the library.", + "title": "Depleted in term" + }, + "type": "array" + }, + "paired_ended": { + "default": false, + "XXXnote": "Is this redundant to the field found in replicate.json", + "type": "boolean", + "description": "Whether or not the library was prepared with paired ends", + "title": "Paired ended" + }, + "lab": { + "comment": "See lab.json for list of available identifiers.", + "title": "Lab", + "type": "string", + "description": "Lab associated with the submission.", + "linkTo": "lab" + }, + "depleted_in_term_id": { + "default": [], + "items": { + "comment": "Based on the choice in depleted_in_term_name use the following guide: rRNA - SO:0000252, polyadenylated mRNA - SO:0000871 or capped mRNA - SO:0000862", + "enum": [ + "SO:0000252", + "SO:0000871", + "SO:0000862" + ], + "type": "string", + "description": "SO (Sequence Ontology) identifier best matching the nucleic acid that was diminished from the library.", + "title": "Depleted in ID" + }, + "type": "array" + }, + "product_id": { + "type": "string", + "description": "The product identifier provided by the originating lab or vendor.", + "title": "Product ID" + }, + "size_range": { + "pattern": "(^[0-9]+-[0-9]+$|^[<>][0-9]+$)", + "type": "string", + "description": "The measured size range of the purified nucleic acid, in kD.", + "title": "Size range" + }, + "notes": { + "title": "Notes", + "type": "string", + "description": "Additional information.", + "permission": "import_items" + }, + "nucleic_acid_term_id": { + "comment": "Based on the choice in nucleic_acid_term_name use the following guide: DNA - SO:0000352, RNA - SO:0000356, polyadenylated mRNA - SO:0000871 or miRNA - SO:0000276", + "enum": [ + "SO:0000352", + "SO:0000356", + "SO:0000871", + "SO:0000276" + ], + "type": "string", + "description": "SO (Sequence Ontology) identifier best matching the nucleic acid isolated to generate the library (e.g. 'SO:0000356' for a total RNA library, even if that library is subsequently reverse transcribed for DNA sequencing.)", + "title": "Nucleic acid ID" + }, + "nucleic_acid_starting_quantity": { + "pattern": "[0-9]+", + "type": "string", + "description": "The starting amount of nucleic acid before selection and purification.", + "title": "Nucleic acid starting quantity" + }, + "date_created": { + "comment": "Do not submit, value is assigned by the server. The date the object is created.", + "title": "Date created", + "serverDefault": "now", + "permission": "import_items", + "anyOf": [ + { + "format": "date-time" + }, + { + "format": "date" + } + ], + "type": "string" + } + }, + "description": "Schema for submitting a nucleic acid library.", + "title": "Library", + "required": [ + "award", + "lab", + "nucleic_acid_term_id" + ], + "mixinProperties": [ + { + "$ref": "mixins.json#/schema_version" + }, + { + "$ref": "mixins.json#/uuid" + }, + { + "$ref": "mixins.json#/accession" + }, + { + "$ref": "mixins.json#/aliases" + }, + { + "$ref": "mixins.json#/attribution" + }, + { + "$ref": "mixins.json#/standard_status" + }, + { + "$ref": "mixins.json#/submitted" + }, + { + "$ref": "mixins.json#/source" + }, + { + "$ref": "mixins.json#/product_id" + }, + { + "$ref": "mixins.json#/lot_id" + }, + { + "$ref": "mixins.json#/notes" + } + ], + "XXXcomment": "is source required?", + "identifyingProperties": [ + "uuid", + "accession", + "aliases" + ], + "additionalProperties": false, + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "id": "/profiles/library.json" +} diff --git a/htsworkflow/submission/test/test_condorfastq.py b/htsworkflow/submission/test/test_condorfastq.py index 09d6808..1e1c2d9 100644 --- a/htsworkflow/submission/test/test_condorfastq.py +++ b/htsworkflow/submission/test/test_condorfastq.py @@ -680,16 +680,6 @@ class TestCondorFastq(TestCase): self.assertTrue('12345_C02F9ACXX_c202_l3_r2.fastq' in arguments[3]) -OLD_DB = settings.DATABASES['default']['NAME'] -def setUpModule(): - setup_test_environment() - connection.creation.create_test_db() - -def tearDownModule(): - connection.creation.destroy_test_db(OLD_DB) - teardown_test_environment() - - def suite(): from unittest2 import TestSuite, defaultTestLoader suite = TestSuite() diff --git a/htsworkflow/submission/test/test_encoded.py b/htsworkflow/submission/test/test_encoded.py new file mode 100644 index 0000000..675d944 --- /dev/null +++ b/htsworkflow/submission/test/test_encoded.py @@ -0,0 +1,189 @@ +import json +import os +from pprint import pprint +from unittest2 import TestCase, TestSuite, defaultTestLoader, skip + +from htsworkflow.submission.encoded import (ENCODED, + ENCODED_CONTEXT, + ENCODED_NAMESPACES +) + +class TestEncoded(TestCase): + def test_prepare_url(self): + encode = ENCODED('test.encodedcc.edu') + + tests = [ + ('/experiments', 'http://test.encodedcc.edu/experiments'), + ('/experiments/ENCLB045ZZZ', + 'http://test.encodedcc.edu/experiments/ENCLB045ZZZ'), + ('http://submit.encodedcc.edu/experiments/ENCLB045ZZZ', + 'http://submit.encodedcc.edu/experiments/ENCLB045ZZZ'), + ] + for url, result in tests: + self.assertEqual(encode.prepare_url(url), result) + + def test_validate(self): + """Test validation + """ + schema_file = os.path.join(os.path.dirname(__file__), 'library.json') + schema = json.loads(open(schema_file, 'r').read()) + + obj = {u'@id': u'/libraries/ENCLB045ZZZ/', + u'@type': [u'library', u'item'], + u'accession': u'ENCLB045ZZZ', + u'aliases': [], + u'alternate_accessions': [], + u'award': u'/awards/U54HG006998/', + u'biosample': u'/biosamples/ENCBS089RNA/', + u'date_created': u'2014-01-14T19:44:51.061770+00:00', + u'depleted_in_term_id': [], + u'depleted_in_term_name': [], + u'documents': [], + u'extraction_method': u'Ambion mirVana', + u'fragmentation_method': u'Illumina/Nextera tagmentation', + u'lab': u'/labs/barbara-wold/', + u'library_size_selection_method': u'SPRI beads', + u'lysis_method': u'Ambion mirVana', + u'nucleic_acid_term_id': u'SO:0000871', + u'nucleic_acid_term_name': u'polyadenylated mRNA', + u'paired_ended': False, + u'schema_version': u'2', + u'size_range': u'>200', + u'status': u'CURRENT', + u'strand_specificity': False, + u'submitted_by': u'/users/0e3dde9b-aaf9-42dd-87f7-975a85072ed2/', + u'treatments': [], + u'uuid': u'42c46028-708f-4347-a3df-2c82dfb021c4'} + encode = ENCODED('submit.encodedcc.org') + encode.schemas[u'library'] = schema + encode.validate(obj) + self.assertTrue('@id' in obj) + + def test_create_context(self): + linked_id = {'@type': '@id'} + library = { '@id': '/libraries/1234', '@type': ['library', 'item'] } + + encode = ENCODED('test.encodedcc.org') + url = encode.prepare_url(library['@id']) + context = encode.create_jsonld_context(library, url) + self.assertEqual(context['@vocab'], 'http://test.encodedcc.org/profiles/library.json#') + self.assertEqual(context['award'], linked_id ) + self._verify_context(context, 'library') + # namespaces not added yet. + self.assertRaises(AssertionError, self._verify_namespaces, context) + encode.add_jsonld_namespaces(context) + self._verify_namespaces(context) + + def test_add_context(self): + """Checking to make sure nested @base and @vocab urls are set correctly + """ + obj = { + "nucleic_acid_term_name": "RNA", + "accession": "ENCLB044ZZZ", + "@id": "/libraries/ENCLB044ZZZ/", + "schema_version": "1", + "@type": [ + "library", + "item" + ], + "lysis_method": "Ambion mirVana", + "nucleic_acid_term_id": "SO:0000356", + "biosample": { + "biosample_term_name": "GM12878", + "description": "B-lymphocyte, lymphoblastoid, International HapMap Project - CEPH/Utah - European Caucasion, Epstein-Barr Virus", + "accession": "ENCBS090RNA", + "date_created": "2013-10-29T21:15:29.144260+00:00", + "@id": "/biosamples/ENCBS090RNA/", + "aliases": [ + "brenton-graveley:GM12878-2", + "thomas-gingeras:191WC" + ], + "organism": "/organisms/human/", + "@type": [ + "biosample", + "item" + ] + }, + } + + encode = ENCODED('test.encodedcc.org') + bio_base = encode.prepare_url(obj['biosample']['@id']) + + url = encode.prepare_url('/libraries/ENCLB044ZZZ/?format=json&embed=False') + schema_url = encode.get_schema_url(obj) + encode.add_jsonld_context(obj, url) + + self.assertEqual(obj['biosample']['@context']['@base'], bio_base) + self.assertEqual(obj['@context']['@vocab'], schema_url) + self._verify_context(obj['@context'], 'library') + self._verify_namespaces(obj['@context']) + self._verify_context(obj['biosample']['@context'], 'biosample') + self.assertEqual(obj['@context']['rdf'], 'http://www.w3.org/1999/02/22-rdf-syntax-ns#') + self.assertEqual(obj['@context']['OBO'], 'http://purl.obolibrary.org/obo/') + + + def test_convert_search_to_jsonld(self): + example = {'count': {'biosamples': 2}, + 'portal_title': 'ENCODE', + 'title': 'Search', + 'notification': 'Success', + 'filters': [], + '@id': '/search/?searchTerm=wold', + '@type': ['search'], + 'facets': [], + '@graph': [{ + u'@id': u'/biosamples/ENCBS125ENC/', + u'@type': [u'biosample', u'item'], + u'accession': u'ENCBS125ENC', + u'award.rfa': u'ENCODE2-Mouse', + u'biosample_term_name': u'myocyte', + u'biosample_type': u'in vitro differentiated cells', + u'characterizations.length': [], + u'constructs.length': [], + u'lab.title': u'Barbara Wold, Caltech', + u'life_stage': u'unknown', + u'organism.name': u'mouse', + u'source.title': u'Barbara Wold', + u'status': u'CURRENT', + u'treatments.length': []}, + {u'@id': u'/biosamples/ENCBS126ENC/', + u'@type': [u'biosample', u'item'], + u'accession': u'ENCBS126ENC', + u'award.rfa': u'ENCODE2-Mouse', + u'biosample_term_name': u'myocyte', + u'biosample_type': u'in vitro differentiated cells', + u'characterizations.length': [], + u'constructs.length': [], + u'lab.title': u'Barbara Wold, Caltech', + u'life_stage': u'unknown', + u'organism.name': u'mouse', + u'source.title': u'Barbara Wold', + u'status': u'CURRENT', + u'treatments.length': []}, + ]} + + encode = ENCODED('test.encodedcc.org') + result = encode.convert_search_to_jsonld(example) + for obj in result['@graph']: + self.assertNotIn('award.rfa', obj) + + def _verify_context(self, context, obj_type): + for context_key in [None, obj_type]: + for k in ENCODED_CONTEXT[context_key]: + self.assertIn(k, context) + self.assertEqual(ENCODED_CONTEXT[context_key][k], context[k]) + + def _verify_namespaces(self, context): + for k in ENCODED_NAMESPACES: + self.assertIn(k, context) + self.assertEqual(ENCODED_NAMESPACES[k], context[k]) + +def suite(): + suite = TestSuite() + suite.addTests( + defaultTestLoader.loadTestsFromTestCase(TestEncoded)) + return suite + +if __name__ == "__main__": + from unittest2 import main + main(defaultTest='suite') diff --git a/htsworkflow/submission/trackhub_submission.py b/htsworkflow/submission/trackhub_submission.py index e383175..3aa4a96 100644 --- a/htsworkflow/submission/trackhub_submission.py +++ b/htsworkflow/submission/trackhub_submission.py @@ -11,7 +11,6 @@ from htsworkflow.submission.submission import Submission from htsworkflow.util.rdfhelp import \ fromTypedNode, \ geoSoftNS, \ - stripNamespace, \ submissionOntology from htsworkflow.util.url import parse_ssh_url from htsworkflow.util.ucsc import bigWigInfo @@ -121,9 +120,9 @@ class TrackHubSubmission(Submission): 'long_label': str(track_label), 'subgroups': track_subgroup, } - - LOGGER.debug('track attributes: %s', pformat(attributes)) - newtrack = Track(**attributes) + + LOGGER.debug('track attributes: %s', pformat(attributes)) + newtrack = Track(**attributes) view.add_tracks([newtrack]) results = hub.render() @@ -186,10 +185,10 @@ class TrackHubSubmission(Submission): value = self.sanitize_name(track[k]) track_subgroups[k] = value return track_subgroups - + def make_track_type(self, track): """Further annotate tracktype. - + bigWig files can have additional information. Add it if we can """ track_type = track['file_type'] diff --git a/htsworkflow/util/rdfhelp.py b/htsworkflow/util/rdfhelp.py index ac5f6cc..4829441 100644 --- a/htsworkflow/util/rdfhelp.py +++ b/htsworkflow/util/rdfhelp.py @@ -214,7 +214,7 @@ def simplify_uri(uri): return element raise ValueError("Unable to simplify %s" % (uri,)) -def stripNamespace(namespace, term): +def strip_namespace(namespace, term): """Remove the namespace portion of a term returns None if they aren't in common @@ -232,15 +232,17 @@ def stripNamespace(namespace, term): return term_s.replace(namespace._prefix, "") -def get_model(model_name=None, directory=None): +def get_model(model_name=None, directory=None, use_contexts=True): if directory is None: directory = os.getcwd() + contexts = 'yes' if use_contexts else 'no' + if model_name is None: - storage = RDF.MemoryStorage(options_string="contexts='yes'") + storage = RDF.MemoryStorage(options_string="contexts='{}'".format(contexts)) logger.info("Using RDF Memory model") else: - options = "contexts='yes',hash-type='bdb',dir='{0}'".format(directory) + options = "contexts='{0}',hash-type='bdb',dir='{1}'".format(contexts, directory) storage = RDF.HashStorage(model_name, options=options) logger.info("Using {0} with options {1}".format(model_name, options)) @@ -281,10 +283,10 @@ def load_into_model(model, parser_name, path, ns=None): except RDF.RedlandError, e: errmsg = "RDF.RedlandError: {0} {1} tries remaining" logger.error(errmsg.format(str(e), retries)) - + if not succeeded: logger.warn("Unable to download %s", url) - + for s in statements: conditionally_add_statement(model, s, ns) diff --git a/htsworkflow/util/rdfjsonld.py b/htsworkflow/util/rdfjsonld.py new file mode 100644 index 0000000..45046a5 --- /dev/null +++ b/htsworkflow/util/rdfjsonld.py @@ -0,0 +1,33 @@ +import RDF +from pyld import jsonld + +def load_into_model(model, json_data): + '''Given a PyLD dictionary, load its statements into our Redland model + ''' + json_graphs = jsonld.to_rdf(json_data) + for graph in json_graphs: + for triple in json_graphs[graph]: + stmt = triple_to_statement(triple) + model.add_statement(stmt) #, graph_context) + +def triple_to_statement(triple): + '''Convert PyLD triple dictionary to a librdf statement + ''' + s = to_node(triple['subject']) + p = to_node(triple['predicate']) + o = to_node(triple['object']) + return RDF.Statement(s, p, o) + +def to_node(item): + '''Convert a PyLD node to a Redland node''' + nodetype = item['type'] + value = item['value'] + datatype = item.get('datatype', None) + + if nodetype == 'blank node': + return RDF.Node(blank=value) + elif nodetype == 'IRI': + return RDF.Node(uri_string=str(value)) + else: + return RDF.Node(literal=unicode(value).encode('utf-8'), + datatype=RDF.Uri(datatype)) diff --git a/htsworkflow/util/test/test_rdfhelp.py b/htsworkflow/util/test/test_rdfhelp.py index 3f328d8..2f14190 100644 --- a/htsworkflow/util/test/test_rdfhelp.py +++ b/htsworkflow/util/test/test_rdfhelp.py @@ -19,7 +19,7 @@ from htsworkflow.util.rdfhelp import \ rdfsNS, \ remove_schemas, \ toTypedNode, \ - stripNamespace, \ + strip_namespace, \ simplify_uri, \ sanitize_literal, \ xsdNS @@ -121,17 +121,17 @@ try: term = 'foo' node = nsOrg[term] - self.assertEqual(stripNamespace(nsOrg, node), term) - self.assertEqual(stripNamespace(nsCom, node), None) - self.assertEqual(stripNamespace(nsOrg, node.uri), term) + self.assertEqual(strip_namespace(nsOrg, node), term) + self.assertEqual(strip_namespace(nsCom, node), None) + self.assertEqual(strip_namespace(nsOrg, node.uri), term) def test_strip_namespace_exceptions(self): nsOrg = RDF.NS('example.org/example#') nsCom = RDF.NS('example.com/example#') node = toTypedNode('bad') - self.assertRaises(ValueError, stripNamespace, nsOrg, node) - self.assertRaises(ValueError, stripNamespace, nsOrg, nsOrg) + self.assertRaises(ValueError, strip_namespace, nsOrg, node) + self.assertRaises(ValueError, strip_namespace, nsOrg, nsOrg) def test_simplify_uri(self): DATA = [('http://asdf.org/foo/bar', 'bar'), diff --git a/htsworkflow/util/test/test_rdfjsonld.py b/htsworkflow/util/test/test_rdfjsonld.py new file mode 100644 index 0000000..8e501ba --- /dev/null +++ b/htsworkflow/util/test/test_rdfjsonld.py @@ -0,0 +1,56 @@ +from unittest2 import TestCase, TestSuite, defaultTestLoader, skip + +from htsworkflow.util.rdfjsonld import load_into_model, to_node, triple_to_statement +from htsworkflow.util.rdfhelp import get_model + +jstatement = { + 'object': {'datatype': u'http://www.w3.org/2001/XMLSchema#dateTime', + 'type': 'literal', + 'value': '1940-10-09'}, + 'predicate': {'type': 'IRI', + 'value': u'http://schema.org/birthDate'}, + 'subject': {'type': 'blank node', + 'value': '_:a'} +} +doc = { + "@context": "http://json-ld.org/contexts/person.jsonld", + "@id": "http://dbpedia.org/resource/John_Lennon", + "name": "John Lennon", + "born": "1940-10-09", + "spouse": "http://dbpedia.org/resource/Cynthia_Lennon" +} + +class TestJsonLD(TestCase): + def test_to_node(self): + obj = to_node(jstatement['object']) + self.assertTrue(obj.is_literal()) + self.assertEqual(str(obj), '1940-10-09') + pred = to_node(jstatement['predicate']) + self.assertTrue(pred.is_resource()) + self.assertEqual(str(pred.uri), jstatement['predicate']['value']) + subj = to_node(jstatement['subject']) + self.assertTrue(subj.is_blank()) + + def test_to_statement(self): + stmt = triple_to_statement(jstatement) + self.assertTrue(stmt.object.is_literal()) + self.assertEqual(str(stmt.object), '1940-10-09') + self.assertTrue(stmt.predicate.is_resource()) + self.assertEqual(str(stmt.predicate.uri), jstatement['predicate']['value']) + self.assertTrue(stmt.subject.is_blank()) + + def test_load_model(self): + model = get_model(use_contexts=False) + self.assertEqual(len(model), 0) + load_into_model(model, doc) + self.assertEqual(len(model), 3) + +def suite(): + suite = TestSuite() + suite.addTests( + defaultTestLoader.loadTestsFromTestCase(TestJsonLD)) + return suite + +if __name__ == "__main__": + from unittest2 import main + main(defaultTest='suite') diff --git a/setup.py b/setup.py index 2d63df1..f37b3ad 100644 --- a/setup.py +++ b/setup.py @@ -36,6 +36,8 @@ setup( 'benderjab >= 0.2', 'httplib2', 'keyring', + 'PyLD', + 'requests', # This dependency is redland librdf, which doesn't have a public egg #'librdf >= 1.0.14', ], diff --git a/test/test_copier.py b/test/test_copier.py index 3e26cc6..a2dd5d7 100644 --- a/test/test_copier.py +++ b/test/test_copier.py @@ -1,9 +1,15 @@ -from unittest2 import TestCase +from unittest import TestCase, skipIf from StringIO import StringIO -from htsworkflow.automation import copier from htsworkflow.automation.solexa import is_runfolder +try: + from htsworkflow.automation import copier + BENDERJAB_UNAVAILABLE = False +except ImportError as e: + BENDERJAB_UNAVAILABLE = True + +@skipIf(BENDERJAB_UNAVAILABLE, "Can't test copier daemon without a working benderjab") class testCopier(TestCase): def test_empty_config(self): cfg = StringIO("""[fake] @@ -11,9 +17,9 @@ something: unrelated """) bot = copier.CopierBot('fake', configfile=cfg) self.failUnlessRaises(RuntimeError, bot.read_config) - + def test_full_config(self): - cfg = StringIO("""[copier] + cfg = StringIO("""[copier] jid: copier@example.fake password: badpassword authorized_users: user1@example.fake user2@example.fake @@ -32,7 +38,7 @@ notify_users: user3@example.fake self.failUnlessEqual(len(c.authorized_users), 2) self.failUnlessEqual(c.authorized_users[0], 'user1@example.fake') self.failUnlessEqual(c.authorized_users[1], 'user2@example.fake') - self.failUnlessEqual(c.rsync.source_base_list[0], + self.failUnlessEqual(c.rsync.source_base_list[0], 'rsync://localhost/tmp/sequencer_source/') self.failUnlessEqual(c.rsync.dest_base, '/tmp/sequencer_destination') self.failUnlessEqual(len(c.notify_users), 1) @@ -40,16 +46,16 @@ notify_users: user3@example.fake self.failUnlessEqual(c.validate_url('rsync://other/tmp'), None) self.failUnlessEqual(c.validate_url('http://localhost/tmp'), None) # In the rsync process the URL gets a trailing '/' added to it - # But in the bot config its still slash-less. + # But in the bot config its still slash-less. # It is debatable when to add the trailing slash. self.failUnlessEqual( - c.validate_url('rsync://localhost/tmp/sequencer_source'), - 'rsync://localhost/tmp/sequencer_source') + c.validate_url('rsync://localhost/tmp/sequencer_source'), + 'rsync://localhost/tmp/sequencer_source') self.failUnlessEqual( - c.validate_url('rsync://localhost/tmp/sequencer_source/'), + c.validate_url('rsync://localhost/tmp/sequencer_source/'), 'rsync://localhost/tmp/sequencer_source/') self.failUnlessEqual( - c.validate_url('rsync://localhost/tmp/sequencer_source/bleem'), + c.validate_url('rsync://localhost/tmp/sequencer_source/bleem'), 'rsync://localhost/tmp/sequencer_source/bleem') self.failUnlessEqual( c.validate_url('rsync://user@server:1234/other_sequencer'),