From: Diane Trout Date: Mon, 21 Oct 2013 20:59:29 +0000 (-0700) Subject: Merge branch 'django1.4' of mus.cacr.caltech.edu:htsworkflow into django1.4 X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=htsworkflow.git;a=commitdiff_plain;h=d3805853ee6fde6a1f18dde68d5f4c132d28e9bf;hp=aa47067f5a80eef8eccd50b3c0f478f79cf0caf8 Merge branch 'django1.4' of mus.cacr.caltech.edu:htsworkflow into django1.4 --- diff --git a/encode_submission/encode3.py b/encode_submission/encode3.py index 875d3bd..a77151e 100644 --- a/encode_submission/encode3.py +++ b/encode_submission/encode3.py @@ -77,6 +77,12 @@ def main(cmdline=None): INDENTED.join(submission_names))) elif len(submission_names) == 1: name = submission_names[0] + + if name: + submission_uri = get_submission_uri(name) + logger.info('Submission URI: %s', name) + else: + logger.debug('No name, unable to create submission ur') mapper = None if opts.make_track_hub: @@ -85,8 +91,6 @@ def main(cmdline=None): baseurl=opts.make_track_hub, baseupload=opts.track_hub_upload, host=opts.host) - submission_uri = get_submission_uri(name) - if opts.load_rdf is not None: if submission_uri is None: @@ -117,6 +121,8 @@ def main(cmdline=None): if opts.scan_submission: if name is None: parser.error("Please define a submission name") + if mapper is None: + parser.error("Scan submission needs --make-track-hub=public-url") mapper.scan_submission_dirs(results) if opts.make_track_hub: diff --git a/htsworkflow/submission/submission.py b/htsworkflow/submission/submission.py index 12a5154..6268970 100644 --- a/htsworkflow/submission/submission.py +++ b/htsworkflow/submission/submission.py @@ -21,6 +21,9 @@ from htsworkflow.submission.daf import \ ModelException, \ get_submission_uri +from django.conf import settings +from django.template import Context, Template, loader + LOGGER = logging.getLogger(__name__) class Submission(object): @@ -121,6 +124,7 @@ class Submission(object): fileNode = self.make_file_node(pathname, an_analysis) self.add_md5s(filename, fileNode, analysis_dir) self.add_fastq_metadata(filename, fileNode) + self.add_label(file_type, fileNode, libNode) self.model.add_statement( RDF.Statement(fileNode, rdfNS['type'], @@ -182,6 +186,23 @@ class Submission(object): if value is not None: s = RDF.Statement(fileNode, model_term, toTypedNode(value)) self.model.append(s) + + def add_label(self, file_type, file_node, lib_node): + """Add rdfs:label to a file node + """ + #template_term = libraryOntology['label_template'] + template_term = libraryOntology['label_template'] + label_template = self.model.get_target(file_type, template_term) + if label_template: + template = loader.get_template('submission_view_rdfs_label_metadata.sparql') + context = Context({ + 'library': str(lib_node.uri), + }) + for r in self.execute_query(template, context): + context = Context(r) + label = Template(label_template).render(context) + s = RDF.Statement(file_node, rdfsNS['label'], unicode(label)) + self.model.append(s) def _add_library_details_to_model(self, libNode): # attributes that can have multiple values diff --git a/htsworkflow/submission/trackhub_submission.py b/htsworkflow/submission/trackhub_submission.py index 5003fc0..7a83e87 100644 --- a/htsworkflow/submission/trackhub_submission.py +++ b/htsworkflow/submission/trackhub_submission.py @@ -105,12 +105,17 @@ class TrackHubSubmission(Submission): track_subgroup = self.make_track_subgroups(subgroups, track) + if 'file_label' in track: + track_label = self.sanitize_name(track['file_label']) + else: + track_label = track_name + newtrack = Track( name=track_name, tracktype = str(track['file_type']), url= hub_url + str(track['relative_path']), short_label=str(track['library_id']), - long_label=track_name, + long_label=str(track_label), subgroups=track_subgroup, ) view.add_tracks([newtrack]) @@ -155,12 +160,11 @@ class TrackHubSubmission(Submission): return str(template.render(context)) def make_track_name(self, track): - name = '{}_{}_{}'.format( + return '{}_{}_{}'.format( track['library_id'], track['replicate'], track['output_type'], ) - return name def make_track_subgroups(self, subgroups, track): track_subgroups = {} @@ -257,7 +261,6 @@ class TrackHubSubmission(Submission): return name def get_manifest_metadata(self, analysis_node): - query_template = loader.get_template('trackhub_manifest.sparql') context = Context({ diff --git a/htsworkflow/templates/manifest.txt b/htsworkflow/templates/manifest.txt index adf0554..c81259c 100644 --- a/htsworkflow/templates/manifest.txt +++ b/htsworkflow/templates/manifest.txt @@ -1,2 +1,2 @@ -#file_name format output_type experiment replicate enriched_in ucsc_db{% for r in files %} -{{ r.relative_path }} {{ r.file_format }} {{ r.output_type }} {{ r.dataset_id }} {{ r.replicate }} {{ r.enriched_in }} {{ r.ucsc_db }}{% endfor %} +#file_name format output_type experiment replicate enriched_in ucsc_db replaces replace_reason{% for r in files %} +{{ r.relative_path }} {{ r.file_format }} {{ r.output_type }} {{ r.dataset_id }} {{ r.replicate }} {{ r.enriched_in }} {{ r.ucsc_db }} {{ r.replaces_accession|default_if_none:"" }} {{ r.replace_reason|default_if_none:"" }}{% endfor %} diff --git a/htsworkflow/templates/submission_view_rdfs_label_metadata.sparql b/htsworkflow/templates/submission_view_rdfs_label_metadata.sparql new file mode 100644 index 0000000..0666e62 --- /dev/null +++ b/htsworkflow/templates/submission_view_rdfs_label_metadata.sparql @@ -0,0 +1,10 @@ +PREFIX htsw: +PREFIX encode3: + +select ?cell_line ?assay ?protocol ?lab +where { + optional { <{{ library }}> htsw:cell_line ?cell_line . } + optional { <{{ library }}> encode3:assay ?assay . } + optional { <{{ library }}> encode3:protocol ?protocol. } + optional { <{{ library }}> encode3:lab ?lab. } +} diff --git a/htsworkflow/templates/trackhub_manifest.sparql b/htsworkflow/templates/trackhub_manifest.sparql index 1c83b47..cb5c4fd 100644 --- a/htsworkflow/templates/trackhub_manifest.sparql +++ b/htsworkflow/templates/trackhub_manifest.sparql @@ -6,7 +6,7 @@ PREFIX ncbiTaxon: PREFIX geoSoft: PREFIX cells: -select distinct ?name ?filename ?relative_path ?file_format ?output_type ?dataset_id ?replicate ?enriched_in ?ucsc_db +select distinct ?name ?filename ?relative_path ?file_format ?output_type ?dataset_id ?replicate ?enriched_in ?ucsc_db ?replaces_accession ?replace_reason WHERE { <{{submission}}> a submissionOntology:submission ; submissionOntology:name ?name ; @@ -17,6 +17,10 @@ WHERE { htswlib:library ?library ; a ?fileClass . + OPTIONAL { ?file encode3:replaces ?replaces_accession ; + encode3:replace_reason ?replace_reason . + } + ?fileClass geoSoft:fileTypeLabel ?file_format ; ucscDaf:output_type ?output_type . diff --git a/htsworkflow/templates/trackhub_samples.sparql b/htsworkflow/templates/trackhub_samples.sparql index 19ce7e1..9515274 100644 --- a/htsworkflow/templates/trackhub_samples.sparql +++ b/htsworkflow/templates/trackhub_samples.sparql @@ -1,3 +1,5 @@ +PREFIX rdf: +PREFIX rdfs: PREFIX htswlib: PREFIX submissionOntology: PREFIX ucscDaf: @@ -6,8 +8,7 @@ PREFIX geoSoft: PREFIX cells: PREFIX encode3: -select distinct ?lab_library_id ?library_id ?filename ?relative_path ?output_type ?file_type ?cell ?replicate ?assay ?rna_type ?protocol - +select distinct ?lab_library_id ?library_id ?filename ?relative_path ?output_type ?file_type ?cell ?replicate ?assay ?rna_type ?protocol ?file_label WHERE { ?trackType geoSoft:fileTypeLabel ?file_type ; ucscDaf:output_type ?output_type . @@ -15,6 +16,7 @@ WHERE { ucscDaf:relative_path ?relative_path ; htswlib:library ?library ; a ?trackType . + OPTIONAL { ?file rdfs:label ?file_label . } OPTIONAL { ?library htswlib:library_id ?lab_library_id } OPTIONAL { ?library encode3:library_id ?library_id } OPTIONAL { ?library htswlib:cell_line ?cell . } diff --git a/htsworkflow/util/hashfile.py b/htsworkflow/util/hashfile.py index 2900e76..af3db76 100644 --- a/htsworkflow/util/hashfile.py +++ b/htsworkflow/util/hashfile.py @@ -40,6 +40,8 @@ def make_md5sum_unix(filename, md5_cache): def parse_md5sum_line(lines, filename): md5sum, md5sum_filename = lines[0].split() + md5sum_filename = os.path.normpath(md5sum_filename) + filename = os.path.normpath(filename) if md5sum_filename != filename: errmsg = "MD5sum and I disagre about filename. {0} != {1}" logger.error(errmsg.format(filename, md5sum_filename)) diff --git a/htsworkflow/util/test/test_ucsc.py b/htsworkflow/util/test/test_ucsc.py index 05a64ba..2b2e976 100644 --- a/htsworkflow/util/test/test_ucsc.py +++ b/htsworkflow/util/test/test_ucsc.py @@ -17,7 +17,7 @@ class TestUCSC(TestCase): self.assertEqual(info.version, 4) self.assertEqual(info.isCompressed, True) # what should i do for byteswapped arch? - self.assertEqual(info.isSwapped, True) + self.assertEqual(info.isSwapped, False) self.assertEqual(info.primaryDataSize, 48) self.assertEqual(info.primaryIndexSize, 6204) self.assertEqual(info.zoomLevels, 2)