Merge branch 'django1.4' of mus.cacr.caltech.edu:htsworkflow into django1.4
authorDiane Trout <diane@ghic.org>
Mon, 21 Oct 2013 20:59:29 +0000 (13:59 -0700)
committerDiane Trout <diane@ghic.org>
Mon, 21 Oct 2013 20:59:29 +0000 (13:59 -0700)
encode_submission/encode3.py
htsworkflow/submission/submission.py
htsworkflow/submission/trackhub_submission.py
htsworkflow/templates/manifest.txt
htsworkflow/templates/submission_view_rdfs_label_metadata.sparql [new file with mode: 0644]
htsworkflow/templates/trackhub_manifest.sparql
htsworkflow/templates/trackhub_samples.sparql
htsworkflow/util/hashfile.py
htsworkflow/util/test/test_ucsc.py

index 875d3bdd9d8205df2eaff65b2b48b5462366645a..a77151e1ead7d2a91091a58a4451a01bcd569d72 100644 (file)
@@ -77,6 +77,12 @@ def main(cmdline=None):
             INDENTED.join(submission_names)))
     elif len(submission_names) == 1:
         name = submission_names[0]
+        
+    if name:
+        submission_uri = get_submission_uri(name)
+        logger.info('Submission URI: %s', name)
+    else:
+        logger.debug('No name, unable to create submission ur')
 
     mapper = None
     if opts.make_track_hub:
@@ -85,8 +91,6 @@ def main(cmdline=None):
                                     baseurl=opts.make_track_hub,
                                     baseupload=opts.track_hub_upload,
                                     host=opts.host)
-        submission_uri = get_submission_uri(name)
-
 
     if opts.load_rdf is not None:
         if submission_uri is None:
@@ -117,6 +121,8 @@ def main(cmdline=None):
     if opts.scan_submission:
         if name is None:
             parser.error("Please define a submission name")
+        if mapper is None:
+            parser.error("Scan submission needs --make-track-hub=public-url")
         mapper.scan_submission_dirs(results)
 
     if opts.make_track_hub:
index 12a5154c33a01bf15d1349040e7bfd2f71f51df8..626897061ea4d9e4c78141a4cba65c4713011507 100644 (file)
@@ -21,6 +21,9 @@ from htsworkflow.submission.daf import \
      ModelException, \
      get_submission_uri
 
+from django.conf import settings
+from django.template import Context, Template, loader
+
 LOGGER = logging.getLogger(__name__)
 
 class Submission(object):
@@ -121,6 +124,7 @@ class Submission(object):
         fileNode = self.make_file_node(pathname, an_analysis)
         self.add_md5s(filename, fileNode, analysis_dir)
         self.add_fastq_metadata(filename, fileNode)
+        self.add_label(file_type, fileNode, libNode)
         self.model.add_statement(
             RDF.Statement(fileNode,
                           rdfNS['type'],
@@ -182,6 +186,23 @@ class Submission(object):
             if value is not None:
                 s = RDF.Statement(fileNode, model_term, toTypedNode(value))
                 self.model.append(s)
+                
+    def add_label(self, file_type, file_node, lib_node):
+        """Add rdfs:label to a file node
+        """
+        #template_term = libraryOntology['label_template']
+        template_term = libraryOntology['label_template']
+        label_template = self.model.get_target(file_type, template_term)
+        if label_template:
+            template = loader.get_template('submission_view_rdfs_label_metadata.sparql')
+            context = Context({
+                'library': str(lib_node.uri),
+                })
+            for r in self.execute_query(template, context):
+                context = Context(r)
+                label = Template(label_template).render(context)
+                s = RDF.Statement(file_node, rdfsNS['label'], unicode(label))
+                self.model.append(s)
 
     def _add_library_details_to_model(self, libNode):
         # attributes that can have multiple values
index 5003fc0fa27772bb542f4d40e15e11a69a97c5db..7a83e8770ee39515ffe90fa223bce3ea19767e47 100644 (file)
@@ -105,12 +105,17 @@ class TrackHubSubmission(Submission):
 
             track_subgroup = self.make_track_subgroups(subgroups, track)
 
+            if 'file_label' in track:
+                track_label = self.sanitize_name(track['file_label'])
+            else:
+                track_label = track_name
+
             newtrack = Track(
                 name=track_name,
                 tracktype = str(track['file_type']),
                 url= hub_url + str(track['relative_path']),
                 short_label=str(track['library_id']),
-                long_label=track_name,
+                long_label=str(track_label),
                 subgroups=track_subgroup,
                 )
             view.add_tracks([newtrack])
@@ -155,12 +160,11 @@ class TrackHubSubmission(Submission):
         return str(template.render(context))
 
     def make_track_name(self, track):
-        name = '{}_{}_{}'.format(
+        return '{}_{}_{}'.format(
             track['library_id'],
             track['replicate'],
             track['output_type'],
         )
-        return name
 
     def make_track_subgroups(self, subgroups, track):
         track_subgroups = {}
@@ -257,7 +261,6 @@ class TrackHubSubmission(Submission):
         return name
 
     def get_manifest_metadata(self, analysis_node):
-
         query_template = loader.get_template('trackhub_manifest.sparql')
 
         context = Context({
index adf055457f53d68f9ead81906614cd4415641c53..c81259cb77949fdffccb321f59e93634fc5e1540 100644 (file)
@@ -1,2 +1,2 @@
-#file_name     format  output_type     experiment      replicate       enriched_in     ucsc_db{% for r in files %}
-{{ r.relative_path }}  {{ r.file_format }}     {{ r.output_type }}     {{ r.dataset_id }}      {{ r.replicate }}       {{ r.enriched_in }}     {{ r.ucsc_db }}{% endfor %}
+#file_name     format  output_type     experiment      replicate       enriched_in     ucsc_db replaces        replace_reason{% for r in files %}
+{{ r.relative_path }}  {{ r.file_format }}     {{ r.output_type }}     {{ r.dataset_id }}      {{ r.replicate }}       {{ r.enriched_in }}     {{ r.ucsc_db }} {{ r.replaces_accession|default_if_none:"" }}   {{ r.replace_reason|default_if_none:"" }}{% endfor %}
diff --git a/htsworkflow/templates/submission_view_rdfs_label_metadata.sparql b/htsworkflow/templates/submission_view_rdfs_label_metadata.sparql
new file mode 100644 (file)
index 0000000..0666e62
--- /dev/null
@@ -0,0 +1,10 @@
+PREFIX htsw: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
+PREFIX encode3: <http://jumpgate.caltech.edu/wiki/Encode3#>
+
+select ?cell_line ?assay ?protocol ?lab
+where {
+    optional { <{{ library }}> htsw:cell_line ?cell_line . }
+    optional { <{{ library }}> encode3:assay ?assay . }
+    optional { <{{ library }}> encode3:protocol ?protocol. }
+    optional { <{{ library }}> encode3:lab ?lab. }
+}
index 1c83b4760b2b416553c91ebbc9b29f64bec38943..cb5c4fd5195b87302ddd6236f19ea2b16339f8cf 100644 (file)
@@ -6,7 +6,7 @@ PREFIX ncbiTaxon: <http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=>
 PREFIX geoSoft: <http://www.ncbi.nlm.nih.gov/geo/info/soft2.html#>
 PREFIX cells: <http://encodewiki.ucsc.edu/EncodeDCC/index.php/Cell_lines#>
 
-select distinct ?name ?filename ?relative_path ?file_format ?output_type ?dataset_id ?replicate ?enriched_in ?ucsc_db
+select distinct ?name ?filename ?relative_path ?file_format ?output_type ?dataset_id ?replicate ?enriched_in ?ucsc_db ?replaces_accession ?replace_reason
 WHERE {
   <{{submission}}> a submissionOntology:submission ;
                    submissionOntology:name ?name ;
@@ -17,6 +17,10 @@ WHERE {
         htswlib:library ?library ;
         a ?fileClass .
 
+  OPTIONAL { ?file encode3:replaces ?replaces_accession ;
+                   encode3:replace_reason ?replace_reason .
+  }
+
   ?fileClass geoSoft:fileTypeLabel ?file_format ;
              ucscDaf:output_type ?output_type .
   
index 19ce7e1d5d7f32d35b88e961c867095104dbde88..95152747b43a5ed4109e8dd8fb020edb120bfc80 100644 (file)
@@ -1,3 +1,5 @@
+PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 
+PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
 PREFIX htswlib: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
 PREFIX submissionOntology: <http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#>
 PREFIX ucscDaf: <http://jumpgate.caltech.edu/wiki/UcscDaf#>
@@ -6,8 +8,7 @@ PREFIX geoSoft: <http://www.ncbi.nlm.nih.gov/geo/info/soft2.html#>
 PREFIX cells: <http://encodewiki.ucsc.edu/EncodeDCC/index.php/Cell_lines#>
 PREFIX encode3: <http://jumpgate.caltech.edu/wiki/Encode3#>
 
-select distinct ?lab_library_id ?library_id ?filename ?relative_path ?output_type ?file_type ?cell ?replicate ?assay ?rna_type ?protocol 
-
+select distinct ?lab_library_id ?library_id ?filename ?relative_path ?output_type ?file_type ?cell ?replicate ?assay ?rna_type ?protocol ?file_label
 WHERE {
   ?trackType geoSoft:fileTypeLabel ?file_type ;
              ucscDaf:output_type ?output_type .
@@ -15,6 +16,7 @@ WHERE {
         ucscDaf:relative_path ?relative_path ;
         htswlib:library ?library ;
         a ?trackType .
+  OPTIONAL { ?file rdfs:label ?file_label . }
   OPTIONAL { ?library htswlib:library_id ?lab_library_id }
   OPTIONAL { ?library encode3:library_id ?library_id }
   OPTIONAL { ?library htswlib:cell_line ?cell . }
index 2900e76ae3727f0b2182376f666cda4deaf85bd3..af3db764971623b9c3580684ddef2ea90b63ee0c 100644 (file)
@@ -40,6 +40,8 @@ def make_md5sum_unix(filename, md5_cache):
 
 def parse_md5sum_line(lines, filename):
     md5sum, md5sum_filename = lines[0].split()
+    md5sum_filename = os.path.normpath(md5sum_filename)
+    filename = os.path.normpath(filename)
     if md5sum_filename != filename:
         errmsg = "MD5sum and I disagre about filename. {0} != {1}"
         logger.error(errmsg.format(filename, md5sum_filename))
index 05a64ba994e8311a9a9482864778865d67195035..2b2e9763cb69cc61f43a93ca39bd73e25afa36a3 100644 (file)
@@ -17,7 +17,7 @@ class TestUCSC(TestCase):
         self.assertEqual(info.version, 4)
         self.assertEqual(info.isCompressed, True)
         # what should i do for byteswapped arch?
-        self.assertEqual(info.isSwapped, True)
+        self.assertEqual(info.isSwapped, False)
         self.assertEqual(info.primaryDataSize, 48)
         self.assertEqual(info.primaryIndexSize, 6204)
         self.assertEqual(info.zoomLevels, 2)