Merge branch 'django1.4' of mus.cacr.caltech.edu:htsworkflow into django1.4
authorDiane Trout <diane@caltech.edu>
Thu, 5 Dec 2013 23:06:14 +0000 (15:06 -0800)
committerDiane Trout <diane@caltech.edu>
Thu, 5 Dec 2013 23:06:14 +0000 (15:06 -0800)
htsworkflow/pipelines/ipar.py
htsworkflow/pipelines/runfolder.py
htsworkflow/pipelines/test/test_runfolder_rta180.py
htsworkflow/submission/trackhub_submission.py
htsworkflow/templates/manifest.txt
htsworkflow/templates/trackhub_manifest.sparql
htsworkflow/templates/trackhub_samples.sparql
htsworkflow/util/rdfhelp.py
htsworkflow/util/ucsc.py

index f818b3495fbf2fd2d8f2fba13d698188e7680d3f..8203f5e0f29fda173728e3f504522f6cd579520f 100644 (file)
@@ -75,6 +75,14 @@ class IPAR(object):
         if xml is not None:
             self.set_elements(xml)
 
+    def _get_runfolder_name(self):
+        """Return runfolder name"""
+        if self.tree is None:
+            raise ValueError("Can't query an empty run")
+        runfolder = self.tree.xpath('RunParameters/Runfolder')
+        return runfolder
+    runfolder_name = property(_get_runfolder)
+    
     def _get_software(self):
         """Return software name"""
         if self.tree is None:
index da1bbe636bccdf359616c3b69e4a5963a5e32370..669c5f03363dfc8c647730133af71678319d0336 100644 (file)
@@ -129,10 +129,12 @@ class PipelineRun(object):
             return path_fields[-1]
 
     def _get_runfolder_name(self):
-        if self.gerald is None:
-            return None
-        else:
+        if self.gerald:
             return self.gerald.runfolder_name
+        elif hasattr(self.image_analysis, 'runfolder_name'):
+            return self.image_analysis.runfolder_name
+        else:
+            return None
     runfolder_name = property(_get_runfolder_name)
 
     def _get_run_dirname(self):
index 63b4a8df0c7176b8ced6aa1a022a8b22f7c6a7bb..0db7857399cd8f9c391a8b8c58fb0b5023fef0f5 100644 (file)
@@ -25,9 +25,8 @@ def make_runfolder(obj=None):
     temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_')
 
     flowcell_id = '4286GAAXX'
-    runfolder_dir = os.path.join(
-        temp_dir,
-        '090608_HWI-EAS229_0117_{0}'.format(flowcell_id))
+    runfolder = '090608_HWI-EAS229_0117_{0}'.format(flowcell_id)
+    runfolder_dir = os.path.join(temp_dir, runfolder)
     os.mkdir(runfolder_dir)
 
     data_dir = os.path.join(runfolder_dir, 'Data')
@@ -50,6 +49,7 @@ def make_runfolder(obj=None):
     if obj is not None:
         obj.flowcell_id = flowcell_id
         obj.temp_dir = temp_dir
+        obj.runfolder = runfolder
         obj.runfolder_dir = runfolder_dir
         obj.data_dir = data_dir
         obj.image_analysis_dir = intensities_dir
@@ -262,8 +262,9 @@ class RunfolderTests(TestCase):
 
         # do we get the flowcell id from the filename?
         self.failUnlessEqual(len(runs), 1)
-        name = 'run_4286GAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
+        name = 'run_4286GAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)        
         self.failUnlessEqual(runs[0].serialization_filename, name)
+        self.assertEqual(runs[0].runfolder_name, '090220_HWI-EAS229_0093_30VR0AAXX')
 
         # do we get the flowcell id from the FlowcellId.xml file
         make_flowcell_id(self.runfolder_dir, '207BTAAXY')
@@ -272,6 +273,7 @@ class RunfolderTests(TestCase):
         name = 'run_207BTAAXY_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
         self.failUnlessEqual(runs[0].serialization_filename, name)
 
+
         r1 = runs[0]
         xml = r1.get_elements()
         xml_str = ElementTree.tostring(xml)
index 7a83e8770ee39515ffe90fa223bce3ea19767e47..e383175a16884c3d1d24a69042bf916a44aa71c9 100644 (file)
@@ -1,5 +1,6 @@
 import logging
 import os
+from pprint import pformat
 import string
 import re
 
@@ -13,6 +14,7 @@ from htsworkflow.util.rdfhelp import \
      stripNamespace, \
      submissionOntology
 from htsworkflow.util.url import parse_ssh_url
+from htsworkflow.util.ucsc import bigWigInfo
 
 from django.conf import settings
 from django.template import Context, loader
@@ -104,20 +106,24 @@ class TrackHubSubmission(Submission):
             track_name = self.make_track_name(track)
 
             track_subgroup = self.make_track_subgroups(subgroups, track)
+            track_type = self.make_track_type(track)
 
             if 'file_label' in track:
                 track_label = self.sanitize_name(track['file_label'])
             else:
                 track_label = track_name
 
-            newtrack = Track(
-                name=track_name,
-                tracktype = str(track['file_type']),
-                url= hub_url + str(track['relative_path']),
-                short_label=str(track['library_id']),
-                long_label=str(track_label),
-                subgroups=track_subgroup,
-                )
+            attributes = {
+                'name': track_name,
+                'tracktype': track_type,
+                'url': hub_url + str(track['relative_path']),
+                'short_label': str(track['library_id']),
+                'long_label': str(track_label),
+                'subgroups': track_subgroup,
+            }
+            
+            LOGGER.debug('track attributes: %s', pformat(attributes))       
+            newtrack = Track(**attributes)                    
             view.add_tracks([newtrack])
 
         results = hub.render()
@@ -136,13 +142,20 @@ class TrackHubSubmission(Submission):
         """
         current_view_type = str(track['output_type'])
         if not view or current_view_type != view.name:
-            view = ViewTrack(
-                name=current_view_type,
-                view=current_view_type,
-                visibility='squish',
-                short_label=current_view_type,
-                tracktype=str(track['file_type']),
-            )
+            attributes = {
+                'name': current_view_type,
+                'view': current_view_type,
+                'visibility': str(track.get('visibility', 'squish')),
+                'short_label': current_view_type,
+                'tracktype': str(track['file_type'])
+            }
+            maxHeightPixels = track.get('maxHeightPixels')
+            if maxHeightPixels:
+                attributes['maxHeightPixels'] = str(maxHeightPixels)
+            autoScale = track.get('autoScale')
+            if autoScale:
+                attributes['autoScale'] = str(autoScale)
+            view = ViewTrack(**attributes)
             composite.add_view(view)
             view_type = current_view_type
         return view
@@ -173,6 +186,21 @@ class TrackHubSubmission(Submission):
                 value = self.sanitize_name(track[k])
                 track_subgroups[k] = value
         return track_subgroups
+    
+    def make_track_type(self, track):
+        """Further annotate tracktype.
+        
+        bigWig files can have additional information. Add it if we can
+        """
+        track_type = track['file_type']
+        if track_type.lower() == 'bigwig':
+            # something we can enhance
+            info = bigWigInfo(track['relative_path'])
+            if info.min is not None and info.max is not None:
+                track_type = '{} {} {}'.format(track_type, int(info.min), int(info.max))
+
+        LOGGER.debug("track_type: %s", track_type)
+        return str(track_type)
 
     def add_subgroups(self, composite):
         """Add subgroups to composite track"""
index c81259cb77949fdffccb321f59e93634fc5e1540..21c8a474cdee41d6bd35cb068b8a0818881a784b 100644 (file)
@@ -1,2 +1,3 @@
-#file_name     format  output_type     experiment      replicate       enriched_in     ucsc_db replaces        replace_reason{% for r in files %}
-{{ r.relative_path }}  {{ r.file_format }}     {{ r.output_type }}     {{ r.dataset_id }}      {{ r.replicate }}       {{ r.enriched_in }}     {{ r.ucsc_db }} {{ r.replaces_accession|default_if_none:"" }}   {{ r.replace_reason|default_if_none:"" }}{% endfor %}
+#version 1.7
+#file_name     format  output_type     experiment      replicate       enriched_in     ucsc_db paired_end      technical_replicate{% for r in files %}
+{{ r.relative_path }}  {{ r.file_format }}     {{ r.output_type }}     {{ r.dataset_id }}      {{ r.replicate }}       {{ r.enriched_in }}     {{ r.ucsc_db }} {{ r.paired_end|default_if_none:"n/a" }}        {{ r.technical_replicate|default_if_none:"n/a"}}{% endfor %}
index cb5c4fd5195b87302ddd6236f19ea2b16339f8cf..36e57fa88fd0aa21f5cfbf0492b1e2e77365c57b 100644 (file)
@@ -6,7 +6,7 @@ PREFIX ncbiTaxon: <http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=>
 PREFIX geoSoft: <http://www.ncbi.nlm.nih.gov/geo/info/soft2.html#>
 PREFIX cells: <http://encodewiki.ucsc.edu/EncodeDCC/index.php/Cell_lines#>
 
-select distinct ?name ?filename ?relative_path ?file_format ?output_type ?dataset_id ?replicate ?enriched_in ?ucsc_db ?replaces_accession ?replace_reason
+select distinct ?name ?filename ?relative_path ?file_format ?output_type ?dataset_id ?replicate ?enriched_in ?ucsc_db ?paired_end ?technical_replicate ?replaces ?replace_reason
 WHERE {
   <{{submission}}> a submissionOntology:submission ;
                    submissionOntology:name ?name ;
@@ -23,6 +23,9 @@ WHERE {
 
   ?fileClass geoSoft:fileTypeLabel ?file_format ;
              ucscDaf:output_type ?output_type .
+  OPTIONAL { ?fileClass ucscDaf:paired_end ?paired_end . }
+  OPTIONAL { ?fileClass ucscDaf:technical_replicate ?technical_replicate . }
+
   
   ?library htswlib:replicate ?replicate ;
            ucscDaf:enriched_in ?enriched_in;
index 95152747b43a5ed4109e8dd8fb020edb120bfc80..6259fce15d646aba5864e91d23f22823ce233cac 100644 (file)
@@ -4,14 +4,18 @@ PREFIX htswlib: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
 PREFIX submissionOntology: <http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#>
 PREFIX ucscDaf: <http://jumpgate.caltech.edu/wiki/UcscDaf#>
 PREFIX ncbiTaxon: <http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=>
+PREFIX trackdb: <http://genome.ucsc.edu/goldenPath/help/trackDb/trackDbHub.html#>
 PREFIX geoSoft: <http://www.ncbi.nlm.nih.gov/geo/info/soft2.html#>
 PREFIX cells: <http://encodewiki.ucsc.edu/EncodeDCC/index.php/Cell_lines#>
 PREFIX encode3: <http://jumpgate.caltech.edu/wiki/Encode3#>
 
-select distinct ?lab_library_id ?library_id ?filename ?relative_path ?output_type ?file_type ?cell ?replicate ?assay ?rna_type ?protocol ?file_label
+select distinct ?lab_library_id ?library_id ?filename ?relative_path ?output_type ?file_type ?cell ?replicate ?assay ?rna_type ?protocol ?file_label ?autoScale ?maxHeightPixels ?visibility
 WHERE {
-  ?trackType geoSoft:fileTypeLabel ?file_type ;
+  ?trackType trackdb:type ?file_type ;
              ucscDaf:output_type ?output_type .
+  OPTIONAL { ?trackType trackdb:autoScale ?autoScale . }
+  OPTIONAL { ?trackType trackdb:maxHeightPixels ?maxHeightPixels . }
+  OPTIONAL { ?trackType trackdb:visibility ?visibility . }
   ?file ucscDaf:filename ?filename ;
         ucscDaf:relative_path ?relative_path ;
         htswlib:library ?library ;
index cbe2a9188b091d0cb5f22fa99069345728204ee9..ac5f6ccd1056a561418cd9b17e155ee936097630 100644 (file)
@@ -271,15 +271,20 @@ def load_into_model(model, parser_name, path, ns=None):
 
     statements = []
     retries = 3
+    succeeded = False
     while retries > 0:
         try:
             retries -= 1
             statements = rdf_parser.parse_as_stream(url, ns)
             retries = 0
+            succeeded = True
         except RDF.RedlandError, e:
             errmsg = "RDF.RedlandError: {0} {1} tries remaining"
             logger.error(errmsg.format(str(e), retries))
-
+            
+    if not succeeded:
+        logger.warn("Unable to download %s", url)
+        
     for s in statements:
         conditionally_add_statement(model, s, ns)
 
index e9ff77ee9cbb80af2b9e906d32987d8a8a7980e2..b96c46aed5c79de3bab81e3bad02b9c0813b1b35 100644 (file)
@@ -3,6 +3,7 @@
 
 import logging
 import os
+import sys
 from subprocess import Popen, PIPE
 
 LOGGER = logging.getLogger(__name__)
@@ -51,17 +52,22 @@ class bigWigInfo:
     def scan_file(self, filename):
         cmd = ['bigWigInfo', 
                filename]
-        p = Popen(cmd, stdout=PIPE)
-        stdout, _ = p.communicate()
-        for line in stdout.split(os.linesep):
-            if len(line) > 0:
-                term, value = line.split(': ')
-                if term in ('isCompressed', 'isSwapped'):
-                    value = parseBoolean(value)
-                else:
-                    value = parseNumber(value)
-                LOGGER.debug('%s: %s', term, str(value))
-                setattr(self, term, value)
+        try:
+            p = Popen(cmd, stdout=PIPE)
+            stdout, _ = p.communicate()
+            for line in stdout.split(os.linesep):
+                if len(line) > 0:
+                    term, value = line.split(': ')
+                    if term in ('isCompressed', 'isSwapped'):
+                        value = parseBoolean(value)
+                    else:
+                        value = parseNumber(value)
+                    LOGGER.debug('%s: %s', term, str(value))
+                    setattr(self, term, value)
+        except OSError as e:
+            LOGGER.error("Exception %s trying to run: %s", str(e), ' '.join(cmd))
+            sys.exit(-1)
+