Merge branch 'django1.4' of mus.cacr.caltech.edu:htsworkflow into django1.4

author Diane Trout <diane@caltech.edu>

Thu, 5 Dec 2013 23:06:14 +0000 (15:06 -0800)

committer Diane Trout <diane@caltech.edu>

Thu, 5 Dec 2013 23:06:14 +0000 (15:06 -0800)
author Diane Trout <diane@caltech.edu>
Thu, 5 Dec 2013 23:06:14 +0000 (15:06 -0800)
committer Diane Trout <diane@caltech.edu>
Thu, 5 Dec 2013 23:06:14 +0000 (15:06 -0800)
diff --git a/htsworkflow/pipelines/ipar.py b/htsworkflow/pipelines/ipar.py

index f818b3495fbf2fd2d8f2fba13d698188e7680d3f..8203f5e0f29fda173728e3f504522f6cd579520f 100644 (file)
--- a/htsworkflow/pipelines/ipar.py
+++ b/htsworkflow/pipelines/ipar.py
@@ -75,6 +75,14 @@ class IPAR(object):
          if xml is not None:
              self.set_elements(xml)
  
+    def _get_runfolder_name(self):
+        """Return runfolder name"""
+        if self.tree is None:
+            raise ValueError("Can't query an empty run")
+        runfolder = self.tree.xpath('RunParameters/Runfolder')
+        return runfolder
+    runfolder_name = property(_get_runfolder)
+    
      def _get_software(self):
          """Return software name"""
          if self.tree is None:
diff --git a/htsworkflow/pipelines/runfolder.py b/htsworkflow/pipelines/runfolder.py

index da1bbe636bccdf359616c3b69e4a5963a5e32370..669c5f03363dfc8c647730133af71678319d0336 100644 (file)
--- a/htsworkflow/pipelines/runfolder.py
+++ b/htsworkflow/pipelines/runfolder.py
@@ -129,10 +129,12 @@ class PipelineRun(object):
              return path_fields[-1]
  
      def _get_runfolder_name(self):
-        if self.gerald is None:
-            return None
-        else:
+        if self.gerald:
              return self.gerald.runfolder_name
+        elif hasattr(self.image_analysis, 'runfolder_name'):
+            return self.image_analysis.runfolder_name
+        else:
+            return None
      runfolder_name = property(_get_runfolder_name)
  
      def _get_run_dirname(self):
diff --git a/htsworkflow/pipelines/test/test_runfolder_rta180.py b/htsworkflow/pipelines/test/test_runfolder_rta180.py

index 63b4a8df0c7176b8ced6aa1a022a8b22f7c6a7bb..0db7857399cd8f9c391a8b8c58fb0b5023fef0f5 100644 (file)
--- a/htsworkflow/pipelines/test/test_runfolder_rta180.py
+++ b/htsworkflow/pipelines/test/test_runfolder_rta180.py
@@ -25,9 +25,8 @@ def make_runfolder(obj=None):
      temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_')
  
      flowcell_id = '4286GAAXX'
-    runfolder_dir = os.path.join(
-        temp_dir,
-        '090608_HWI-EAS229_0117_{0}'.format(flowcell_id))
+    runfolder = '090608_HWI-EAS229_0117_{0}'.format(flowcell_id)
+    runfolder_dir = os.path.join(temp_dir, runfolder)
      os.mkdir(runfolder_dir)
  
      data_dir = os.path.join(runfolder_dir, 'Data')
@@ -50,6 +49,7 @@ def make_runfolder(obj=None):
      if obj is not None:
          obj.flowcell_id = flowcell_id
          obj.temp_dir = temp_dir
+        obj.runfolder = runfolder
          obj.runfolder_dir = runfolder_dir
          obj.data_dir = data_dir
          obj.image_analysis_dir = intensities_dir
@@ -262,8 +262,9 @@ class RunfolderTests(TestCase):
  
          # do we get the flowcell id from the filename?
          self.failUnlessEqual(len(runs), 1)
-        name = 'run_4286GAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
+        name = 'run_4286GAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)        
          self.failUnlessEqual(runs[0].serialization_filename, name)
+        self.assertEqual(runs[0].runfolder_name, '090220_HWI-EAS229_0093_30VR0AAXX')
  
          # do we get the flowcell id from the FlowcellId.xml file
          make_flowcell_id(self.runfolder_dir, '207BTAAXY')
@@ -272,6 +273,7 @@ class RunfolderTests(TestCase):
          name = 'run_207BTAAXY_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
          self.failUnlessEqual(runs[0].serialization_filename, name)
  
+
          r1 = runs[0]
          xml = r1.get_elements()
          xml_str = ElementTree.tostring(xml)
diff --git a/htsworkflow/submission/trackhub_submission.py b/htsworkflow/submission/trackhub_submission.py

index 7a83e8770ee39515ffe90fa223bce3ea19767e47..e383175a16884c3d1d24a69042bf916a44aa71c9 100644 (file)
--- a/htsworkflow/submission/trackhub_submission.py
+++ b/htsworkflow/submission/trackhub_submission.py
@@ -1,5 +1,6 @@
  import logging
  import os
+from pprint import pformat
  import string
  import re
  
@@ -13,6 +14,7 @@ from htsworkflow.util.rdfhelp import \
       stripNamespace, \
       submissionOntology
  from htsworkflow.util.url import parse_ssh_url
+from htsworkflow.util.ucsc import bigWigInfo
  
  from django.conf import settings
  from django.template import Context, loader
@@ -104,20 +106,24 @@ class TrackHubSubmission(Submission):
              track_name = self.make_track_name(track)
  
              track_subgroup = self.make_track_subgroups(subgroups, track)
+            track_type = self.make_track_type(track)
  
              if 'file_label' in track:
                  track_label = self.sanitize_name(track['file_label'])
              else:
                  track_label = track_name
  
-            newtrack = Track(
-                name=track_name,
-                tracktype = str(track['file_type']),
-                url= hub_url + str(track['relative_path']),
-                short_label=str(track['library_id']),
-                long_label=str(track_label),
-                subgroups=track_subgroup,
-                )
+            attributes = {
+                'name': track_name,
+                'tracktype': track_type,
+                'url': hub_url + str(track['relative_path']),
+                'short_label': str(track['library_id']),
+                'long_label': str(track_label),
+                'subgroups': track_subgroup,
+            }
+            
+            LOGGER.debug('track attributes: %s', pformat(attributes))       
+            newtrack = Track(**attributes)                    
              view.add_tracks([newtrack])
  
          results = hub.render()
@@ -136,13 +142,20 @@ class TrackHubSubmission(Submission):
          """
          current_view_type = str(track['output_type'])
          if not view or current_view_type != view.name:
-            view = ViewTrack(
-                name=current_view_type,
-                view=current_view_type,
-                visibility='squish',
-                short_label=current_view_type,
-                tracktype=str(track['file_type']),
-            )
+            attributes = {
+                'name': current_view_type,
+                'view': current_view_type,
+                'visibility': str(track.get('visibility', 'squish')),
+                'short_label': current_view_type,
+                'tracktype': str(track['file_type'])
+            }
+            maxHeightPixels = track.get('maxHeightPixels')
+            if maxHeightPixels:
+                attributes['maxHeightPixels'] = str(maxHeightPixels)
+            autoScale = track.get('autoScale')
+            if autoScale:
+                attributes['autoScale'] = str(autoScale)
+            view = ViewTrack(**attributes)
              composite.add_view(view)
              view_type = current_view_type
          return view
@@ -173,6 +186,21 @@ class TrackHubSubmission(Submission):
                  value = self.sanitize_name(track[k])
                  track_subgroups[k] = value
          return track_subgroups
+    
+    def make_track_type(self, track):
+        """Further annotate tracktype.
+        
+        bigWig files can have additional information. Add it if we can
+        """
+        track_type = track['file_type']
+        if track_type.lower() == 'bigwig':
+            # something we can enhance
+            info = bigWigInfo(track['relative_path'])
+            if info.min is not None and info.max is not None:
+                track_type = '{} {} {}'.format(track_type, int(info.min), int(info.max))
+
+        LOGGER.debug("track_type: %s", track_type)
+        return str(track_type)
  
      def add_subgroups(self, composite):
          """Add subgroups to composite track"""
diff --git a/htsworkflow/templates/manifest.txt b/htsworkflow/templates/manifest.txt

index c81259cb77949fdffccb321f59e93634fc5e1540..21c8a474cdee41d6bd35cb068b8a0818881a784b 100644 (file)
--- a/htsworkflow/templates/manifest.txt
+++ b/htsworkflow/templates/manifest.txt
@@ -1,2 +1,3 @@
-#file_name     format  output_type     experiment      replicate       enriched_in     ucsc_db replaces        replace_reason{% for r in files %}
-{{ r.relative_path }}  {{ r.file_format }}     {{ r.output_type }}     {{ r.dataset_id }}      {{ r.replicate }}       {{ r.enriched_in }}     {{ r.ucsc_db }} {{ r.replaces_accession|default_if_none:"" }}   {{ r.replace_reason|default_if_none:"" }}{% endfor %}
+#version 1.7
+#file_name     format  output_type     experiment      replicate       enriched_in     ucsc_db paired_end      technical_replicate{% for r in files %}
+{{ r.relative_path }}  {{ r.file_format }}     {{ r.output_type }}     {{ r.dataset_id }}      {{ r.replicate }}       {{ r.enriched_in }}     {{ r.ucsc_db }} {{ r.paired_end|default_if_none:"n/a" }}        {{ r.technical_replicate|default_if_none:"n/a"}}{% endfor %}
diff --git a/htsworkflow/templates/trackhub_manifest.sparql b/htsworkflow/templates/trackhub_manifest.sparql

index cb5c4fd5195b87302ddd6236f19ea2b16339f8cf..36e57fa88fd0aa21f5cfbf0492b1e2e77365c57b 100644 (file)
--- a/htsworkflow/templates/trackhub_manifest.sparql
+++ b/htsworkflow/templates/trackhub_manifest.sparql
@@ -6,7 +6,7 @@ PREFIX ncbiTaxon: <http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=>
  PREFIX geoSoft: <http://www.ncbi.nlm.nih.gov/geo/info/soft2.html#>
  PREFIX cells: <http://encodewiki.ucsc.edu/EncodeDCC/index.php/Cell_lines#>
  
-select distinct ?name ?filename ?relative_path ?file_format ?output_type ?dataset_id ?replicate ?enriched_in ?ucsc_db ?replaces_accession ?replace_reason
+select distinct ?name ?filename ?relative_path ?file_format ?output_type ?dataset_id ?replicate ?enriched_in ?ucsc_db ?paired_end ?technical_replicate ?replaces ?replace_reason
  WHERE {
    <{{submission}}> a submissionOntology:submission ;
                     submissionOntology:name ?name ;
@@ -23,6 +23,9 @@ WHERE {
  
    ?fileClass geoSoft:fileTypeLabel ?file_format ;
               ucscDaf:output_type ?output_type .
+  OPTIONAL { ?fileClass ucscDaf:paired_end ?paired_end . }
+  OPTIONAL { ?fileClass ucscDaf:technical_replicate ?technical_replicate . }
+
    
    ?library htswlib:replicate ?replicate ;
             ucscDaf:enriched_in ?enriched_in;
diff --git a/htsworkflow/templates/trackhub_samples.sparql b/htsworkflow/templates/trackhub_samples.sparql

index 95152747b43a5ed4109e8dd8fb020edb120bfc80..6259fce15d646aba5864e91d23f22823ce233cac 100644 (file)
--- a/htsworkflow/templates/trackhub_samples.sparql
+++ b/htsworkflow/templates/trackhub_samples.sparql
@@ -4,14 +4,18 @@ PREFIX htswlib: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
  PREFIX submissionOntology: <http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#>
  PREFIX ucscDaf: <http://jumpgate.caltech.edu/wiki/UcscDaf#>
  PREFIX ncbiTaxon: <http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=>
+PREFIX trackdb: <http://genome.ucsc.edu/goldenPath/help/trackDb/trackDbHub.html#>
  PREFIX geoSoft: <http://www.ncbi.nlm.nih.gov/geo/info/soft2.html#>
  PREFIX cells: <http://encodewiki.ucsc.edu/EncodeDCC/index.php/Cell_lines#>
  PREFIX encode3: <http://jumpgate.caltech.edu/wiki/Encode3#>
  
-select distinct ?lab_library_id ?library_id ?filename ?relative_path ?output_type ?file_type ?cell ?replicate ?assay ?rna_type ?protocol ?file_label
+select distinct ?lab_library_id ?library_id ?filename ?relative_path ?output_type ?file_type ?cell ?replicate ?assay ?rna_type ?protocol ?file_label ?autoScale ?maxHeightPixels ?visibility
  WHERE {
-  ?trackType geoSoft:fileTypeLabel ?file_type ;
+  ?trackType trackdb:type ?file_type ;
               ucscDaf:output_type ?output_type .
+  OPTIONAL { ?trackType trackdb:autoScale ?autoScale . }
+  OPTIONAL { ?trackType trackdb:maxHeightPixels ?maxHeightPixels . }
+  OPTIONAL { ?trackType trackdb:visibility ?visibility . }
    ?file ucscDaf:filename ?filename ;
          ucscDaf:relative_path ?relative_path ;
          htswlib:library ?library ;
diff --git a/htsworkflow/util/rdfhelp.py b/htsworkflow/util/rdfhelp.py

index cbe2a9188b091d0cb5f22fa99069345728204ee9..ac5f6ccd1056a561418cd9b17e155ee936097630 100644 (file)
--- a/htsworkflow/util/rdfhelp.py
+++ b/htsworkflow/util/rdfhelp.py
@@ -271,15 +271,20 @@ def load_into_model(model, parser_name, path, ns=None):
  
      statements = []
      retries = 3
+    succeeded = False
      while retries > 0:
          try:
              retries -= 1
              statements = rdf_parser.parse_as_stream(url, ns)
              retries = 0
+            succeeded = True
          except RDF.RedlandError, e:
              errmsg = "RDF.RedlandError: {0} {1} tries remaining"
              logger.error(errmsg.format(str(e), retries))
-
+            
+    if not succeeded:
+        logger.warn("Unable to download %s", url)
+        
      for s in statements:
          conditionally_add_statement(model, s, ns)
  
diff --git a/htsworkflow/util/ucsc.py b/htsworkflow/util/ucsc.py

index e9ff77ee9cbb80af2b9e906d32987d8a8a7980e2..b96c46aed5c79de3bab81e3bad02b9c0813b1b35 100644 (file)
--- a/htsworkflow/util/ucsc.py
+++ b/htsworkflow/util/ucsc.py
@@ -3,6 +3,7 @@
  
  import logging
  import os
+import sys
  from subprocess import Popen, PIPE
  
  LOGGER = logging.getLogger(__name__)
@@ -51,17 +52,22 @@ class bigWigInfo:
      def scan_file(self, filename):
          cmd = ['bigWigInfo', 
                 filename]
-        p = Popen(cmd, stdout=PIPE)
-        stdout, _ = p.communicate()
-        for line in stdout.split(os.linesep):
-            if len(line) > 0:
-                term, value = line.split(': ')
-                if term in ('isCompressed', 'isSwapped'):
-                    value = parseBoolean(value)
-                else:
-                    value = parseNumber(value)
-                LOGGER.debug('%s: %s', term, str(value))
-                setattr(self, term, value)
+        try:
+            p = Popen(cmd, stdout=PIPE)
+            stdout, _ = p.communicate()
+            for line in stdout.split(os.linesep):
+                if len(line) > 0:
+                    term, value = line.split(': ')
+                    if term in ('isCompressed', 'isSwapped'):
+                        value = parseBoolean(value)
+                    else:
+                        value = parseNumber(value)
+                    LOGGER.debug('%s: %s', term, str(value))
+                    setattr(self, term, value)
+        except OSError as e:
+            LOGGER.error("Exception %s trying to run: %s", str(e), ' '.join(cmd))
+            sys.exit(-1)
+
author	Diane Trout <diane@caltech.edu>
	Thu, 5 Dec 2013 23:06:14 +0000 (15:06 -0800)
committer	Diane Trout <diane@caltech.edu>
	Thu, 5 Dec 2013 23:06:14 +0000 (15:06 -0800)
htsworkflow/pipelines/ipar.py		patch \| blob \| history
htsworkflow/pipelines/runfolder.py		patch \| blob \| history
htsworkflow/pipelines/test/test_runfolder_rta180.py		patch \| blob \| history
htsworkflow/submission/trackhub_submission.py		patch \| blob \| history
htsworkflow/templates/manifest.txt		patch \| blob \| history
htsworkflow/templates/trackhub_manifest.sparql		patch \| blob \| history
htsworkflow/templates/trackhub_samples.sparql		patch \| blob \| history
htsworkflow/util/rdfhelp.py		patch \| blob \| history
htsworkflow/util/ucsc.py		patch \| blob \| history