Merge branch 'django1.4' of mus.cacr.caltech.edu:htsworkflow into django1.4

author Diane Trout <diane@ghic.org>

Thu, 5 Dec 2013 23:28:36 +0000 (15:28 -0800)

committer Diane Trout <diane@ghic.org>

Thu, 5 Dec 2013 23:28:36 +0000 (15:28 -0800)
author Diane Trout <diane@ghic.org>
Thu, 5 Dec 2013 23:28:36 +0000 (15:28 -0800)
committer Diane Trout <diane@ghic.org>
Thu, 5 Dec 2013 23:28:36 +0000 (15:28 -0800)
diff --git a/htsworkflow/pipelines/ipar.py b/htsworkflow/pipelines/ipar.py

index f818b3495fbf2fd2d8f2fba13d698188e7680d3f..8203f5e0f29fda173728e3f504522f6cd579520f 100644 (file)
--- a/htsworkflow/pipelines/ipar.py
+++ b/htsworkflow/pipelines/ipar.py
@@ -75,6 +75,14 @@ class IPAR(object):
          if xml is not None:
              self.set_elements(xml)
  
+    def _get_runfolder_name(self):
+        """Return runfolder name"""
+        if self.tree is None:
+            raise ValueError("Can't query an empty run")
+        runfolder = self.tree.xpath('RunParameters/Runfolder')
+        return runfolder
+    runfolder_name = property(_get_runfolder)
+    
      def _get_software(self):
          """Return software name"""
          if self.tree is None:
diff --git a/htsworkflow/pipelines/runfolder.py b/htsworkflow/pipelines/runfolder.py

index 7c06e217de63d5733e524ae551f06eea40528e11..669c5f03363dfc8c647730133af71678319d0336 100644 (file)
--- a/htsworkflow/pipelines/runfolder.py
+++ b/htsworkflow/pipelines/runfolder.py
@@ -129,10 +129,12 @@ class PipelineRun(object):
              return path_fields[-1]
  
      def _get_runfolder_name(self):
-        if self.gerald is None:
-            return None
-        else:
+        if self.gerald:
              return self.gerald.runfolder_name
+        elif hasattr(self.image_analysis, 'runfolder_name'):
+            return self.image_analysis.runfolder_name
+        else:
+            return None
      runfolder_name = property(_get_runfolder_name)
  
      def _get_run_dirname(self):
@@ -361,12 +363,12 @@ def build_hiseq_runs(image_analysis, runs, datadir, runfolder, flowcell_id):
              p.suffix = suffix
              p.image_analysis = image_analysis
              p.bustard = bustard.bustard(unaligned)
-            assert p.bustard
              if aligned:
                  p.gerald = gerald.gerald(aligned)
              runs.append(p)
-        except IOError, e:
-            LOGGER.error("Ignoring " + str(e))
+        except (IOError, RuntimeError) as e:
+           LOGGER.error("Exception %s", str(e))
+            LOGGER.error("Skipping run in %s", flowcell_id)
      return len(runs) - start
  
  def hiseq_match_aligned_unaligned(aligned, unaligned):
diff --git a/htsworkflow/pipelines/test/test_runfolder_rta180.py b/htsworkflow/pipelines/test/test_runfolder_rta180.py

index 63b4a8df0c7176b8ced6aa1a022a8b22f7c6a7bb..0db7857399cd8f9c391a8b8c58fb0b5023fef0f5 100644 (file)
--- a/htsworkflow/pipelines/test/test_runfolder_rta180.py
+++ b/htsworkflow/pipelines/test/test_runfolder_rta180.py
@@ -25,9 +25,8 @@ def make_runfolder(obj=None):
      temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_')
  
      flowcell_id = '4286GAAXX'
-    runfolder_dir = os.path.join(
-        temp_dir,
-        '090608_HWI-EAS229_0117_{0}'.format(flowcell_id))
+    runfolder = '090608_HWI-EAS229_0117_{0}'.format(flowcell_id)
+    runfolder_dir = os.path.join(temp_dir, runfolder)
      os.mkdir(runfolder_dir)
  
      data_dir = os.path.join(runfolder_dir, 'Data')
@@ -50,6 +49,7 @@ def make_runfolder(obj=None):
      if obj is not None:
          obj.flowcell_id = flowcell_id
          obj.temp_dir = temp_dir
+        obj.runfolder = runfolder
          obj.runfolder_dir = runfolder_dir
          obj.data_dir = data_dir
          obj.image_analysis_dir = intensities_dir
@@ -262,8 +262,9 @@ class RunfolderTests(TestCase):
  
          # do we get the flowcell id from the filename?
          self.failUnlessEqual(len(runs), 1)
-        name = 'run_4286GAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
+        name = 'run_4286GAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)        
          self.failUnlessEqual(runs[0].serialization_filename, name)
+        self.assertEqual(runs[0].runfolder_name, '090220_HWI-EAS229_0093_30VR0AAXX')
  
          # do we get the flowcell id from the FlowcellId.xml file
          make_flowcell_id(self.runfolder_dir, '207BTAAXY')
@@ -272,6 +273,7 @@ class RunfolderTests(TestCase):
          name = 'run_207BTAAXY_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
          self.failUnlessEqual(runs[0].serialization_filename, name)
  
+
          r1 = runs[0]
          xml = r1.get_elements()
          xml_str = ElementTree.tostring(xml)
diff --git a/htsworkflow/templates/manifest.txt b/htsworkflow/templates/manifest.txt

index c81259cb77949fdffccb321f59e93634fc5e1540..21c8a474cdee41d6bd35cb068b8a0818881a784b 100644 (file)
--- a/htsworkflow/templates/manifest.txt
+++ b/htsworkflow/templates/manifest.txt
@@ -1,2 +1,3 @@
-#file_name     format  output_type     experiment      replicate       enriched_in     ucsc_db replaces        replace_reason{% for r in files %}
-{{ r.relative_path }}  {{ r.file_format }}     {{ r.output_type }}     {{ r.dataset_id }}      {{ r.replicate }}       {{ r.enriched_in }}     {{ r.ucsc_db }} {{ r.replaces_accession|default_if_none:"" }}   {{ r.replace_reason|default_if_none:"" }}{% endfor %}
+#version 1.7
+#file_name     format  output_type     experiment      replicate       enriched_in     ucsc_db paired_end      technical_replicate{% for r in files %}
+{{ r.relative_path }}  {{ r.file_format }}     {{ r.output_type }}     {{ r.dataset_id }}      {{ r.replicate }}       {{ r.enriched_in }}     {{ r.ucsc_db }} {{ r.paired_end|default_if_none:"n/a" }}        {{ r.technical_replicate|default_if_none:"n/a"}}{% endfor %}
diff --git a/htsworkflow/templates/trackhub_manifest.sparql b/htsworkflow/templates/trackhub_manifest.sparql

index cb5c4fd5195b87302ddd6236f19ea2b16339f8cf..36e57fa88fd0aa21f5cfbf0492b1e2e77365c57b 100644 (file)
--- a/htsworkflow/templates/trackhub_manifest.sparql
+++ b/htsworkflow/templates/trackhub_manifest.sparql
@@ -6,7 +6,7 @@ PREFIX ncbiTaxon: <http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=>
  PREFIX geoSoft: <http://www.ncbi.nlm.nih.gov/geo/info/soft2.html#>
  PREFIX cells: <http://encodewiki.ucsc.edu/EncodeDCC/index.php/Cell_lines#>
  
-select distinct ?name ?filename ?relative_path ?file_format ?output_type ?dataset_id ?replicate ?enriched_in ?ucsc_db ?replaces_accession ?replace_reason
+select distinct ?name ?filename ?relative_path ?file_format ?output_type ?dataset_id ?replicate ?enriched_in ?ucsc_db ?paired_end ?technical_replicate ?replaces ?replace_reason
  WHERE {
    <{{submission}}> a submissionOntology:submission ;
                     submissionOntology:name ?name ;
@@ -23,6 +23,9 @@ WHERE {
  
    ?fileClass geoSoft:fileTypeLabel ?file_format ;
               ucscDaf:output_type ?output_type .
+  OPTIONAL { ?fileClass ucscDaf:paired_end ?paired_end . }
+  OPTIONAL { ?fileClass ucscDaf:technical_replicate ?technical_replicate . }
+
    
    ?library htswlib:replicate ?replicate ;
             ucscDaf:enriched_in ?enriched_in;
diff --git a/htsworkflow/util/rdfhelp.py b/htsworkflow/util/rdfhelp.py

index cbe2a9188b091d0cb5f22fa99069345728204ee9..ac5f6ccd1056a561418cd9b17e155ee936097630 100644 (file)
--- a/htsworkflow/util/rdfhelp.py
+++ b/htsworkflow/util/rdfhelp.py
@@ -271,15 +271,20 @@ def load_into_model(model, parser_name, path, ns=None):
  
      statements = []
      retries = 3
+    succeeded = False
      while retries > 0:
          try:
              retries -= 1
              statements = rdf_parser.parse_as_stream(url, ns)
              retries = 0
+            succeeded = True
          except RDF.RedlandError, e:
              errmsg = "RDF.RedlandError: {0} {1} tries remaining"
              logger.error(errmsg.format(str(e), retries))
-
+            
+    if not succeeded:
+        logger.warn("Unable to download %s", url)
+        
      for s in statements:
          conditionally_add_statement(model, s, ns)
  
diff --git a/htsworkflow/util/ucsc.py b/htsworkflow/util/ucsc.py

index e9ff77ee9cbb80af2b9e906d32987d8a8a7980e2..b96c46aed5c79de3bab81e3bad02b9c0813b1b35 100644 (file)
--- a/htsworkflow/util/ucsc.py
+++ b/htsworkflow/util/ucsc.py
@@ -3,6 +3,7 @@
  
  import logging
  import os
+import sys
  from subprocess import Popen, PIPE
  
  LOGGER = logging.getLogger(__name__)
@@ -51,17 +52,22 @@ class bigWigInfo:
      def scan_file(self, filename):
          cmd = ['bigWigInfo', 
                 filename]
-        p = Popen(cmd, stdout=PIPE)
-        stdout, _ = p.communicate()
-        for line in stdout.split(os.linesep):
-            if len(line) > 0:
-                term, value = line.split(': ')
-                if term in ('isCompressed', 'isSwapped'):
-                    value = parseBoolean(value)
-                else:
-                    value = parseNumber(value)
-                LOGGER.debug('%s: %s', term, str(value))
-                setattr(self, term, value)
+        try:
+            p = Popen(cmd, stdout=PIPE)
+            stdout, _ = p.communicate()
+            for line in stdout.split(os.linesep):
+                if len(line) > 0:
+                    term, value = line.split(': ')
+                    if term in ('isCompressed', 'isSwapped'):
+                        value = parseBoolean(value)
+                    else:
+                        value = parseNumber(value)
+                    LOGGER.debug('%s: %s', term, str(value))
+                    setattr(self, term, value)
+        except OSError as e:
+            LOGGER.error("Exception %s trying to run: %s", str(e), ' '.join(cmd))
+            sys.exit(-1)
+
author	Diane Trout <diane@ghic.org>
	Thu, 5 Dec 2013 23:28:36 +0000 (15:28 -0800)
committer	Diane Trout <diane@ghic.org>
	Thu, 5 Dec 2013 23:28:36 +0000 (15:28 -0800)
htsworkflow/pipelines/ipar.py		patch \| blob \| history
htsworkflow/pipelines/runfolder.py		patch \| blob \| history
htsworkflow/pipelines/test/test_runfolder_rta180.py		patch \| blob \| history
htsworkflow/templates/manifest.txt		patch \| blob \| history
htsworkflow/templates/trackhub_manifest.sparql		patch \| blob \| history
htsworkflow/util/rdfhelp.py		patch \| blob \| history
htsworkflow/util/ucsc.py		patch \| blob \| history