From: Diane Trout <diane@ghic.org>
Date: Thu, 5 Dec 2013 23:28:36 +0000 (-0800)
Subject: Merge branch 'django1.4' of mus.cacr.caltech.edu:htsworkflow into django1.4
X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=htsworkflow.git;a=commitdiff_plain;h=f7abcc50636dc384cb23fff66e0e618f39b879a0;hp=b352bcebfc6c74f84d65a05c8cdbfb55d94d029c

Merge branch 'django1.4' of mus.cacr.caltech.edu:htsworkflow into django1.4
---

diff --git a/htsworkflow/pipelines/ipar.py b/htsworkflow/pipelines/ipar.py
index f818b34..8203f5e 100644
--- a/htsworkflow/pipelines/ipar.py
+++ b/htsworkflow/pipelines/ipar.py
@@ -75,6 +75,14 @@ class IPAR(object):
         if xml is not None:
             self.set_elements(xml)
 
+    def _get_runfolder_name(self):
+        """Return runfolder name"""
+        if self.tree is None:
+            raise ValueError("Can't query an empty run")
+        runfolder = self.tree.xpath('RunParameters/Runfolder')
+        return runfolder
+    runfolder_name = property(_get_runfolder)
+    
     def _get_software(self):
         """Return software name"""
         if self.tree is None:
diff --git a/htsworkflow/pipelines/runfolder.py b/htsworkflow/pipelines/runfolder.py
index 7c06e21..669c5f0 100644
--- a/htsworkflow/pipelines/runfolder.py
+++ b/htsworkflow/pipelines/runfolder.py
@@ -129,10 +129,12 @@ class PipelineRun(object):
             return path_fields[-1]
 
     def _get_runfolder_name(self):
-        if self.gerald is None:
-            return None
-        else:
+        if self.gerald:
             return self.gerald.runfolder_name
+        elif hasattr(self.image_analysis, 'runfolder_name'):
+            return self.image_analysis.runfolder_name
+        else:
+            return None
     runfolder_name = property(_get_runfolder_name)
 
     def _get_run_dirname(self):
@@ -361,12 +363,12 @@ def build_hiseq_runs(image_analysis, runs, datadir, runfolder, flowcell_id):
             p.suffix = suffix
             p.image_analysis = image_analysis
             p.bustard = bustard.bustard(unaligned)
-            assert p.bustard
             if aligned:
                 p.gerald = gerald.gerald(aligned)
             runs.append(p)
-        except IOError, e:
-            LOGGER.error("Ignoring " + str(e))
+        except (IOError, RuntimeError) as e:
+	    LOGGER.error("Exception %s", str(e))
+            LOGGER.error("Skipping run in %s", flowcell_id)
     return len(runs) - start
 
 def hiseq_match_aligned_unaligned(aligned, unaligned):
diff --git a/htsworkflow/pipelines/test/test_runfolder_rta180.py b/htsworkflow/pipelines/test/test_runfolder_rta180.py
index 63b4a8d..0db7857 100644
--- a/htsworkflow/pipelines/test/test_runfolder_rta180.py
+++ b/htsworkflow/pipelines/test/test_runfolder_rta180.py
@@ -25,9 +25,8 @@ def make_runfolder(obj=None):
     temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_')
 
     flowcell_id = '4286GAAXX'
-    runfolder_dir = os.path.join(
-        temp_dir,
-        '090608_HWI-EAS229_0117_{0}'.format(flowcell_id))
+    runfolder = '090608_HWI-EAS229_0117_{0}'.format(flowcell_id)
+    runfolder_dir = os.path.join(temp_dir, runfolder)
     os.mkdir(runfolder_dir)
 
     data_dir = os.path.join(runfolder_dir, 'Data')
@@ -50,6 +49,7 @@ def make_runfolder(obj=None):
     if obj is not None:
         obj.flowcell_id = flowcell_id
         obj.temp_dir = temp_dir
+        obj.runfolder = runfolder
         obj.runfolder_dir = runfolder_dir
         obj.data_dir = data_dir
         obj.image_analysis_dir = intensities_dir
@@ -262,8 +262,9 @@ class RunfolderTests(TestCase):
 
         # do we get the flowcell id from the filename?
         self.failUnlessEqual(len(runs), 1)
-        name = 'run_4286GAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
+        name = 'run_4286GAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)        
         self.failUnlessEqual(runs[0].serialization_filename, name)
+        self.assertEqual(runs[0].runfolder_name, '090220_HWI-EAS229_0093_30VR0AAXX')
 
         # do we get the flowcell id from the FlowcellId.xml file
         make_flowcell_id(self.runfolder_dir, '207BTAAXY')
@@ -272,6 +273,7 @@ class RunfolderTests(TestCase):
         name = 'run_207BTAAXY_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
         self.failUnlessEqual(runs[0].serialization_filename, name)
 
+
         r1 = runs[0]
         xml = r1.get_elements()
         xml_str = ElementTree.tostring(xml)
diff --git a/htsworkflow/templates/manifest.txt b/htsworkflow/templates/manifest.txt
index c81259c..21c8a47 100644
--- a/htsworkflow/templates/manifest.txt
+++ b/htsworkflow/templates/manifest.txt
@@ -1,2 +1,3 @@
-#file_name	format	output_type	experiment	replicate	enriched_in	ucsc_db	replaces	replace_reason{% for r in files %}
-{{ r.relative_path }}	{{ r.file_format }}	{{ r.output_type }}	{{ r.dataset_id }}	{{ r.replicate }}	{{ r.enriched_in }}	{{ r.ucsc_db }}	{{ r.replaces_accession|default_if_none:"" }}	{{ r.replace_reason|default_if_none:"" }}{% endfor %}
+#version 1.7
+#file_name	format	output_type	experiment	replicate	enriched_in	ucsc_db	paired_end	technical_replicate{% for r in files %}
+{{ r.relative_path }}	{{ r.file_format }}	{{ r.output_type }}	{{ r.dataset_id }}	{{ r.replicate }}	{{ r.enriched_in }}	{{ r.ucsc_db }}	{{ r.paired_end|default_if_none:"n/a" }}	{{ r.technical_replicate|default_if_none:"n/a"}}{% endfor %}
diff --git a/htsworkflow/templates/trackhub_manifest.sparql b/htsworkflow/templates/trackhub_manifest.sparql
index cb5c4fd..36e57fa 100644
--- a/htsworkflow/templates/trackhub_manifest.sparql
+++ b/htsworkflow/templates/trackhub_manifest.sparql
@@ -6,7 +6,7 @@ PREFIX ncbiTaxon: <http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=>
 PREFIX geoSoft: <http://www.ncbi.nlm.nih.gov/geo/info/soft2.html#>
 PREFIX cells: <http://encodewiki.ucsc.edu/EncodeDCC/index.php/Cell_lines#>
 
-select distinct ?name ?filename ?relative_path ?file_format ?output_type ?dataset_id ?replicate ?enriched_in ?ucsc_db ?replaces_accession ?replace_reason
+select distinct ?name ?filename ?relative_path ?file_format ?output_type ?dataset_id ?replicate ?enriched_in ?ucsc_db ?paired_end ?technical_replicate ?replaces ?replace_reason
 WHERE {
   <{{submission}}> a submissionOntology:submission ;
                    submissionOntology:name ?name ;
@@ -23,6 +23,9 @@ WHERE {
 
   ?fileClass geoSoft:fileTypeLabel ?file_format ;
              ucscDaf:output_type ?output_type .
+  OPTIONAL { ?fileClass ucscDaf:paired_end ?paired_end . }
+  OPTIONAL { ?fileClass ucscDaf:technical_replicate ?technical_replicate . }
+
   
   ?library htswlib:replicate ?replicate ;
            ucscDaf:enriched_in ?enriched_in;
diff --git a/htsworkflow/util/rdfhelp.py b/htsworkflow/util/rdfhelp.py
index cbe2a91..ac5f6cc 100644
--- a/htsworkflow/util/rdfhelp.py
+++ b/htsworkflow/util/rdfhelp.py
@@ -271,15 +271,20 @@ def load_into_model(model, parser_name, path, ns=None):
 
     statements = []
     retries = 3
+    succeeded = False
     while retries > 0:
         try:
             retries -= 1
             statements = rdf_parser.parse_as_stream(url, ns)
             retries = 0
+            succeeded = True
         except RDF.RedlandError, e:
             errmsg = "RDF.RedlandError: {0} {1} tries remaining"
             logger.error(errmsg.format(str(e), retries))
-
+            
+    if not succeeded:
+        logger.warn("Unable to download %s", url)
+        
     for s in statements:
         conditionally_add_statement(model, s, ns)
 
diff --git a/htsworkflow/util/ucsc.py b/htsworkflow/util/ucsc.py
index e9ff77e..b96c46a 100644
--- a/htsworkflow/util/ucsc.py
+++ b/htsworkflow/util/ucsc.py
@@ -3,6 +3,7 @@
 
 import logging
 import os
+import sys
 from subprocess import Popen, PIPE
 
 LOGGER = logging.getLogger(__name__)
@@ -51,17 +52,22 @@ class bigWigInfo:
     def scan_file(self, filename):
         cmd = ['bigWigInfo', 
                filename]
-        p = Popen(cmd, stdout=PIPE)
-        stdout, _ = p.communicate()
-        for line in stdout.split(os.linesep):
-            if len(line) > 0:
-                term, value = line.split(': ')
-                if term in ('isCompressed', 'isSwapped'):
-                    value = parseBoolean(value)
-                else:
-                    value = parseNumber(value)
-                LOGGER.debug('%s: %s', term, str(value))
-                setattr(self, term, value)
+        try:
+            p = Popen(cmd, stdout=PIPE)
+            stdout, _ = p.communicate()
+            for line in stdout.split(os.linesep):
+                if len(line) > 0:
+                    term, value = line.split(': ')
+                    if term in ('isCompressed', 'isSwapped'):
+                        value = parseBoolean(value)
+                    else:
+                        value = parseNumber(value)
+                    LOGGER.debug('%s: %s', term, str(value))
+                    setattr(self, term, value)
+        except OSError as e:
+            LOGGER.error("Exception %s trying to run: %s", str(e), ' '.join(cmd))
+            sys.exit(-1)
+