Merge branch 'django1.4' of mus.cacr.caltech.edu:htsworkflow into django1.4
authorDiane Trout <diane@ghic.org>
Thu, 5 Dec 2013 23:28:36 +0000 (15:28 -0800)
committerDiane Trout <diane@ghic.org>
Thu, 5 Dec 2013 23:28:36 +0000 (15:28 -0800)
htsworkflow/pipelines/ipar.py
htsworkflow/pipelines/runfolder.py
htsworkflow/pipelines/test/test_runfolder_rta180.py
htsworkflow/templates/manifest.txt
htsworkflow/templates/trackhub_manifest.sparql
htsworkflow/util/rdfhelp.py
htsworkflow/util/ucsc.py

index f818b3495fbf2fd2d8f2fba13d698188e7680d3f..8203f5e0f29fda173728e3f504522f6cd579520f 100644 (file)
@@ -75,6 +75,14 @@ class IPAR(object):
         if xml is not None:
             self.set_elements(xml)
 
+    def _get_runfolder_name(self):
+        """Return runfolder name"""
+        if self.tree is None:
+            raise ValueError("Can't query an empty run")
+        runfolder = self.tree.xpath('RunParameters/Runfolder')
+        return runfolder
+    runfolder_name = property(_get_runfolder)
+    
     def _get_software(self):
         """Return software name"""
         if self.tree is None:
index 7c06e217de63d5733e524ae551f06eea40528e11..669c5f03363dfc8c647730133af71678319d0336 100644 (file)
@@ -129,10 +129,12 @@ class PipelineRun(object):
             return path_fields[-1]
 
     def _get_runfolder_name(self):
-        if self.gerald is None:
-            return None
-        else:
+        if self.gerald:
             return self.gerald.runfolder_name
+        elif hasattr(self.image_analysis, 'runfolder_name'):
+            return self.image_analysis.runfolder_name
+        else:
+            return None
     runfolder_name = property(_get_runfolder_name)
 
     def _get_run_dirname(self):
@@ -361,12 +363,12 @@ def build_hiseq_runs(image_analysis, runs, datadir, runfolder, flowcell_id):
             p.suffix = suffix
             p.image_analysis = image_analysis
             p.bustard = bustard.bustard(unaligned)
-            assert p.bustard
             if aligned:
                 p.gerald = gerald.gerald(aligned)
             runs.append(p)
-        except IOError, e:
-            LOGGER.error("Ignoring " + str(e))
+        except (IOError, RuntimeError) as e:
+           LOGGER.error("Exception %s", str(e))
+            LOGGER.error("Skipping run in %s", flowcell_id)
     return len(runs) - start
 
 def hiseq_match_aligned_unaligned(aligned, unaligned):
index 63b4a8df0c7176b8ced6aa1a022a8b22f7c6a7bb..0db7857399cd8f9c391a8b8c58fb0b5023fef0f5 100644 (file)
@@ -25,9 +25,8 @@ def make_runfolder(obj=None):
     temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_')
 
     flowcell_id = '4286GAAXX'
-    runfolder_dir = os.path.join(
-        temp_dir,
-        '090608_HWI-EAS229_0117_{0}'.format(flowcell_id))
+    runfolder = '090608_HWI-EAS229_0117_{0}'.format(flowcell_id)
+    runfolder_dir = os.path.join(temp_dir, runfolder)
     os.mkdir(runfolder_dir)
 
     data_dir = os.path.join(runfolder_dir, 'Data')
@@ -50,6 +49,7 @@ def make_runfolder(obj=None):
     if obj is not None:
         obj.flowcell_id = flowcell_id
         obj.temp_dir = temp_dir
+        obj.runfolder = runfolder
         obj.runfolder_dir = runfolder_dir
         obj.data_dir = data_dir
         obj.image_analysis_dir = intensities_dir
@@ -262,8 +262,9 @@ class RunfolderTests(TestCase):
 
         # do we get the flowcell id from the filename?
         self.failUnlessEqual(len(runs), 1)
-        name = 'run_4286GAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
+        name = 'run_4286GAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)        
         self.failUnlessEqual(runs[0].serialization_filename, name)
+        self.assertEqual(runs[0].runfolder_name, '090220_HWI-EAS229_0093_30VR0AAXX')
 
         # do we get the flowcell id from the FlowcellId.xml file
         make_flowcell_id(self.runfolder_dir, '207BTAAXY')
@@ -272,6 +273,7 @@ class RunfolderTests(TestCase):
         name = 'run_207BTAAXY_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
         self.failUnlessEqual(runs[0].serialization_filename, name)
 
+
         r1 = runs[0]
         xml = r1.get_elements()
         xml_str = ElementTree.tostring(xml)
index c81259cb77949fdffccb321f59e93634fc5e1540..21c8a474cdee41d6bd35cb068b8a0818881a784b 100644 (file)
@@ -1,2 +1,3 @@
-#file_name     format  output_type     experiment      replicate       enriched_in     ucsc_db replaces        replace_reason{% for r in files %}
-{{ r.relative_path }}  {{ r.file_format }}     {{ r.output_type }}     {{ r.dataset_id }}      {{ r.replicate }}       {{ r.enriched_in }}     {{ r.ucsc_db }} {{ r.replaces_accession|default_if_none:"" }}   {{ r.replace_reason|default_if_none:"" }}{% endfor %}
+#version 1.7
+#file_name     format  output_type     experiment      replicate       enriched_in     ucsc_db paired_end      technical_replicate{% for r in files %}
+{{ r.relative_path }}  {{ r.file_format }}     {{ r.output_type }}     {{ r.dataset_id }}      {{ r.replicate }}       {{ r.enriched_in }}     {{ r.ucsc_db }} {{ r.paired_end|default_if_none:"n/a" }}        {{ r.technical_replicate|default_if_none:"n/a"}}{% endfor %}
index cb5c4fd5195b87302ddd6236f19ea2b16339f8cf..36e57fa88fd0aa21f5cfbf0492b1e2e77365c57b 100644 (file)
@@ -6,7 +6,7 @@ PREFIX ncbiTaxon: <http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=>
 PREFIX geoSoft: <http://www.ncbi.nlm.nih.gov/geo/info/soft2.html#>
 PREFIX cells: <http://encodewiki.ucsc.edu/EncodeDCC/index.php/Cell_lines#>
 
-select distinct ?name ?filename ?relative_path ?file_format ?output_type ?dataset_id ?replicate ?enriched_in ?ucsc_db ?replaces_accession ?replace_reason
+select distinct ?name ?filename ?relative_path ?file_format ?output_type ?dataset_id ?replicate ?enriched_in ?ucsc_db ?paired_end ?technical_replicate ?replaces ?replace_reason
 WHERE {
   <{{submission}}> a submissionOntology:submission ;
                    submissionOntology:name ?name ;
@@ -23,6 +23,9 @@ WHERE {
 
   ?fileClass geoSoft:fileTypeLabel ?file_format ;
              ucscDaf:output_type ?output_type .
+  OPTIONAL { ?fileClass ucscDaf:paired_end ?paired_end . }
+  OPTIONAL { ?fileClass ucscDaf:technical_replicate ?technical_replicate . }
+
   
   ?library htswlib:replicate ?replicate ;
            ucscDaf:enriched_in ?enriched_in;
index cbe2a9188b091d0cb5f22fa99069345728204ee9..ac5f6ccd1056a561418cd9b17e155ee936097630 100644 (file)
@@ -271,15 +271,20 @@ def load_into_model(model, parser_name, path, ns=None):
 
     statements = []
     retries = 3
+    succeeded = False
     while retries > 0:
         try:
             retries -= 1
             statements = rdf_parser.parse_as_stream(url, ns)
             retries = 0
+            succeeded = True
         except RDF.RedlandError, e:
             errmsg = "RDF.RedlandError: {0} {1} tries remaining"
             logger.error(errmsg.format(str(e), retries))
-
+            
+    if not succeeded:
+        logger.warn("Unable to download %s", url)
+        
     for s in statements:
         conditionally_add_statement(model, s, ns)
 
index e9ff77ee9cbb80af2b9e906d32987d8a8a7980e2..b96c46aed5c79de3bab81e3bad02b9c0813b1b35 100644 (file)
@@ -3,6 +3,7 @@
 
 import logging
 import os
+import sys
 from subprocess import Popen, PIPE
 
 LOGGER = logging.getLogger(__name__)
@@ -51,17 +52,22 @@ class bigWigInfo:
     def scan_file(self, filename):
         cmd = ['bigWigInfo', 
                filename]
-        p = Popen(cmd, stdout=PIPE)
-        stdout, _ = p.communicate()
-        for line in stdout.split(os.linesep):
-            if len(line) > 0:
-                term, value = line.split(': ')
-                if term in ('isCompressed', 'isSwapped'):
-                    value = parseBoolean(value)
-                else:
-                    value = parseNumber(value)
-                LOGGER.debug('%s: %s', term, str(value))
-                setattr(self, term, value)
+        try:
+            p = Popen(cmd, stdout=PIPE)
+            stdout, _ = p.communicate()
+            for line in stdout.split(os.linesep):
+                if len(line) > 0:
+                    term, value = line.split(': ')
+                    if term in ('isCompressed', 'isSwapped'):
+                        value = parseBoolean(value)
+                    else:
+                        value = parseNumber(value)
+                    LOGGER.debug('%s: %s', term, str(value))
+                    setattr(self, term, value)
+        except OSError as e:
+            LOGGER.error("Exception %s trying to run: %s", str(e), ' '.join(cmd))
+            sys.exit(-1)
+