From: Diane Trout Date: Thu, 5 Dec 2013 23:28:36 +0000 (-0800) Subject: Merge branch 'django1.4' of mus.cacr.caltech.edu:htsworkflow into django1.4 X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=htsworkflow.git;a=commitdiff_plain;h=f7abcc50636dc384cb23fff66e0e618f39b879a0;hp=b352bcebfc6c74f84d65a05c8cdbfb55d94d029c Merge branch 'django1.4' of mus.cacr.caltech.edu:htsworkflow into django1.4 --- diff --git a/htsworkflow/pipelines/ipar.py b/htsworkflow/pipelines/ipar.py index f818b34..8203f5e 100644 --- a/htsworkflow/pipelines/ipar.py +++ b/htsworkflow/pipelines/ipar.py @@ -75,6 +75,14 @@ class IPAR(object): if xml is not None: self.set_elements(xml) + def _get_runfolder_name(self): + """Return runfolder name""" + if self.tree is None: + raise ValueError("Can't query an empty run") + runfolder = self.tree.xpath('RunParameters/Runfolder') + return runfolder + runfolder_name = property(_get_runfolder) + def _get_software(self): """Return software name""" if self.tree is None: diff --git a/htsworkflow/pipelines/runfolder.py b/htsworkflow/pipelines/runfolder.py index 7c06e21..669c5f0 100644 --- a/htsworkflow/pipelines/runfolder.py +++ b/htsworkflow/pipelines/runfolder.py @@ -129,10 +129,12 @@ class PipelineRun(object): return path_fields[-1] def _get_runfolder_name(self): - if self.gerald is None: - return None - else: + if self.gerald: return self.gerald.runfolder_name + elif hasattr(self.image_analysis, 'runfolder_name'): + return self.image_analysis.runfolder_name + else: + return None runfolder_name = property(_get_runfolder_name) def _get_run_dirname(self): @@ -361,12 +363,12 @@ def build_hiseq_runs(image_analysis, runs, datadir, runfolder, flowcell_id): p.suffix = suffix p.image_analysis = image_analysis p.bustard = bustard.bustard(unaligned) - assert p.bustard if aligned: p.gerald = gerald.gerald(aligned) runs.append(p) - except IOError, e: - LOGGER.error("Ignoring " + str(e)) + except (IOError, RuntimeError) as e: + LOGGER.error("Exception %s", str(e)) + LOGGER.error("Skipping run in %s", flowcell_id) return len(runs) - start def hiseq_match_aligned_unaligned(aligned, unaligned): diff --git a/htsworkflow/pipelines/test/test_runfolder_rta180.py b/htsworkflow/pipelines/test/test_runfolder_rta180.py index 63b4a8d..0db7857 100644 --- a/htsworkflow/pipelines/test/test_runfolder_rta180.py +++ b/htsworkflow/pipelines/test/test_runfolder_rta180.py @@ -25,9 +25,8 @@ def make_runfolder(obj=None): temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_') flowcell_id = '4286GAAXX' - runfolder_dir = os.path.join( - temp_dir, - '090608_HWI-EAS229_0117_{0}'.format(flowcell_id)) + runfolder = '090608_HWI-EAS229_0117_{0}'.format(flowcell_id) + runfolder_dir = os.path.join(temp_dir, runfolder) os.mkdir(runfolder_dir) data_dir = os.path.join(runfolder_dir, 'Data') @@ -50,6 +49,7 @@ def make_runfolder(obj=None): if obj is not None: obj.flowcell_id = flowcell_id obj.temp_dir = temp_dir + obj.runfolder = runfolder obj.runfolder_dir = runfolder_dir obj.data_dir = data_dir obj.image_analysis_dir = intensities_dir @@ -262,8 +262,9 @@ class RunfolderTests(TestCase): # do we get the flowcell id from the filename? self.failUnlessEqual(len(runs), 1) - name = 'run_4286GAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),) + name = 'run_4286GAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),) self.failUnlessEqual(runs[0].serialization_filename, name) + self.assertEqual(runs[0].runfolder_name, '090220_HWI-EAS229_0093_30VR0AAXX') # do we get the flowcell id from the FlowcellId.xml file make_flowcell_id(self.runfolder_dir, '207BTAAXY') @@ -272,6 +273,7 @@ class RunfolderTests(TestCase): name = 'run_207BTAAXY_%s.xml' % ( date.today().strftime('%Y-%m-%d'),) self.failUnlessEqual(runs[0].serialization_filename, name) + r1 = runs[0] xml = r1.get_elements() xml_str = ElementTree.tostring(xml) diff --git a/htsworkflow/templates/manifest.txt b/htsworkflow/templates/manifest.txt index c81259c..21c8a47 100644 --- a/htsworkflow/templates/manifest.txt +++ b/htsworkflow/templates/manifest.txt @@ -1,2 +1,3 @@ -#file_name format output_type experiment replicate enriched_in ucsc_db replaces replace_reason{% for r in files %} -{{ r.relative_path }} {{ r.file_format }} {{ r.output_type }} {{ r.dataset_id }} {{ r.replicate }} {{ r.enriched_in }} {{ r.ucsc_db }} {{ r.replaces_accession|default_if_none:"" }} {{ r.replace_reason|default_if_none:"" }}{% endfor %} +#version 1.7 +#file_name format output_type experiment replicate enriched_in ucsc_db paired_end technical_replicate{% for r in files %} +{{ r.relative_path }} {{ r.file_format }} {{ r.output_type }} {{ r.dataset_id }} {{ r.replicate }} {{ r.enriched_in }} {{ r.ucsc_db }} {{ r.paired_end|default_if_none:"n/a" }} {{ r.technical_replicate|default_if_none:"n/a"}}{% endfor %} diff --git a/htsworkflow/templates/trackhub_manifest.sparql b/htsworkflow/templates/trackhub_manifest.sparql index cb5c4fd..36e57fa 100644 --- a/htsworkflow/templates/trackhub_manifest.sparql +++ b/htsworkflow/templates/trackhub_manifest.sparql @@ -6,7 +6,7 @@ PREFIX ncbiTaxon: PREFIX geoSoft: PREFIX cells: -select distinct ?name ?filename ?relative_path ?file_format ?output_type ?dataset_id ?replicate ?enriched_in ?ucsc_db ?replaces_accession ?replace_reason +select distinct ?name ?filename ?relative_path ?file_format ?output_type ?dataset_id ?replicate ?enriched_in ?ucsc_db ?paired_end ?technical_replicate ?replaces ?replace_reason WHERE { <{{submission}}> a submissionOntology:submission ; submissionOntology:name ?name ; @@ -23,6 +23,9 @@ WHERE { ?fileClass geoSoft:fileTypeLabel ?file_format ; ucscDaf:output_type ?output_type . + OPTIONAL { ?fileClass ucscDaf:paired_end ?paired_end . } + OPTIONAL { ?fileClass ucscDaf:technical_replicate ?technical_replicate . } + ?library htswlib:replicate ?replicate ; ucscDaf:enriched_in ?enriched_in; diff --git a/htsworkflow/util/rdfhelp.py b/htsworkflow/util/rdfhelp.py index cbe2a91..ac5f6cc 100644 --- a/htsworkflow/util/rdfhelp.py +++ b/htsworkflow/util/rdfhelp.py @@ -271,15 +271,20 @@ def load_into_model(model, parser_name, path, ns=None): statements = [] retries = 3 + succeeded = False while retries > 0: try: retries -= 1 statements = rdf_parser.parse_as_stream(url, ns) retries = 0 + succeeded = True except RDF.RedlandError, e: errmsg = "RDF.RedlandError: {0} {1} tries remaining" logger.error(errmsg.format(str(e), retries)) - + + if not succeeded: + logger.warn("Unable to download %s", url) + for s in statements: conditionally_add_statement(model, s, ns) diff --git a/htsworkflow/util/ucsc.py b/htsworkflow/util/ucsc.py index e9ff77e..b96c46a 100644 --- a/htsworkflow/util/ucsc.py +++ b/htsworkflow/util/ucsc.py @@ -3,6 +3,7 @@ import logging import os +import sys from subprocess import Popen, PIPE LOGGER = logging.getLogger(__name__) @@ -51,17 +52,22 @@ class bigWigInfo: def scan_file(self, filename): cmd = ['bigWigInfo', filename] - p = Popen(cmd, stdout=PIPE) - stdout, _ = p.communicate() - for line in stdout.split(os.linesep): - if len(line) > 0: - term, value = line.split(': ') - if term in ('isCompressed', 'isSwapped'): - value = parseBoolean(value) - else: - value = parseNumber(value) - LOGGER.debug('%s: %s', term, str(value)) - setattr(self, term, value) + try: + p = Popen(cmd, stdout=PIPE) + stdout, _ = p.communicate() + for line in stdout.split(os.linesep): + if len(line) > 0: + term, value = line.split(': ') + if term in ('isCompressed', 'isSwapped'): + value = parseBoolean(value) + else: + value = parseNumber(value) + LOGGER.debug('%s: %s', term, str(value)) + setattr(self, term, value) + except OSError as e: + LOGGER.error("Exception %s trying to run: %s", str(e), ' '.join(cmd)) + sys.exit(-1) +