From: Diane Trout Date: Thu, 5 Dec 2013 23:06:14 +0000 (-0800) Subject: Merge branch 'django1.4' of mus.cacr.caltech.edu:htsworkflow into django1.4 X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=htsworkflow.git;a=commitdiff_plain;h=86dc57e6227c79cd3f599e3d8bac46a44f961607;hp=a116f737719d577bc5846eea895f6b159ad08430 Merge branch 'django1.4' of mus.cacr.caltech.edu:htsworkflow into django1.4 --- diff --git a/htsworkflow/pipelines/ipar.py b/htsworkflow/pipelines/ipar.py index f818b34..8203f5e 100644 --- a/htsworkflow/pipelines/ipar.py +++ b/htsworkflow/pipelines/ipar.py @@ -75,6 +75,14 @@ class IPAR(object): if xml is not None: self.set_elements(xml) + def _get_runfolder_name(self): + """Return runfolder name""" + if self.tree is None: + raise ValueError("Can't query an empty run") + runfolder = self.tree.xpath('RunParameters/Runfolder') + return runfolder + runfolder_name = property(_get_runfolder) + def _get_software(self): """Return software name""" if self.tree is None: diff --git a/htsworkflow/pipelines/runfolder.py b/htsworkflow/pipelines/runfolder.py index da1bbe6..669c5f0 100644 --- a/htsworkflow/pipelines/runfolder.py +++ b/htsworkflow/pipelines/runfolder.py @@ -129,10 +129,12 @@ class PipelineRun(object): return path_fields[-1] def _get_runfolder_name(self): - if self.gerald is None: - return None - else: + if self.gerald: return self.gerald.runfolder_name + elif hasattr(self.image_analysis, 'runfolder_name'): + return self.image_analysis.runfolder_name + else: + return None runfolder_name = property(_get_runfolder_name) def _get_run_dirname(self): diff --git a/htsworkflow/pipelines/test/test_runfolder_rta180.py b/htsworkflow/pipelines/test/test_runfolder_rta180.py index 63b4a8d..0db7857 100644 --- a/htsworkflow/pipelines/test/test_runfolder_rta180.py +++ b/htsworkflow/pipelines/test/test_runfolder_rta180.py @@ -25,9 +25,8 @@ def make_runfolder(obj=None): temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_') flowcell_id = '4286GAAXX' - runfolder_dir = os.path.join( - temp_dir, - '090608_HWI-EAS229_0117_{0}'.format(flowcell_id)) + runfolder = '090608_HWI-EAS229_0117_{0}'.format(flowcell_id) + runfolder_dir = os.path.join(temp_dir, runfolder) os.mkdir(runfolder_dir) data_dir = os.path.join(runfolder_dir, 'Data') @@ -50,6 +49,7 @@ def make_runfolder(obj=None): if obj is not None: obj.flowcell_id = flowcell_id obj.temp_dir = temp_dir + obj.runfolder = runfolder obj.runfolder_dir = runfolder_dir obj.data_dir = data_dir obj.image_analysis_dir = intensities_dir @@ -262,8 +262,9 @@ class RunfolderTests(TestCase): # do we get the flowcell id from the filename? self.failUnlessEqual(len(runs), 1) - name = 'run_4286GAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),) + name = 'run_4286GAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),) self.failUnlessEqual(runs[0].serialization_filename, name) + self.assertEqual(runs[0].runfolder_name, '090220_HWI-EAS229_0093_30VR0AAXX') # do we get the flowcell id from the FlowcellId.xml file make_flowcell_id(self.runfolder_dir, '207BTAAXY') @@ -272,6 +273,7 @@ class RunfolderTests(TestCase): name = 'run_207BTAAXY_%s.xml' % ( date.today().strftime('%Y-%m-%d'),) self.failUnlessEqual(runs[0].serialization_filename, name) + r1 = runs[0] xml = r1.get_elements() xml_str = ElementTree.tostring(xml) diff --git a/htsworkflow/submission/trackhub_submission.py b/htsworkflow/submission/trackhub_submission.py index 7a83e87..e383175 100644 --- a/htsworkflow/submission/trackhub_submission.py +++ b/htsworkflow/submission/trackhub_submission.py @@ -1,5 +1,6 @@ import logging import os +from pprint import pformat import string import re @@ -13,6 +14,7 @@ from htsworkflow.util.rdfhelp import \ stripNamespace, \ submissionOntology from htsworkflow.util.url import parse_ssh_url +from htsworkflow.util.ucsc import bigWigInfo from django.conf import settings from django.template import Context, loader @@ -104,20 +106,24 @@ class TrackHubSubmission(Submission): track_name = self.make_track_name(track) track_subgroup = self.make_track_subgroups(subgroups, track) + track_type = self.make_track_type(track) if 'file_label' in track: track_label = self.sanitize_name(track['file_label']) else: track_label = track_name - newtrack = Track( - name=track_name, - tracktype = str(track['file_type']), - url= hub_url + str(track['relative_path']), - short_label=str(track['library_id']), - long_label=str(track_label), - subgroups=track_subgroup, - ) + attributes = { + 'name': track_name, + 'tracktype': track_type, + 'url': hub_url + str(track['relative_path']), + 'short_label': str(track['library_id']), + 'long_label': str(track_label), + 'subgroups': track_subgroup, + } + + LOGGER.debug('track attributes: %s', pformat(attributes)) + newtrack = Track(**attributes) view.add_tracks([newtrack]) results = hub.render() @@ -136,13 +142,20 @@ class TrackHubSubmission(Submission): """ current_view_type = str(track['output_type']) if not view or current_view_type != view.name: - view = ViewTrack( - name=current_view_type, - view=current_view_type, - visibility='squish', - short_label=current_view_type, - tracktype=str(track['file_type']), - ) + attributes = { + 'name': current_view_type, + 'view': current_view_type, + 'visibility': str(track.get('visibility', 'squish')), + 'short_label': current_view_type, + 'tracktype': str(track['file_type']) + } + maxHeightPixels = track.get('maxHeightPixels') + if maxHeightPixels: + attributes['maxHeightPixels'] = str(maxHeightPixels) + autoScale = track.get('autoScale') + if autoScale: + attributes['autoScale'] = str(autoScale) + view = ViewTrack(**attributes) composite.add_view(view) view_type = current_view_type return view @@ -173,6 +186,21 @@ class TrackHubSubmission(Submission): value = self.sanitize_name(track[k]) track_subgroups[k] = value return track_subgroups + + def make_track_type(self, track): + """Further annotate tracktype. + + bigWig files can have additional information. Add it if we can + """ + track_type = track['file_type'] + if track_type.lower() == 'bigwig': + # something we can enhance + info = bigWigInfo(track['relative_path']) + if info.min is not None and info.max is not None: + track_type = '{} {} {}'.format(track_type, int(info.min), int(info.max)) + + LOGGER.debug("track_type: %s", track_type) + return str(track_type) def add_subgroups(self, composite): """Add subgroups to composite track""" diff --git a/htsworkflow/templates/manifest.txt b/htsworkflow/templates/manifest.txt index c81259c..21c8a47 100644 --- a/htsworkflow/templates/manifest.txt +++ b/htsworkflow/templates/manifest.txt @@ -1,2 +1,3 @@ -#file_name format output_type experiment replicate enriched_in ucsc_db replaces replace_reason{% for r in files %} -{{ r.relative_path }} {{ r.file_format }} {{ r.output_type }} {{ r.dataset_id }} {{ r.replicate }} {{ r.enriched_in }} {{ r.ucsc_db }} {{ r.replaces_accession|default_if_none:"" }} {{ r.replace_reason|default_if_none:"" }}{% endfor %} +#version 1.7 +#file_name format output_type experiment replicate enriched_in ucsc_db paired_end technical_replicate{% for r in files %} +{{ r.relative_path }} {{ r.file_format }} {{ r.output_type }} {{ r.dataset_id }} {{ r.replicate }} {{ r.enriched_in }} {{ r.ucsc_db }} {{ r.paired_end|default_if_none:"n/a" }} {{ r.technical_replicate|default_if_none:"n/a"}}{% endfor %} diff --git a/htsworkflow/templates/trackhub_manifest.sparql b/htsworkflow/templates/trackhub_manifest.sparql index cb5c4fd..36e57fa 100644 --- a/htsworkflow/templates/trackhub_manifest.sparql +++ b/htsworkflow/templates/trackhub_manifest.sparql @@ -6,7 +6,7 @@ PREFIX ncbiTaxon: PREFIX geoSoft: PREFIX cells: -select distinct ?name ?filename ?relative_path ?file_format ?output_type ?dataset_id ?replicate ?enriched_in ?ucsc_db ?replaces_accession ?replace_reason +select distinct ?name ?filename ?relative_path ?file_format ?output_type ?dataset_id ?replicate ?enriched_in ?ucsc_db ?paired_end ?technical_replicate ?replaces ?replace_reason WHERE { <{{submission}}> a submissionOntology:submission ; submissionOntology:name ?name ; @@ -23,6 +23,9 @@ WHERE { ?fileClass geoSoft:fileTypeLabel ?file_format ; ucscDaf:output_type ?output_type . + OPTIONAL { ?fileClass ucscDaf:paired_end ?paired_end . } + OPTIONAL { ?fileClass ucscDaf:technical_replicate ?technical_replicate . } + ?library htswlib:replicate ?replicate ; ucscDaf:enriched_in ?enriched_in; diff --git a/htsworkflow/templates/trackhub_samples.sparql b/htsworkflow/templates/trackhub_samples.sparql index 9515274..6259fce 100644 --- a/htsworkflow/templates/trackhub_samples.sparql +++ b/htsworkflow/templates/trackhub_samples.sparql @@ -4,14 +4,18 @@ PREFIX htswlib: PREFIX submissionOntology: PREFIX ucscDaf: PREFIX ncbiTaxon: +PREFIX trackdb: PREFIX geoSoft: PREFIX cells: PREFIX encode3: -select distinct ?lab_library_id ?library_id ?filename ?relative_path ?output_type ?file_type ?cell ?replicate ?assay ?rna_type ?protocol ?file_label +select distinct ?lab_library_id ?library_id ?filename ?relative_path ?output_type ?file_type ?cell ?replicate ?assay ?rna_type ?protocol ?file_label ?autoScale ?maxHeightPixels ?visibility WHERE { - ?trackType geoSoft:fileTypeLabel ?file_type ; + ?trackType trackdb:type ?file_type ; ucscDaf:output_type ?output_type . + OPTIONAL { ?trackType trackdb:autoScale ?autoScale . } + OPTIONAL { ?trackType trackdb:maxHeightPixels ?maxHeightPixels . } + OPTIONAL { ?trackType trackdb:visibility ?visibility . } ?file ucscDaf:filename ?filename ; ucscDaf:relative_path ?relative_path ; htswlib:library ?library ; diff --git a/htsworkflow/util/rdfhelp.py b/htsworkflow/util/rdfhelp.py index cbe2a91..ac5f6cc 100644 --- a/htsworkflow/util/rdfhelp.py +++ b/htsworkflow/util/rdfhelp.py @@ -271,15 +271,20 @@ def load_into_model(model, parser_name, path, ns=None): statements = [] retries = 3 + succeeded = False while retries > 0: try: retries -= 1 statements = rdf_parser.parse_as_stream(url, ns) retries = 0 + succeeded = True except RDF.RedlandError, e: errmsg = "RDF.RedlandError: {0} {1} tries remaining" logger.error(errmsg.format(str(e), retries)) - + + if not succeeded: + logger.warn("Unable to download %s", url) + for s in statements: conditionally_add_statement(model, s, ns) diff --git a/htsworkflow/util/ucsc.py b/htsworkflow/util/ucsc.py index e9ff77e..b96c46a 100644 --- a/htsworkflow/util/ucsc.py +++ b/htsworkflow/util/ucsc.py @@ -3,6 +3,7 @@ import logging import os +import sys from subprocess import Popen, PIPE LOGGER = logging.getLogger(__name__) @@ -51,17 +52,22 @@ class bigWigInfo: def scan_file(self, filename): cmd = ['bigWigInfo', filename] - p = Popen(cmd, stdout=PIPE) - stdout, _ = p.communicate() - for line in stdout.split(os.linesep): - if len(line) > 0: - term, value = line.split(': ') - if term in ('isCompressed', 'isSwapped'): - value = parseBoolean(value) - else: - value = parseNumber(value) - LOGGER.debug('%s: %s', term, str(value)) - setattr(self, term, value) + try: + p = Popen(cmd, stdout=PIPE) + stdout, _ = p.communicate() + for line in stdout.split(os.linesep): + if len(line) > 0: + term, value = line.split(': ') + if term in ('isCompressed', 'isSwapped'): + value = parseBoolean(value) + else: + value = parseNumber(value) + LOGGER.debug('%s: %s', term, str(value)) + setattr(self, term, value) + except OSError as e: + LOGGER.error("Exception %s trying to run: %s", str(e), ' '.join(cmd)) + sys.exit(-1) +