if xml is not None:
self.set_elements(xml)
+ def _get_runfolder_name(self):
+ """Return runfolder name"""
+ if self.tree is None:
+ raise ValueError("Can't query an empty run")
+ runfolder = self.tree.xpath('RunParameters/Runfolder')
+ return runfolder
+ runfolder_name = property(_get_runfolder)
+
def _get_software(self):
"""Return software name"""
if self.tree is None:
return path_fields[-1]
def _get_runfolder_name(self):
- if self.gerald is None:
- return None
- else:
+ if self.gerald:
return self.gerald.runfolder_name
+ elif hasattr(self.image_analysis, 'runfolder_name'):
+ return self.image_analysis.runfolder_name
+ else:
+ return None
runfolder_name = property(_get_runfolder_name)
def _get_run_dirname(self):
p.suffix = suffix
p.image_analysis = image_analysis
p.bustard = bustard.bustard(unaligned)
- assert p.bustard
if aligned:
p.gerald = gerald.gerald(aligned)
runs.append(p)
- except IOError, e:
- LOGGER.error("Ignoring " + str(e))
+ except (IOError, RuntimeError) as e:
+ LOGGER.error("Exception %s", str(e))
+ LOGGER.error("Skipping run in %s", flowcell_id)
return len(runs) - start
def hiseq_match_aligned_unaligned(aligned, unaligned):
temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_')
flowcell_id = '4286GAAXX'
- runfolder_dir = os.path.join(
- temp_dir,
- '090608_HWI-EAS229_0117_{0}'.format(flowcell_id))
+ runfolder = '090608_HWI-EAS229_0117_{0}'.format(flowcell_id)
+ runfolder_dir = os.path.join(temp_dir, runfolder)
os.mkdir(runfolder_dir)
data_dir = os.path.join(runfolder_dir, 'Data')
if obj is not None:
obj.flowcell_id = flowcell_id
obj.temp_dir = temp_dir
+ obj.runfolder = runfolder
obj.runfolder_dir = runfolder_dir
obj.data_dir = data_dir
obj.image_analysis_dir = intensities_dir
# do we get the flowcell id from the filename?
self.failUnlessEqual(len(runs), 1)
- name = 'run_4286GAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
+ name = 'run_4286GAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
self.failUnlessEqual(runs[0].serialization_filename, name)
+ self.assertEqual(runs[0].runfolder_name, '090220_HWI-EAS229_0093_30VR0AAXX')
# do we get the flowcell id from the FlowcellId.xml file
make_flowcell_id(self.runfolder_dir, '207BTAAXY')
name = 'run_207BTAAXY_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
self.failUnlessEqual(runs[0].serialization_filename, name)
+
r1 = runs[0]
xml = r1.get_elements()
xml_str = ElementTree.tostring(xml)
-#file_name format output_type experiment replicate enriched_in ucsc_db replaces replace_reason{% for r in files %}
-{{ r.relative_path }} {{ r.file_format }} {{ r.output_type }} {{ r.dataset_id }} {{ r.replicate }} {{ r.enriched_in }} {{ r.ucsc_db }} {{ r.replaces_accession|default_if_none:"" }} {{ r.replace_reason|default_if_none:"" }}{% endfor %}
+#version 1.7
+#file_name format output_type experiment replicate enriched_in ucsc_db paired_end technical_replicate{% for r in files %}
+{{ r.relative_path }} {{ r.file_format }} {{ r.output_type }} {{ r.dataset_id }} {{ r.replicate }} {{ r.enriched_in }} {{ r.ucsc_db }} {{ r.paired_end|default_if_none:"n/a" }} {{ r.technical_replicate|default_if_none:"n/a"}}{% endfor %}
PREFIX geoSoft: <http://www.ncbi.nlm.nih.gov/geo/info/soft2.html#>
PREFIX cells: <http://encodewiki.ucsc.edu/EncodeDCC/index.php/Cell_lines#>
-select distinct ?name ?filename ?relative_path ?file_format ?output_type ?dataset_id ?replicate ?enriched_in ?ucsc_db ?replaces_accession ?replace_reason
+select distinct ?name ?filename ?relative_path ?file_format ?output_type ?dataset_id ?replicate ?enriched_in ?ucsc_db ?paired_end ?technical_replicate ?replaces ?replace_reason
WHERE {
<{{submission}}> a submissionOntology:submission ;
submissionOntology:name ?name ;
?fileClass geoSoft:fileTypeLabel ?file_format ;
ucscDaf:output_type ?output_type .
+ OPTIONAL { ?fileClass ucscDaf:paired_end ?paired_end . }
+ OPTIONAL { ?fileClass ucscDaf:technical_replicate ?technical_replicate . }
+
?library htswlib:replicate ?replicate ;
ucscDaf:enriched_in ?enriched_in;
statements = []
retries = 3
+ succeeded = False
while retries > 0:
try:
retries -= 1
statements = rdf_parser.parse_as_stream(url, ns)
retries = 0
+ succeeded = True
except RDF.RedlandError, e:
errmsg = "RDF.RedlandError: {0} {1} tries remaining"
logger.error(errmsg.format(str(e), retries))
-
+
+ if not succeeded:
+ logger.warn("Unable to download %s", url)
+
for s in statements:
conditionally_add_statement(model, s, ns)
import logging
import os
+import sys
from subprocess import Popen, PIPE
LOGGER = logging.getLogger(__name__)
def scan_file(self, filename):
cmd = ['bigWigInfo',
filename]
- p = Popen(cmd, stdout=PIPE)
- stdout, _ = p.communicate()
- for line in stdout.split(os.linesep):
- if len(line) > 0:
- term, value = line.split(': ')
- if term in ('isCompressed', 'isSwapped'):
- value = parseBoolean(value)
- else:
- value = parseNumber(value)
- LOGGER.debug('%s: %s', term, str(value))
- setattr(self, term, value)
+ try:
+ p = Popen(cmd, stdout=PIPE)
+ stdout, _ = p.communicate()
+ for line in stdout.split(os.linesep):
+ if len(line) > 0:
+ term, value = line.split(': ')
+ if term in ('isCompressed', 'isSwapped'):
+ value = parseBoolean(value)
+ else:
+ value = parseNumber(value)
+ LOGGER.debug('%s: %s', term, str(value))
+ setattr(self, term, value)
+ except OSError as e:
+ LOGGER.error("Exception %s trying to run: %s", str(e), ' '.join(cmd))
+ sys.exit(-1)
+