If an IPAR or firecrest directory is missing some of the important
matrix files that implies there isn't actually a valid run present,
this patch will then (hopefully) issue a warning and skip that analysis
run.
I also added an option to scripts/runfolder to allow a user to specify
where the extracted results should go.
One questionable thing is that for one analysis some of the lanes
were run as sequence and not an eland analysis so were I expected
all the lanes to have an eland genome, it doesn't for these.
I hope that the code doesn't lose the index after serializing and
deserializing that chunk example.
# should I parse this deeper than just stashing the
# contents of the matrix file?
matrix_pathname = os.path.join(pathname, 'Matrix', 's_matrix.txt')
# should I parse this deeper than just stashing the
# contents of the matrix file?
matrix_pathname = os.path.join(pathname, 'Matrix', 's_matrix.txt')
+ if not os.path.exists(matrix_pathname):
+ return None
f.matrix = open(matrix_pathname, 'r').read()
return f
f.matrix = open(matrix_pathname, 'r').read()
return f
if len(container.getchildren()) > LANES_PER_FLOWCELL:
raise RuntimeError('GERALD config.xml file changed')
lanes = [x.tag.split('_')[1] for x in container.getchildren()]
if len(container.getchildren()) > LANES_PER_FLOWCELL:
raise RuntimeError('GERALD config.xml file changed')
lanes = [x.tag.split('_')[1] for x in container.getchildren()]
- index = lanes.index(self._lane_id)
+ try:
+ index = lanes.index(self._lane_id)
+ except ValueError, e:
+ return None
element = container[index]
return element.text
def _get_analysis(self):
element = container[index]
return element.text
def _get_analysis(self):
# contents of the matrix file?
matrix_pathname = os.path.join(pathname, 'Matrix', 's_matrix.txt')
# contents of the matrix file?
matrix_pathname = os.path.join(pathname, 'Matrix', 's_matrix.txt')
+ if not os.path.exists(matrix_pathname):
+ return None
i.matrix = open(matrix_pathname, 'r').read()
# look for parameter xml file
i.matrix = open(matrix_pathname, 'r').read()
# look for parameter xml file
print i.tiles.keys()
print j.tiles.keys()
print j.tiles.items()
print i.tiles.keys()
print j.tiles.keys()
print j.tiles.items()
- print j.file_list()
\ No newline at end of file
for firecrest_pathname in glob(os.path.join(datadir,"*Firecrest*")):
logging.info('Found firecrest in ' + datadir)
image_analysis = firecrest.firecrest(firecrest_pathname)
for firecrest_pathname in glob(os.path.join(datadir,"*Firecrest*")):
logging.info('Found firecrest in ' + datadir)
image_analysis = firecrest.firecrest(firecrest_pathname)
- scan_post_image_analysis(runs, runfolder, image_analysis, firecrest_pathname)
+ if image_analysis is None:
+ logging.warn(
+ "%s is an empty or invalid firecrest directory" % (firecrest_pathname,)
+ )
+ else:
+ scan_post_image_analysis(
+ runs, runfolder, image_analysis, firecrest_pathname
+ )
# scan for IPAR directories
for ipar_pathname in glob(os.path.join(datadir,"IPAR_*")):
logging.info('Found ipar directories in ' + datadir)
image_analysis = ipar.ipar(ipar_pathname)
# scan for IPAR directories
for ipar_pathname in glob(os.path.join(datadir,"IPAR_*")):
logging.info('Found ipar directories in ' + datadir)
image_analysis = ipar.ipar(ipar_pathname)
- scan_post_image_analysis(runs, runfolder, image_analysis, ipar_pathname)
+ if image_analysis is None:
+ logging.warn(
+ "%s is an empty or invalid IPAR directory" %(ipar_pathname,)
+ )
+ else:
+ scan_post_image_analysis(
+ runs, runfolder, image_analysis, ipar_pathname
+ )
that contains the per lane post filter cluster numbers and the mapped
read counts. (The report isn't currently very pretty)
"""
that contains the per lane post filter cluster numbers and the mapped
read counts. (The report isn't currently very pretty)
"""
import logging
import optparse
import os
import logging
import optparse
import os
parser.add_option('--extract-results', action='store_true',
default=False,
help='extract result files out of runfolder into a simpler archive')
parser.add_option('--extract-results', action='store_true',
default=False,
help='extract result files out of runfolder into a simpler archive')
+ parser.add_option('-o', '--output-dir', default=None,
+ help="specify the default output directory for extract results")
+
parser.add_option('--run-xml', dest='run_xml',
default=None,
help='specify a run_<FlowCell>.xml file for summary reports')
parser.add_option('--run-xml', dest='run_xml',
default=None,
help='specify a run_<FlowCell>.xml file for summary reports')
root_log = logging.getLogger()
root_log.setLevel(logging.INFO)
root_log = logging.getLogger()
root_log.setLevel(logging.INFO)
+ logging.info('Starting htsworkflow illumina runfolder processing tool.')
runs = []
if opt.run_xml:
# handle ~ shortcut
opt.run_xml = os.path.expanduser(opt.run_xml)
tree = ElementTree.parse(opt.run_xml).getroot()
runs.append(runfolder.PipelineRun(xml=tree))
runs = []
if opt.run_xml:
# handle ~ shortcut
opt.run_xml = os.path.expanduser(opt.run_xml)
tree = ElementTree.parse(opt.run_xml).getroot()
runs.append(runfolder.PipelineRun(xml=tree))
- for run_dir in args:
- runs.extend(runfolder.get_runs(run_dir))
+ for run_pattern in args:
+ # expand args on our own if needed
+ for run_dir in glob(run_pattern):
+ runs.extend(runfolder.get_runs(run_dir))
if len(runs) > 0:
if opt.summary:
if len(runs) > 0:
if opt.summary:
if opt.archive:
runfolder.extract_run_parameters(runs)
if opt.extract_results:
if opt.archive:
runfolder.extract_run_parameters(runs)
if opt.extract_results:
- runfolder.extract_results(runs)
+ runfolder.extract_results(runs, opt.output_dir)