#!/usr/bin/env python """ Runfolder.py can generate a xml file capturing all the 'interesting' parameters from a finished pipeline run. (using the -a option). The information currently being captured includes: * Flowcell ID * run dates * start/stop cycle numbers * Firecrest, bustard, gerald version numbers * Eland analysis types, and everything in the eland configuration file. * cluster numbers and other values from the Summary.htm LaneSpecificParameters table. * How many reads mapped to a genome from an eland file The ELAND "mapped reads" counter will also check for eland squashed file that were symlinked from another directory. This is so I can track how many reads landed on the genome of interest and on the spike ins. Basically my subdirectories something like: genomes/hg18 genomes/hg18/chr*.2bpb <- files for hg18 genome genomes/hg18/chr*.vld genomes/hg18/VATG.fa.2bp <- symlink to genomes/spikeins genomes/spikein runfolder.py can also spit out a simple summary report (-s option) that contains the per lane post filter cluster numbers and the mapped read counts. (The report isn't currently very pretty) """ import logging import optparse import sys from gaworkflow.pipeline import runfolder from gaworkflow.pipeline.runfolder import ElementTree def make_parser(): usage = 'usage: %prog [options] runfolder_root_dir' parser = optparse.OptionParser(usage) parser.add_option('-v', '--verbose', dest='verbose', action='store_true', default=False, help='turn on verbose mode') parser.add_option('-s', '--summary', dest='summary', action='store_true', default=False, help='produce summary report') parser.add_option('-a', '--archive', dest='archive', action='store_true', default=False, help='generate run configuration archive') parser.add_option('--extract-results', action='store_true', default=False, help='extract result files out of runfolder into a simpler archive') parser.add_option('--run-xml', dest='run_xml', default=None, help='specify a run_.xml file for summary reports') return parser def main(cmdlist=None): parser = make_parser() opt, args = parser.parse_args(cmdlist) logging.basicConfig() if opt.verbose: root_log = logging.getLogger() root_log.setLevel(logging.INFO) runs = [] if opt.run_xml: tree = ElementTree.parse(opt.run_xml).getroot() runs.append(runfolder.PipelineRun(xml=tree)) for run_dir in args: runs.extend(runfolder.get_runs(run_dir)) if len(runs) > 0: if opt.summary: print runfolder.summary_report(runs) if opt.archive: runfolder.extract_run_parameters(runs) if opt.extract_results: runfolder.extract_results(runs) return 0 if __name__ == "__main__": sys.exit(main(sys.argv[1:]))