--- /dev/null
+#!/usr/bin/env python
+"""
+Runfolder.py can generate a xml file capturing all the 'interesting' parameters from a finished pipeline run. (using the -a option). The information currently being captured includes:
+
+ * Flowcell ID
+ * run dates
+ * start/stop cycle numbers
+ * Firecrest, bustard, gerald version numbers
+ * Eland analysis types, and everything in the eland configuration file.
+ * cluster numbers and other values from the Summary.htm
+ LaneSpecificParameters table.
+ * How many reads mapped to a genome from an eland file
+
+The ELAND "mapped reads" counter will also check for eland squashed file
+that were symlinked from another directory. This is so I can track how
+many reads landed on the genome of interest and on the spike ins.
+
+Basically my subdirectories something like:
+
+genomes/hg18
+genomes/hg18/chr*.2bpb <- files for hg18 genome
+genomes/hg18/chr*.vld
+genomes/hg18/VATG.fa.2bp <- symlink to genomes/spikeins
+genomes/spikein
+
+runfolder.py can also spit out a simple summary report (-s option)
+that contains the per lane post filter cluster numbers and the mapped
+read counts. (The report isn't currently very pretty)
+"""
+import logging
+import optparse
+import sys
+
+from gaworkflow.pipeline import runfolder
+from gaworkflow.pipeline.runfolder import ElementTree
+
+def make_parser():
+ usage = 'usage: %prog [options] runfolder_root_dir'
+ parser = optparse.OptionParser(usage)
+ parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
+ default=False,
+ help='turn on verbose mode')
+ parser.add_option('-s', '--summary', dest='summary', action='store_true',
+ default=False,
+ help='produce summary report')
+ parser.add_option('-a', '--archive', dest='archive', action='store_true',
+ default=False,
+ help='generate run configuration archive')
+ parser.add_option('--extract-results', action='store_true',
+ default=False,
+ help='extract result files out of runfolder into a simpler archive')
+ parser.add_option('--run-xml', dest='run_xml',
+ default=None,
+ help='specify a run_<FlowCell>.xml file for summary reports')
+
+ return parser
+
+def main(cmdlist=None):
+ parser = make_parser()
+ opt, args = parser.parse_args(cmdlist)
+
+ logging.basicConfig()
+ if opt.verbose:
+ root_log = logging.getLogger()
+ root_log.setLevel(logging.INFO)
+
+ runs = []
+ if opt.run_xml:
+ tree = ElementTree.parse(opt.run_xml).getroot()
+ runs.append(runfolder.PipelineRun(xml=tree))
+ for run_dir in args:
+ runs.extend(runfolder.get_runs(run_dir))
+
+ if len(runs) > 0:
+ if opt.summary:
+ print runfolder.summary_report(runs)
+ if opt.archive:
+ runfolder.extract_run_parameters(runs)
+ if opt.extract_results:
+ runfolder.extract_results(runs)
+
+ return 0
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))