3 Runfolder.py can generate a xml file capturing all the 'interesting' parameters from a finished pipeline run. (using the -a option). The information currently being captured includes:
7 * start/stop cycle numbers
8 * Firecrest, bustard, gerald version numbers
9 * Eland analysis types, and everything in the eland configuration file.
10 * cluster numbers and other values from the Summary.htm
11 LaneSpecificParameters table.
12 * How many reads mapped to a genome from an eland file
14 The ELAND "mapped reads" counter will also check for eland squashed file
15 that were symlinked from another directory. This is so I can track how
16 many reads landed on the genome of interest and on the spike ins.
18 Basically my subdirectories something like:
21 genomes/hg18/chr*.2bpb <- files for hg18 genome
23 genomes/hg18/VATG.fa.2bp <- symlink to genomes/spikeins
26 runfolder.py can also spit out a simple summary report (-s option)
27 that contains the per lane post filter cluster numbers and the mapped
28 read counts. (The report isn't currently very pretty)
36 from htsworkflow.pipelines import runfolder
37 from htsworkflow.pipelines.runfolder import ElementTree
40 usage = 'usage: %prog [options] runfolder_root_dir'
41 parser = optparse.OptionParser(usage)
43 parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
45 help='turn on verbose mode')
46 parser.add_option('--dry-run', action='store_true', default=False,
47 help="Don't delete anything (in clean mode)")
49 commands = optparse.OptionGroup(parser, 'Commands')
51 commands.add_option('-s', '--summary', dest='summary', action='store_true',
53 help='produce summary report')
54 commands.add_option('-a', '--archive', dest='archive', action='store_true',
56 help='generate run configuration archive')
57 commands.add_option('--extract-results', action='store_true',
59 help='create run-xml summary, compress the eland result files, and '
60 'copy them and the Summary.htm file into archival directory.')
61 commands.add_option('-c', '--clean', action='store_true', default=False,
62 help='Clean runfolder, preparing it for long-term storage')
63 parser.add_option_group(commands)
65 parser.add_option('-o', '--output-dir', default=None,
66 help="specify the default output directory for extract results")
68 parser.add_option('-u', '--use-run', dest='use_run', default=None,
69 help='Specify which run to use instead of autoscanning '
70 'the runfolder. You do this by providing the final '
71 ' GERALD directory, and it assumes the parent '
72 'directories are the bustard and image processing '
75 parser.add_option('--run-xml', dest='run_xml',
77 help='specify a run_<FlowCell>.xml file for summary reports')
82 def main(cmdlist=None):
83 parser = make_parser()
84 opt, args = parser.parse_args(cmdlist)
88 root_log = logging.getLogger()
89 root_log.setLevel(logging.INFO)
91 logging.info('Starting htsworkflow illumina runfolder processing tool.')
95 opt.run_xml = os.path.expanduser(opt.run_xml)
96 tree = ElementTree.parse(opt.run_xml).getroot()
97 runs.append(runfolder.PipelineRun(xml=tree))
99 # look for manually specified run
100 if opt.use_run is not None:
101 specific_run = runfolder.get_specific_run(opt.use_run)
102 if specific_run is not None:
103 runs.append(specific_run)
105 logging.warn("Couldn't find a run in %s" % (opt.use_run,))
107 # scan runfolders for runs
108 for run_pattern in args:
109 # expand args on our own if needed
110 for run_dir in glob(run_pattern):
111 runs.extend(runfolder.get_runs(run_dir))
116 print runfolder.summary_report(runs)
119 runfolder.extract_run_parameters(runs)
121 if opt.extract_results:
122 runfolder.extract_results(runs, opt.output_dir)
125 runfolder.clean_runs(runs, opt.dry_run)
128 if command_run == False:
129 print "You need to specify a command."+os.linesep
132 print "You need to specify some run folders to process..."+os.linesep
137 if __name__ == "__main__":
138 sys.exit(main(sys.argv[1:]))