From: Diane Trout Date: Fri, 28 Mar 2008 22:37:13 +0000 (+0000) Subject: rename the summary report to summary_report to distingush it from Summary X-Git-Tag: stanford.caltech-merged-database-2009-jan-15~84 X-Git-Url: http://woldlab.caltech.edu/gitweb/?a=commitdiff_plain;h=f33c94adc1d48f8bd8418f55302976aa39d1efe4;p=htsworkflow.git rename the summary report to summary_report to distingush it from Summary also moved the summarize_mapped_reads from the summary_report function to the top level of the script --- diff --git a/scripts/runfolder.py b/scripts/runfolder.py index e19790e..9e45ea8 100644 --- a/scripts/runfolder.py +++ b/scripts/runfolder.py @@ -541,21 +541,28 @@ def extract_run_parameters(runs): for run in runs: run.save() -def summary(runs): - def summarize_mapped_reads(mapped_reads): - summarized_reads = {} - genome_reads = 0 - genome = 'unknown' - for k, v in mapped_reads.items(): - path, k = os.path.split(k) - if len(path) > 0: - genome = path - genome_reads += v - else: - summarized_reads[k] = summarized_reads.setdefault(k, 0) + v - summarized_reads[genome] = genome_reads - return summarized_reads - +def summarize_mapped_reads(mapped_reads): + """ + Summarize per chromosome reads into a genome count + But handle spike-in/contamination symlinks seperately. + """ + summarized_reads = {} + genome_reads = 0 + genome = 'unknown' + for k, v in mapped_reads.items(): + path, k = os.path.split(k) + if len(path) > 0: + genome = path + genome_reads += v + else: + summarized_reads[k] = summarized_reads.setdefault(k, 0) + v + summarized_reads[genome] = genome_reads + return summarized_reads + +def summary_report(runs): + """ + Summarize cluster numbers and mapped read counts for a runfolder + """ for run in runs: # print a run name? logging.info('Summarizing ' + run.name) @@ -603,7 +610,7 @@ def main(cmdlist=None): for runfolder in args: runs = get_runs(runfolder) if opt.summary: - summary(runs) + summary_report(runs) if opt.archive: extract_run_parameters(runs)