From 0c1f6eecc99275f29ae1870b6f781e2cadc04d51 Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Fri, 29 Aug 2008 17:19:52 +0000 Subject: [PATCH] merge in my changes from trunk --- htswdataprod/htswdataprod/runfolder.py | 44 +++++++++++++++++++++----- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/htswdataprod/htswdataprod/runfolder.py b/htswdataprod/htswdataprod/runfolder.py index 7682d7c..65f6191 100644 --- a/htswdataprod/htswdataprod/runfolder.py +++ b/htswdataprod/htswdataprod/runfolder.py @@ -5,6 +5,7 @@ from glob import glob import logging import os import re +import shutil import stat import subprocess import sys @@ -21,8 +22,8 @@ VERSION_RE = "([0-9\.]+)" USER_RE = "([a-zA-Z0-9]+)" LANES_PER_FLOWCELL = 8 -from htswcommon.util.alphanum import alphanum -from htswcommon.util.ethelp import indent, flatten +from gaworkflow.util.alphanum import alphanum +from gaworkflow.util.ethelp import indent, flatten class PipelineRun(object): @@ -141,9 +142,9 @@ def get_runs(runfolder): generate two different PipelineRun objects, that differ in there gerald component. """ - from htswdataprod.illumina import firecrest - from htswdataprod.illumina import bustard - from htswdataprod.illumina import gerald + from gaworkflow.pipeline import firecrest + from gaworkflow.pipeline import bustard + from gaworkflow.pipeline import gerald datadir = os.path.join(runfolder, 'Data') @@ -210,8 +211,13 @@ def summary_report(runs): report.append("Clusters %d +/- %d" % (cluster[0], cluster[1])) report.append("Total Reads: %d" % (result.reads)) mc = result._match_codes - report.append("No Match: %d" % (mc['NM'])) - report.append("QC Failed: %d" % (mc['QC'])) + nm = mc['NM'] + nm_percent = float(nm)/result.reads * 100 + qc = mc['QC'] + qc_percent = float(qc)/result.reads * 100 + + report.append("No Match: %d (%2.2g %%)" % (nm, nm_percent)) + report.append("QC Failed: %d (%2.2g %%)" % (qc, qc_percent)) report.append('Unique (0,1,2 mismatches) %d %d %d' % \ (mc['U0'], mc['U1'], mc['U2'])) report.append('Repeat (0,1,2 mismatches) %d %d %d' % \ @@ -250,6 +256,14 @@ def extract_results(runs, output_base_dir=None): # save run file r.save(cycle_dir) + # Copy Summary.htm + summary_path = os.path.join(r.gerald.pathname, 'Summary.htm') + if os.path.exists(summary_path): + logging.info('Copying %s to %s' % (summary_path, cycle_dir)) + shutil.copy(summary_path, cycle_dir) + else: + logging.info('Summary file %s was not found' % (summary_path,)) + # tar score files score_files = [] for f in os.listdir(g.pathname): @@ -282,4 +296,18 @@ def extract_results(runs, output_base_dir=None): logging.info('Saving to %s' % (dest_name, )) bzip.wait() - +def clean_runs(runs): + """ + Clean up run folders to optimize for compression. + """ + # TODO: implement this. + # rm RunLog*.xml + # rm pipeline_*.txt + # rm gclog.txt + # rm NetCopy.log + # rm nfn.log + # rm Images/L* + # cd Data/C1-*_Firecrest* + # make clean_intermediate + + pass -- 2.30.2