From 0c1f6eecc99275f29ae1870b6f781e2cadc04d51 Mon Sep 17 00:00:00 2001
From: Diane Trout <diane@caltech.edu>
Date: Fri, 29 Aug 2008 17:19:52 +0000
Subject: [PATCH] merge in my changes from trunk

---
 htswdataprod/htswdataprod/runfolder.py | 44 +++++++++++++++++++++-----
 1 file changed, 36 insertions(+), 8 deletions(-)

diff --git a/htswdataprod/htswdataprod/runfolder.py b/htswdataprod/htswdataprod/runfolder.py
index 7682d7c..65f6191 100644
--- a/htswdataprod/htswdataprod/runfolder.py
+++ b/htswdataprod/htswdataprod/runfolder.py
@@ -5,6 +5,7 @@ from glob import glob
 import logging
 import os
 import re
+import shutil
 import stat
 import subprocess
 import sys
@@ -21,8 +22,8 @@ VERSION_RE = "([0-9\.]+)"
 USER_RE = "([a-zA-Z0-9]+)"
 LANES_PER_FLOWCELL = 8
 
-from htswcommon.util.alphanum import alphanum
-from htswcommon.util.ethelp import indent, flatten
+from gaworkflow.util.alphanum import alphanum
+from gaworkflow.util.ethelp import indent, flatten
 
 
 class PipelineRun(object):
@@ -141,9 +142,9 @@ def get_runs(runfolder):
     generate two different PipelineRun objects, that differ
     in there gerald component.
     """
-    from htswdataprod.illumina import firecrest
-    from htswdataprod.illumina import bustard
-    from htswdataprod.illumina import gerald
+    from gaworkflow.pipeline import firecrest
+    from gaworkflow.pipeline import bustard
+    from gaworkflow.pipeline import gerald
 
     datadir = os.path.join(runfolder, 'Data')
 
@@ -210,8 +211,13 @@ def summary_report(runs):
             report.append("Clusters %d +/- %d" % (cluster[0], cluster[1]))
             report.append("Total Reads: %d" % (result.reads))
             mc = result._match_codes
-	    report.append("No Match: %d" % (mc['NM']))
-	    report.append("QC Failed: %d" % (mc['QC']))
+            nm = mc['NM']
+            nm_percent = float(nm)/result.reads  * 100
+            qc = mc['QC']
+            qc_percent = float(qc)/result.reads * 100
+
+	    report.append("No Match: %d (%2.2g %%)" % (nm, nm_percent))
+	    report.append("QC Failed: %d (%2.2g %%)" % (qc, qc_percent))
             report.append('Unique (0,1,2 mismatches) %d %d %d' % \
                           (mc['U0'], mc['U1'], mc['U2']))
             report.append('Repeat (0,1,2 mismatches) %d %d %d' % \
@@ -250,6 +256,14 @@ def extract_results(runs, output_base_dir=None):
       # save run file
       r.save(cycle_dir)
 
+      # Copy Summary.htm
+      summary_path = os.path.join(r.gerald.pathname, 'Summary.htm')
+      if os.path.exists(summary_path):
+          logging.info('Copying %s to %s' % (summary_path, cycle_dir))
+          shutil.copy(summary_path, cycle_dir)
+      else:
+          logging.info('Summary file %s was not found' % (summary_path,))
+
       # tar score files
       score_files = []
       for f in os.listdir(g.pathname):
@@ -282,4 +296,18 @@ def extract_results(runs, output_base_dir=None):
           logging.info('Saving to %s' % (dest_name, ))
           bzip.wait()
 
-      
+def clean_runs(runs):
+    """
+    Clean up run folders to optimize for compression.
+    """
+    # TODO: implement this.
+    # rm RunLog*.xml
+    # rm pipeline_*.txt
+    # rm gclog.txt
+    # rm NetCopy.log
+    # rm nfn.log
+    # rm Images/L*
+    # cd Data/C1-*_Firecrest*
+    # make clean_intermediate
+
+    pass
-- 
2.30.2