merge in my changes from trunk
authorDiane Trout <diane@caltech.edu>
Fri, 29 Aug 2008 17:19:52 +0000 (17:19 +0000)
committerDiane Trout <diane@caltech.edu>
Fri, 29 Aug 2008 17:19:52 +0000 (17:19 +0000)
htswdataprod/htswdataprod/runfolder.py

index 7682d7c18f1b9ec2bb58a4cafb8746bc8a4c4090..65f6191ec9879ed9f05944b77d269478d51ad22f 100644 (file)
@@ -5,6 +5,7 @@ from glob import glob
 import logging
 import os
 import re
+import shutil
 import stat
 import subprocess
 import sys
@@ -21,8 +22,8 @@ VERSION_RE = "([0-9\.]+)"
 USER_RE = "([a-zA-Z0-9]+)"
 LANES_PER_FLOWCELL = 8
 
-from htswcommon.util.alphanum import alphanum
-from htswcommon.util.ethelp import indent, flatten
+from gaworkflow.util.alphanum import alphanum
+from gaworkflow.util.ethelp import indent, flatten
 
 
 class PipelineRun(object):
@@ -141,9 +142,9 @@ def get_runs(runfolder):
     generate two different PipelineRun objects, that differ
     in there gerald component.
     """
-    from htswdataprod.illumina import firecrest
-    from htswdataprod.illumina import bustard
-    from htswdataprod.illumina import gerald
+    from gaworkflow.pipeline import firecrest
+    from gaworkflow.pipeline import bustard
+    from gaworkflow.pipeline import gerald
 
     datadir = os.path.join(runfolder, 'Data')
 
@@ -210,8 +211,13 @@ def summary_report(runs):
             report.append("Clusters %d +/- %d" % (cluster[0], cluster[1]))
             report.append("Total Reads: %d" % (result.reads))
             mc = result._match_codes
-           report.append("No Match: %d" % (mc['NM']))
-           report.append("QC Failed: %d" % (mc['QC']))
+            nm = mc['NM']
+            nm_percent = float(nm)/result.reads  * 100
+            qc = mc['QC']
+            qc_percent = float(qc)/result.reads * 100
+
+           report.append("No Match: %d (%2.2g %%)" % (nm, nm_percent))
+           report.append("QC Failed: %d (%2.2g %%)" % (qc, qc_percent))
             report.append('Unique (0,1,2 mismatches) %d %d %d' % \
                           (mc['U0'], mc['U1'], mc['U2']))
             report.append('Repeat (0,1,2 mismatches) %d %d %d' % \
@@ -250,6 +256,14 @@ def extract_results(runs, output_base_dir=None):
       # save run file
       r.save(cycle_dir)
 
+      # Copy Summary.htm
+      summary_path = os.path.join(r.gerald.pathname, 'Summary.htm')
+      if os.path.exists(summary_path):
+          logging.info('Copying %s to %s' % (summary_path, cycle_dir))
+          shutil.copy(summary_path, cycle_dir)
+      else:
+          logging.info('Summary file %s was not found' % (summary_path,))
+
       # tar score files
       score_files = []
       for f in os.listdir(g.pathname):
@@ -282,4 +296,18 @@ def extract_results(runs, output_base_dir=None):
           logging.info('Saving to %s' % (dest_name, ))
           bzip.wait()
 
-      
+def clean_runs(runs):
+    """
+    Clean up run folders to optimize for compression.
+    """
+    # TODO: implement this.
+    # rm RunLog*.xml
+    # rm pipeline_*.txt
+    # rm gclog.txt
+    # rm NetCopy.log
+    # rm nfn.log
+    # rm Images/L*
+    # cd Data/C1-*_Firecrest*
+    # make clean_intermediate
+
+    pass