merge in my changes from trunk

author Diane Trout <diane@caltech.edu>

Fri, 29 Aug 2008 17:19:52 +0000 (17:19 +0000)

committer Diane Trout <diane@caltech.edu>

Fri, 29 Aug 2008 17:19:52 +0000 (17:19 +0000)
author Diane Trout <diane@caltech.edu>
Fri, 29 Aug 2008 17:19:52 +0000 (17:19 +0000)
committer Diane Trout <diane@caltech.edu>
Fri, 29 Aug 2008 17:19:52 +0000 (17:19 +0000)
diff --git a/htswdataprod/htswdataprod/runfolder.py b/htswdataprod/htswdataprod/runfolder.py

index 7682d7c18f1b9ec2bb58a4cafb8746bc8a4c4090..65f6191ec9879ed9f05944b77d269478d51ad22f 100644 (file)
--- a/htswdataprod/htswdataprod/runfolder.py
+++ b/htswdataprod/htswdataprod/runfolder.py
@@ -5,6 +5,7 @@ from glob import glob
  import logging
  import os
  import re
+import shutil
  import stat
  import subprocess
  import sys
@@ -21,8 +22,8 @@ VERSION_RE = "([0-9\.]+)"
  USER_RE = "([a-zA-Z0-9]+)"
  LANES_PER_FLOWCELL = 8
  
-from htswcommon.util.alphanum import alphanum
-from htswcommon.util.ethelp import indent, flatten
+from gaworkflow.util.alphanum import alphanum
+from gaworkflow.util.ethelp import indent, flatten
  
  
  class PipelineRun(object):
@@ -141,9 +142,9 @@ def get_runs(runfolder):
      generate two different PipelineRun objects, that differ
      in there gerald component.
      """
-    from htswdataprod.illumina import firecrest
-    from htswdataprod.illumina import bustard
-    from htswdataprod.illumina import gerald
+    from gaworkflow.pipeline import firecrest
+    from gaworkflow.pipeline import bustard
+    from gaworkflow.pipeline import gerald
  
      datadir = os.path.join(runfolder, 'Data')
  
@@ -210,8 +211,13 @@ def summary_report(runs):
              report.append("Clusters %d +/- %d" % (cluster[0], cluster[1]))
              report.append("Total Reads: %d" % (result.reads))
              mc = result._match_codes
-           report.append("No Match: %d" % (mc['NM']))
-           report.append("QC Failed: %d" % (mc['QC']))
+            nm = mc['NM']
+            nm_percent = float(nm)/result.reads  * 100
+            qc = mc['QC']
+            qc_percent = float(qc)/result.reads * 100
+
+           report.append("No Match: %d (%2.2g %%)" % (nm, nm_percent))
+           report.append("QC Failed: %d (%2.2g %%)" % (qc, qc_percent))
              report.append('Unique (0,1,2 mismatches) %d %d %d' % \
                            (mc['U0'], mc['U1'], mc['U2']))
              report.append('Repeat (0,1,2 mismatches) %d %d %d' % \
@@ -250,6 +256,14 @@ def extract_results(runs, output_base_dir=None):
        # save run file
        r.save(cycle_dir)
  
+      # Copy Summary.htm
+      summary_path = os.path.join(r.gerald.pathname, 'Summary.htm')
+      if os.path.exists(summary_path):
+          logging.info('Copying %s to %s' % (summary_path, cycle_dir))
+          shutil.copy(summary_path, cycle_dir)
+      else:
+          logging.info('Summary file %s was not found' % (summary_path,))
+
        # tar score files
        score_files = []
        for f in os.listdir(g.pathname):
@@ -282,4 +296,18 @@ def extract_results(runs, output_base_dir=None):
            logging.info('Saving to %s' % (dest_name, ))
            bzip.wait()
  
-      
+def clean_runs(runs):
+    """
+    Clean up run folders to optimize for compression.
+    """
+    # TODO: implement this.
+    # rm RunLog*.xml
+    # rm pipeline_*.txt
+    # rm gclog.txt
+    # rm NetCopy.log
+    # rm nfn.log
+    # rm Images/L*
+    # cd Data/C1-*_Firecrest*
+    # make clean_intermediate
+
+    pass
author	Diane Trout <diane@caltech.edu>
	Fri, 29 Aug 2008 17:19:52 +0000 (17:19 +0000)
committer	Diane Trout <diane@caltech.edu>
	Fri, 29 Aug 2008 17:19:52 +0000 (17:19 +0000)