projects
/
htsworkflow.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
5142604
)
merge in my changes from trunk
author
Diane Trout
<diane@caltech.edu>
Fri, 29 Aug 2008 17:19:52 +0000
(17:19 +0000)
committer
Diane Trout
<diane@caltech.edu>
Fri, 29 Aug 2008 17:19:52 +0000
(17:19 +0000)
htswdataprod/htswdataprod/runfolder.py
patch
|
blob
|
history
diff --git
a/htswdataprod/htswdataprod/runfolder.py
b/htswdataprod/htswdataprod/runfolder.py
index 7682d7c18f1b9ec2bb58a4cafb8746bc8a4c4090..65f6191ec9879ed9f05944b77d269478d51ad22f 100644
(file)
--- a/
htswdataprod/htswdataprod/runfolder.py
+++ b/
htswdataprod/htswdataprod/runfolder.py
@@
-5,6
+5,7
@@
from glob import glob
import logging
import os
import re
import logging
import os
import re
+import shutil
import stat
import subprocess
import sys
import stat
import subprocess
import sys
@@
-21,8
+22,8
@@
VERSION_RE = "([0-9\.]+)"
USER_RE = "([a-zA-Z0-9]+)"
LANES_PER_FLOWCELL = 8
USER_RE = "([a-zA-Z0-9]+)"
LANES_PER_FLOWCELL = 8
-from
htswcommon
.util.alphanum import alphanum
-from
htswcommon
.util.ethelp import indent, flatten
+from
gaworkflow
.util.alphanum import alphanum
+from
gaworkflow
.util.ethelp import indent, flatten
class PipelineRun(object):
class PipelineRun(object):
@@
-141,9
+142,9
@@
def get_runs(runfolder):
generate two different PipelineRun objects, that differ
in there gerald component.
"""
generate two different PipelineRun objects, that differ
in there gerald component.
"""
- from
htswdataprod.illumina
import firecrest
- from
htswdataprod.illumina
import bustard
- from
htswdataprod.illumina
import gerald
+ from
gaworkflow.pipeline
import firecrest
+ from
gaworkflow.pipeline
import bustard
+ from
gaworkflow.pipeline
import gerald
datadir = os.path.join(runfolder, 'Data')
datadir = os.path.join(runfolder, 'Data')
@@
-210,8
+211,13
@@
def summary_report(runs):
report.append("Clusters %d +/- %d" % (cluster[0], cluster[1]))
report.append("Total Reads: %d" % (result.reads))
mc = result._match_codes
report.append("Clusters %d +/- %d" % (cluster[0], cluster[1]))
report.append("Total Reads: %d" % (result.reads))
mc = result._match_codes
- report.append("No Match: %d" % (mc['NM']))
- report.append("QC Failed: %d" % (mc['QC']))
+ nm = mc['NM']
+ nm_percent = float(nm)/result.reads * 100
+ qc = mc['QC']
+ qc_percent = float(qc)/result.reads * 100
+
+ report.append("No Match: %d (%2.2g %%)" % (nm, nm_percent))
+ report.append("QC Failed: %d (%2.2g %%)" % (qc, qc_percent))
report.append('Unique (0,1,2 mismatches) %d %d %d' % \
(mc['U0'], mc['U1'], mc['U2']))
report.append('Repeat (0,1,2 mismatches) %d %d %d' % \
report.append('Unique (0,1,2 mismatches) %d %d %d' % \
(mc['U0'], mc['U1'], mc['U2']))
report.append('Repeat (0,1,2 mismatches) %d %d %d' % \
@@
-250,6
+256,14
@@
def extract_results(runs, output_base_dir=None):
# save run file
r.save(cycle_dir)
# save run file
r.save(cycle_dir)
+ # Copy Summary.htm
+ summary_path = os.path.join(r.gerald.pathname, 'Summary.htm')
+ if os.path.exists(summary_path):
+ logging.info('Copying %s to %s' % (summary_path, cycle_dir))
+ shutil.copy(summary_path, cycle_dir)
+ else:
+ logging.info('Summary file %s was not found' % (summary_path,))
+
# tar score files
score_files = []
for f in os.listdir(g.pathname):
# tar score files
score_files = []
for f in os.listdir(g.pathname):
@@
-282,4
+296,18
@@
def extract_results(runs, output_base_dir=None):
logging.info('Saving to %s' % (dest_name, ))
bzip.wait()
logging.info('Saving to %s' % (dest_name, ))
bzip.wait()
-
+def clean_runs(runs):
+ """
+ Clean up run folders to optimize for compression.
+ """
+ # TODO: implement this.
+ # rm RunLog*.xml
+ # rm pipeline_*.txt
+ # rm gclog.txt
+ # rm NetCopy.log
+ # rm nfn.log
+ # rm Images/L*
+ # cd Data/C1-*_Firecrest*
+ # make clean_intermediate
+
+ pass