From: Diane Trout <diane@caltech.edu>
Date: Tue, 1 Apr 2008 00:18:07 +0000 (+0000)
Subject: report statistics on the various eland_result sequence status codes
X-Git-Tag: stanford.caltech-merged-database-2009-jan-15~82
X-Git-Url: http://woldlab.caltech.edu/gitweb/?a=commitdiff_plain;h=0a7ad4ff06f523c55a907f93e1fc52f265d5f24d;p=htsworkflow.git

report statistics on the various eland_result sequence status codes
(E.g. how many times does QC/NM/U[012]/R[012] show up)
---

diff --git a/scripts/runfolder.py b/scripts/runfolder.py
index 4a2549a..29b372b 100644
--- a/scripts/runfolder.py
+++ b/scripts/runfolder.py
@@ -419,15 +419,23 @@ class ELAND(object):
             mapped_reads = {}
             genome_dir = self.gerald.lanes[self.sample_name].eland_genome
             self._build_fasta_map(genome_dir)
-
+            match_codes = {'NM':0, 'QC':0, 'RM':0, 
+                           'U0':0, 'U1':0, 'U2':0,
+                           'R0':0, 'R1':0, 'R2':0,
+                          }
             for line in open(self.pathname):
                 reads += 1
                 fields = line.split()
+		# code = fields[2]
+                # match_codes[code] = match_codes.setdefault(code, 0) + 1
+	        # the QC/NM etc codes are in the 3rd field and always present
+                match_codes[fields[2]] += 1
                 # ignore lines that don't have a fasta filename
                 if len(fields) < 7:
                     continue
                 fasta = self._fasta_map.get(fields[6], fields[6])
                 mapped_reads[fasta] = mapped_reads.setdefault(fasta, 0) + 1
+            self._match_codes = match_codes
             self._mapped_reads = mapped_reads
             self._reads = reads
         
@@ -581,6 +589,11 @@ def summary_report(runs):
 	    result = run.gerald.eland_results.results[sample]
             print "Sample name", sample
             print "Total Reads", result.reads
+            mc = result._match_codes
+	    print "No Match", mc['NM']
+	    print "QC Failed", mc['QC']
+            print 'Unique (0,1,2 mismatches)', mc['U0'], mc['U1'], mc['U2']
+            print 'Repeat (0,1,2 mismatches)', mc['R0'], mc['R1'], mc['R2']
             print "Mapped Reads"
             pprint(summarize_mapped_reads(result.mapped_reads))