p.gerald = g
runs.append(p)
except IOError, e:
- print "Ignoring", str(e)
+ logging.error("Ignoring " + str(e))
datadir = os.path.join(runfolder, 'Data')
for run in runs:
run.save()
-def summarize_mapped_reads(mapped_reads):
+def summarize_mapped_reads(genome_map, mapped_reads):
"""
Summarize per chromosome reads into a genome count
But handle spike-in/contamination symlinks seperately.
genome = 'unknown'
for k, v in mapped_reads.items():
path, k = os.path.split(k)
- if len(path) > 0:
+ if len(path) > 0 and not genome_map.has_key(path):
genome = path
genome_reads += v
else:
report.append('Repeat (0,1,2 mismatches) %d %d %d' % \
(mc['R0'], mc['R1'], mc['R2']))
report.append("Mapped Reads")
- mapped_reads = summarize_mapped_reads(eland_result.mapped_reads)
+ mapped_reads = summarize_mapped_reads(eland_result.genome_map, eland_result.mapped_reads)
for name, counts in mapped_reads.items():
report.append(" %s: %d" % (name, counts))
report.append('')
>HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:2 chr5.fa:55269838R0
>HWI-EAS229_60_30DP9AAXX:1:1:1121:379 AGAAGAGACATTAAGAGTTCCTGAAATTTATATCTGG 2:1:0 chr16.fa:46189180R1,chr7.fa:122968519R0,chr8.fa:48197174F0
>HWI-EAS229_60_30DP9AAXX:1:1:892:1155 ACATTCTCCTTTCCTTCTGAAGTTTTTACGATTCTTT 0:9:10 chr10.fa:114298201F1,chr12.fa:8125072F1,19500297F2,42341293R2,chr13.fa:27688155R2,95069772R1,chr15.fa:51016475F2,chr16.fa:27052155F2,chr1.fa:192426217R2,chr21.fa:23685310R2,chr2.fa:106680068F1,chr3.fa:185226695F2,chr4.fa:106626808R2,chr5.fa:14704894F1,43530779F1,126543189F2,chr6.fa:74284101F1,chr7.fa:22516603F1,chr9.fa:134886204R
+>HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:0 spike.fa/sample1:55269838R0
+>HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:0 spike.fa/sample2:55269838R0
""", """>HWI-EAS229_60_30DP9AAXX:1:1:1221:788 AAGATATCTACGACGTGGTATGGCGGTGTCTGGTCGT NM
>HWI-EAS229_60_30DP9AAXX:1:1:1221:788 NNNNNNNNNNNNNNGTGGTATGGCGGTGTCTGGTCGT QC
>HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:2 chr5.fa:55269838R0
>HWI-EAS229_60_30DP9AAXX:1:1:1121:379 AGAAGAGACATTAAGAGTTCCTGAAATTTATATCTGG 2:1:0 chr16.fa:46189180R1,chr7.fa:122968519R0,chr8.fa:48197174F0,chr7.fa:22516603F1,chr9.fa:134886204R
->HWI-EAS229_60_30DP9AAXX:1:1:892:1155 ACATTCTCCTTTCCTTCTGAAGTTTTTACGATTCTTT 0:9:10 chr10.fa:114298201F1,chr12.fa:8125072F1,19500297F2,42341293R2,chr13.fa:27688155R2,95069772R1,chr15.fa:51016475F2,chr16.fa:27052155F2,chr1.fa:192426217R2,chr21.fa:23685310R2,chr2.fa:106680068F1,chr3.fa:185226695F2,chr4.fa:106626808R2,chr5.fa:14704894F1,43530779F1,126543189F2,chr6.fa:74284101F1"""]
+>HWI-EAS229_60_30DP9AAXX:1:1:892:1155 ACATTCTCCTTTCCTTCTGAAGTTTTTACGATTCTTT 0:9:10 chr10.fa:114298201F1,chr12.fa:8125072F1,19500297F2,42341293R2,chr13.fa:27688155R2,95069772R1,chr15.fa:51016475F2,chr16.fa:27052155F2,chr1.fa:192426217R2,chr21.fa:23685310R2,chr2.fa:106680068F1,chr3.fa:185226695F2,chr4.fa:106626808R2,chr5.fa:14704894F1,43530779F1,126543189F2,chr6.fa:74284101F1
+>HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:0 spike.fa/sample1:55269838R0
+>HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:0 spike.fa/sample2:55269838R0
+"""]
if paired:
for e in [1,2]:
for i in range(1,9):
for i in range(1,9):
lane = eland.results[0][i]
- self.failUnlessEqual(lane.reads, 4)
+ self.failUnlessEqual(lane.reads, 6)
self.failUnlessEqual(lane.sample_name, "s")
self.failUnlessEqual(lane.lane_id, i)
- self.failUnlessEqual(len(lane.mapped_reads), 15)
+ self.failUnlessEqual(len(lane.mapped_reads), 17)
self.failUnlessEqual(lane.mapped_reads['hg18/chr5.fa'], 4)
- self.failUnlessEqual(lane.match_codes['U0'], 1)
+ self.failUnlessEqual(lane.match_codes['U0'], 3)
self.failUnlessEqual(lane.match_codes['R0'], 2)
self.failUnlessEqual(lane.match_codes['U1'], 1)
self.failUnlessEqual(lane.match_codes['R1'], 9)
self.failUnlessEqual(l1.sample_name, l2.sample_name)
self.failUnlessEqual(l1.lane_id, l2.lane_id)
self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads))
- self.failUnlessEqual(len(l1.mapped_reads), 15)
+ self.failUnlessEqual(len(l1.mapped_reads), 17)
for k in l1.mapped_reads.keys():
self.failUnlessEqual(l1.mapped_reads[k],
l2.mapped_reads[k])
for i in range(1,9):
lane = eland.results[0][i]
- self.failUnlessEqual(lane.reads, 4)
+ self.failUnlessEqual(lane.reads, 6)
self.failUnlessEqual(lane.sample_name, "s")
self.failUnlessEqual(lane.lane_id, i)
- self.failUnlessEqual(len(lane.mapped_reads), 15)
+ self.failUnlessEqual(len(lane.mapped_reads), 17)
self.failUnlessEqual(lane.mapped_reads['hg18/chr5.fa'], 4)
- self.failUnlessEqual(lane.match_codes['U0'], 1)
+ self.failUnlessEqual(lane.mapped_reads['spike.fa/sample1'], 1)
+ self.failUnlessEqual(lane.mapped_reads['spike.fa/sample2'], 1)
+ self.failUnlessEqual(lane.match_codes['U0'], 3)
self.failUnlessEqual(lane.match_codes['R0'], 2)
self.failUnlessEqual(lane.match_codes['U1'], 1)
self.failUnlessEqual(lane.match_codes['R1'], 9)
self.failUnlessEqual(l1.sample_name, l2.sample_name)
self.failUnlessEqual(l1.lane_id, l2.lane_id)
self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads))
- self.failUnlessEqual(len(l1.mapped_reads), 15)
+ self.failUnlessEqual(len(l1.mapped_reads), 17)
for k in l1.mapped_reads.keys():
self.failUnlessEqual(l1.mapped_reads[k],
l2.mapped_reads[k])
# check first end
for i in range(1,9):
lane = eland.results[0][i]
- self.failUnlessEqual(lane.reads, 4)
+ self.failUnlessEqual(lane.reads, 6)
self.failUnlessEqual(lane.sample_name, "s")
self.failUnlessEqual(lane.lane_id, i)
- self.failUnlessEqual(len(lane.mapped_reads), 15)
+ self.failUnlessEqual(len(lane.mapped_reads), 17)
self.failUnlessEqual(lane.mapped_reads['hg18/chr5.fa'], 4)
- self.failUnlessEqual(lane.match_codes['U0'], 1)
+ self.failUnlessEqual(lane.match_codes['U0'], 3)
self.failUnlessEqual(lane.match_codes['R0'], 2)
self.failUnlessEqual(lane.match_codes['U1'], 1)
self.failUnlessEqual(lane.match_codes['R1'], 9)
# check second end
for i in range(1,9):
lane = eland.results[1][i]
- self.failUnlessEqual(lane.reads, 5)
+ self.failUnlessEqual(lane.reads, 7)
self.failUnlessEqual(lane.sample_name, "s")
self.failUnlessEqual(lane.lane_id, i)
- self.failUnlessEqual(len(lane.mapped_reads), 15)
+ self.failUnlessEqual(len(lane.mapped_reads), 17)
self.failUnlessEqual(lane.mapped_reads['hg18/chr5.fa'], 4)
- self.failUnlessEqual(lane.match_codes['U0'], 1)
+ self.failUnlessEqual(lane.match_codes['U0'], 3)
self.failUnlessEqual(lane.match_codes['R0'], 2)
self.failUnlessEqual(lane.match_codes['U1'], 1)
self.failUnlessEqual(lane.match_codes['R1'], 9)
self.failUnlessEqual(l1.sample_name, l2.sample_name)
self.failUnlessEqual(l1.lane_id, l2.lane_id)
self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads))
- self.failUnlessEqual(len(l1.mapped_reads), 15)
+ self.failUnlessEqual(len(l1.mapped_reads), 17)
for k in l1.mapped_reads.keys():
self.failUnlessEqual(l1.mapped_reads[k],
l2.mapped_reads[k])