"""
Process an eland result file
"""
- XML_VERSION = 1
+ XML_VERSION = 2
LANE = 'ElandLane'
SAMPLE_NAME = 'SampleName'
LANE_ID = 'LaneID'
+ END = 'End'
GENOME_MAP = 'GenomeMap'
GENOME_ITEM = 'GenomeItem'
MAPPED_READS = 'MappedReads'
ELAND_EXTENDED = 2
ELAND_EXPORT = 3
- def __init__(self, pathname=None, genome_map=None, eland_type=None, xml=None):
+ def __init__(self, pathname=None, lane_id=None, end=None, genome_map=None, eland_type=None, xml=None):
self.pathname = pathname
self._sample_name = None
- self._lane_id = None
+ self.lane_id = lane_id
+ self.end = end
self._reads = None
self._mapped_reads = None
self._match_codes = None
path, name = os.path.split(self.pathname)
split_name = name.split('_')
self._sample_name = split_name[0]
- self._lane_id = int(split_name[1])
def _get_sample_name(self):
if self._sample_name is None:
return self._sample_name
sample_name = property(_get_sample_name)
- def _get_lane_id(self):
- if self._lane_id is None:
- self._update_name()
- return self._lane_id
- lane_id = property(_get_lane_id)
-
def _get_reads(self):
if self._reads is None:
self._update()
sample_tag.text = self.sample_name
lane_tag = ElementTree.SubElement(lane, ElandLane.LANE_ID)
lane_tag.text = str(self.lane_id)
+ if self.end is not None:
+ end_tag = ElementTree.SubElement(lane, ElandLane.END)
+ end_tag.text = str(self.end)
genome_map = ElementTree.SubElement(lane, ElandLane.GENOME_MAP)
for k, v in self.genome_map.items():
item = ElementTree.SubElement(
if tag == ElandLane.SAMPLE_NAME.lower():
self._sample_name = element.text
elif tag == ElandLane.LANE_ID.lower():
- self._lane_id = int(element.text)
+ self.lane_id = int(element.text)
+ elif tag == ElandLane.END.lower():
+ self.end = int(element.text)
elif tag == ElandLane.GENOME_MAP.lower():
for child in element:
name = child.attrib['name']
"""
Summarize information from eland files
"""
- XML_VERSION = 1
+ XML_VERSION = 2
ELAND = 'ElandCollection'
LANE = 'Lane'
LANE_ID = 'id'
+ END = 'end'
def __init__(self, xml=None):
# we need information from the gerald config.xml
- self.results = {}
+ self.results = [{},{}]
if xml is not None:
self.set_elements(xml)
- def __len__(self):
- return len(self.results)
-
- def keys(self):
- return self.results.keys()
-
- def values(self):
- return self.results.values()
-
- def items(self):
- return self.results.items()
-
- def __getitem__(self, key):
- return self.results[key]
-
def get_elements(self):
root = ElementTree.Element(ELAND.ELAND,
{'version': unicode(ELAND.XML_VERSION)})
- for lane_id, lane in self.results.items():
- eland_lane = lane.get_elements()
- eland_lane.attrib[ELAND.LANE_ID] = unicode(lane_id)
- root.append(eland_lane)
+ for end in range(len(self.results)):
+ end_results = self.results[end]
+ for lane_id, lane in end_results.items():
+ eland_lane = lane.get_elements()
+ eland_lane.attrib[ELAND.END] = unicode (end)
+ eland_lane.attrib[ELAND.LANE_ID] = unicode(lane_id)
+ root.append(eland_lane)
return root
def set_elements(self, tree):
raise ValueError('Expecting %s', ELAND.ELAND)
for element in list(tree):
lane_id = int(element.attrib[ELAND.LANE_ID])
+ end = int(element.attrib.get(ELAND.END, 0))
lane = ElandLane(xml=element)
- self.results[lane_id] = lane
+ self.results[end][lane_id] = lane
+
+def check_for_eland_file(basedir, pattern, lane_id, end):
+ if end is None:
+ full_lane_id = lane_id
+ else:
+ full_lane_id = "%d_%d" % ( lane_id, end )
-def check_for_eland_file(basedir, lane_id, pattern):
- basename = pattern % (lane_id,)
+ basename = pattern % (full_lane_id,)
pathname = os.path.join(basedir, basename)
if os.path.exists(pathname):
return pathname
def eland(basedir, gerald=None, genome_maps=None):
e = ELAND()
- file_list = glob(os.path.join(basedir, "*_eland_result.txt"))
- if len(file_list) == 0:
- # lets handle compressed eland files too
- file_list = glob(os.path.join(basedir, "*_eland_result.txt.bz2"))
+ #file_list = glob(os.path.join(basedir, "*_eland_result.txt"))
+ #if len(file_list) == 0:
+ # # lets handle compressed eland files too
+ # file_list = glob(os.path.join(basedir, "*_eland_result.txt.bz2"))
lane_ids = range(1,9)
+ ends = [None, 1, 2]
+
# the order in patterns determines the preference for what
# will be found.
- patterns = ['s_%d_eland_result.txt',
- 's_%d_eland_result.txt.bz2',
- 's_%d_eland_result.txt.gz',
- 's_%d_eland_extended.txt',
- 's_%d_eland_extended.txt.bz2',
- 's_%d_eland_extended.txt.gz',
- 's_%d_eland_multi.txt',
- 's_%d_eland_multi.txt.bz2',
- 's_%d_eland_multi.txt.gz',]
-
- for lane_id in lane_ids:
- for p in patterns:
- pathname = check_for_eland_file(basedir, lane_id, p)
- if pathname is not None:
- break
- else:
- continue
- # yes the lane_id is also being computed in ElandLane._update
- # I didn't want to clutter up my constructor
- # but I needed to persist the sample_name/lane_id for
- # runfolder summary_report
- path, name = os.path.split(pathname)
- logging.info("Adding eland file %s" %(name,))
- split_name = name.split('_')
- lane_id = int(split_name[1])
-
- if genome_maps is not None:
- genome_map = genome_maps[lane_id]
- elif gerald is not None:
- genome_dir = gerald.lanes[lane_id].eland_genome
- genome_map = build_genome_fasta_map(genome_dir)
- else:
- genome_map = {}
+ patterns = ['s_%s_eland_result.txt',
+ 's_%s_eland_result.txt.bz2',
+ 's_%s_eland_result.txt.gz',
+ 's_%s_eland_extended.txt',
+ 's_%s_eland_extended.txt.bz2',
+ 's_%s_eland_extended.txt.gz',
+ 's_%s_eland_multi.txt',
+ 's_%s_eland_multi.txt.bz2',
+ 's_%s_eland_multi.txt.gz',]
+
+ for end in ends:
+ for lane_id in lane_ids:
+ for p in patterns:
+ pathname = check_for_eland_file(basedir, p, lane_id, end)
+ if pathname is not None:
+ break
+ else:
+ continue
+ # yes the lane_id is also being computed in ElandLane._update
+ # I didn't want to clutter up my constructor
+ # but I needed to persist the sample_name/lane_id for
+ # runfolder summary_report
+ path, name = os.path.split(pathname)
+ logging.info("Adding eland file %s" %(name,))
+ # split_name = name.split('_')
+ # lane_id = int(split_name[1])
+
+ if genome_maps is not None:
+ genome_map = genome_maps[lane_id]
+ elif gerald is not None:
+ genome_dir = gerald.lanes[lane_id].eland_genome
+ genome_map = build_genome_fasta_map(genome_dir)
+ else:
+ genome_map = {}
- eland_result = ElandLane(pathname, genome_map)
- e.results[lane_id] = eland_result
+ eland_result = ElandLane(pathname, lane_id, end, genome_map)
+ if end is None:
+ effective_end = 0
+ else:
+ effective_end = end - 1
+ e.results[effective_end][lane_id] = eland_result
return e
def build_genome_fasta_map(genome_dir):
def summarize_lane(gerald, lane_id):
report = []
summary_results = gerald.summary.lane_results
- eland_result = gerald.eland_results.results[lane_id]
- report.append("Sample name %s" % (eland_result.sample_name))
- report.append("Lane id %s" % (eland_result.lane_id,))
- cluster = summary_results[eland_result.lane_id].cluster
- report.append("Clusters %d +/- %d" % (cluster[0], cluster[1]))
- report.append("Total Reads: %d" % (eland_result.reads))
- mc = eland_result._match_codes
- nm = mc['NM']
- nm_percent = float(nm)/eland_result.reads * 100
- qc = mc['QC']
- qc_percent = float(qc)/eland_result.reads * 100
-
- report.append("No Match: %d (%2.2g %%)" % (nm, nm_percent))
- report.append("QC Failed: %d (%2.2g %%)" % (qc, qc_percent))
- report.append('Unique (0,1,2 mismatches) %d %d %d' % \
- (mc['U0'], mc['U1'], mc['U2']))
- report.append('Repeat (0,1,2 mismatches) %d %d %d' % \
- (mc['R0'], mc['R1'], mc['R2']))
- report.append("Mapped Reads")
- mapped_reads = summarize_mapped_reads(eland_result.mapped_reads)
- for name, counts in mapped_reads.items():
- report.append(" %s: %d" % (name, counts))
+ for end in range(len(summary_results)):
+ eland_result = gerald.eland_results.results[end][lane_id]
+ report.append("Sample name %s" % (eland_result.sample_name))
+ report.append("Lane id %s end %s" % (eland_result.lane_id, end))
+ cluster = summary_results[end][eland_result.lane_id].cluster
+ report.append("Clusters %d +/- %d" % (cluster[0], cluster[1]))
+ report.append("Total Reads: %d" % (eland_result.reads))
+ mc = eland_result._match_codes
+ nm = mc['NM']
+ nm_percent = float(nm)/eland_result.reads * 100
+ qc = mc['QC']
+ qc_percent = float(qc)/eland_result.reads * 100
+
+ report.append("No Match: %d (%2.2g %%)" % (nm, nm_percent))
+ report.append("QC Failed: %d (%2.2g %%)" % (qc, qc_percent))
+ report.append('Unique (0,1,2 mismatches) %d %d %d' % \
+ (mc['U0'], mc['U1'], mc['U2']))
+ report.append('Repeat (0,1,2 mismatches) %d %d %d' % \
+ (mc['R0'], mc['R1'], mc['R2']))
+ report.append("Mapped Reads")
+ mapped_reads = summarize_mapped_reads(eland_result.mapped_reads)
+ for name, counts in mapped_reads.items():
+ report.append(" %s: %d" % (name, counts))
+ report.append('')
return report
def summary_report(runs):
# print a run name?
report.append('Summary for %s' % (run.name,))
# sort the report
- eland_keys = run.gerald.eland_results.results.keys()
+ eland_keys = run.gerald.eland_results.results[0].keys()
eland_keys.sort(alphanum)
for lane_id in eland_keys:
tar.wait()
# copy & bzip eland files
- for eland_lane in g.eland_results.values():
- source_name = eland_lane.pathname
- path, name = os.path.split(eland_lane.pathname)
- dest_name = os.path.join(cycle_dir, name)
- if is_compressed(name):
- logging.info('Already compressed, Saving to %s' % (dest_name, ))
- shutil.copy(source_name, dest_name)
- else:
- # not compressed
- dest_name += '.bz2'
- args = ['bzip2', '-9', '-c', source_name]
- logging.info('Running: %s' % ( " ".join(args) ))
- bzip_dest = open(dest_name, 'w')
- bzip = subprocess.Popen(args, stdout=bzip_dest)
- logging.info('Saving to %s' % (dest_name, ))
- bzip.wait()
+ for lanes_dictionary in g.eland_results.results:
+ for eland_lane in lanes_dictionary.values():
+ source_name = eland_lane.pathname
+ path, name = os.path.split(eland_lane.pathname)
+ dest_name = os.path.join(cycle_dir, name)
+ if is_compressed(name):
+ logging.info('Already compressed, Saving to %s' % (dest_name, ))
+ shutil.copy(source_name, dest_name)
+ else:
+ # not compressed
+ dest_name += '.bz2'
+ args = ['bzip2', '-9', '-c', source_name]
+ logging.info('Running: %s' % ( " ".join(args) ))
+ bzip_dest = open(dest_name, 'w')
+ bzip = subprocess.Popen(args, stdout=bzip_dest)
+ logging.info('Saving to %s' % (dest_name, ))
+ bzip.wait()
def clean_runs(runs):
"""
f.write(eland_result)
f.close()
-def make_eland_multi(gerald_dir):
- eland_multi = """>HWI-EAS229_60_30DP9AAXX:1:1:1221:788 AAGATATCTACGACGTGGTATGGCGGTGTCTGGTCGT NM
+def make_eland_multi(gerald_dir, paired=False):
+ eland_multi = [""">HWI-EAS229_60_30DP9AAXX:1:1:1221:788 AAGATATCTACGACGTGGTATGGCGGTGTCTGGTCGT NM
>HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:2 chr5.fa:55269838R0
>HWI-EAS229_60_30DP9AAXX:1:1:1121:379 AGAAGAGACATTAAGAGTTCCTGAAATTTATATCTGG 2:1:0 chr16.fa:46189180R1,chr7.fa:122968519R0,chr8.fa:48197174F0
>HWI-EAS229_60_30DP9AAXX:1:1:892:1155 ACATTCTCCTTTCCTTCTGAAGTTTTTACGATTCTTT 0:9:10 chr10.fa:114298201F1,chr12.fa:8125072F1,19500297F2,42341293R2,chr13.fa:27688155R2,95069772R1,chr15.fa:51016475F2,chr16.fa:27052155F2,chr1.fa:192426217R2,chr21.fa:23685310R2,chr2.fa:106680068F1,chr3.fa:185226695F2,chr4.fa:106626808R2,chr5.fa:14704894F1,43530779F1,126543189F2,chr6.fa:74284101F1,chr7.fa:22516603F1,chr9.fa:134886204R
-"""
- for i in range(1,9):
- pathname = os.path.join(gerald_dir,
- 's_%d_eland_multi.txt' % (i,))
- f = open(pathname, 'w')
- f.write(eland_multi)
- f.close()
+""", """>HWI-EAS229_60_30DP9AAXX:1:1:1221:788 AAGATATCTACGACGTGGTATGGCGGTGTCTGGTCGT NM
+>HWI-EAS229_60_30DP9AAXX:1:1:1221:788 NNNNNNNNNNNNNNGTGGTATGGCGGTGTCTGGTCGT QC
+>HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:2 chr5.fa:55269838R0
+>HWI-EAS229_60_30DP9AAXX:1:1:1121:379 AGAAGAGACATTAAGAGTTCCTGAAATTTATATCTGG 2:1:0 chr16.fa:46189180R1,chr7.fa:122968519R0,chr8.fa:48197174F0,chr7.fa:22516603F1,chr9.fa:134886204R
+>HWI-EAS229_60_30DP9AAXX:1:1:892:1155 ACATTCTCCTTTCCTTCTGAAGTTTTTACGATTCTTT 0:9:10 chr10.fa:114298201F1,chr12.fa:8125072F1,19500297F2,42341293R2,chr13.fa:27688155R2,95069772R1,chr15.fa:51016475F2,chr16.fa:27052155F2,chr1.fa:192426217R2,chr21.fa:23685310R2,chr2.fa:106680068F1,chr3.fa:185226695F2,chr4.fa:106626808R2,chr5.fa:14704894F1,43530779F1,126543189F2,chr6.fa:74284101F1"""]
+ if paired:
+ for e in [1,2]:
+ for i in range(1,9):
+ pathname = os.path.join(gerald_dir,
+ 's_%d_%d_eland_multi.txt' % (i,e))
+ f = open(pathname, 'w')
+ f.write(eland_multi[e-1])
+ f.close()
+ else:
+ for i in range(1,9):
+ pathname = os.path.join(gerald_dir,
+ 's_%d_eland_multi.txt' % (i,))
+ f = open(pathname, 'w')
+ f.write(eland_multi[0])
+ f.close()
g_eland = g.eland_results
g2_eland = g2.eland_results
- for lane in g_eland.keys():
- self.failUnlessEqual(g_eland[lane].reads,
- g2_eland[lane].reads)
- self.failUnlessEqual(len(g_eland[lane].mapped_reads),
- len(g2_eland[lane].mapped_reads))
- for k in g_eland[lane].mapped_reads.keys():
- self.failUnlessEqual(g_eland[lane].mapped_reads[k],
- g2_eland[lane].mapped_reads[k])
-
- self.failUnlessEqual(len(g_eland[lane].match_codes),
- len(g2_eland[lane].match_codes))
- for k in g_eland[lane].match_codes.keys():
- self.failUnlessEqual(g_eland[lane].match_codes[k],
- g2_eland[lane].match_codes[k])
+ for lane in g_eland.results[0].keys():
+ g_results = g_eland.results[0][lane]
+ g2_results = g2_eland.results[0][lane]
+ self.failUnlessEqual(g_results.reads,
+ g2_results.reads)
+ self.failUnlessEqual(len(g_results.mapped_reads),
+ len(g2_results.mapped_reads))
+ for k in g_results.mapped_reads.keys():
+ self.failUnlessEqual(g_results.mapped_reads[k],
+ g2_results.mapped_reads[k])
+
+ self.failUnlessEqual(len(g_results.match_codes),
+ len(g2_results.match_codes))
+ for k in g_results.match_codes.keys():
+ self.failUnlessEqual(g_results.match_codes[k],
+ g2_results.match_codes[k])
def test_eland(self):
eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
for i in range(1,9):
- lane = eland[i]
+ lane = eland.results[0][i]
self.failUnlessEqual(lane.reads, 4)
self.failUnlessEqual(lane.sample_name, "s")
self.failUnlessEqual(lane.lane_id, i)
e2 = gerald.ELAND(xml=xml)
for i in range(1,9):
- l1 = eland[i]
- l2 = e2[i]
+ l1 = eland.results[0][i]
+ l2 = e2.results[0][i]
self.failUnlessEqual(l1.reads, l2.reads)
self.failUnlessEqual(l1.sample_name, l2.sample_name)
self.failUnlessEqual(l1.lane_id, l2.lane_id)
g_eland = g.eland_results
g2_eland = g2.eland_results
- for lane in g_eland.keys():
- self.failUnlessEqual(g_eland[lane].reads,
- g2_eland[lane].reads)
- self.failUnlessEqual(len(g_eland[lane].mapped_reads),
- len(g2_eland[lane].mapped_reads))
- for k in g_eland[lane].mapped_reads.keys():
- self.failUnlessEqual(g_eland[lane].mapped_reads[k],
- g2_eland[lane].mapped_reads[k])
-
- self.failUnlessEqual(len(g_eland[lane].match_codes),
- len(g2_eland[lane].match_codes))
- for k in g_eland[lane].match_codes.keys():
- self.failUnlessEqual(g_eland[lane].match_codes[k],
- g2_eland[lane].match_codes[k])
+ for lane in g_eland.results[0].keys():
+ g_results = g_eland.results[0][lane]
+ g2_results = g2_eland.results[0][lane]
+ self.failUnlessEqual(g_results.reads,
+ g2_results.reads)
+ self.failUnlessEqual(len(g_results.mapped_reads),
+ len(g2_results.mapped_reads))
+ for k in g_results.mapped_reads.keys():
+ self.failUnlessEqual(g_results.mapped_reads[k],
+ g2_results.mapped_reads[k])
+
+ self.failUnlessEqual(len(g_results.match_codes),
+ len(g2_results.match_codes))
+ for k in g_results.match_codes.keys():
+ self.failUnlessEqual(g_results.match_codes[k],
+ g2_results.match_codes[k])
def test_eland(self):
eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
for i in range(1,9):
- lane = eland[i]
+ lane = eland.results[0][i]
self.failUnlessEqual(lane.reads, 4)
self.failUnlessEqual(lane.sample_name, "s")
self.failUnlessEqual(lane.lane_id, i)
e2 = gerald.ELAND(xml=xml)
for i in range(1,9):
- l1 = eland[i]
- l2 = e2[i]
+ l1 = eland.results[0][i]
+ l2 = e2.results[0][i]
self.failUnlessEqual(l1.reads, l2.reads)
self.failUnlessEqual(l1.sample_name, l2.sample_name)
self.failUnlessEqual(l1.lane_id, l2.lane_id)
g_eland = g.eland_results
g2_eland = g2.eland_results
- for lane in g_eland.keys():
- self.failUnlessEqual(g_eland[lane].reads,
- g2_eland[lane].reads)
- self.failUnlessEqual(len(g_eland[lane].mapped_reads),
- len(g2_eland[lane].mapped_reads))
- for k in g_eland[lane].mapped_reads.keys():
- self.failUnlessEqual(g_eland[lane].mapped_reads[k],
- g2_eland[lane].mapped_reads[k])
-
- self.failUnlessEqual(len(g_eland[lane].match_codes),
- len(g2_eland[lane].match_codes))
- for k in g_eland[lane].match_codes.keys():
- self.failUnlessEqual(g_eland[lane].match_codes[k],
- g2_eland[lane].match_codes[k])
+ for lane in g_eland.results[0].keys():
+ g_results = g_eland.results[0][lane]
+ g2_results = g2_eland.results[0][lane]
+ self.failUnlessEqual(g_results.reads,
+ g2_results.reads)
+ self.failUnlessEqual(len(g_results.mapped_reads),
+ len(g2_results.mapped_reads))
+ for k in g_results.mapped_reads.keys():
+ self.failUnlessEqual(g_results.mapped_reads[k],
+ g2_results.mapped_reads[k])
+
+ self.failUnlessEqual(len(g_results.match_codes),
+ len(g2_results.match_codes))
+ for k in g_results.match_codes.keys():
+ self.failUnlessEqual(g_results.match_codes[k],
+ g2_results.match_codes[k])
def test_eland(self):
eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
for i in range(1,9):
- lane = eland[i]
+ lane = eland.results[0][i]
self.failUnlessEqual(lane.reads, 4)
self.failUnlessEqual(lane.sample_name, "s")
self.failUnlessEqual(lane.lane_id, i)
e2 = gerald.ELAND(xml=xml)
for i in range(1,9):
- l1 = eland[i]
- l2 = e2[i]
+ l1 = eland.results[0][i]
+ l2 = e2.results[0][i]
self.failUnlessEqual(l1.reads, l2.reads)
self.failUnlessEqual(l1.sample_name, l2.sample_name)
self.failUnlessEqual(l1.lane_id, l2.lane_id)
g_eland = g.eland_results
g2_eland = g2.eland_results
- for lane in g_eland.keys():
- self.failUnlessEqual(g_eland[lane].reads,
- g2_eland[lane].reads)
- self.failUnlessEqual(len(g_eland[lane].mapped_reads),
- len(g2_eland[lane].mapped_reads))
- for k in g_eland[lane].mapped_reads.keys():
- self.failUnlessEqual(g_eland[lane].mapped_reads[k],
- g2_eland[lane].mapped_reads[k])
-
- self.failUnlessEqual(len(g_eland[lane].match_codes),
- len(g2_eland[lane].match_codes))
- for k in g_eland[lane].match_codes.keys():
- self.failUnlessEqual(g_eland[lane].match_codes[k],
- g2_eland[lane].match_codes[k])
+ for lane in g_eland.results[0].keys():
+ g_results = g_eland.results[0][lane]
+ g2_results = g2_eland.results[0][lane]
+ self.failUnlessEqual(g_results.reads,
+ g2_results.reads)
+ self.failUnlessEqual(len(g_results.mapped_reads),
+ len(g2_results.mapped_reads))
+ for k in g_results.mapped_reads.keys():
+ self.failUnlessEqual(g_results.mapped_reads[k],
+ g2_results.mapped_reads[k])
+
+ self.failUnlessEqual(len(g_results.match_codes),
+ len(g2_results.match_codes))
+ for k in g_results.match_codes.keys():
+ self.failUnlessEqual(g_results.match_codes[k],
+ g2_results.match_codes[k])
def test_eland(self):
eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
for i in range(1,9):
- lane = eland[i]
+ lane = eland.results[0][i]
self.failUnlessEqual(lane.reads, 4)
self.failUnlessEqual(lane.sample_name, "s")
self.failUnlessEqual(lane.lane_id, i)
e2 = gerald.ELAND(xml=xml)
for i in range(1,9):
- l1 = eland[i]
- l2 = e2[i]
+ l1 = eland.results[0][i]
+ l2 = e2.results[0][i]
self.failUnlessEqual(l1.reads, l2.reads)
self.failUnlessEqual(l1.sample_name, l2.sample_name)
self.failUnlessEqual(l1.lane_id, l2.lane_id)
os.mkdir(gerald_dir)
make_gerald_config(gerald_dir)
make_summary_paired_htm(gerald_dir)
- make_eland_multi(gerald_dir)
+ make_eland_multi(gerald_dir, paired=True)
if obj is not None:
obj.temp_dir = temp_dir
g_eland = g.eland_results
g2_eland = g2.eland_results
- for lane in g_eland.keys():
- self.failUnlessEqual(g_eland[lane].reads,
- g2_eland[lane].reads)
- self.failUnlessEqual(len(g_eland[lane].mapped_reads),
- len(g2_eland[lane].mapped_reads))
- for k in g_eland[lane].mapped_reads.keys():
- self.failUnlessEqual(g_eland[lane].mapped_reads[k],
- g2_eland[lane].mapped_reads[k])
-
- self.failUnlessEqual(len(g_eland[lane].match_codes),
- len(g2_eland[lane].match_codes))
- for k in g_eland[lane].match_codes.keys():
- self.failUnlessEqual(g_eland[lane].match_codes[k],
- g2_eland[lane].match_codes[k])
+ for lane in g_eland.results[end].keys():
+ g_results = g_eland.results[end][lane]
+ g2_results = g_eland.results[end][lane]
+ self.failUnlessEqual(g_results.reads,
+ g2_results.reads)
+ self.failUnlessEqual(len(g_results.mapped_reads),
+ len(g2_results.mapped_reads))
+ for k in g_results.mapped_reads.keys():
+ self.failUnlessEqual(g_results.mapped_reads[k],
+ g2_results.mapped_reads[k])
+
+ self.failUnlessEqual(len(g_results.match_codes),
+ len(g2_results.match_codes))
+ for k in g_results.match_codes.keys():
+ self.failUnlessEqual(g_results.match_codes[k],
+ g2_results.match_codes[k])
def test_eland(self):
5:hg_map, 6:hg_map, 7:hg_map, 8:hg_map }
eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
+ # check first end
for i in range(1,9):
- lane = eland[i]
+ lane = eland.results[0][i]
self.failUnlessEqual(lane.reads, 4)
self.failUnlessEqual(lane.sample_name, "s")
self.failUnlessEqual(lane.lane_id, i)
self.failUnlessEqual(lane.match_codes['NM'], 1)
self.failUnlessEqual(lane.match_codes['QC'], 0)
+ # check second end
+ for i in range(1,9):
+ lane = eland.results[1][i]
+ self.failUnlessEqual(lane.reads, 5)
+ self.failUnlessEqual(lane.sample_name, "s")
+ self.failUnlessEqual(lane.lane_id, i)
+ self.failUnlessEqual(len(lane.mapped_reads), 15)
+ self.failUnlessEqual(lane.mapped_reads['hg18/chr5.fa'], 4)
+ self.failUnlessEqual(lane.match_codes['U0'], 1)
+ self.failUnlessEqual(lane.match_codes['R0'], 2)
+ self.failUnlessEqual(lane.match_codes['U1'], 1)
+ self.failUnlessEqual(lane.match_codes['R1'], 9)
+ self.failUnlessEqual(lane.match_codes['U2'], 0)
+ self.failUnlessEqual(lane.match_codes['R2'], 12)
+ self.failUnlessEqual(lane.match_codes['NM'], 1)
+ self.failUnlessEqual(lane.match_codes['QC'], 1)
+
xml = eland.get_elements()
# just make sure that element tree can serialize the tree
xml_str = ElementTree.tostring(xml)
e2 = gerald.ELAND(xml=xml)
- for i in range(1,9):
- l1 = eland[i]
- l2 = e2[i]
- self.failUnlessEqual(l1.reads, l2.reads)
- self.failUnlessEqual(l1.sample_name, l2.sample_name)
- self.failUnlessEqual(l1.lane_id, l2.lane_id)
- self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads))
- self.failUnlessEqual(len(l1.mapped_reads), 15)
- for k in l1.mapped_reads.keys():
- self.failUnlessEqual(l1.mapped_reads[k],
- l2.mapped_reads[k])
-
- self.failUnlessEqual(len(l1.match_codes), 9)
- self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes))
- for k in l1.match_codes.keys():
- self.failUnlessEqual(l1.match_codes[k],
- l2.match_codes[k])
+ for end in [0, 1]:
+ for i in range(1,9):
+ l1 = eland.results[end][i]
+ l2 = e2.results[end][i]
+ self.failUnlessEqual(l1.reads, l2.reads)
+ self.failUnlessEqual(l1.sample_name, l2.sample_name)
+ self.failUnlessEqual(l1.lane_id, l2.lane_id)
+ self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads))
+ self.failUnlessEqual(len(l1.mapped_reads), 15)
+ for k in l1.mapped_reads.keys():
+ self.failUnlessEqual(l1.mapped_reads[k],
+ l2.mapped_reads[k])
+
+ self.failUnlessEqual(len(l1.match_codes), 9)
+ self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes))
+ for k in l1.match_codes.keys():
+ self.failUnlessEqual(l1.match_codes[k],
+ l2.match_codes[k])
def test_runfolder(self):
runs = runfolder.get_runs(self.runfolder_dir)