path, name = os.path.split(self.pathname)
split_name = name.split('_')
self._sample_name = split_name[0]
- self._lane_id = split_name[1]
+ self._lane_id = int(split_name[1])
def _get_sample_name(self):
if self._sample_name is None:
sample_tag = ElementTree.SubElement(lane, ElandLane.SAMPLE_NAME)
sample_tag.text = self.sample_name
lane_tag = ElementTree.SubElement(lane, ElandLane.LANE_ID)
- lane_tag.text = self.lane_id
+ lane_tag.text = str(self.lane_id)
genome_map = ElementTree.SubElement(lane, ElandLane.GENOME_MAP)
for k, v in self.genome_map.items():
item = ElementTree.SubElement(
if tag == ElandLane.SAMPLE_NAME.lower():
self._sample_name = element.text
elif tag == ElandLane.LANE_ID.lower():
- self._lane_id = element.text
+ self._lane_id = int(element.text)
elif tag == ElandLane.GENOME_MAP.lower():
for child in element:
name = child.attrib['name']
if tree.tag.lower() != ELAND.ELAND.lower():
raise ValueError('Expecting %s', ELAND.ELAND)
for element in list(tree):
- lane_id = element.attrib[ELAND.LANE_ID]
+ lane_id = int(element.attrib[ELAND.LANE_ID])
lane = ElandLane(xml=element)
self.results[lane_id] = lane
# lets handle compressed eland files too
file_list = glob(os.path.join(basedir, "*_eland_result.txt.bz2"))
- lane_ids = ['1','2','3','4','5','6','7','8']
+ lane_ids = range(1,9)
# the order in patterns determines the preference for what
# will be found.
- patterns = ['s_%s_eland_result.txt',
- 's_%s_eland_result.txt.bz2',
- 's_%s_eland_result.txt.gz',
- 's_%s_eland_extended.txt',
- 's_%s_eland_extended.txt.bz2',
- 's_%s_eland_extended.txt.gz',
- 's_%s_eland_multi.txt',
- 's_%s_eland_multi.txt.bz2',
- 's_%s_eland_multi.txt.gz',]
+ patterns = ['s_%d_eland_result.txt',
+ 's_%d_eland_result.txt.bz2',
+ 's_%d_eland_result.txt.gz',
+ 's_%d_eland_extended.txt',
+ 's_%d_eland_extended.txt.bz2',
+ 's_%d_eland_extended.txt.gz',
+ 's_%d_eland_multi.txt',
+ 's_%d_eland_multi.txt.bz2',
+ 's_%d_eland_multi.txt.gz',]
for lane_id in lane_ids:
for p in patterns:
path, name = os.path.split(pathname)
logging.info("Adding eland file %s" %(name,))
split_name = name.split('_')
- lane_id = split_name[1]
+ lane_id = int(split_name[1])
if genome_maps is not None:
genome_map = genome_maps[lane_id]
# those consistently.
for element in analysis:
sample, lane_id = element.tag.split('_')
- self._lanes[lane_id] = Gerald.LaneParameters(self._gerald, lane_id)
+ self._lanes[int(lane_id)] = Gerald.LaneParameters(
+ self._gerald, lane_id)
def __getitem__(self, key):
if self._lane is None:
# quick test code
import sys
g = gerald(sys.argv[1])
- #ElementTree.dump(g.get_elements())
\ No newline at end of file
+ #ElementTree.dump(g.get_elements())
"""
Extract some useful information from the Summary.htm file
"""
- XML_VERSION = 2
+ XML_VERSION = 3
SUMMARY = 'Summary'
class LaneResultSummary(object):
def __init__(self, html=None, xml=None):
self.lane = None
+ self.end = 0
self.lane_yield = None
self.cluster = None
self.cluster_pass_filter = None
raise RuntimeError("Summary.htm file format changed")
# same in pre-0.3.0 Summary file and 0.3 summary file
- self.lane = data[0]
+ self.lane = int(data[0])
if len(data) == 8:
parsed_data = [ parse_mean_range(x) for x in data[1:] ]
def get_elements(self):
lane_result = ElementTree.Element(
Summary.LaneResultSummary.LANE_RESULT_SUMMARY,
- {'lane': self.lane})
+ {'lane': str(self.lane), 'end': str(self.end)})
for tag, variable_name in Summary.LaneResultSummary.TAGS.items():
value = getattr(self, variable_name)
if value is None:
if tree.tag != Summary.LaneResultSummary.LANE_RESULT_SUMMARY:
raise ValueError('Expected %s' % (
Summary.LaneResultSummary.LANE_RESULT_SUMMARY))
- self.lane = tree.attrib['lane']
+ self.lane = int(tree.attrib['lane'])
+ # default to the first end, for the older summary files
+ # that are single ended
+ self.end = int(tree.attrib.get('end', 0))
tags = Summary.LaneResultSummary.TAGS
for element in list(tree):
try:
logging.warn('Unrecognized tag %s' % (element.tag,))
def __init__(self, filename=None, xml=None):
- self.lane_results = {}
+ # lane results is a list of 1 or 2 ends containing
+ # a dictionary of all the lanes reported in this
+ # summary file
+ self.lane_results = [{}]
if filename is not None:
self._extract_lane_results(filename)
def __len__(self):
return len(self.lane_results)
- def keys(self):
- return self.lane_results.keys()
-
- def values(self):
- return self.lane_results.values()
-
- def items(self):
- return self.lane_results.items()
-
def _flattened_row(self, row):
"""
flatten the children of a <tr>...</tr>
return tables
def _extract_lane_results(self, pathname):
+ tables = self._extract_named_tables(pathname)
+ table_names = [ ('Lane Results Summary', 0),
+ ('Lane Results Summary : Read 1', 0),
+ ('Lane Results Summary : Read 2', 1),]
+ for name, end in table_names:
+ if tables.has_key(name):
+ self._extract_lane_results_for_end(tables, name, end)
+
+ def _extract_lane_results_for_end(self, tables, table_name, end):
"""
extract the Lane Results Summary table
"""
-
- tables = self._extract_named_tables(pathname)
-
# parse lane result summary
- lane_summary = tables['Lane Results Summary']
+ lane_summary = tables[table_name]
# this is version 1 of the summary file
if len(lane_summary[-1]) == 8:
# strip header
lane_summary = lane_summary[2:10]
# after the last lane, there's a set of chip wide averages
+ # append an extra dictionary if needed
+ if len(self.lane_results) < (end + 1):
+ self.lane_results.append({})
+
for r in lane_summary:
lrs = Summary.LaneResultSummary(html=r)
- self.lane_results[lrs.lane] = lrs
+ lrs.end = end
+ self.lane_results[lrs.end][lrs.lane] = lrs
def get_elements(self):
summary = ElementTree.Element(Summary.SUMMARY,
{'version': unicode(Summary.XML_VERSION)})
- for lane in self.lane_results.values():
- summary.append(lane.get_elements())
+ for end in self.lane_results:
+ for lane in end.values():
+ summary.append(lane.get_elements())
return summary
def set_elements(self, tree):
for element in list(tree):
lrs = Summary.LaneResultSummary()
lrs.set_elements(element)
- self.lane_results[lrs.lane] = lrs
+ print lrs.end, lrs.lane
+ if len(self.lane_results) < (lrs.end + 1):
+ self.lane_results.append({})
+ self.lane_results[lrs.end][lrs.lane] = lrs
+
+ def is_paired_end(self):
+ return len(self.lane_results) == 2
def dump(self):
"""
<tr>
<td>1</td>
<td>277083</td>
-<td>103646 +/- 4515</td>
+<td>103647 +/- 4516</td>
<td>74887 +/- 6080</td>
<td>277 +/- 17</td>
<td>94.42 +/- 5.68</td>
<tr>
<td>2</td>
<td>289563</td>
-<td>106678 +/- 4652</td>
+<td>106679 +/- 4653</td>
<td>78260 +/- 2539</td>
<td>259 +/- 13</td>
<td>93.57 +/- 2.55</td>
<tr>
<td>3</td>
<td>259242</td>
-<td>84583 +/- 5963</td>
+<td>84584 +/- 5964</td>
<td>70065 +/- 4194</td>
<td>252 +/- 12</td>
<td>94.23 +/- 2.19</td>
<tr>
<td>4</td>
<td>210549</td>
-<td>68813 +/- 4782</td>
+<td>68814 +/- 4783</td>
<td>56905 +/- 4145</td>
<td>226 +/- 16</td>
<td>96.82 +/- 7.12</td>
<tr>
<td>5</td>
<td>295555</td>
-<td>104854 +/- 4664</td>
+<td>104855 +/- 4665</td>
<td>79879 +/- 6270</td>
<td>200 +/- 24</td>
<td>103.56 +/- 15.45</td>
<tr>
<td>6</td>
<td>140401</td>
-<td>43555 +/- 1632</td>
+<td>43556 +/- 1633</td>
<td>37946 +/- 2140</td>
<td>179 +/- 10</td>
<td>100.82 +/- 5.47</td>
<tr>
<td>7</td>
<td>154217</td>
-<td>54265 +/- 1588</td>
+<td>54266 +/- 1589</td>
<td>41680 +/- 5319</td>
<td>184 +/- 5</td>
<td>103.42 +/- 3.47</td>
<tr>
<td>8</td>
<td>147969</td>
-<td>64363 +/- 2697</td>
+<td>64364 +/- 2698</td>
<td>39991 +/- 6785</td>
<td>206 +/- 31</td>
<td>99.48 +/- 3.23</td>
# test lane specific parameters from gerald config file
for i in range(1,9):
- cur_lane = g.lanes[str(i)]
+ cur_lane = g.lanes[i]
self.failUnlessEqual(cur_lane.analysis, 'eland')
self.failUnlessEqual(cur_lane.eland_genome, genomes[i])
self.failUnlessEqual(cur_lane.read_length, '32')
(17421, 2139), (20311, 2402), (20193, 2399), (15537, 2531),
(32047, 3356), (32946, 4753), (39504, 4171), (37998, 3792)]
+ self.failUnlessEqual(len(g.summary), 1)
for i in range(1,9):
- summary_lane = g.summary[str(i)]
+ summary_lane = g.summary[0][i]
self.failUnlessEqual(summary_lane.cluster, clusters[i])
- self.failUnlessEqual(summary_lane.lane, str(i))
+ self.failUnlessEqual(summary_lane.lane, i)
xml = g.get_elements()
# just make sure that element tree can serialize the tree
# test lane specific parameters from gerald config file
for i in range(1,9):
- g_lane = g.lanes[str(i)]
- g2_lane = g2.lanes[str(i)]
+ g_lane = g.lanes[i]
+ g2_lane = g2.lanes[i]
self.failUnlessEqual(g_lane.analysis, g2_lane.analysis)
self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome)
self.failUnlessEqual(g_lane.read_length, g2_lane.read_length)
self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases)
+ self.failUnlessEqual(len(g.summary), 1)
# test (some) summary elements
for i in range(1,9):
- g_summary = g.summary[str(i)]
- g2_summary = g2.summary[str(i)]
+ g_summary = g.summary[0][i]
+ g2_summary = g2.summary[0][i]
self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
self.failUnlessEqual(g_summary.lane, g2_summary.lane)
dm3_map = { 'chrUextra.fa' : 'dm3/chrUextra.fa',
'chr2L.fa': 'dm3/chr2L.fa',
'Lambda.fa': 'Lambda.fa'}
- genome_maps = { '1':dm3_map, '2':dm3_map, '3':dm3_map, '4':dm3_map,
- '5':dm3_map, '6':dm3_map, '7':dm3_map, '8':dm3_map }
+ genome_maps = { 1:dm3_map, 2:dm3_map, 3:dm3_map, 4:dm3_map,
+ 5:dm3_map, 6:dm3_map, 7:dm3_map, 8:dm3_map }
eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
for i in range(1,9):
- lane = eland[str(i)]
+ lane = eland[i]
self.failUnlessEqual(lane.reads, 4)
self.failUnlessEqual(lane.sample_name, "s")
- self.failUnlessEqual(lane.lane_id, unicode(i))
+ self.failUnlessEqual(lane.lane_id, i)
self.failUnlessEqual(len(lane.mapped_reads), 3)
self.failUnlessEqual(lane.mapped_reads['Lambda.fa'], 1)
self.failUnlessEqual(lane.mapped_reads['dm3/chr2L.fa'], 1)
e2 = gerald.ELAND(xml=xml)
for i in range(1,9):
- l1 = eland[str(i)]
- l2 = e2[str(i)]
+ l1 = eland[i]
+ l2 = e2[i]
self.failUnlessEqual(l1.reads, l2.reads)
self.failUnlessEqual(l1.sample_name, l2.sample_name)
self.failUnlessEqual(l1.lane_id, l2.lane_id)
# test lane specific parameters from gerald config file
for i in range(1,9):
- cur_lane = g.lanes[str(i)]
+ cur_lane = g.lanes[i]
self.failUnlessEqual(cur_lane.analysis, 'eland')
self.failUnlessEqual(cur_lane.eland_genome, genomes[i])
self.failUnlessEqual(cur_lane.read_length, '32')
(119735, 8465), (152177, 8146),
(84649, 7325), (54622, 4812),]
+ self.failUnlessEqual(len(g.summary), 1)
for i in range(1,9):
- summary_lane = g.summary[str(i)]
+ summary_lane = g.summary[0][i]
self.failUnlessEqual(summary_lane.cluster, clusters[i])
- self.failUnlessEqual(summary_lane.lane, str(i))
+ self.failUnlessEqual(summary_lane.lane, i)
xml = g.get_elements()
# just make sure that element tree can serialize the tree
# test lane specific parameters from gerald config file
for i in range(1,9):
- g_lane = g.lanes[str(i)]
- g2_lane = g2.lanes[str(i)]
+ g_lane = g.lanes[i]
+ g2_lane = g2.lanes[i]
self.failUnlessEqual(g_lane.analysis, g2_lane.analysis)
self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome)
self.failUnlessEqual(g_lane.read_length, g2_lane.read_length)
self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases)
# test (some) summary elements
+ self.failUnlessEqual(len(g.summary), 1)
for i in range(1,9):
- g_summary = g.summary[str(i)]
- g2_summary = g2.summary[str(i)]
+ g_summary = g.summary[0][i]
+ g2_summary = g2.summary[0][i]
self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
self.failUnlessEqual(g_summary.lane, g2_summary.lane)
dm3_map = { 'chrUextra.fa' : 'dm3/chrUextra.fa',
'chr2L.fa': 'dm3/chr2L.fa',
'Lambda.fa': 'Lambda.fa'}
- genome_maps = { '1':dm3_map, '2':dm3_map, '3':dm3_map, '4':dm3_map,
- '5':dm3_map, '6':dm3_map, '7':dm3_map, '8':dm3_map }
+ genome_maps = { 1:dm3_map, 2:dm3_map, 3:dm3_map, 4:dm3_map,
+ 5:dm3_map, 6:dm3_map, 7:dm3_map, 8:dm3_map }
eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
for i in range(1,9):
- lane = eland[str(i)]
+ lane = eland[i]
self.failUnlessEqual(lane.reads, 4)
self.failUnlessEqual(lane.sample_name, "s")
- self.failUnlessEqual(lane.lane_id, unicode(i))
+ self.failUnlessEqual(lane.lane_id, i)
self.failUnlessEqual(len(lane.mapped_reads), 3)
self.failUnlessEqual(lane.mapped_reads['Lambda.fa'], 1)
self.failUnlessEqual(lane.mapped_reads['dm3/chr2L.fa'], 1)
e2 = gerald.ELAND(xml=xml)
for i in range(1,9):
- l1 = eland[str(i)]
- l2 = e2[str(i)]
+ l1 = eland[i]
+ l2 = e2[i]
self.failUnlessEqual(l1.reads, l2.reads)
self.failUnlessEqual(l1.sample_name, l2.sample_name)
self.failUnlessEqual(l1.lane_id, l2.lane_id)
# test lane specific parameters from gerald config file
for i in range(1,9):
- cur_lane = g.lanes[str(i)]
+ cur_lane = g.lanes[i]
self.failUnlessEqual(cur_lane.analysis, 'eland')
self.failUnlessEqual(cur_lane.eland_genome, genomes[i])
self.failUnlessEqual(cur_lane.read_length, '32')
(247308, 11600), (204298, 15640),
(202707, 15404), (198075, 14702),]
+ self.failUnlessEqual(len(g.summary), 1)
for i in range(1,9):
- summary_lane = g.summary[str(i)]
+ summary_lane = g.summary[0][i]
self.failUnlessEqual(summary_lane.cluster, clusters[i])
- self.failUnlessEqual(summary_lane.lane, str(i))
+ self.failUnlessEqual(summary_lane.lane, i)
xml = g.get_elements()
# just make sure that element tree can serialize the tree
# test lane specific parameters from gerald config file
for i in range(1,9):
- g_lane = g.lanes[str(i)]
- g2_lane = g2.lanes[str(i)]
+ g_lane = g.lanes[i]
+ g2_lane = g2.lanes[i]
self.failUnlessEqual(g_lane.analysis, g2_lane.analysis)
self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome)
self.failUnlessEqual(g_lane.read_length, g2_lane.read_length)
self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases)
+ self.failUnlessEqual(len(g.summary), 1)
# test (some) summary elements
for i in range(1,9):
- g_summary = g.summary[str(i)]
- g2_summary = g2.summary[str(i)]
+ g_summary = g.summary[0][i]
+ g2_summary = g2.summary[0][i]
self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
self.failUnlessEqual(g_summary.lane, g2_summary.lane)
long_name = 'hg18/chr%d.fa' % (i,)
hg_map[short_name] = long_name
- genome_maps = { '1':hg_map, '2':hg_map, '3':hg_map, '4':hg_map,
- '5':hg_map, '6':hg_map, '7':hg_map, '8':hg_map }
+ genome_maps = { 1:hg_map, 2:hg_map, 3:hg_map, 4:hg_map,
+ 5:hg_map, 6:hg_map, 7:hg_map, 8:hg_map }
eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
for i in range(1,9):
- lane = eland[str(i)]
+ lane = eland[i]
self.failUnlessEqual(lane.reads, 4)
self.failUnlessEqual(lane.sample_name, "s")
- self.failUnlessEqual(lane.lane_id, unicode(i))
+ self.failUnlessEqual(lane.lane_id, i)
self.failUnlessEqual(len(lane.mapped_reads), 15)
self.failUnlessEqual(lane.mapped_reads['hg18/chr5.fa'], 4)
self.failUnlessEqual(lane.match_codes['U0'], 1)
e2 = gerald.ELAND(xml=xml)
for i in range(1,9):
- l1 = eland[str(i)]
- l2 = e2[str(i)]
+ l1 = eland[i]
+ l2 = e2[i]
self.failUnlessEqual(l1.reads, l2.reads)
self.failUnlessEqual(l1.sample_name, l2.sample_name)
self.failUnlessEqual(l1.lane_id, l2.lane_id)
# test lane specific parameters from gerald config file
for i in range(1,9):
- cur_lane = g.lanes[str(i)]
+ cur_lane = g.lanes[i]
self.failUnlessEqual(cur_lane.analysis, 'eland')
self.failUnlessEqual(cur_lane.eland_genome, genomes[i])
self.failUnlessEqual(cur_lane.read_length, '32')
(119735, 8465), (152177, 8146),
(84649, 7325), (54622, 4812),]
+ self.failUnlessEqual(len(g.summary), 1)
for i in range(1,9):
- summary_lane = g.summary[str(i)]
+ summary_lane = g.summary[0][i]
self.failUnlessEqual(summary_lane.cluster, clusters[i])
- self.failUnlessEqual(summary_lane.lane, str(i))
+ self.failUnlessEqual(summary_lane.lane, i)
xml = g.get_elements()
# just make sure that element tree can serialize the tree
# test lane specific parameters from gerald config file
for i in range(1,9):
- g_lane = g.lanes[str(i)]
- g2_lane = g2.lanes[str(i)]
+ g_lane = g.lanes[i]
+ g2_lane = g2.lanes[i]
self.failUnlessEqual(g_lane.analysis, g2_lane.analysis)
self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome)
self.failUnlessEqual(g_lane.read_length, g2_lane.read_length)
self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases)
# test (some) summary elements
+ self.failUnlessEqual(len(g.summary), 1)
for i in range(1,9):
- g_summary = g.summary[str(i)]
- g2_summary = g2.summary[str(i)]
+ g_summary = g.summary[0][i]
+ g2_summary = g2.summary[0][i]
self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
self.failUnlessEqual(g_summary.lane, g2_summary.lane)
long_name = 'hg18/chr%d.fa' % (i,)
hg_map[short_name] = long_name
- genome_maps = { '1':hg_map, '2':hg_map, '3':hg_map, '4':hg_map,
- '5':hg_map, '6':hg_map, '7':hg_map, '8':hg_map }
+ genome_maps = { 1:hg_map, 2:hg_map, 3:hg_map, 4:hg_map,
+ 5:hg_map, 6:hg_map, 7:hg_map, 8:hg_map }
eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
for i in range(1,9):
- lane = eland[str(i)]
+ lane = eland[i]
self.failUnlessEqual(lane.reads, 4)
self.failUnlessEqual(lane.sample_name, "s")
- self.failUnlessEqual(lane.lane_id, unicode(i))
+ self.failUnlessEqual(lane.lane_id, i)
self.failUnlessEqual(len(lane.mapped_reads), 15)
self.failUnlessEqual(lane.mapped_reads['hg18/chr5.fa'], 4)
self.failUnlessEqual(lane.match_codes['U0'], 1)
e2 = gerald.ELAND(xml=xml)
for i in range(1,9):
- l1 = eland[str(i)]
- l2 = e2[str(i)]
+ l1 = eland[i]
+ l2 = e2[i]
self.failUnlessEqual(l1.reads, l2.reads)
self.failUnlessEqual(l1.sample_name, l2.sample_name)
self.failUnlessEqual(l1.lane_id, l2.lane_id)
# test lane specific parameters from gerald config file
for i in range(1,9):
- cur_lane = g.lanes[str(i)]
+ cur_lane = g.lanes[i]
self.failUnlessEqual(cur_lane.analysis, 'eland')
self.failUnlessEqual(cur_lane.eland_genome, genomes[i])
self.failUnlessEqual(cur_lane.read_length, '32')
self.failUnlessEqual(l.use_bases, 'Y'*32)
# test data extracted from summary file
- clusters = [None,
- (96483, 9074), (133738, 7938),
- (152142, 10002), (15784, 2162),
- (119735, 8465), (152177, 8146),
- (84649, 7325), (54622, 4812),]
-
- for i in range(1,9):
- summary_lane = g.summary[str(i)]
- self.failUnlessEqual(summary_lane.cluster, clusters[i])
- self.failUnlessEqual(summary_lane.lane, str(i))
+ clusters = [[None,
+ (103646, 4515), (106678, 4652),
+ (84583, 5963), (68813, 4782),
+ (104854, 4664), (43555, 1632),
+ (54265, 1588), (64363, 2697),],
+ [None,
+ (103647, 4516), (106679, 4653),
+ (84584, 5964), (68814, 4783),
+ (104855, 4665), (43556, 1633),
+ (54266, 1589), (64364, 2698),],]
+
+ for end in [0,1]:
+ for lane in range(1,9):
+ summary_lane = g.summary[end][lane]
+ self.failUnlessEqual(summary_lane.cluster, clusters[end][lane])
+ self.failUnlessEqual(summary_lane.lane, lane)
xml = g.get_elements()
# just make sure that element tree can serialize the tree
# test lane specific parameters from gerald config file
for i in range(1,9):
- g_lane = g.lanes[str(i)]
- g2_lane = g2.lanes[str(i)]
+ g_lane = g.lanes[i]
+ g2_lane = g2.lanes[i]
self.failUnlessEqual(g_lane.analysis, g2_lane.analysis)
self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome)
self.failUnlessEqual(g_lane.read_length, g2_lane.read_length)
self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases)
# test (some) summary elements
- for i in range(1,9):
- g_summary = g.summary[str(i)]
- g2_summary = g2.summary[str(i)]
- self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
- self.failUnlessEqual(g_summary.lane, g2_summary.lane)
-
- g_eland = g.eland_results
- g2_eland = g2.eland_results
- for lane in g_eland.keys():
- self.failUnlessEqual(g_eland[lane].reads,
- g2_eland[lane].reads)
- self.failUnlessEqual(len(g_eland[lane].mapped_reads),
- len(g2_eland[lane].mapped_reads))
- for k in g_eland[lane].mapped_reads.keys():
- self.failUnlessEqual(g_eland[lane].mapped_reads[k],
- g2_eland[lane].mapped_reads[k])
-
- self.failUnlessEqual(len(g_eland[lane].match_codes),
- len(g2_eland[lane].match_codes))
- for k in g_eland[lane].match_codes.keys():
- self.failUnlessEqual(g_eland[lane].match_codes[k],
- g2_eland[lane].match_codes[k])
+ for end in [0,1]:
+ for i in range(1,9):
+ g_summary = g.summary[end][i]
+ g2_summary = g2.summary[end][i]
+ self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
+ self.failUnlessEqual(g_summary.lane, g2_summary.lane)
+
+ g_eland = g.eland_results
+ g2_eland = g2.eland_results
+ for lane in g_eland.keys():
+ self.failUnlessEqual(g_eland[lane].reads,
+ g2_eland[lane].reads)
+ self.failUnlessEqual(len(g_eland[lane].mapped_reads),
+ len(g2_eland[lane].mapped_reads))
+ for k in g_eland[lane].mapped_reads.keys():
+ self.failUnlessEqual(g_eland[lane].mapped_reads[k],
+ g2_eland[lane].mapped_reads[k])
+
+ self.failUnlessEqual(len(g_eland[lane].match_codes),
+ len(g2_eland[lane].match_codes))
+ for k in g_eland[lane].match_codes.keys():
+ self.failUnlessEqual(g_eland[lane].match_codes[k],
+ g2_eland[lane].match_codes[k])
def test_eland(self):
long_name = 'hg18/chr%d.fa' % (i,)
hg_map[short_name] = long_name
- genome_maps = { '1':hg_map, '2':hg_map, '3':hg_map, '4':hg_map,
- '5':hg_map, '6':hg_map, '7':hg_map, '8':hg_map }
+ genome_maps = { 1:hg_map, 2:hg_map, 3:hg_map, 4:hg_map,
+ 5:hg_map, 6:hg_map, 7:hg_map, 8:hg_map }
eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
for i in range(1,9):
- lane = eland[str(i)]
+ lane = eland[i]
self.failUnlessEqual(lane.reads, 4)
self.failUnlessEqual(lane.sample_name, "s")
- self.failUnlessEqual(lane.lane_id, unicode(i))
+ self.failUnlessEqual(lane.lane_id, i)
self.failUnlessEqual(len(lane.mapped_reads), 15)
self.failUnlessEqual(lane.mapped_reads['hg18/chr5.fa'], 4)
self.failUnlessEqual(lane.match_codes['U0'], 1)
e2 = gerald.ELAND(xml=xml)
for i in range(1,9):
- l1 = eland[str(i)]
- l2 = e2[str(i)]
+ l1 = eland[i]
+ l2 = e2[i]
self.failUnlessEqual(l1.reads, l2.reads)
self.failUnlessEqual(l1.sample_name, l2.sample_name)
self.failUnlessEqual(l1.lane_id, l2.lane_id)
# do we get the flowcell id from the filename?
self.failUnlessEqual(len(runs), 1)
- name = 'run_207BTAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
+ # firecrest's date depends on filename not the create time.
+ name = 'run_207BTAAXX_2008-04-19.xml'
self.failUnlessEqual(runs[0].name, name)
# do we get the flowcell id from the FlowcellId.xml file
make_flowcell_id(self.runfolder_dir, '207BTAAXY')
runs = runfolder.get_runs(self.runfolder_dir)
self.failUnlessEqual(len(runs), 1)
- name = 'run_207BTAAXY_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
+ name = 'run_207BTAAXY_2008-04-19.xml'
self.failUnlessEqual(runs[0].name, name)
r1 = runs[0]