From b374e7c2d15aca12d5928e7335cbf609c9d7e3a7 Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Wed, 10 Dec 2008 01:00:25 +0000 Subject: [PATCH] The summary parsing code now seems to handle paired end runs this required changing how the lane_results were being stored, previously it was a dictionary indexed by lane, now it is a list of dictionaries, where the list index indicates which "end" of a paired end run it is. (0 is the first, 1 is the second) Also I got tired of being forced to use strings for the lane index by element tree and modified the code so it converts the strings required by element tree to integers for our internal dictionaries. --- htsworkflow/pipelines/eland.py | 30 +++--- htsworkflow/pipelines/gerald.py | 5 +- htsworkflow/pipelines/summary.py | 60 +++++++----- .../pipelines/test/simulate_runfolder.py | 16 ++-- .../pipelines/test/test_runfolder026.py | 28 +++--- .../pipelines/test/test_runfolder030.py | 28 +++--- .../pipelines/test/test_runfolder110.py | 28 +++--- .../pipelines/test/test_runfolder_ipar100.py | 28 +++--- .../pipelines/test/test_runfolder_pair.py | 94 ++++++++++--------- 9 files changed, 175 insertions(+), 142 deletions(-) diff --git a/htsworkflow/pipelines/eland.py b/htsworkflow/pipelines/eland.py index d44dae8..a010e1d 100644 --- a/htsworkflow/pipelines/eland.py +++ b/htsworkflow/pipelines/eland.py @@ -167,7 +167,7 @@ class ElandLane(object): path, name = os.path.split(self.pathname) split_name = name.split('_') self._sample_name = split_name[0] - self._lane_id = split_name[1] + self._lane_id = int(split_name[1]) def _get_sample_name(self): if self._sample_name is None: @@ -206,7 +206,7 @@ class ElandLane(object): sample_tag = ElementTree.SubElement(lane, ElandLane.SAMPLE_NAME) sample_tag.text = self.sample_name lane_tag = ElementTree.SubElement(lane, ElandLane.LANE_ID) - lane_tag.text = self.lane_id + lane_tag.text = str(self.lane_id) genome_map = ElementTree.SubElement(lane, ElandLane.GENOME_MAP) for k, v in self.genome_map.items(): item = ElementTree.SubElement( @@ -240,7 +240,7 @@ class ElandLane(object): if tag == ElandLane.SAMPLE_NAME.lower(): self._sample_name = element.text elif tag == ElandLane.LANE_ID.lower(): - self._lane_id = element.text + self._lane_id = int(element.text) elif tag == ElandLane.GENOME_MAP.lower(): for child in element: name = child.attrib['name'] @@ -306,7 +306,7 @@ class ELAND(object): if tree.tag.lower() != ELAND.ELAND.lower(): raise ValueError('Expecting %s', ELAND.ELAND) for element in list(tree): - lane_id = element.attrib[ELAND.LANE_ID] + lane_id = int(element.attrib[ELAND.LANE_ID]) lane = ElandLane(xml=element) self.results[lane_id] = lane @@ -326,18 +326,18 @@ def eland(basedir, gerald=None, genome_maps=None): # lets handle compressed eland files too file_list = glob(os.path.join(basedir, "*_eland_result.txt.bz2")) - lane_ids = ['1','2','3','4','5','6','7','8'] + lane_ids = range(1,9) # the order in patterns determines the preference for what # will be found. - patterns = ['s_%s_eland_result.txt', - 's_%s_eland_result.txt.bz2', - 's_%s_eland_result.txt.gz', - 's_%s_eland_extended.txt', - 's_%s_eland_extended.txt.bz2', - 's_%s_eland_extended.txt.gz', - 's_%s_eland_multi.txt', - 's_%s_eland_multi.txt.bz2', - 's_%s_eland_multi.txt.gz',] + patterns = ['s_%d_eland_result.txt', + 's_%d_eland_result.txt.bz2', + 's_%d_eland_result.txt.gz', + 's_%d_eland_extended.txt', + 's_%d_eland_extended.txt.bz2', + 's_%d_eland_extended.txt.gz', + 's_%d_eland_multi.txt', + 's_%d_eland_multi.txt.bz2', + 's_%d_eland_multi.txt.gz',] for lane_id in lane_ids: for p in patterns: @@ -353,7 +353,7 @@ def eland(basedir, gerald=None, genome_maps=None): path, name = os.path.split(pathname) logging.info("Adding eland file %s" %(name,)) split_name = name.split('_') - lane_id = split_name[1] + lane_id = int(split_name[1]) if genome_maps is not None: genome_map = genome_maps[lane_id] diff --git a/htsworkflow/pipelines/gerald.py b/htsworkflow/pipelines/gerald.py index 7e2328a..a5dd323 100644 --- a/htsworkflow/pipelines/gerald.py +++ b/htsworkflow/pipelines/gerald.py @@ -88,7 +88,8 @@ class Gerald(object): # those consistently. for element in analysis: sample, lane_id = element.tag.split('_') - self._lanes[lane_id] = Gerald.LaneParameters(self._gerald, lane_id) + self._lanes[int(lane_id)] = Gerald.LaneParameters( + self._gerald, lane_id) def __getitem__(self, key): if self._lane is None: @@ -201,4 +202,4 @@ if __name__ == "__main__": # quick test code import sys g = gerald(sys.argv[1]) - #ElementTree.dump(g.get_elements()) \ No newline at end of file + #ElementTree.dump(g.get_elements()) diff --git a/htsworkflow/pipelines/summary.py b/htsworkflow/pipelines/summary.py index ad07d06..72b3e5e 100644 --- a/htsworkflow/pipelines/summary.py +++ b/htsworkflow/pipelines/summary.py @@ -10,7 +10,7 @@ class Summary(object): """ Extract some useful information from the Summary.htm file """ - XML_VERSION = 2 + XML_VERSION = 3 SUMMARY = 'Summary' class LaneResultSummary(object): @@ -33,6 +33,7 @@ class Summary(object): def __init__(self, html=None, xml=None): self.lane = None + self.end = 0 self.lane_yield = None self.cluster = None self.cluster_pass_filter = None @@ -53,7 +54,7 @@ class Summary(object): raise RuntimeError("Summary.htm file format changed") # same in pre-0.3.0 Summary file and 0.3 summary file - self.lane = data[0] + self.lane = int(data[0]) if len(data) == 8: parsed_data = [ parse_mean_range(x) for x in data[1:] ] @@ -81,7 +82,7 @@ class Summary(object): def get_elements(self): lane_result = ElementTree.Element( Summary.LaneResultSummary.LANE_RESULT_SUMMARY, - {'lane': self.lane}) + {'lane': str(self.lane), 'end': str(self.end)}) for tag, variable_name in Summary.LaneResultSummary.TAGS.items(): value = getattr(self, variable_name) if value is None: @@ -102,7 +103,10 @@ class Summary(object): if tree.tag != Summary.LaneResultSummary.LANE_RESULT_SUMMARY: raise ValueError('Expected %s' % ( Summary.LaneResultSummary.LANE_RESULT_SUMMARY)) - self.lane = tree.attrib['lane'] + self.lane = int(tree.attrib['lane']) + # default to the first end, for the older summary files + # that are single ended + self.end = int(tree.attrib.get('end', 0)) tags = Summary.LaneResultSummary.TAGS for element in list(tree): try: @@ -113,7 +117,10 @@ class Summary(object): logging.warn('Unrecognized tag %s' % (element.tag,)) def __init__(self, filename=None, xml=None): - self.lane_results = {} + # lane results is a list of 1 or 2 ends containing + # a dictionary of all the lanes reported in this + # summary file + self.lane_results = [{}] if filename is not None: self._extract_lane_results(filename) @@ -126,15 +133,6 @@ class Summary(object): def __len__(self): return len(self.lane_results) - def keys(self): - return self.lane_results.keys() - - def values(self): - return self.lane_results.values() - - def items(self): - return self.lane_results.items() - def _flattened_row(self, row): """ flatten the children of a ... @@ -178,14 +176,20 @@ class Summary(object): return tables def _extract_lane_results(self, pathname): + tables = self._extract_named_tables(pathname) + table_names = [ ('Lane Results Summary', 0), + ('Lane Results Summary : Read 1', 0), + ('Lane Results Summary : Read 2', 1),] + for name, end in table_names: + if tables.has_key(name): + self._extract_lane_results_for_end(tables, name, end) + + def _extract_lane_results_for_end(self, tables, table_name, end): """ extract the Lane Results Summary table """ - - tables = self._extract_named_tables(pathname) - # parse lane result summary - lane_summary = tables['Lane Results Summary'] + lane_summary = tables[table_name] # this is version 1 of the summary file if len(lane_summary[-1]) == 8: # strip header @@ -200,15 +204,21 @@ class Summary(object): lane_summary = lane_summary[2:10] # after the last lane, there's a set of chip wide averages + # append an extra dictionary if needed + if len(self.lane_results) < (end + 1): + self.lane_results.append({}) + for r in lane_summary: lrs = Summary.LaneResultSummary(html=r) - self.lane_results[lrs.lane] = lrs + lrs.end = end + self.lane_results[lrs.end][lrs.lane] = lrs def get_elements(self): summary = ElementTree.Element(Summary.SUMMARY, {'version': unicode(Summary.XML_VERSION)}) - for lane in self.lane_results.values(): - summary.append(lane.get_elements()) + for end in self.lane_results: + for lane in end.values(): + summary.append(lane.get_elements()) return summary def set_elements(self, tree): @@ -220,7 +230,13 @@ class Summary(object): for element in list(tree): lrs = Summary.LaneResultSummary() lrs.set_elements(element) - self.lane_results[lrs.lane] = lrs + print lrs.end, lrs.lane + if len(self.lane_results) < (lrs.end + 1): + self.lane_results.append({}) + self.lane_results[lrs.end][lrs.lane] = lrs + + def is_paired_end(self): + return len(self.lane_results) == 2 def dump(self): """ diff --git a/htsworkflow/pipelines/test/simulate_runfolder.py b/htsworkflow/pipelines/test/simulate_runfolder.py index 112201a..f6f3742 100644 --- a/htsworkflow/pipelines/test/simulate_runfolder.py +++ b/htsworkflow/pipelines/test/simulate_runfolder.py @@ -1499,7 +1499,7 @@ def make_summary_paired_htm(gerald_dir): 1 277083 -103646 +/- 4515 +103647 +/- 4516 74887 +/- 6080 277 +/- 17 94.42 +/- 5.68 @@ -1511,7 +1511,7 @@ def make_summary_paired_htm(gerald_dir): 2 289563 -106678 +/- 4652 +106679 +/- 4653 78260 +/- 2539 259 +/- 13 93.57 +/- 2.55 @@ -1523,7 +1523,7 @@ def make_summary_paired_htm(gerald_dir): 3 259242 -84583 +/- 5963 +84584 +/- 5964 70065 +/- 4194 252 +/- 12 94.23 +/- 2.19 @@ -1535,7 +1535,7 @@ def make_summary_paired_htm(gerald_dir): 4 210549 -68813 +/- 4782 +68814 +/- 4783 56905 +/- 4145 226 +/- 16 96.82 +/- 7.12 @@ -1547,7 +1547,7 @@ def make_summary_paired_htm(gerald_dir): 5 295555 -104854 +/- 4664 +104855 +/- 4665 79879 +/- 6270 200 +/- 24 103.56 +/- 15.45 @@ -1559,7 +1559,7 @@ def make_summary_paired_htm(gerald_dir): 6 140401 -43555 +/- 1632 +43556 +/- 1633 37946 +/- 2140 179 +/- 10 100.82 +/- 5.47 @@ -1571,7 +1571,7 @@ def make_summary_paired_htm(gerald_dir): 7 154217 -54265 +/- 1588 +54266 +/- 1589 41680 +/- 5319 184 +/- 5 103.42 +/- 3.47 @@ -1583,7 +1583,7 @@ def make_summary_paired_htm(gerald_dir): 8 147969 -64363 +/- 2697 +64364 +/- 2698 39991 +/- 6785 206 +/- 31 99.48 +/- 3.23 diff --git a/htsworkflow/pipelines/test/test_runfolder026.py b/htsworkflow/pipelines/test/test_runfolder026.py index 7b15381..656b281 100644 --- a/htsworkflow/pipelines/test/test_runfolder026.py +++ b/htsworkflow/pipelines/test/test_runfolder026.py @@ -334,7 +334,7 @@ class RunfolderTests(unittest.TestCase): # test lane specific parameters from gerald config file for i in range(1,9): - cur_lane = g.lanes[str(i)] + cur_lane = g.lanes[i] self.failUnlessEqual(cur_lane.analysis, 'eland') self.failUnlessEqual(cur_lane.eland_genome, genomes[i]) self.failUnlessEqual(cur_lane.read_length, '32') @@ -345,10 +345,11 @@ class RunfolderTests(unittest.TestCase): (17421, 2139), (20311, 2402), (20193, 2399), (15537, 2531), (32047, 3356), (32946, 4753), (39504, 4171), (37998, 3792)] + self.failUnlessEqual(len(g.summary), 1) for i in range(1,9): - summary_lane = g.summary[str(i)] + summary_lane = g.summary[0][i] self.failUnlessEqual(summary_lane.cluster, clusters[i]) - self.failUnlessEqual(summary_lane.lane, str(i)) + self.failUnlessEqual(summary_lane.lane, i) xml = g.get_elements() # just make sure that element tree can serialize the tree @@ -363,17 +364,18 @@ class RunfolderTests(unittest.TestCase): # test lane specific parameters from gerald config file for i in range(1,9): - g_lane = g.lanes[str(i)] - g2_lane = g2.lanes[str(i)] + g_lane = g.lanes[i] + g2_lane = g2.lanes[i] self.failUnlessEqual(g_lane.analysis, g2_lane.analysis) self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome) self.failUnlessEqual(g_lane.read_length, g2_lane.read_length) self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases) + self.failUnlessEqual(len(g.summary), 1) # test (some) summary elements for i in range(1,9): - g_summary = g.summary[str(i)] - g2_summary = g2.summary[str(i)] + g_summary = g.summary[0][i] + g2_summary = g2.summary[0][i] self.failUnlessEqual(g_summary.cluster, g2_summary.cluster) self.failUnlessEqual(g_summary.lane, g2_summary.lane) @@ -399,15 +401,15 @@ class RunfolderTests(unittest.TestCase): dm3_map = { 'chrUextra.fa' : 'dm3/chrUextra.fa', 'chr2L.fa': 'dm3/chr2L.fa', 'Lambda.fa': 'Lambda.fa'} - genome_maps = { '1':dm3_map, '2':dm3_map, '3':dm3_map, '4':dm3_map, - '5':dm3_map, '6':dm3_map, '7':dm3_map, '8':dm3_map } + genome_maps = { 1:dm3_map, 2:dm3_map, 3:dm3_map, 4:dm3_map, + 5:dm3_map, 6:dm3_map, 7:dm3_map, 8:dm3_map } eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps) for i in range(1,9): - lane = eland[str(i)] + lane = eland[i] self.failUnlessEqual(lane.reads, 4) self.failUnlessEqual(lane.sample_name, "s") - self.failUnlessEqual(lane.lane_id, unicode(i)) + self.failUnlessEqual(lane.lane_id, i) self.failUnlessEqual(len(lane.mapped_reads), 3) self.failUnlessEqual(lane.mapped_reads['Lambda.fa'], 1) self.failUnlessEqual(lane.mapped_reads['dm3/chr2L.fa'], 1) @@ -420,8 +422,8 @@ class RunfolderTests(unittest.TestCase): e2 = gerald.ELAND(xml=xml) for i in range(1,9): - l1 = eland[str(i)] - l2 = e2[str(i)] + l1 = eland[i] + l2 = e2[i] self.failUnlessEqual(l1.reads, l2.reads) self.failUnlessEqual(l1.sample_name, l2.sample_name) self.failUnlessEqual(l1.lane_id, l2.lane_id) diff --git a/htsworkflow/pipelines/test/test_runfolder030.py b/htsworkflow/pipelines/test/test_runfolder030.py index 6922764..6e97ec6 100644 --- a/htsworkflow/pipelines/test/test_runfolder030.py +++ b/htsworkflow/pipelines/test/test_runfolder030.py @@ -756,7 +756,7 @@ class RunfolderTests(unittest.TestCase): # test lane specific parameters from gerald config file for i in range(1,9): - cur_lane = g.lanes[str(i)] + cur_lane = g.lanes[i] self.failUnlessEqual(cur_lane.analysis, 'eland') self.failUnlessEqual(cur_lane.eland_genome, genomes[i]) self.failUnlessEqual(cur_lane.read_length, '32') @@ -769,10 +769,11 @@ class RunfolderTests(unittest.TestCase): (119735, 8465), (152177, 8146), (84649, 7325), (54622, 4812),] + self.failUnlessEqual(len(g.summary), 1) for i in range(1,9): - summary_lane = g.summary[str(i)] + summary_lane = g.summary[0][i] self.failUnlessEqual(summary_lane.cluster, clusters[i]) - self.failUnlessEqual(summary_lane.lane, str(i)) + self.failUnlessEqual(summary_lane.lane, i) xml = g.get_elements() # just make sure that element tree can serialize the tree @@ -787,17 +788,18 @@ class RunfolderTests(unittest.TestCase): # test lane specific parameters from gerald config file for i in range(1,9): - g_lane = g.lanes[str(i)] - g2_lane = g2.lanes[str(i)] + g_lane = g.lanes[i] + g2_lane = g2.lanes[i] self.failUnlessEqual(g_lane.analysis, g2_lane.analysis) self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome) self.failUnlessEqual(g_lane.read_length, g2_lane.read_length) self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases) # test (some) summary elements + self.failUnlessEqual(len(g.summary), 1) for i in range(1,9): - g_summary = g.summary[str(i)] - g2_summary = g2.summary[str(i)] + g_summary = g.summary[0][i] + g2_summary = g2.summary[0][i] self.failUnlessEqual(g_summary.cluster, g2_summary.cluster) self.failUnlessEqual(g_summary.lane, g2_summary.lane) @@ -823,15 +825,15 @@ class RunfolderTests(unittest.TestCase): dm3_map = { 'chrUextra.fa' : 'dm3/chrUextra.fa', 'chr2L.fa': 'dm3/chr2L.fa', 'Lambda.fa': 'Lambda.fa'} - genome_maps = { '1':dm3_map, '2':dm3_map, '3':dm3_map, '4':dm3_map, - '5':dm3_map, '6':dm3_map, '7':dm3_map, '8':dm3_map } + genome_maps = { 1:dm3_map, 2:dm3_map, 3:dm3_map, 4:dm3_map, + 5:dm3_map, 6:dm3_map, 7:dm3_map, 8:dm3_map } eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps) for i in range(1,9): - lane = eland[str(i)] + lane = eland[i] self.failUnlessEqual(lane.reads, 4) self.failUnlessEqual(lane.sample_name, "s") - self.failUnlessEqual(lane.lane_id, unicode(i)) + self.failUnlessEqual(lane.lane_id, i) self.failUnlessEqual(len(lane.mapped_reads), 3) self.failUnlessEqual(lane.mapped_reads['Lambda.fa'], 1) self.failUnlessEqual(lane.mapped_reads['dm3/chr2L.fa'], 1) @@ -844,8 +846,8 @@ class RunfolderTests(unittest.TestCase): e2 = gerald.ELAND(xml=xml) for i in range(1,9): - l1 = eland[str(i)] - l2 = e2[str(i)] + l1 = eland[i] + l2 = e2[i] self.failUnlessEqual(l1.reads, l2.reads) self.failUnlessEqual(l1.sample_name, l2.sample_name) self.failUnlessEqual(l1.lane_id, l2.lane_id) diff --git a/htsworkflow/pipelines/test/test_runfolder110.py b/htsworkflow/pipelines/test/test_runfolder110.py index e1f3fdd..fba2981 100644 --- a/htsworkflow/pipelines/test/test_runfolder110.py +++ b/htsworkflow/pipelines/test/test_runfolder110.py @@ -137,7 +137,7 @@ class RunfolderTests(unittest.TestCase): # test lane specific parameters from gerald config file for i in range(1,9): - cur_lane = g.lanes[str(i)] + cur_lane = g.lanes[i] self.failUnlessEqual(cur_lane.analysis, 'eland') self.failUnlessEqual(cur_lane.eland_genome, genomes[i]) self.failUnlessEqual(cur_lane.read_length, '32') @@ -158,10 +158,11 @@ class RunfolderTests(unittest.TestCase): (247308, 11600), (204298, 15640), (202707, 15404), (198075, 14702),] + self.failUnlessEqual(len(g.summary), 1) for i in range(1,9): - summary_lane = g.summary[str(i)] + summary_lane = g.summary[0][i] self.failUnlessEqual(summary_lane.cluster, clusters[i]) - self.failUnlessEqual(summary_lane.lane, str(i)) + self.failUnlessEqual(summary_lane.lane, i) xml = g.get_elements() # just make sure that element tree can serialize the tree @@ -176,17 +177,18 @@ class RunfolderTests(unittest.TestCase): # test lane specific parameters from gerald config file for i in range(1,9): - g_lane = g.lanes[str(i)] - g2_lane = g2.lanes[str(i)] + g_lane = g.lanes[i] + g2_lane = g2.lanes[i] self.failUnlessEqual(g_lane.analysis, g2_lane.analysis) self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome) self.failUnlessEqual(g_lane.read_length, g2_lane.read_length) self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases) + self.failUnlessEqual(len(g.summary), 1) # test (some) summary elements for i in range(1,9): - g_summary = g.summary[str(i)] - g2_summary = g2.summary[str(i)] + g_summary = g.summary[0][i] + g2_summary = g2.summary[0][i] self.failUnlessEqual(g_summary.cluster, g2_summary.cluster) self.failUnlessEqual(g_summary.lane, g2_summary.lane) @@ -215,15 +217,15 @@ class RunfolderTests(unittest.TestCase): long_name = 'hg18/chr%d.fa' % (i,) hg_map[short_name] = long_name - genome_maps = { '1':hg_map, '2':hg_map, '3':hg_map, '4':hg_map, - '5':hg_map, '6':hg_map, '7':hg_map, '8':hg_map } + genome_maps = { 1:hg_map, 2:hg_map, 3:hg_map, 4:hg_map, + 5:hg_map, 6:hg_map, 7:hg_map, 8:hg_map } eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps) for i in range(1,9): - lane = eland[str(i)] + lane = eland[i] self.failUnlessEqual(lane.reads, 4) self.failUnlessEqual(lane.sample_name, "s") - self.failUnlessEqual(lane.lane_id, unicode(i)) + self.failUnlessEqual(lane.lane_id, i) self.failUnlessEqual(len(lane.mapped_reads), 15) self.failUnlessEqual(lane.mapped_reads['hg18/chr5.fa'], 4) self.failUnlessEqual(lane.match_codes['U0'], 1) @@ -241,8 +243,8 @@ class RunfolderTests(unittest.TestCase): e2 = gerald.ELAND(xml=xml) for i in range(1,9): - l1 = eland[str(i)] - l2 = e2[str(i)] + l1 = eland[i] + l2 = e2[i] self.failUnlessEqual(l1.reads, l2.reads) self.failUnlessEqual(l1.sample_name, l2.sample_name) self.failUnlessEqual(l1.lane_id, l2.lane_id) diff --git a/htsworkflow/pipelines/test/test_runfolder_ipar100.py b/htsworkflow/pipelines/test/test_runfolder_ipar100.py index 0d546d1..76f1e64 100644 --- a/htsworkflow/pipelines/test/test_runfolder_ipar100.py +++ b/htsworkflow/pipelines/test/test_runfolder_ipar100.py @@ -134,7 +134,7 @@ class RunfolderTests(unittest.TestCase): # test lane specific parameters from gerald config file for i in range(1,9): - cur_lane = g.lanes[str(i)] + cur_lane = g.lanes[i] self.failUnlessEqual(cur_lane.analysis, 'eland') self.failUnlessEqual(cur_lane.eland_genome, genomes[i]) self.failUnlessEqual(cur_lane.read_length, '32') @@ -153,10 +153,11 @@ class RunfolderTests(unittest.TestCase): (119735, 8465), (152177, 8146), (84649, 7325), (54622, 4812),] + self.failUnlessEqual(len(g.summary), 1) for i in range(1,9): - summary_lane = g.summary[str(i)] + summary_lane = g.summary[0][i] self.failUnlessEqual(summary_lane.cluster, clusters[i]) - self.failUnlessEqual(summary_lane.lane, str(i)) + self.failUnlessEqual(summary_lane.lane, i) xml = g.get_elements() # just make sure that element tree can serialize the tree @@ -171,17 +172,18 @@ class RunfolderTests(unittest.TestCase): # test lane specific parameters from gerald config file for i in range(1,9): - g_lane = g.lanes[str(i)] - g2_lane = g2.lanes[str(i)] + g_lane = g.lanes[i] + g2_lane = g2.lanes[i] self.failUnlessEqual(g_lane.analysis, g2_lane.analysis) self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome) self.failUnlessEqual(g_lane.read_length, g2_lane.read_length) self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases) # test (some) summary elements + self.failUnlessEqual(len(g.summary), 1) for i in range(1,9): - g_summary = g.summary[str(i)] - g2_summary = g2.summary[str(i)] + g_summary = g.summary[0][i] + g2_summary = g2.summary[0][i] self.failUnlessEqual(g_summary.cluster, g2_summary.cluster) self.failUnlessEqual(g_summary.lane, g2_summary.lane) @@ -210,15 +212,15 @@ class RunfolderTests(unittest.TestCase): long_name = 'hg18/chr%d.fa' % (i,) hg_map[short_name] = long_name - genome_maps = { '1':hg_map, '2':hg_map, '3':hg_map, '4':hg_map, - '5':hg_map, '6':hg_map, '7':hg_map, '8':hg_map } + genome_maps = { 1:hg_map, 2:hg_map, 3:hg_map, 4:hg_map, + 5:hg_map, 6:hg_map, 7:hg_map, 8:hg_map } eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps) for i in range(1,9): - lane = eland[str(i)] + lane = eland[i] self.failUnlessEqual(lane.reads, 4) self.failUnlessEqual(lane.sample_name, "s") - self.failUnlessEqual(lane.lane_id, unicode(i)) + self.failUnlessEqual(lane.lane_id, i) self.failUnlessEqual(len(lane.mapped_reads), 15) self.failUnlessEqual(lane.mapped_reads['hg18/chr5.fa'], 4) self.failUnlessEqual(lane.match_codes['U0'], 1) @@ -236,8 +238,8 @@ class RunfolderTests(unittest.TestCase): e2 = gerald.ELAND(xml=xml) for i in range(1,9): - l1 = eland[str(i)] - l2 = e2[str(i)] + l1 = eland[i] + l2 = e2[i] self.failUnlessEqual(l1.reads, l2.reads) self.failUnlessEqual(l1.sample_name, l2.sample_name) self.failUnlessEqual(l1.lane_id, l2.lane_id) diff --git a/htsworkflow/pipelines/test/test_runfolder_pair.py b/htsworkflow/pipelines/test/test_runfolder_pair.py index 20dc0d7..783a5af 100644 --- a/htsworkflow/pipelines/test/test_runfolder_pair.py +++ b/htsworkflow/pipelines/test/test_runfolder_pair.py @@ -137,7 +137,7 @@ class RunfolderTests(unittest.TestCase): # test lane specific parameters from gerald config file for i in range(1,9): - cur_lane = g.lanes[str(i)] + cur_lane = g.lanes[i] self.failUnlessEqual(cur_lane.analysis, 'eland') self.failUnlessEqual(cur_lane.eland_genome, genomes[i]) self.failUnlessEqual(cur_lane.read_length, '32') @@ -150,16 +150,22 @@ class RunfolderTests(unittest.TestCase): self.failUnlessEqual(l.use_bases, 'Y'*32) # test data extracted from summary file - clusters = [None, - (96483, 9074), (133738, 7938), - (152142, 10002), (15784, 2162), - (119735, 8465), (152177, 8146), - (84649, 7325), (54622, 4812),] - - for i in range(1,9): - summary_lane = g.summary[str(i)] - self.failUnlessEqual(summary_lane.cluster, clusters[i]) - self.failUnlessEqual(summary_lane.lane, str(i)) + clusters = [[None, + (103646, 4515), (106678, 4652), + (84583, 5963), (68813, 4782), + (104854, 4664), (43555, 1632), + (54265, 1588), (64363, 2697),], + [None, + (103647, 4516), (106679, 4653), + (84584, 5964), (68814, 4783), + (104855, 4665), (43556, 1633), + (54266, 1589), (64364, 2698),],] + + for end in [0,1]: + for lane in range(1,9): + summary_lane = g.summary[end][lane] + self.failUnlessEqual(summary_lane.cluster, clusters[end][lane]) + self.failUnlessEqual(summary_lane.lane, lane) xml = g.get_elements() # just make sure that element tree can serialize the tree @@ -174,36 +180,37 @@ class RunfolderTests(unittest.TestCase): # test lane specific parameters from gerald config file for i in range(1,9): - g_lane = g.lanes[str(i)] - g2_lane = g2.lanes[str(i)] + g_lane = g.lanes[i] + g2_lane = g2.lanes[i] self.failUnlessEqual(g_lane.analysis, g2_lane.analysis) self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome) self.failUnlessEqual(g_lane.read_length, g2_lane.read_length) self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases) # test (some) summary elements - for i in range(1,9): - g_summary = g.summary[str(i)] - g2_summary = g2.summary[str(i)] - self.failUnlessEqual(g_summary.cluster, g2_summary.cluster) - self.failUnlessEqual(g_summary.lane, g2_summary.lane) - - g_eland = g.eland_results - g2_eland = g2.eland_results - for lane in g_eland.keys(): - self.failUnlessEqual(g_eland[lane].reads, - g2_eland[lane].reads) - self.failUnlessEqual(len(g_eland[lane].mapped_reads), - len(g2_eland[lane].mapped_reads)) - for k in g_eland[lane].mapped_reads.keys(): - self.failUnlessEqual(g_eland[lane].mapped_reads[k], - g2_eland[lane].mapped_reads[k]) - - self.failUnlessEqual(len(g_eland[lane].match_codes), - len(g2_eland[lane].match_codes)) - for k in g_eland[lane].match_codes.keys(): - self.failUnlessEqual(g_eland[lane].match_codes[k], - g2_eland[lane].match_codes[k]) + for end in [0,1]: + for i in range(1,9): + g_summary = g.summary[end][i] + g2_summary = g2.summary[end][i] + self.failUnlessEqual(g_summary.cluster, g2_summary.cluster) + self.failUnlessEqual(g_summary.lane, g2_summary.lane) + + g_eland = g.eland_results + g2_eland = g2.eland_results + for lane in g_eland.keys(): + self.failUnlessEqual(g_eland[lane].reads, + g2_eland[lane].reads) + self.failUnlessEqual(len(g_eland[lane].mapped_reads), + len(g2_eland[lane].mapped_reads)) + for k in g_eland[lane].mapped_reads.keys(): + self.failUnlessEqual(g_eland[lane].mapped_reads[k], + g2_eland[lane].mapped_reads[k]) + + self.failUnlessEqual(len(g_eland[lane].match_codes), + len(g2_eland[lane].match_codes)) + for k in g_eland[lane].match_codes.keys(): + self.failUnlessEqual(g_eland[lane].match_codes[k], + g2_eland[lane].match_codes[k]) def test_eland(self): @@ -213,15 +220,15 @@ class RunfolderTests(unittest.TestCase): long_name = 'hg18/chr%d.fa' % (i,) hg_map[short_name] = long_name - genome_maps = { '1':hg_map, '2':hg_map, '3':hg_map, '4':hg_map, - '5':hg_map, '6':hg_map, '7':hg_map, '8':hg_map } + genome_maps = { 1:hg_map, 2:hg_map, 3:hg_map, 4:hg_map, + 5:hg_map, 6:hg_map, 7:hg_map, 8:hg_map } eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps) for i in range(1,9): - lane = eland[str(i)] + lane = eland[i] self.failUnlessEqual(lane.reads, 4) self.failUnlessEqual(lane.sample_name, "s") - self.failUnlessEqual(lane.lane_id, unicode(i)) + self.failUnlessEqual(lane.lane_id, i) self.failUnlessEqual(len(lane.mapped_reads), 15) self.failUnlessEqual(lane.mapped_reads['hg18/chr5.fa'], 4) self.failUnlessEqual(lane.match_codes['U0'], 1) @@ -239,8 +246,8 @@ class RunfolderTests(unittest.TestCase): e2 = gerald.ELAND(xml=xml) for i in range(1,9): - l1 = eland[str(i)] - l2 = e2[str(i)] + l1 = eland[i] + l2 = e2[i] self.failUnlessEqual(l1.reads, l2.reads) self.failUnlessEqual(l1.sample_name, l2.sample_name) self.failUnlessEqual(l1.lane_id, l2.lane_id) @@ -261,14 +268,15 @@ class RunfolderTests(unittest.TestCase): # do we get the flowcell id from the filename? self.failUnlessEqual(len(runs), 1) - name = 'run_207BTAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),) + # firecrest's date depends on filename not the create time. + name = 'run_207BTAAXX_2008-04-19.xml' self.failUnlessEqual(runs[0].name, name) # do we get the flowcell id from the FlowcellId.xml file make_flowcell_id(self.runfolder_dir, '207BTAAXY') runs = runfolder.get_runs(self.runfolder_dir) self.failUnlessEqual(len(runs), 1) - name = 'run_207BTAAXY_%s.xml' % ( date.today().strftime('%Y-%m-%d'),) + name = 'run_207BTAAXY_2008-04-19.xml' self.failUnlessEqual(runs[0].name, name) r1 = runs[0] -- 2.30.2