From 497886324ef38ebcab2e2ce9a375754044136d63 Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Mon, 7 Jul 2008 22:19:51 +0000 Subject: [PATCH] Finish updating the Summary parsing file to handle the new 0.3 format in addition I split test_runfolder into one that tests 0.2.6 files and one that tests 0.3 files. --- gaworkflow/pipeline/gerald.py | 68 +- ...test_runfolder.py => test_runfolder026.py} | 0 gaworkflow/pipeline/test/test_runfolder030.py | 1007 +++++++++++++++++ 3 files changed, 1055 insertions(+), 20 deletions(-) rename gaworkflow/pipeline/test/{test_runfolder.py => test_runfolder026.py} (100%) create mode 100644 gaworkflow/pipeline/test/test_runfolder030.py diff --git a/gaworkflow/pipeline/gerald.py b/gaworkflow/pipeline/gerald.py index 3e43f1b..311987d 100644 --- a/gaworkflow/pipeline/gerald.py +++ b/gaworkflow/pipeline/gerald.py @@ -7,6 +7,7 @@ import logging import os import stat import time +import types from gaworkflow.pipeline.runfolder import \ ElementTree, \ @@ -215,11 +216,20 @@ def parse_mean_range_element(element): return (tonumber(element.attrib['mean']), tonumber(element.attrib['deviation'])) +def parse_summary_element(element): + """ + Determine if we have a simple element or a mean/deviation element + """ + if len(element.attrib) > 0: + return parse_mean_range_element(element) + else: + return element.text + class Summary(object): """ Extract some useful information from the Summary.htm file """ - XML_VERSION = 1 + XML_VERSION = 2 SUMMARY = 'Summary' class LaneResultSummary(object): @@ -229,7 +239,9 @@ class Summary(object): """ LANE_RESULT_SUMMARY = 'LaneResultSummary' TAGS = { - 'Cluster': 'cluster', + 'LaneYield': 'lane_yield', + 'Cluster': 'cluster', # Raw + 'ClusterPF': 'cluster_pass_filter', 'AverageFirstCycleIntensity': 'average_first_cycle_intensity', 'PercentIntensityAfter20Cycles': 'percent_intensity_after_20_cycles', 'PercentPassFilterClusters': 'percent_pass_filter_clusters', @@ -240,7 +252,9 @@ class Summary(object): def __init__(self, html=None, xml=None): self.lane = None + self.lane_yield = None self.cluster = None + self.cluster_pass_filter = None self.average_first_cycle_intensity = None self.percent_intensity_after_20_cycles = None self.percent_pass_filter_clusters = None @@ -261,32 +275,46 @@ class Summary(object): self.lane = data[0] if len(data) == 8: + parsed_data = [ parse_mean_range(x) for x in data[1:] ] # this is the < 0.3 Pipeline version - self.cluster = parse_mean_range(data[1]) - self.average_first_cycle_intensity = parse_mean_range(data[2]) - self.percent_intensity_after_20_cycles = \ - parse_mean_range(data[3]) - self.percent_pass_filter_clusters = parse_mean_range(data[4]) - self.percent_pass_filter_align = parse_mean_range(data[5]) - self.average_alignment_score = parse_mean_range(data[6]) - self.percent_error_rate = parse_mean_range(data[7]) + self.cluster = parsed_data[0] + self.average_first_cycle_intensity = parsed_data[1] + self.percent_intensity_after_20_cycles = parsed_data[2] + self.percent_pass_filter_clusters = parsed_data[3] + self.percent_pass_filter_align = parsed_data[4] + self.average_alignment_score = parsed_data[5] + self.percent_error_rate = parsed_data[6] elif len(data) == 10: + parsed_data = [ parse_mean_range(x) for x in data[2:] ] # this is the >= 0.3 summary file - self.cluster_raw = data[1] - self.cluster = parse_mean_range(data[2]) - # FIXME: think of generic way to capture the variable data - + self.lane_yield = data[1] + self.cluster = parsed_data[0] + self.cluster_pass_filter = parsed_data[1] + self.average_first_cycle_intensity = parsed_data[2] + self.percent_intensity_after_20_cycles = parsed_data[3] + self.percent_pass_filter_clusters = parsed_data[4] + self.percent_pass_filter_align = parsed_data[5] + self.average_alignment_score = parsed_data[6] + self.percent_error_rate = parsed_data[7] def get_elements(self): lane_result = ElementTree.Element( Summary.LaneResultSummary.LANE_RESULT_SUMMARY, {'lane': self.lane}) for tag, variable_name in Summary.LaneResultSummary.TAGS.items(): - element = make_mean_range_element( - lane_result, - tag, - *getattr(self, variable_name) - ) + value = getattr(self, variable_name) + if value is None: + continue + # it looks like a sequence + elif type(value) in (types.TupleType, types.ListType): + element = make_mean_range_element( + lane_result, + tag, + *value + ) + else: + element = ElementTree.SubElement(lane_result, tag) + element.text = value return lane_result def set_elements(self, tree): @@ -299,7 +327,7 @@ class Summary(object): try: variable_name = tags[element.tag] setattr(self, variable_name, - parse_mean_range_element(element)) + parse_summary_element(element)) except KeyError, e: logging.warn('Unrecognized tag %s' % (element.tag,)) diff --git a/gaworkflow/pipeline/test/test_runfolder.py b/gaworkflow/pipeline/test/test_runfolder026.py similarity index 100% rename from gaworkflow/pipeline/test/test_runfolder.py rename to gaworkflow/pipeline/test/test_runfolder026.py diff --git a/gaworkflow/pipeline/test/test_runfolder030.py b/gaworkflow/pipeline/test/test_runfolder030.py new file mode 100644 index 0000000..3f1ef78 --- /dev/null +++ b/gaworkflow/pipeline/test/test_runfolder030.py @@ -0,0 +1,1007 @@ +#!/usr/bin/env python + +from datetime import datetime, date +import os +import tempfile +import shutil +import unittest + +from gaworkflow.pipeline import firecrest +from gaworkflow.pipeline import bustard +from gaworkflow.pipeline import gerald +from gaworkflow.pipeline import runfolder +from gaworkflow.pipeline.runfolder import ElementTree + + +def make_flowcell_id(runfolder_dir, flowcell_id=None): + if flowcell_id is None: + flowcell_id = '207BTAAXY' + + config = """ + + %s +""" % (flowcell_id,) + config_dir = os.path.join(runfolder_dir, 'Config') + + if not os.path.exists(config_dir): + os.mkdir(config_dir) + pathname = os.path.join(config_dir, 'FlowcellId.xml') + f = open(pathname,'w') + f.write(config) + f.close() + +def make_matrix(matrix_dir): + contents = """# Auto-generated frequency response matrix +> A +> C +> G +> T +0.77 0.15 -0.04 -0.04 +0.76 1.02 -0.05 -0.06 +-0.10 -0.10 1.17 -0.03 +-0.13 -0.12 0.80 1.27 +""" + s_matrix = os.path.join(matrix_dir, 's_matrix.txt') + f = open(s_matrix, 'w') + f.write(contents) + f.close() + +def make_phasing_params(bustard_dir): + for lane in range(1,9): + pathname = os.path.join(bustard_dir, 'params%d.xml' % (lane)) + f = open(pathname, 'w') + f.write(""" + 0.009900 + 0.003500 + +""") + f.close() + +def make_gerald_config(gerald_dir): + config_xml = """ + + default + + + + + Need_to_specify_ELAND_genome_directory + 8 + + domain.com + diane + localhost:25 + /home/diane/gec/080416_HWI-EAS229_0024_207BTAAXX/Data/C1-33_Firecrest1.8.28_19-04-2008_diane/Bustard1.8.28_19-04-2008_diane + /home/diane/gec + 1 + /home/diane/proj/SolexaPipeline-0.2.2.6/Goat/../Gerald/../../Genomes + Need_to_specify_genome_file_name + genome + /home/diane/gec/080416_HWI-EAS229_0024_207BTAAXX/Data/C1-33_Firecrest1.8.28_19-04-2008_diane/Bustard1.8.28_19-04-2008_diane/GERALD_19-04-2008_diane + + _prb.txt + 12 + '((CHASTITY>=0.6))' + _qhg.txt + --symbolic + 32 + --scarf + _seq.txt + _sig2.txt + _sig.txt + @(#) Id: GERALD.pl,v 1.68.2.2 2007/06/13 11:08:49 km Exp + s_[1-8]_[0-9][0-9][0-9][0-9] + s + Sat Apr 19 19:08:30 2008 + /home/diane/proj/SolexaPipeline-0.2.2.6/Goat/../Gerald + all + http://host.domain.com/yourshare/ + + + + eland + eland + eland + eland + eland + eland + eland + eland + + + /g/dm3 + /g/equcab1 + /g/equcab1 + /g/canfam2 + /g/hg18 + /g/hg18 + /g/hg18 + /g/hg18 + + + 32 + 32 + 32 + 32 + 32 + 32 + 32 + 32 + + + YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY + YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY + YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY + YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY + YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY + YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY + YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY + YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY + + + +""" + pathname = os.path.join(gerald_dir, 'config.xml') + f = open(pathname,'w') + f.write(config_xml) + f.close() + +def make_summary_htm(gerald_dir): + summary_htm=""" + + + + +

080627_HWI-EAS229_0036_3055HAXX Summary

+

Summary Information For Experiment 080627_HWI-EAS229_0036_3055HAXX on Machine HWI-EAS229

+



Chip Summary

+ + + + +
MachineHWI-EAS229
Run Folder080627_HWI-EAS229_0036_3055HAXX
Chip IDunknown
+



Chip Results Summary

+ + + + + + + + + + +
ClustersClusters (PF)Yield (kbases)
80933224435778031133022
+



Lane Parameter Summary

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
LaneSample IDSample TargetSample TypeLengthFilterNum TilesTiles
1unknownmm9ELAND26'((CHASTITY>=0.6))'100Lane 1
2unknownmm9ELAND26'((CHASTITY>=0.6))'100Lane 2
3unknownmm9ELAND26'((CHASTITY>=0.6))'100Lane 3
4unknownelegans170ELAND26'((CHASTITY>=0.6))'100Lane 4
5unknownelegans170ELAND26'((CHASTITY>=0.6))'100Lane 5
6unknownelegans170ELAND26'((CHASTITY>=0.6))'100Lane 6
7unknownelegans170ELAND26'((CHASTITY>=0.6))'100Lane 7
8unknownelegans170ELAND26'((CHASTITY>=0.6))'100Lane 8
+



Lane Results Summary

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Lane InfoTile Mean +/- SD for Lane
Lane Lane Yield (kbases) Clusters (raw)Clusters (PF) 1st Cycle Int (PF) % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Alignment Score (PF) % Error Rate (PF)
115804696483 +/- 907460787 +/- 4240329 +/- 35101.88 +/- 6.0363.21 +/- 3.2970.33 +/- 0.249054.08 +/- 59.160.46 +/- 0.18
2156564133738 +/- 793860217 +/- 1926444 +/- 3992.62 +/- 7.5845.20 +/- 3.3151.98 +/- 0.746692.04 +/- 92.490.46 +/- 0.09
3185818152142 +/- 1000271468 +/- 2827366 +/- 3691.53 +/- 8.6647.19 +/- 3.8082.24 +/- 0.4410598.68 +/- 64.130.41 +/- 0.04
43495315784 +/- 216213443 +/- 1728328 +/- 4097.53 +/- 9.8785.29 +/- 1.9180.02 +/- 0.5310368.82 +/- 71.080.15 +/- 0.05
5167936119735 +/- 846564590 +/- 2529417 +/- 3788.69 +/- 14.7954.10 +/- 2.5976.95 +/- 0.329936.47 +/- 65.750.28 +/- 0.02
6173463152177 +/- 814666716 +/- 2493372 +/- 3987.06 +/- 9.8643.98 +/- 3.1278.80 +/- 0.4310162.28 +/- 49.650.38 +/- 0.03
714928784649 +/- 732557418 +/- 3617295 +/- 2889.40 +/- 8.2367.97 +/- 1.8233.38 +/- 0.254247.92 +/- 32.371.00 +/- 0.03
810695354622 +/- 481241136 +/- 3309284 +/- 3790.21 +/- 9.1075.39 +/- 2.2748.33 +/- 0.296169.21 +/- 169.500.86 +/- 1.22
Tile mean across chip
Av.1011665447235492.3660.2965.258403.690.50
+



Expanded Lane Summary

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Lane InfoPhasing InfoRaw Data (tile mean)Filtered Data (tile mean)
Lane Clusters (tile mean) (raw)% Phasing % Prephasing % Error Rate (raw) Equiv Perfect Clusters (raw) % retained Cycle 2-4 Av Int (PF) Cycle 2-10 Av % Loss (PF) Cycle 10-20 Av % Loss (PF) % Align (PF) % Error Rate (PF) Equiv Perfect Clusters (PF)
1964830.77000.31001.004967663.21317 +/- 320.13 +/- 0.44-1.14 +/- 0.3470.330.4641758
21337380.77000.31001.224046745.20415 +/- 330.29 +/- 0.40-0.79 +/- 0.3551.980.4630615
31521420.77000.31001.307858847.19344 +/- 260.68 +/- 0.51-0.77 +/- 0.4282.240.4157552
4157840.77000.31000.291109585.29306 +/- 340.20 +/- 0.69-1.28 +/- 0.6680.020.1510671
51197350.77000.31000.856033554.10380 +/- 320.34 +/- 0.49-1.55 +/- 4.6976.950.2849015
61521770.77000.31001.217090543.98333 +/- 270.57 +/- 0.50-0.91 +/- 0.3978.800.3851663
7846490.77000.31001.382106967.97272 +/- 201.15 +/- 0.52-0.84 +/- 0.5833.381.0018265
8546220.77000.31001.172133575.39262 +/- 311.10 +/- 0.59-1.01 +/- 0.4748.330.8619104
+

IVC Plots
+

IVC.htm +

+

All Intensity Plots
+

All.htm +

+

Error graphs:
+

Error.htm +

+Back to top +



Lane 1

+ + + + + + + + + + + + + + + + + + + + + + + +
Lane Tile Clusters (raw)Av 1st Cycle Int (PF) Av % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Av Alignment Score (PF) % Error Rate (PF)
10001114972326.4894.3957.4470.29038.60.44
+Back to top +



Lane 2

+ + + + + + + + + + + + + + + + + + + + + + + +
Lane Tile Clusters (raw)Av 1st Cycle Int (PF) Av % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Av Alignment Score (PF) % Error Rate (PF)
20001147793448.1283.6838.5753.76905.40.54
+Back to top +



Lane 3

+ + + + + + + + + + + + + + + + + + + + + + + +
Lane Tile Clusters (raw)Av 1st Cycle Int (PF) Av % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Av Alignment Score (PF) % Error Rate (PF)
30001167904374.0586.9140.3681.310465.00.47
+Back to top +



Lane 4

+ + + + + + + + + + + + + + + + + + + + + + + +
Lane Tile Clusters (raw)Av 1st Cycle Int (PF) Av % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Av Alignment Score (PF) % Error Rate (PF)
4000120308276.8592.8784.2680.410413.80.16
+Back to top +



Lane 5

+ + + + + + + + + + + + +
Lane Tile Clusters (raw)Av 1st Cycle Int (PF) Av % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Av Alignment Score (PF) % Error Rate (PF)
+Back to top +



Lane 6

+ + + + + + + + + + + + + + + + + + + + + + + +
Lane Tile Clusters (raw)Av 1st Cycle Int (PF) Av % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Av Alignment Score (PF) % Error Rate (PF)
60001166844348.1277.5938.1379.710264.40.44
+Back to top +



Lane 7

+ + + + + + + + + + + + + + + + + + + + + + + +
Lane Tile Clusters (raw)Av 1st Cycle Int (PF) Av % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Av Alignment Score (PF) % Error Rate (PF)
7000198913269.9086.6664.5533.24217.51.02
+Back to top +



Lane 8

+ + + + + + + + + + + + + + + + + + + + + + + +
Lane Tile Clusters (raw)Av 1st Cycle Int (PF) Av % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Av Alignment Score (PF) % Error Rate (PF)
8000164972243.6089.4073.1748.36182.80.71
+Back to top + + +""" + pathname = os.path.join(gerald_dir, 'Summary.htm') + f = open(pathname, 'w') + f.write(summary_htm) + f.close() + +def make_eland_results(gerald_dir): + eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759 ACATAGNCACAGACATAAACATAGACATAGAC U0 1 1 3 chrUextra.fa 28189829 R D. +>HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 chr2L.fa 8796855 R DD 24T +>HWI-EAS229_24_207BTAAXX:1:7:776:582 AGCTCANCCGATCGAAAACCTCNCCAAGCAAT NM 0 0 0 +>HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 Lambda.fa 8796855 R DD 24T +""" + for i in range(1,9): + pathname = os.path.join(gerald_dir, + 's_%d_eland_result.txt' % (i,)) + f = open(pathname, 'w') + f.write(eland_result) + f.close() + +class RunfolderTests(unittest.TestCase): + """ + Test components of the runfolder processing code + which includes firecrest, bustard, and gerald + """ + def setUp(self): + # make a fake runfolder directory + self.temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_') + + self.runfolder_dir = os.path.join(self.temp_dir, + '080102_HWI-EAS229_0010_207BTAAXX') + os.mkdir(self.runfolder_dir) + + self.data_dir = os.path.join(self.runfolder_dir, 'Data') + os.mkdir(self.data_dir) + + self.firecrest_dir = os.path.join(self.data_dir, + 'C1-33_Firecrest1.8.28_12-04-2008_diane' + ) + os.mkdir(self.firecrest_dir) + self.matrix_dir = os.path.join(self.firecrest_dir, 'Matrix') + os.mkdir(self.matrix_dir) + make_matrix(self.matrix_dir) + + self.bustard_dir = os.path.join(self.firecrest_dir, + 'Bustard1.8.28_12-04-2008_diane') + os.mkdir(self.bustard_dir) + make_phasing_params(self.bustard_dir) + + self.gerald_dir = os.path.join(self.bustard_dir, + 'GERALD_12-04-2008_diane') + os.mkdir(self.gerald_dir) + make_gerald_config(self.gerald_dir) + make_summary_htm(self.gerald_dir) + make_eland_results(self.gerald_dir) + + def tearDown(self): + shutil.rmtree(self.temp_dir) + + def test_firecrest(self): + """ + Construct a firecrest object + """ + f = firecrest.firecrest(self.firecrest_dir) + self.failUnlessEqual(f.version, '1.8.28') + self.failUnlessEqual(f.start, 1) + self.failUnlessEqual(f.stop, 33) + self.failUnlessEqual(f.user, 'diane') + self.failUnlessEqual(f.date, date(2008,4,12)) + + xml = f.get_elements() + # just make sure that element tree can serialize the tree + xml_str = ElementTree.tostring(xml) + + f2 = firecrest.Firecrest(xml=xml) + self.failUnlessEqual(f.version, f2.version) + self.failUnlessEqual(f.start, f2.start) + self.failUnlessEqual(f.stop, f2.stop) + self.failUnlessEqual(f.user, f2.user) + self.failUnlessEqual(f.date, f2.date) + + def test_bustard(self): + """ + construct a bustard object + """ + b = bustard.bustard(self.bustard_dir) + self.failUnlessEqual(b.version, '1.8.28') + self.failUnlessEqual(b.date, date(2008,4,12)) + self.failUnlessEqual(b.user, 'diane') + self.failUnlessEqual(len(b.phasing), 8) + self.failUnlessAlmostEqual(b.phasing[8].phasing, 0.0099) + + xml = b.get_elements() + b2 = bustard.Bustard(xml=xml) + self.failUnlessEqual(b.version, b2.version) + self.failUnlessEqual(b.date, b2.date ) + self.failUnlessEqual(b.user, b2.user) + self.failUnlessEqual(len(b.phasing), len(b2.phasing)) + for key in b.phasing.keys(): + self.failUnlessEqual(b.phasing[key].lane, + b2.phasing[key].lane) + self.failUnlessEqual(b.phasing[key].phasing, + b2.phasing[key].phasing) + self.failUnlessEqual(b.phasing[key].prephasing, + b2.phasing[key].prephasing) + + def test_gerald(self): + # need to update gerald and make tests for it + g = gerald.gerald(self.gerald_dir) + + self.failUnlessEqual(g.version, + '@(#) Id: GERALD.pl,v 1.68.2.2 2007/06/13 11:08:49 km Exp') + self.failUnlessEqual(g.date, datetime(2008,4,19,19,8,30)) + self.failUnlessEqual(len(g.lanes), len(g.lanes.keys())) + self.failUnlessEqual(len(g.lanes), len(g.lanes.items())) + + + # list of genomes, matches what was defined up in + # make_gerald_config. + # the first None is to offset the genomes list to be 1..9 + # instead of pythons default 0..8 + genomes = [None, '/g/dm3', '/g/equcab1', '/g/equcab1', '/g/canfam2', + '/g/hg18', '/g/hg18', '/g/hg18', '/g/hg18', ] + + # test lane specific parameters from gerald config file + for i in range(1,9): + cur_lane = g.lanes[str(i)] + self.failUnlessEqual(cur_lane.analysis, 'eland') + self.failUnlessEqual(cur_lane.eland_genome, genomes[i]) + self.failUnlessEqual(cur_lane.read_length, '32') + self.failUnlessEqual(cur_lane.use_bases, 'Y'*32) + + # test data extracted from summary file + clusters = [None, + (96483, 9074), (133738, 7938), + (152142, 10002), (15784, 2162), + (119735, 8465), (152177, 8146), + (84649, 7325), (54622, 4812),] + + for i in range(1,9): + summary_lane = g.summary[str(i)] + self.failUnlessEqual(summary_lane.cluster, clusters[i]) + self.failUnlessEqual(summary_lane.lane, str(i)) + + xml = g.get_elements() + # just make sure that element tree can serialize the tree + xml_str = ElementTree.tostring(xml) + g2 = gerald.Gerald(xml=xml) + + # do it all again after extracting from the xml file + self.failUnlessEqual(g.version, g2.version) + self.failUnlessEqual(g.date, g2.date) + self.failUnlessEqual(len(g.lanes.keys()), len(g2.lanes.keys())) + self.failUnlessEqual(len(g.lanes.items()), len(g2.lanes.items())) + + # test lane specific parameters from gerald config file + for i in range(1,9): + g_lane = g.lanes[str(i)] + g2_lane = g2.lanes[str(i)] + self.failUnlessEqual(g_lane.analysis, g2_lane.analysis) + self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome) + self.failUnlessEqual(g_lane.read_length, g2_lane.read_length) + self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases) + + # test (some) summary elements + for i in range(1,9): + g_summary = g.summary[str(i)] + g2_summary = g2.summary[str(i)] + self.failUnlessEqual(g_summary.cluster, g2_summary.cluster) + self.failUnlessEqual(g_summary.lane, g2_summary.lane) + + g_eland = g.eland_results + g2_eland = g2.eland_results + for lane in g_eland.keys(): + self.failUnlessEqual(g_eland[lane].reads, + g2_eland[lane].reads) + self.failUnlessEqual(len(g_eland[lane].mapped_reads), + len(g2_eland[lane].mapped_reads)) + for k in g_eland[lane].mapped_reads.keys(): + self.failUnlessEqual(g_eland[lane].mapped_reads[k], + g2_eland[lane].mapped_reads[k]) + + self.failUnlessEqual(len(g_eland[lane].match_codes), + len(g2_eland[lane].match_codes)) + for k in g_eland[lane].match_codes.keys(): + self.failUnlessEqual(g_eland[lane].match_codes[k], + g2_eland[lane].match_codes[k]) + + + def test_eland(self): + dm3_map = { 'chrUextra.fa' : 'dm3/chrUextra.fa', + 'chr2L.fa': 'dm3/chr2L.fa', + 'Lambda.fa': 'Lambda.fa'} + genome_maps = { '1':dm3_map, '2':dm3_map, '3':dm3_map, '4':dm3_map, + '5':dm3_map, '6':dm3_map, '7':dm3_map, '8':dm3_map } + eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps) + + for i in range(1,9): + lane = eland[str(i)] + self.failUnlessEqual(lane.reads, 4) + self.failUnlessEqual(lane.sample_name, "s") + self.failUnlessEqual(lane.lane_id, unicode(i)) + self.failUnlessEqual(len(lane.mapped_reads), 3) + self.failUnlessEqual(lane.mapped_reads['Lambda.fa'], 1) + self.failUnlessEqual(lane.mapped_reads['dm3/chr2L.fa'], 1) + self.failUnlessEqual(lane.match_codes['U1'], 2) + self.failUnlessEqual(lane.match_codes['NM'], 1) + + xml = eland.get_elements() + # just make sure that element tree can serialize the tree + xml_str = ElementTree.tostring(xml) + e2 = gerald.ELAND(xml=xml) + + for i in range(1,9): + l1 = eland[str(i)] + l2 = e2[str(i)] + self.failUnlessEqual(l1.reads, l2.reads) + self.failUnlessEqual(l1.sample_name, l2.sample_name) + self.failUnlessEqual(l1.lane_id, l2.lane_id) + self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads)) + self.failUnlessEqual(len(l1.mapped_reads), 3) + for k in l1.mapped_reads.keys(): + self.failUnlessEqual(l1.mapped_reads[k], + l2.mapped_reads[k]) + + self.failUnlessEqual(len(l1.match_codes), 9) + self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes)) + for k in l1.match_codes.keys(): + self.failUnlessEqual(l1.match_codes[k], + l2.match_codes[k]) + + def test_runfolder(self): + runs = runfolder.get_runs(self.runfolder_dir) + + # do we get the flowcell id from the filename? + self.failUnlessEqual(len(runs), 1) + self.failUnlessEqual(runs[0].name, 'run_207BTAAXX_2008-04-19.xml') + + # do we get the flowcell id from the FlowcellId.xml file + make_flowcell_id(self.runfolder_dir, '207BTAAXY') + runs = runfolder.get_runs(self.runfolder_dir) + self.failUnlessEqual(len(runs), 1) + self.failUnlessEqual(runs[0].name, 'run_207BTAAXY_2008-04-19.xml') + + r1 = runs[0] + xml = r1.get_elements() + xml_str = ElementTree.tostring(xml) + + r2 = runfolder.PipelineRun(xml=xml) + self.failUnlessEqual(r1.name, r2.name) + self.failIfEqual(r2.firecrest, None) + self.failIfEqual(r2.bustard, None) + self.failIfEqual(r2.gerald, None) + + +def suite(): + return unittest.makeSuite(RunfolderTests,'test') + +if __name__ == "__main__": + unittest.main(defaultTest="suite") + -- 2.30.2