From: Diane Trout Date: Thu, 12 Jul 2012 17:52:53 +0000 (-0700) Subject: Re-enable some suppressed tests and update them for the SampleKey API. X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=htsworkflow.git;a=commitdiff_plain;h=43a792219b99d8865f64318866db857498c68895 Re-enable some suppressed tests and update them for the SampleKey API. Also try to test getting the HiSeq flowcell ID out of RunInfo.xml --- diff --git a/htsworkflow/pipelines/test/simulate_runfolder.py b/htsworkflow/pipelines/test/simulate_runfolder.py index 3eebce9..6734b96 100644 --- a/htsworkflow/pipelines/test/simulate_runfolder.py +++ b/htsworkflow/pipelines/test/simulate_runfolder.py @@ -50,6 +50,31 @@ def make_flowcell_id(runfolder_dir, flowcell_id=None): f.write(config) f.close() +def make_runinfo(runfolder_dir, flowcell_id): + """Simulate a RunInfo.xml file created by >= RTA 1.9 + """ + xml = ''' + + + {flowcell} + SN787 + 110815 + + + + + + + + +''' + path, runfolder = os.path.split(runfolder_dir) + runinfo = os.path.join(runfolder_dir, 'RunInfo.xml') + stream = open(runinfo, 'w') + stream.write(xml.format(runfolder=runfolder, flowcell=flowcell_id)) + stream.close() + return runinfo + def make_bustard_config132(image_dir): source = os.path.join(TESTDATA_DIR, 'bustard-config132.xml') destination = os.path.join(image_dir, 'config.xml') diff --git a/htsworkflow/pipelines/test/test_runfolder_rta.py b/htsworkflow/pipelines/test/test_runfolder_rta.py index 02b8e44..90126d8 100644 --- a/htsworkflow/pipelines/test/test_runfolder_rta.py +++ b/htsworkflow/pipelines/test/test_runfolder_rta.py @@ -11,6 +11,7 @@ from htsworkflow.pipelines import ipar from htsworkflow.pipelines import bustard from htsworkflow.pipelines import gerald from htsworkflow.pipelines import runfolder +from htsworkflow.pipelines.samplekey import SampleKey from htsworkflow.pipelines.runfolder import ElementTree from htsworkflow.pipelines.test.simulate_runfolder import * @@ -157,7 +158,6 @@ class RunfolderTests(unittest.TestCase): # just make sure that element tree can serialize the tree xml_str = ElementTree.tostring(xml) g2 = gerald.Gerald(xml=xml) - return # do it all again after extracting from the xml file self.failUnlessEqual(g.version, g2.version) @@ -184,9 +184,9 @@ class RunfolderTests(unittest.TestCase): g_eland = g.eland_results g2_eland = g2.eland_results - for lane in g_eland.results[0].keys(): - g_results = g_eland.results[0][lane] - g2_results = g2_eland.results[0][lane] + for lane in g_eland: + g_results = g_eland[lane] + g2_results = g2_eland[lane] self.failUnlessEqual(g_results.reads, g2_results.reads) if isinstance(g_results, eland.ElandLane): @@ -204,7 +204,6 @@ class RunfolderTests(unittest.TestCase): def test_eland(self): - return hg_map = {'Lambda.fa': 'Lambda.fa'} for i in range(1,22): short_name = 'chr%d.fa' % (i,) @@ -216,11 +215,12 @@ class RunfolderTests(unittest.TestCase): eland_container = gerald.eland(self.gerald_dir, genome_maps=genome_maps) # I added sequence lanes to the last 2 lanes of this test case - for i in range(1,7): - lane = eland_container.results[0][i] + lanes = [ SampleKey(lane=i, read=1, sample='s') for i in range(1,7) ] + for key in lanes: + lane = eland_container[key] self.failUnlessEqual(lane.reads, 6) self.failUnlessEqual(lane.sample_name, "s") - self.failUnlessEqual(lane.lane_id, i) + self.failUnlessEqual(lane.lane_id, key.lane) self.failUnlessEqual(len(lane.mapped_reads), 17) self.failUnlessEqual(lane.mapped_reads['hg18/chr5.fa'], 4) self.failUnlessEqual(lane.match_codes['U0'], 3) @@ -233,14 +233,14 @@ class RunfolderTests(unittest.TestCase): self.failUnlessEqual(lane.match_codes['QC'], 0) # test scarf - lane = eland_container.results[0][7] + lane = eland_container[SampleKey(lane=7, read=1, sample='s')] self.failUnlessEqual(lane.reads, 5) self.failUnlessEqual(lane.sample_name, 's') self.failUnlessEqual(lane.lane_id, 7) self.failUnlessEqual(lane.sequence_type, eland.SequenceLane.SCARF_TYPE) # test fastq - lane = eland_container.results[0][8] + lane = eland_container[SampleKey(lane=8, read=1, sample='s')] self.failUnlessEqual(lane.reads, 3) self.failUnlessEqual(lane.sample_name, 's') self.failUnlessEqual(lane.lane_id, 8) @@ -251,9 +251,9 @@ class RunfolderTests(unittest.TestCase): xml_str = ElementTree.tostring(xml) e2 = gerald.ELAND(xml=xml) - for i in range(1,9): - l1 = eland_container.results[0][i] - l2 = e2.results[0][i] + for key in eland_container: + l1 = eland_container[key] + l2 = e2[key] self.failUnlessEqual(l1.reads, l2.reads) self.failUnlessEqual(l1.sample_name, l2.sample_name) self.failUnlessEqual(l1.lane_id, l2.lane_id) @@ -273,7 +273,6 @@ class RunfolderTests(unittest.TestCase): self.failUnlessEqual(l1.sequence_type, l2.sequence_type) def test_runfolder(self): - return runs = runfolder.get_runs(self.runfolder_dir) # do we get the flowcell id from the filename? diff --git a/htsworkflow/pipelines/test/test_runfolder_rta160.py b/htsworkflow/pipelines/test/test_runfolder_rta160.py index 137fc41..49f8ffa 100644 --- a/htsworkflow/pipelines/test/test_runfolder_rta160.py +++ b/htsworkflow/pipelines/test/test_runfolder_rta160.py @@ -11,6 +11,7 @@ from htsworkflow.pipelines import ipar from htsworkflow.pipelines import bustard from htsworkflow.pipelines import gerald from htsworkflow.pipelines import runfolder +from htsworkflow.pipelines.samplekey import SampleKey from htsworkflow.pipelines.runfolder import ElementTree from htsworkflow.pipelines.test.simulate_runfolder import * @@ -127,7 +128,6 @@ class RunfolderTests(unittest.TestCase): # just make sure that element tree can serialize the tree xml_str = ElementTree.tostring(xml) g2 = gerald.Gerald(xml=xml) - return # do it all again after extracting from the xml file self.failUnlessEqual(g.software, g2.software) @@ -155,9 +155,9 @@ class RunfolderTests(unittest.TestCase): g_eland = g.eland_results g2_eland = g2.eland_results - for lane in g_eland.results[0].keys(): - g_results = g_eland.results[0][lane] - g2_results = g2_eland.results[0][lane] + for key in g_eland: + g_results = g_eland[key] + g2_results = g2_eland[key] self.failUnlessEqual(g_results.reads, g2_results.reads) if isinstance(g_results, eland.ElandLane): @@ -175,7 +175,6 @@ class RunfolderTests(unittest.TestCase): def test_eland(self): - return hg_map = {'Lambda.fa': 'Lambda.fa'} for i in range(1,22): short_name = 'chr%d.fa' % (i,) @@ -187,11 +186,12 @@ class RunfolderTests(unittest.TestCase): eland_container = gerald.eland(self.gerald_dir, genome_maps=genome_maps) # I added sequence lanes to the last 2 lanes of this test case - for i in range(1,7): - lane = eland_container.results[0][i] + keys = [ SampleKey(lane=i, read=1, sample='s') for i in range(1,7)] + for key in keys: + lane = eland_container[key] self.failUnlessEqual(lane.reads, 6) self.failUnlessEqual(lane.sample_name, "s") - self.failUnlessEqual(lane.lane_id, i) + self.failUnlessEqual(lane.lane_id, key.lane) self.failUnlessEqual(len(lane.mapped_reads), 17) self.failUnlessEqual(lane.mapped_reads['hg18/chr5.fa'], 4) self.failUnlessEqual(lane.match_codes['U0'], 3) @@ -204,14 +204,14 @@ class RunfolderTests(unittest.TestCase): self.failUnlessEqual(lane.match_codes['QC'], 0) # test scarf - lane = eland_container.results[0][7] + lane = eland_container[SampleKey(lane=7, read=1, sample='s')] self.failUnlessEqual(lane.reads, 5) self.failUnlessEqual(lane.sample_name, 's') self.failUnlessEqual(lane.lane_id, 7) self.failUnlessEqual(lane.sequence_type, eland.SequenceLane.SCARF_TYPE) # test fastq - lane = eland_container.results[0][8] + lane = eland_container[SampleKey(lane=8, read=1, sample='s')] self.failUnlessEqual(lane.reads, 3) self.failUnlessEqual(lane.sample_name, 's') self.failUnlessEqual(lane.lane_id, 8) @@ -222,9 +222,9 @@ class RunfolderTests(unittest.TestCase): xml_str = ElementTree.tostring(xml) e2 = gerald.ELAND(xml=xml) - for i in range(1,9): - l1 = eland_container.results[0][i] - l2 = e2.results[0][i] + for key in eland_container: + l1 = eland_container[key] + l2 = e2[key] self.failUnlessEqual(l1.reads, l2.reads) self.failUnlessEqual(l1.sample_name, l2.sample_name) self.failUnlessEqual(l1.lane_id, l2.lane_id) @@ -244,7 +244,6 @@ class RunfolderTests(unittest.TestCase): self.failUnlessEqual(l1.sequence_type, l2.sequence_type) def test_runfolder(self): - return runs = runfolder.get_runs(self.runfolder_dir) # do we get the flowcell id from the filename? diff --git a/htsworkflow/pipelines/test/test_runfolder_rta180.py b/htsworkflow/pipelines/test/test_runfolder_rta180.py index 8340064..b803787 100644 --- a/htsworkflow/pipelines/test/test_runfolder_rta180.py +++ b/htsworkflow/pipelines/test/test_runfolder_rta180.py @@ -11,6 +11,7 @@ from htsworkflow.pipelines import ipar from htsworkflow.pipelines import bustard from htsworkflow.pipelines import gerald from htsworkflow.pipelines import runfolder +from htsworkflow.pipelines.samplekey import SampleKey from htsworkflow.pipelines.runfolder import ElementTree from htsworkflow.pipelines.test.simulate_runfolder import * @@ -23,8 +24,10 @@ def make_runfolder(obj=None): # make a fake runfolder directory temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_') - runfolder_dir = os.path.join(temp_dir, - '090608_HWI-EAS229_0117_4286GAAXX') + flowcell_id = '4286GAAXX' + runfolder_dir = os.path.join( + temp_dir, + '090608_HWI-EAS229_0117_{0}'.format(flowcell_id)) os.mkdir(runfolder_dir) data_dir = os.path.join(runfolder_dir, 'Data') @@ -45,6 +48,7 @@ def make_runfolder(obj=None): make_fastq(gerald_dir, lane_list=[8,]) if obj is not None: + obj.flowcell_id = flowcell_id obj.temp_dir = temp_dir obj.runfolder_dir = runfolder_dir obj.data_dir = data_dir @@ -137,10 +141,9 @@ class RunfolderTests(unittest.TestCase): # just make sure that element tree can serialize the tree xml_str = ElementTree.tostring(xml) g2 = gerald.Gerald(xml=xml) - return # do it all again after extracting from the xml file - self.failUnlessEqual(g.software, g2.version) + self.failUnlessEqual(g.software, g2.software) self.failUnlessEqual(g.version, g2.version) self.failUnlessEqual(g.date, g2.date) self.failUnlessEqual(len(g.lanes.keys()), len(g2.lanes.keys())) @@ -165,9 +168,9 @@ class RunfolderTests(unittest.TestCase): g_eland = g.eland_results g2_eland = g2.eland_results - for lane in g_eland.results[0].keys(): - g_results = g_eland.results[0][lane] - g2_results = g2_eland.results[0][lane] + for lane in g_eland: + g_results = g_eland[lane] + g2_results = g2_eland[lane] self.failUnlessEqual(g_results.reads, g2_results.reads) if isinstance(g_results, eland.ElandLane): @@ -185,7 +188,6 @@ class RunfolderTests(unittest.TestCase): def test_eland(self): - return hg_map = {'Lambda.fa': 'Lambda.fa'} for i in range(1,22): short_name = 'chr%d.fa' % (i,) @@ -197,31 +199,33 @@ class RunfolderTests(unittest.TestCase): eland_container = gerald.eland(self.gerald_dir, genome_maps=genome_maps) # I added sequence lanes to the last 2 lanes of this test case - for i in range(1,7): - lane = eland_container.results[0][i] - self.failUnlessEqual(lane.reads, 6) + keys = [SampleKey(lane=i, read=1, sample='s') for i in range(1,7)] + for key in keys: + lane = eland_container[key] + self.failUnlessEqual(lane.reads, 28) self.failUnlessEqual(lane.sample_name, "s") - self.failUnlessEqual(lane.lane_id, i) - self.failUnlessEqual(len(lane.mapped_reads), 17) - self.failUnlessEqual(lane.mapped_reads['hg18/chr5.fa'], 4) - self.failUnlessEqual(lane.match_codes['U0'], 3) - self.failUnlessEqual(lane.match_codes['R0'], 2) + self.failUnlessEqual(lane.lane_id, key.lane) + self.failUnlessEqual(len(lane.mapped_reads), 7) + self.failUnlessEqual(lane.mapped_reads['hg18/chr7.fa'], 4) + self.failUnlessEqual(lane.mapped_reads['Lambda_1-1_11936nts.fa'], 1) + self.failUnlessEqual(lane.match_codes['U0'], 1) + self.failUnlessEqual(lane.match_codes['R0'], 20) self.failUnlessEqual(lane.match_codes['U1'], 1) - self.failUnlessEqual(lane.match_codes['R1'], 9) - self.failUnlessEqual(lane.match_codes['U2'], 0) - self.failUnlessEqual(lane.match_codes['R2'], 12) - self.failUnlessEqual(lane.match_codes['NM'], 1) - self.failUnlessEqual(lane.match_codes['QC'], 0) + self.failUnlessEqual(lane.match_codes['R1'], 2) + self.failUnlessEqual(lane.match_codes['U2'], 11) + self.failUnlessEqual(lane.match_codes['R2'], 0) + self.failUnlessEqual(lane.match_codes['NM'], 2) + self.failUnlessEqual(lane.match_codes['QC'], 9) # test scarf - lane = eland_container.results[0][7] + lane = eland_container[SampleKey(lane=7, read=1, sample='s')] self.failUnlessEqual(lane.reads, 5) self.failUnlessEqual(lane.sample_name, 's') self.failUnlessEqual(lane.lane_id, 7) self.failUnlessEqual(lane.sequence_type, eland.SequenceLane.SCARF_TYPE) # test fastq - lane = eland_container.results[0][8] + lane = eland_container[SampleKey(lane=8, read=1, sample='s')] self.failUnlessEqual(lane.reads, 3) self.failUnlessEqual(lane.sample_name, 's') self.failUnlessEqual(lane.lane_id, 8) @@ -232,15 +236,15 @@ class RunfolderTests(unittest.TestCase): xml_str = ElementTree.tostring(xml) e2 = gerald.ELAND(xml=xml) - for i in range(1,9): - l1 = eland_container.results[0][i] - l2 = e2.results[0][i] + for key in eland_container: + l1 = eland_container[key] + l2 = e2.results[key] self.failUnlessEqual(l1.reads, l2.reads) self.failUnlessEqual(l1.sample_name, l2.sample_name) self.failUnlessEqual(l1.lane_id, l2.lane_id) if isinstance(l1, eland.ElandLane): self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads)) - self.failUnlessEqual(len(l1.mapped_reads), 17) + self.failUnlessEqual(len(l1.mapped_reads), 7) for k in l1.mapped_reads.keys(): self.failUnlessEqual(l1.mapped_reads[k], l2.mapped_reads[k]) @@ -254,7 +258,6 @@ class RunfolderTests(unittest.TestCase): self.failUnlessEqual(l1.sequence_type, l2.sequence_type) def test_runfolder(self): - return runs = runfolder.get_runs(self.runfolder_dir) # do we get the flowcell id from the filename? diff --git a/htsworkflow/pipelines/test/test_runfolder_rta1_12.py b/htsworkflow/pipelines/test/test_runfolder_rta1_12.py index c6233d9..81f7263 100644 --- a/htsworkflow/pipelines/test/test_runfolder_rta1_12.py +++ b/htsworkflow/pipelines/test/test_runfolder_rta1_12.py @@ -26,10 +26,13 @@ def make_runfolder(obj=None): flowcell_id = 'D07K6ACXX' temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_') - runfolder_dir = os.path.join(temp_dir, - '110815_SN787_0101_A{0}'.format(flowcell_id)) + runfolder_dir = os.path.join( + temp_dir, + '110815_SN787_0101_A{0}'.format(flowcell_id)) os.mkdir(runfolder_dir) + make_runinfo(runfolder_dir, flowcell_id) + data_dir = os.path.join(runfolder_dir, 'Data') os.mkdir(data_dir) @@ -50,6 +53,7 @@ def make_runfolder(obj=None): make_aligned_config_1_12(aligned_dir) if obj is not None: + obj.flowcell_id = flowcell_id obj.temp_dir = temp_dir obj.runfolder_dir = runfolder_dir obj.data_dir = data_dir @@ -245,19 +249,13 @@ class RunfolderTests(unittest.TestCase): self.failUnlessEqual(l1.sequence_type, l2.sequence_type) def test_runfolder(self): - return runs = runfolder.get_runs(self.runfolder_dir) # do we get the flowcell id from the filename? self.failUnlessEqual(len(runs), 1) - name = 'run_4286GAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),) - self.failUnlessEqual(runs[0].name, name) - - # do we get the flowcell id from the FlowcellId.xml file - make_flowcell_id(self.runfolder_dir, '207BTAAXY') - runs = runfolder.get_runs(self.runfolder_dir) - self.failUnlessEqual(len(runs), 1) - name = 'run_207BTAAXY_%s.xml' % ( date.today().strftime('%Y-%m-%d'),) + self.assertEqual(runs[0].flowcell_id, self.flowcell_id) + name = 'run_%s_%s.xml' % ( self.flowcell_id, + date.today().strftime('%Y-%m-%d'),) self.failUnlessEqual(runs[0].name, name) r1 = runs[0]