Massively rework eland file finding and indexing.
[htsworkflow.git] / htsworkflow / pipelines / test / test_runfolder_rta1_12.py
index c57859bb04f9ebf0c1bf7a7209f2c6c98a917cf5..9b7bec8ab8169d2a9c39b01f64ae63753158ae99 100644 (file)
@@ -8,6 +8,7 @@ import shutil
 import unittest
 
 from htsworkflow.pipelines import eland
+from htsworkflow.pipelines.samplekey import SampleKey
 from htsworkflow.pipelines import ipar
 from htsworkflow.pipelines import bustard
 from htsworkflow.pipelines import gerald
@@ -157,9 +158,9 @@ class RunfolderTests(unittest.TestCase):
 
             g_eland = g.eland_results
             g2_eland = g2.eland_results
-            for lane in g_eland.results[0].keys():
-                g_results = g_eland.results[0][lane]
-                g2_results = g2_eland.results[0][lane]
+            for key in g_eland:
+                g_results = g_eland[key]
+                g2_results = g2_eland[key]
                 self.failUnlessEqual(g_results.reads,
                                      g2_results.reads)
                 if isinstance(g_results, eland.ElandLane):
@@ -183,55 +184,54 @@ class RunfolderTests(unittest.TestCase):
           long_name = 'hg18/chr%d.fa' % (i,)
           hg_map[short_name] = long_name
 
-        genome_maps = { 1:hg_map, 2:hg_map, 3:hg_map, 4:hg_map,
-                        5:hg_map, 6:hg_map, 7:hg_map, 8:hg_map }
+        samples = set(('11111', '11112', '11113', '11114', '11115',
+                       '11116', '11117', '11118', '11119', '11120'))
+        genome_maps = {}
+        for i in range(1,9):
+            genome_maps[i] = hg_map
+
         eland_container = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
 
-        # I added sequence lanes to the last 2 lanes of this test case
-        for i in range(1,7):
-            lane = eland_container.results[0][i]
-            self.failUnlessEqual(lane.reads, 6)
-            self.failUnlessEqual(lane.sample_name, "s")
-            self.failUnlessEqual(lane.lane_id, i)
-            self.failUnlessEqual(len(lane.mapped_reads), 17)
-            self.failUnlessEqual(lane.mapped_reads['hg18/chr5.fa'], 4)
-            self.failUnlessEqual(lane.match_codes['U0'], 3)
-            self.failUnlessEqual(lane.match_codes['R0'], 2)
-            self.failUnlessEqual(lane.match_codes['U1'], 1)
-            self.failUnlessEqual(lane.match_codes['R1'], 9)
-            self.failUnlessEqual(lane.match_codes['U2'], 0)
-            self.failUnlessEqual(lane.match_codes['R2'], 12)
-            self.failUnlessEqual(lane.match_codes['NM'], 1)
-            self.failUnlessEqual(lane.match_codes['QC'], 0)
-
-        # test scarf
-        lane = eland_container.results[0][7]
-        self.failUnlessEqual(lane.reads, 5)
-        self.failUnlessEqual(lane.sample_name, 's')
-        self.failUnlessEqual(lane.lane_id, 7)
-        self.failUnlessEqual(lane.sequence_type, eland.SequenceLane.SCARF_TYPE)
-
-        # test fastq
-        lane = eland_container.results[0][8]
-        self.failUnlessEqual(lane.reads, 3)
-        self.failUnlessEqual(lane.sample_name, 's')
-        self.failUnlessEqual(lane.lane_id, 8)
-        self.failUnlessEqual(lane.sequence_type, eland.SequenceLane.FASTQ_TYPE)
+        for lane in eland_container.values():
+            # I added sequence lanes to the last 2 lanes of this test case
+            if lane.sample_name == '11113':
+                self.assertEqual(lane.reads, 24)
+                self.assertEqual(lane.mapped_reads['hg18/chr9.fa'], 6)
+                self.assertEqual(lane.match_codes['U0'], 6)
+                self.assertEqual(lane.match_codes['R0'], 18)
+                self.assertEqual(lane.match_codes['R1'], 24)
+                self.assertEqual(lane.match_codes['R2'], 18)
+                self.assertEqual(lane.match_codes['NM'], 12)
+            else:
+                self.assertEqual(lane.reads, 8)
+                self.assertEqual(lane.mapped_reads['hg18/chr9.fa'], 2)
+                self.assertEqual(lane.match_codes['U0'], 2)
+                self.assertEqual(lane.match_codes['R0'], 6)
+                self.assertEqual(lane.match_codes['R1'], 8)
+                self.assertEqual(lane.match_codes['R2'], 6)
+                self.assertEqual(lane.match_codes['NM'], 4)
+
+            self.assertIn(lane.sample_name, samples)
+            #self.assertEqual(lane.lane_id, 1)
+            self.assertEqual(len(lane.mapped_reads), 1)
+            self.assertEqual(lane.match_codes['U1'], 0)
+            self.assertEqual(lane.match_codes['U2'], 0)
+            self.assertEqual(lane.match_codes['QC'], 0)
 
         xml = eland_container.get_elements()
         # just make sure that element tree can serialize the tree
         xml_str = ElementTree.tostring(xml)
         e2 = gerald.ELAND(xml=xml)
 
-        for i in range(1,9):
-            l1 = eland_container.results[0][i]
-            l2 = e2.results[0][i]
+        for key in eland_container.results:
+            l1 = eland_container.results[key]
+            l2 = e2.results[key]
             self.failUnlessEqual(l1.reads, l2.reads)
             self.failUnlessEqual(l1.sample_name, l2.sample_name)
             self.failUnlessEqual(l1.lane_id, l2.lane_id)
             if isinstance(l1, eland.ElandLane):
               self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads))
-              self.failUnlessEqual(len(l1.mapped_reads), 17)
+              self.failUnlessEqual(len(l1.mapped_reads), 1)
               for k in l1.mapped_reads.keys():
                   self.failUnlessEqual(l1.mapped_reads[k],
                                        l2.mapped_reads[k])