5 from htsworkflow.pipelines import sequences
7 class SequenceFileTests(unittest.TestCase):
9 Make sure the sequence archive class works
11 def test_flowcell_cycle(self):
13 Make sure code to parse directory heirarchy works
15 path = '/root/42BW9AAXX/C1-152'
16 flowcell, start, stop = sequences.get_flowcell_cycle(path)
18 self.failUnlessEqual(flowcell, '42BW9AAXX')
19 self.failUnlessEqual(start, 1)
20 self.failUnlessEqual(stop, 152)
22 path = '/root/42BW9AAXX/other'
23 self.failUnlessRaises(ValueError, sequences.get_flowcell_cycle, path)
27 path = '/root/42BW9AAXX/C1-38'
28 name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_4.srf'
29 pathname = os.path.join(path,name)
30 f = sequences.parse_srf(path, name)
32 self.failUnlessEqual(f.filetype, 'srf')
33 self.failUnlessEqual(f.path, pathname)
34 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
35 self.failUnlessEqual(f.lane, 4)
36 self.failUnlessEqual(f.read, None)
37 self.failUnlessEqual(f.pf, None)
38 self.failUnlessEqual(f.cycle, 38)
41 path = '/root/42BW9AAXX/C1-36'
42 name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l4_r1.tar.bz2'
43 pathname = os.path.join(path,name)
44 f = sequences.parse_qseq(path, name)
46 self.failUnlessEqual(f.filetype, 'qseq')
47 self.failUnlessEqual(f.path, pathname)
48 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
49 self.failUnlessEqual(f.lane, 4)
50 self.failUnlessEqual(f.read, 1)
51 self.failUnlessEqual(f.pf, None)
52 self.failUnlessEqual(f.cycle, 36)
55 path = '/root/ilmn200901/C1-202'
56 name = 'woldlab_090125_HWI-EAS_0000_ilmn200901_l1_r1.tar.bz2'
57 pathname = os.path.join(path, name)
58 f = sequences.parse_qseq(path, name)
60 self.failUnlessEqual(f.filetype, 'qseq')
61 self.failUnlessEqual(f.path, pathname)
62 self.failUnlessEqual(f.lane, 1)
63 self.failUnlessEqual(f.read, 1)
64 self.failUnlessEqual(f.pf, None)
65 self.failUnlessEqual(f.cycle, 202)
68 path = '/root/42BW9AAXX/C1-38'
69 name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l4_r1_pass.fastq.bz2'
70 pathname = os.path.join(path,name)
71 f = sequences.parse_fastq(path, name)
73 self.failUnlessEqual(f.filetype, 'fastq')
74 self.failUnlessEqual(f.path, pathname)
75 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
76 self.failUnlessEqual(f.lane, 4)
77 self.failUnlessEqual(f.read, 1)
78 self.failUnlessEqual(f.pf, True)
79 self.failUnlessEqual(f.cycle, 38)
81 name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l4_r2_nopass.fastq.bz2'
82 pathname = os.path.join(path,name)
83 f = sequences.parse_fastq(path, name)
85 self.failUnlessEqual(f.filetype, 'fastq')
86 self.failUnlessEqual(f.path, pathname)
87 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
88 self.failUnlessEqual(f.lane, 4)
89 self.failUnlessEqual(f.read, 2)
90 self.failUnlessEqual(f.pf, False)
91 self.failUnlessEqual(f.cycle, 38)
94 path = '/root/42BW9AAXX/C1-38'
95 name = 's_4_eland_extended.txt.bz2'
96 pathname = os.path.join(path,name)
97 f = sequences.parse_eland(path, name)
99 self.failUnlessEqual(f.filetype, 'eland')
100 self.failUnlessEqual(f.path, pathname)
101 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
102 self.failUnlessEqual(f.lane, 4)
103 self.failUnlessEqual(f.read, None)
104 self.failUnlessEqual(f.pf, None)
105 self.failUnlessEqual(f.cycle, 38)
107 path = '/root/42BW9AAXX/C1-152'
108 name = 's_4_1_eland_extended.txt.bz2'
109 pathname = os.path.join(path,name)
110 f = sequences.parse_eland(path, name)
112 self.failUnlessEqual(f.filetype, 'eland')
113 self.failUnlessEqual(f.path, pathname)
114 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
115 self.failUnlessEqual(f.lane, 4)
116 self.failUnlessEqual(f.read, 1)
117 self.failUnlessEqual(f.pf, None)
118 self.failUnlessEqual(f.cycle, 152)
120 def test_sequence_file_equality(self):
121 path = '/root/42BW9AAXX/C1-38'
122 name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l4_r1.tar.bz2'
124 f1_qseq = sequences.parse_qseq(path, name)
125 f2_qseq = sequences.parse_qseq(path, name)
127 self.failUnlessEqual(f1_qseq, f2_qseq)
131 Make sure that the quick and dirty sql interface in sequences works
134 db = sqlite3.connect(":memory:")
136 sequences.create_sequence_table(c)
138 data = [('/root/42BW9AAXX/C1-152',
139 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l1_r1.tar.bz2'),
140 ('/root/42BW9AAXX/C1-152',
141 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l1_r2.tar.bz2'),
142 ('/root/42BW9AAXX/C1-152',
143 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l2_r1.tar.bz2'),
144 ('/root/42BW9AAXX/C1-152',
145 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l2_r21.tar.bz2'),]
147 for path, name in data:
148 seq = sequences.parse_qseq(path, name)
151 count = c.execute("select count(*) from sequences")
152 row = count.fetchone()
153 self.failUnlessEqual(row[0], 4)
157 return unittest.makeSuite(SequenceFileTests,'test')
159 if __name__ == "__main__":
160 unittest.main(defaultTest="suite")