5 from pprint import pprint
10 from htsworkflow.submission.condorfastq import CondorFastqExtract
11 from htsworkflow.submission.results import ResultMap
12 from htsworkflow.util.rdfhelp import load_string_into_model, dump_model
17 'C02F9ACXX/C1-202/Project_11154',
18 'C02F9ACXX/C1-202/Project_12342_Index1',
19 'C02F9ACXX/C1-202/Project_12342_Index2',
20 'C02F9ACXX/C1-202/Project_12345',
32 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R1_001.fastq.gz',
33 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R1_002.fastq.gz',
34 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R2_001.fastq.gz',
35 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R2_002.fastq.gz',
36 'C02F9ACXX/C1-202/Project_12342_Index1/12342_GCCAAT_L004_R1_001.fastq.gz',
37 'C02F9ACXX/C1-202/Project_12342_Index1/12342_GCCAAT_L004_R2_001.fastq.gz',
38 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L007_R1_001.fastq.gz',
39 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L007_R2_001.fastq.gz',
40 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L005_R1_001.fastq.gz',
41 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L005_R2_001.fastq.gz',
42 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_001.fastq.gz',
43 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_002.fastq.gz',
44 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_003.fastq.gz',
45 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_001.fastq.gz',
46 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_002.fastq.gz',
47 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_003.fastq.gz',
48 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r1.tar.bz2',
49 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l2_r1.tar.bz2',
50 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l3_r1.tar.bz2',
51 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l4_r1.tar.bz2',
52 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r1.tar.bz2',
53 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l6_r1.tar.bz2',
54 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l7_r1.tar.bz2',
55 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l8_r1.tar.bz2',
56 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r2.tar.bz2',
57 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r2.tar.bz2',
58 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l2_r2.tar.bz2',
59 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l3_r2.tar.bz2',
60 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l4_r2.tar.bz2',
61 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2',
62 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l6_r2.tar.bz2',
63 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l7_r2.tar.bz2',
64 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l8_r2.tar.bz2',
65 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_1.srf',
66 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_2.srf',
67 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_3.srf',
68 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_4.srf',
69 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_5.srf',
70 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_6.srf',
71 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_7.srf',
72 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_8.srf',
73 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_1.srf',
74 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_2.srf',
75 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_3.srf',
76 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_4.srf',
77 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_5.srf',
78 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_6.srf',
79 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_7.srf',
80 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_8.srf',
81 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l1_r1.tar.bz2',
82 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l2_r1.tar.bz2',
83 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l3_r1.tar.bz2',
84 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l4_r1.tar.bz2',
85 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l5_r1.tar.bz2',
86 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l6_r1.tar.bz2',
87 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l7_r1.tar.bz2',
88 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l8_r1.tar.bz2',
91 lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
92 @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
93 @prefix dc: <http://purl.org/dc/elements/1.1/> .
94 @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
95 @prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#> .
96 @prefix seqns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#> .
97 @prefix invns: <http://jumpgate.caltech.edu/wiki/InventoryOntology#> .
99 <http://localhost/flowcell/30221AAXX/>
100 a libns:illumina_flowcell ;
101 libns:read_length 33 ;
102 libns:flowcell_type "Single"@en ;
103 libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
104 libns:has_lane <http://localhost/lane/3401> ;
105 libns:has_lane <http://localhost/lane/3402> ;
106 libns:has_lane <http://localhost/lane/3403> ;
107 libns:has_lane <http://localhost/lane/3404> ;
108 libns:has_lane <http://localhost/lane/3405> ;
109 libns:has_lane <http://localhost/lane/3406> ;
110 libns:has_lane <http://localhost/lane/3407> ;
111 libns:has_lane <http://localhost/lane/3408> ;
112 libns:flowcell_id "30221AAXX"@en .
114 <http://localhost/lane/3401>
115 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
116 libns:library <http://localhost/library/10000/> ;
117 libns:lane_number 1 .
118 <http://localhost/lane/3402>
119 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
120 libns:library <http://localhost/library/10000/> ;
121 libns:lane_number 2 .
122 <http://localhost/lane/3403>
123 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
124 libns:library <http://localhost/library/10000/> ;
125 libns:lane_number 3 .
126 <http://localhost/lane/3404>
127 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
128 libns:library <http://localhost/library/11154/> ;
129 libns:lane_number 4 .
132 # status "Unknown"@en .
133 <http://localhost/lane/3405>
134 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
135 libns:library <http://localhost/library/10000/> ;
136 libns:lane_number 5 .
137 <http://localhost/lane/3406>
138 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
139 libns:library <http://localhost/library/10000/> ;
140 libns:lane_number 6 .
141 <http://localhost/lane/3407>
142 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
143 libns:library <http://localhost/library/10000/> ;
144 libns:lane_number 7 .
145 <http://localhost/lane/3408>
146 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
147 libns:library <http://localhost/library/10000/> ;
148 libns:lane_number 8 .
150 <http://localhost/flowcell/42JUYAAXX/>
151 a libns:illumina_flowcell ;
152 libns:read_length 76 ;
153 libns:flowcell_type "Paired"@en ;
154 libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
155 libns:has_lane <http://localhost/lane/4201> ;
156 libns:has_lane <http://localhost/lane/4202> ;
157 libns:has_lane <http://localhost/lane/4203> ;
158 libns:has_lane <http://localhost/lane/4204> ;
159 libns:has_lane <http://localhost/lane/4205> ;
160 libns:has_lane <http://localhost/lane/4206> ;
161 libns:has_lane <http://localhost/lane/4207> ;
162 libns:has_lane <http://localhost/lane/4208> ;
163 libns:flowcell_id "42JUYAAXX"@en .
165 <http://localhost/lane/4201>
166 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
167 libns:library <http://localhost/library/1421/> ;
168 libns:lane_number 1 .
169 <http://localhost/lane/4202>
170 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
171 libns:library <http://localhost/library/1421/> ;
172 libns:lane_number 2 .
173 <http://localhost/lane/4203>
174 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
175 libns:library <http://localhost/library/1421/> ;
176 libns:lane_number 3 .
177 <http://localhost/lane/4204>
178 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
179 libns:library <http://localhost/library/1421/> ;
180 libns:lane_number 4 .
181 <http://localhost/lane/4205>
182 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
183 libns:library <http://localhost/library/11154/> ;
184 libns:lane_number 5 .
187 # status "Unknown"@en .
188 <http://localhost/lane/4206>
189 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
190 libns:library <http://localhost/library/1421/> ;
191 libns:lane_number 6 .
192 <http://localhost/lane/4207>
193 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
194 libns:library <http://localhost/library/1421/> ;
195 libns:lane_number 7 .
196 <http://localhost/lane/4208>
197 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
198 libns:library <http://localhost/library/1421/> ;
199 libns:lane_number 8 .
201 <http://localhost/flowcell/61MJTAAXX/>
202 a libns:illumina_flowcell ;
203 libns:read_length 76 ;
204 libns:flowcell_type "Single"@en ;
205 libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
206 libns:has_lane <http://localhost/lane/6601> ;
207 libns:has_lane <http://localhost/lane/6602> ;
208 libns:has_lane <http://localhost/lane/6603> ;
209 libns:has_lane <http://localhost/lane/6604> ;
210 libns:has_lane <http://localhost/lane/6605> ;
211 libns:has_lane <http://localhost/lane/6606> ;
212 libns:has_lane <http://localhost/lane/6607> ;
213 libns:has_lane <http://localhost/lane/6608> ;
214 libns:flowcell_id "61MJTAAXX"@en .
216 <http://localhost/lane/6601>
217 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
218 libns:library <http://localhost/library/1661/> ;
219 libns:lane_number 1 .
220 <http://localhost/lane/6602>
221 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
222 libns:library <http://localhost/library/1661/> ;
223 libns:lane_number 2 .
224 <http://localhost/lane/6603>
225 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
226 libns:library <http://localhost/library/1661/> ;
227 libns:lane_number 3 .
228 <http://localhost/lane/6604>
229 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
230 libns:library <http://localhost/library/1661/> ;
231 libns:lane_number 4 .
232 <http://localhost/lane/6605>
233 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
234 libns:library <http://localhost/library/1661/> ;
235 libns:lane_number 5 .
236 <http://localhost/lane/6606>
237 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
238 libns:library <http://localhost/library/11154/> ;
239 libns:lane_number 6 .
242 # status "Unknown"@en .
243 <http://localhost/lane/6607>
244 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
245 libns:library <http://localhost/library/1661/> ;
246 libns:lane_number 7 .
247 <http://localhost/lane/6608>
248 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
249 libns:library <http://localhost/library/1661/> ;
250 libns:lane_number 8 .
252 <http://localhost/flowcell/30DY0AAXX/>
253 a libns:illumina_flowcell ;
254 libns:read_length 76 ;
255 libns:flowcell_type "Paired"@en ;
256 libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
257 libns:has_lane <http://localhost/lane/3801> ;
258 libns:has_lane <http://localhost/lane/3802> ;
259 libns:has_lane <http://localhost/lane/3803> ;
260 libns:has_lane <http://localhost/lane/3804> ;
261 libns:has_lane <http://localhost/lane/3805> ;
262 libns:has_lane <http://localhost/lane/3806> ;
263 libns:has_lane <http://localhost/lane/3807> ;
264 libns:has_lane <http://localhost/lane/3808> ;
265 libns:flowcell_id "30DY0AAXX"@en .
267 <http://localhost/lane/3801>
268 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
269 libns:library <http://localhost/library/1331/> ;
270 libns:lane_number 1 .
271 <http://localhost/lane/3802>
272 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
273 libns:library <http://localhost/library/1331/> ;
274 libns:lane_number 2 .
275 <http://localhost/lane/3803>
276 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
277 libns:library <http://localhost/library/1331/> ;
278 libns:lane_number 3 .
279 <http://localhost/lane/3804>
280 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
281 libns:library <http://localhost/library/1331/> ;
282 libns:lane_number 4 .
283 <http://localhost/lane/3805>
284 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
285 libns:library <http://localhost/library/1331/> ;
286 libns:lane_number 5 .
287 <http://localhost/lane/3806>
288 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
289 libns:library <http://localhost/library/1331/> ;
290 libns:lane_number 6 .
291 <http://localhost/lane/3807>
292 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
293 libns:library <http://localhost/library/1331/> ;
294 libns:lane_number 7 .
295 <http://localhost/lane/3808>
296 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
297 libns:library <http://localhost/library/11154/> ;
298 libns:lane_number 8 .
301 # status "Unknown"@en .
303 <http://localhost/flowcell/C02F9ACXX/>
304 a libns:illumina_flowcell ;
305 libns:read_length 101 ;
306 libns:flowcell_type "Paired"@en ;
307 libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
308 libns:has_lane <http://localhost/lane/12300> ;
309 libns:has_lane <http://localhost/lane/12500> ;
310 libns:flowcell_id "C02F9ACXX"@en .
312 <http://localhost/lane/12300>
313 libns:flowcell <http://localhost/flowcell/C02F9ACXX/> ;
314 libns:library <http://localhost/library/12345/> ;
315 libns:lane_number 3 .
318 # status "Unknown"@en .
320 <http://localhost/lane/12500>
321 libns:flowcell <http://localhost/flowcell/C02F9ACXX/> ;
322 libns:library <http://localhost/library/11154/> ;
323 libns:lane_number 3 .
326 # status "Unknown"@en .
328 <http://localhost/library/11154/>
330 libns:affiliation "TSR"@en;
331 libns:concentration "29.7";
332 libns:date "2012-12-28T00:00:00"^^xsd:dateTime ;
333 libns:experiment_type "RNA-seq"@en ;
335 libns:has_lane <http://localhost/lane/3404> ;
336 libns:has_lane <http://localhost/lane/4205> ;
337 libns:has_lane <http://localhost/lane/6606> ;
338 libns:has_lane <http://localhost/lane/3808> ;
339 libns:has_lane <http://localhost/lane/12500> ;
340 libns:insert_size 2000 ;
341 libns:library_id "11154"@en ;
342 libns:library_type "Paired End (Multiplexed)"@en ;
343 libns:made_by "Gary Gygax"@en ;
344 libns:name "Paired Ends ASDF"@en ;
345 libns:replicate "1"@en;
346 libns:species "Mus musculus"@en ;
347 libns:stopping_point "Completed"@en ;
348 libns:total_unique_locations 8841201 .
352 <http://localhost/library/12345/>
354 libns:affiliation "TSR"@en;
355 libns:concentration "12.345";
356 libns:cell_line "Unknown"@en ;
357 libns:date "2012-12-28T00:00:00"^^xsd:dateTime ;
358 libns:experiment_type "RNA-seq"@en ;
360 libns:has_lane <http://localhost/lane/12300> ;
361 libns:insert_size 2000 ;
362 libns:library_id "12345"@en ;
363 libns:library_type "Paired End (Multiplexed)"@en ;
364 libns:made_by "Gary Gygax"@en ;
365 libns:name "Paired Ends THING"@en ;
366 libns:replicate "1"@en;
367 libns:species "Mus musculus"@en ;
368 libns:stopping_point "Completed"@en ;
369 libns:total_unique_locations 8841201 .
372 HOST = "http://localhost"
374 class TestCondorFastq(unittest.TestCase):
376 self.cwd = os.getcwd()
378 self.tempdir = tempfile.mkdtemp(prefix='condorfastq_test')
379 self.flowcelldir = os.path.join(self.tempdir, 'flowcells')
380 os.mkdir(self.flowcelldir)
382 self.logdir = os.path.join(self.tempdir, 'log')
383 os.mkdir(self.logdir)
386 os.mkdir(os.path.join(self.flowcelldir, d))
389 filename = os.path.join(self.flowcelldir, f)
390 with open(filename, 'w') as stream:
391 stream.write('testfile')
393 self.result_map = ResultMap()
394 for lib_id in [u'11154', u'12345']:
395 subname = 'sub-%s' % (lib_id,)
396 sub_dir = os.path.join(self.tempdir, subname)
398 self.result_map[lib_id] = sub_dir
400 self.extract = CondorFastqExtract(HOST,
403 load_string_into_model(self.extract.model, 'turtle', lib_turtle)
406 shutil.rmtree(self.tempdir)
409 def test_find_relavant_flowcell_ids(self):
410 expected = set(('30221AAXX',
415 flowcell_ids = self.extract.find_relavant_flowcell_ids()
416 self.assertEqual(flowcell_ids, expected)
418 def test_find_archive_sequence(self):
419 seqs = self.extract.find_archive_sequence_files(self.result_map)
422 (u'11154', u'42JUYAAXX', 5, 1, 76, True, 'qseq'),
423 (u'11154', u'42JUYAAXX', 5, 2, 76, True, 'qseq'),
424 (u'11154', u'61MJTAAXX', 6, 1, 76, False, 'qseq'),
425 (u'11154', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'),
426 (u'11154', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'),
427 (u'11154', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'),
428 (u'11154', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'),
429 (u'12345', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'),
430 (u'12345', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'),
431 (u'12345', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'),
432 (u'12345', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'),
433 (u'12345', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'),
434 (u'12345', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'),
435 (u'11154', u'30221AAXX', 4, 1, 33, False, 'srf'),
436 (u'11154', u'30DY0AAXX', 8, 1, 151, True, 'srf')
438 found = set([(l.library_id, l.flowcell_id, l.lane_number, l.read, l.cycle, l.ispaired, l.filetype) for l in seqs])
439 self.assertEqual(expected, found)
441 def test_find_needed_targets(self):
442 lib_db = self.extract.find_archive_sequence_files(self.result_map)
444 needed_targets = self.extract.find_missing_targets(self.result_map,
446 self.assertEqual(len(needed_targets), 9)
447 srf_30221 = needed_targets[
448 self.result_map['11154'] + u'/11154_30221AAXX_c33_l4.fastq']
449 qseq_42JUY_r1 = needed_targets[
450 self.result_map['11154'] + u'/11154_42JUYAAXX_c76_l5_r1.fastq']
451 qseq_42JUY_r2 = needed_targets[
452 self.result_map['11154'] + u'/11154_42JUYAAXX_c76_l5_r2.fastq']
453 qseq_61MJT = needed_targets[
454 self.result_map['11154'] + u'/11154_61MJTAAXX_c76_l6.fastq']
455 split_C02F9_r1 = needed_targets[
456 self.result_map['11154'] + u'/11154_C02F9ACXX_c202_l3_r1.fastq']
457 split_C02F9_r2 = needed_targets[
458 self.result_map['11154'] + u'/11154_C02F9ACXX_c202_l3_r2.fastq']
460 self.assertEqual(len(srf_30221['srf']), 1)
461 self.assertEqual(len(qseq_42JUY_r1['qseq']), 1)
462 self.assertEqual(len(qseq_42JUY_r2['qseq']), 1)
463 self.assertEqual(len(qseq_61MJT['qseq']), 1)
464 self.assertEqual(len(split_C02F9_r1['split_fastq']), 2)
465 self.assertEqual(len(split_C02F9_r2['split_fastq']), 2)
467 def test_generate_fastqs(self):
468 commands = self.extract.build_condor_arguments(self.result_map)
470 srf = commands['srf']
471 qseq = commands['qseq']
472 split = commands['split_fastq']
474 self.assertEqual(len(srf), 2)
475 self.assertEqual(len(qseq), 3)
476 self.assertEqual(len(split), 4)
479 os.path.join(self.result_map['11154'],
480 '11154_30221AAXX_c33_l4.fastq'): {
483 'sources': [u'woldlab_090425_HWI-EAS229_0110_30221AAXX_4.srf'],
484 'flowcell': u'30221AAXX',
485 'target': os.path.join(self.result_map['11154'],
486 u'11154_30221AAXX_c33_l4.fastq'),
488 os.path.join(self.result_map['11154'],
489 '11154_30DY0AAXX_c151_l8_r1.fastq'): {
492 'flowcell': u'30DY0AAXX',
493 'sources': [u'woldlab_090725_HWI-EAS229_0110_30DY0AAXX_8.srf'],
496 os.path.join(self.result_map['11154'],
497 u'11154_30DY0AAXX_c151_l8_r1.fastq'),
499 os.path.join(self.result_map['11154'],
500 u'11154_30DY0AAXX_c151_l8_r2.fastq'),
504 expected = srf_data[args['target']]
505 self.assertEqual(args['ispaired'], expected['ispaired'])
506 self.assertEqual(len(args['sources']), 1)
507 _, source_filename = os.path.split(args['sources'][0])
508 self.assertEqual(source_filename, expected['sources'][0])
509 self.assertEqual(args['target'], expected['target'])
511 self.assertEqual(args['target_right'],
512 expected['target_right'])
513 if 'mid' in expected:
514 self.assertEqual(args['mid'], expected['mid'])
517 os.path.join(self.result_map['11154'],
518 '11154_42JUYAAXX_c76_l5_r1.fastq'): {
522 u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r1.tar.bz2']
524 os.path.join(self.result_map['11154'],
525 '11154_42JUYAAXX_c76_l5_r2.fastq'): {
529 u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2']
531 os.path.join(self.result_map['11154'],
532 '11154_61MJTAAXX_c76_l6.fastq'): {
536 u'woldlab_100826_HSI-123_0001_61MJTAAXX_l6_r1.tar.bz2'],
540 expected = qseq_data[args['target']]
541 self.assertEqual(args['istar'], expected['istar'])
542 self.assertEqual(args['ispaired'], expected['ispaired'])
543 for i in range(len(expected['sources'])):
544 _, filename = os.path.split(args['sources'][i])
545 self.assertEqual(filename, expected['sources'][i])
548 split_test = dict((( x['target'], x) for x in
549 [{'sources': [u'11154_NoIndex_L003_R1_001.fastq.gz',
550 u'11154_NoIndex_L003_R1_002.fastq.gz'],
551 'pyscript': 'desplit_fastq.pyc',
552 'target': u'11154_C02F9ACXX_c202_l3_r1.fastq'},
553 {'sources': [u'11154_NoIndex_L003_R2_001.fastq.gz',
554 u'11154_NoIndex_L003_R2_002.fastq.gz'],
555 'pyscript': 'desplit_fastq.pyc',
556 'target': u'11154_C02F9ACXX_c202_l3_r2.fastq'},
557 {'sources': [u'12345_CGATGT_L003_R1_001.fastq.gz',
558 u'12345_CGATGT_L003_R1_002.fastq.gz',
559 u'12345_CGATGT_L003_R1_003.fastq.gz',
561 'pyscript': 'desplit_fastq.pyc',
562 'target': u'12345_C02F9ACXX_c202_l3_r1.fastq'},
563 {'sources': [u'12345_CGATGT_L003_R2_001.fastq.gz',
564 u'12345_CGATGT_L003_R2_002.fastq.gz',
565 u'12345_CGATGT_L003_R2_003.fastq.gz',
567 'pyscript': 'desplit_fastq.pyc',
568 'target': u'12345_C02F9ACXX_c202_l3_r2.fastq'}
572 _, target = os.path.split(arg['target'])
573 pyscript = split_test[target]['pyscript']
574 self.assertTrue(arg['pyscript'].endswith(pyscript))
575 filename = split_test[target]['target']
576 self.assertTrue(arg['target'].endswith(filename))
577 for s_index in range(len(arg['sources'])):
578 s1 = arg['sources'][s_index]
579 s2 = split_test[target]['sources'][s_index]
580 self.assertTrue(s1.endswith(s2))
582 def test_create_scripts(self):
583 self.extract.create_scripts(self.result_map)
585 self.assertTrue(os.path.exists('srf.condor'))
586 with open('srf.condor', 'r') as srf:
587 arguments = [ l for l in srf if l.startswith('argument') ]
589 self.assertEqual(len(arguments), 2)
590 self.assertTrue('sub-11154/11154_30221AAXX_c33_l4.fastq'
593 'sub-11154/11154_30DY0AAXX_c151_l8_r2.fastq' in
596 self.assertTrue(os.path.exists('qseq.condor'))
597 with open('qseq.condor', 'r') as srf:
598 arguments = [ l for l in srf if l.startswith('argument') ]
600 self.assertEqual(len(arguments), 3)
601 self.assertTrue('sub-11154/11154_42JUYAAXX_c76_l5_r1.fastq ' in
604 'C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2' in
606 self.assertTrue('61MJTAAXX_c76_l6.fastq -f 61MJTAAXX' in
609 self.assertTrue(os.path.exists('split_fastq.condor'))
610 with open('split_fastq.condor', 'r') as split:
611 arguments = [ l for l in split if l.startswith('argument') ]
613 self.assertEqual(len(arguments), 4)
615 self.assertTrue('11154_NoIndex_L003_R1_001.fastq.gz' in \
618 self.assertTrue('11154_NoIndex_L003_R2_002.fastq.gz' in \
621 self.assertTrue('12345_CGATGT_L003_R1_001.fastq.gz' in arguments[2])
622 self.assertTrue('12345_CGATGT_L003_R1_002.fastq.gz' in arguments[2])
623 self.assertTrue('12345_CGATGT_L003_R1_003.fastq.gz' in arguments[2])
624 self.assertTrue('12345_C02F9ACXX_c202_l3_r1.fastq' in arguments[2])
627 self.assertTrue('12345_CGATGT_L003_R2_001.fastq.gz' in arguments[3])
628 self.assertTrue('12345_CGATGT_L003_R2_002.fastq.gz' in arguments[3])
629 self.assertTrue('12345_CGATGT_L003_R2_003.fastq.gz' in arguments[3])
630 self.assertTrue('12345_C02F9ACXX_c202_l3_r2.fastq' in arguments[3])
634 suite = unittest.makeSuite(TestCondorFastq, 'test')
637 if __name__ == "__main__":
638 unittest.main(defaultTest='suite')