5 from pprint import pprint
9 from django.test import TestCase
11 from htsworkflow.submission.condorfastq import CondorFastqExtract
12 from htsworkflow.submission.results import ResultMap
13 from htsworkflow.util.rdfhelp import \
14 add_default_schemas, load_string_into_model, dump_model
15 from htsworkflow.util.rdfinfer import Infer
20 'C02F9ACXX/C1-202/Project_11154',
21 'C02F9ACXX/C1-202/Project_12342_Index1',
22 'C02F9ACXX/C1-202/Project_12342_Index2',
23 'C02F9ACXX/C1-202/Project_12345',
35 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R1_001.fastq.gz',
36 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R1_002.fastq.gz',
37 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R2_001.fastq.gz',
38 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R2_002.fastq.gz',
39 'C02F9ACXX/C1-202/Project_12342_Index1/12342_GCCAAT_L004_R1_001.fastq.gz',
40 'C02F9ACXX/C1-202/Project_12342_Index1/12342_GCCAAT_L004_R2_001.fastq.gz',
41 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L007_R1_001.fastq.gz',
42 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L007_R2_001.fastq.gz',
43 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L005_R1_001.fastq.gz',
44 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L005_R2_001.fastq.gz',
45 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_001.fastq.gz',
46 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_002.fastq.gz',
47 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_003.fastq.gz',
48 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_001.fastq.gz',
49 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_002.fastq.gz',
50 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_003.fastq.gz',
51 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r1.tar.bz2',
52 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l2_r1.tar.bz2',
53 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l3_r1.tar.bz2',
54 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l4_r1.tar.bz2',
55 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r1.tar.bz2',
56 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l6_r1.tar.bz2',
57 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l7_r1.tar.bz2',
58 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l8_r1.tar.bz2',
59 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r2.tar.bz2',
60 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r2.tar.bz2',
61 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l2_r2.tar.bz2',
62 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l3_r2.tar.bz2',
63 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l4_r2.tar.bz2',
64 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2',
65 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l6_r2.tar.bz2',
66 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l7_r2.tar.bz2',
67 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l8_r2.tar.bz2',
68 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_1.srf',
69 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_2.srf',
70 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_3.srf',
71 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_4.srf',
72 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_5.srf',
73 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_6.srf',
74 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_7.srf',
75 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_8.srf',
76 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_1.srf',
77 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_2.srf',
78 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_3.srf',
79 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_4.srf',
80 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_5.srf',
81 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_6.srf',
82 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_7.srf',
83 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_8.srf',
84 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l1_r1.tar.bz2',
85 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l2_r1.tar.bz2',
86 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l3_r1.tar.bz2',
87 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l4_r1.tar.bz2',
88 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l5_r1.tar.bz2',
89 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l6_r1.tar.bz2',
90 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l7_r1.tar.bz2',
91 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l8_r1.tar.bz2',
94 lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
95 @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
96 @prefix dc: <http://purl.org/dc/elements/1.1/> .
97 @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
98 @prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#> .
99 @prefix seqns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#> .
100 @prefix invns: <http://jumpgate.caltech.edu/wiki/InventoryOntology#> .
102 <http://localhost/library/10000/> a libns:Library .
103 <http://localhost/library/1331/> a libns:Library .
104 <http://localhost/library/1421/> a libns:Library .
105 <http://localhost/library/1661/> a libns:Library .
107 <http://localhost/flowcell/30221AAXX/>
108 a libns:IlluminaFlowcell ;
109 libns:read_length 33 ;
110 libns:flowcell_type "Single"@en ;
111 libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
112 libns:has_lane <http://localhost/lane/3401> ;
113 libns:has_lane <http://localhost/lane/3402> ;
114 libns:has_lane <http://localhost/lane/3403> ;
115 libns:has_lane <http://localhost/lane/3404> ;
116 libns:has_lane <http://localhost/lane/3405> ;
117 libns:has_lane <http://localhost/lane/3406> ;
118 libns:has_lane <http://localhost/lane/3407> ;
119 libns:has_lane <http://localhost/lane/3408> ;
120 libns:flowcell_id "30221AAXX"@en .
122 <http://localhost/lane/3401>
123 a libns:IlluminaLane ;
124 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
125 libns:library <http://localhost/library/10000/> ;
126 libns:lane_number "1" .
127 <http://localhost/lane/3402>
128 a libns:IlluminaLane ;
129 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
130 libns:library <http://localhost/library/10000/> ;
131 libns:lane_number "2" .
132 <http://localhost/lane/3403>
133 a libns:IlluminaLane ;
134 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
135 libns:library <http://localhost/library/10000/> ;
136 libns:lane_number "3" .
137 <http://localhost/lane/3404>
138 a libns:IlluminaLane ;
139 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
140 libns:library <http://localhost/library/11154/> ;
141 libns:lane_number "4" .
144 # status "Unknown"@en .
145 <http://localhost/lane/3405>
146 a libns:IlluminaLane ;
147 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
148 libns:library <http://localhost/library/10000/> ;
149 libns:lane_number "5" .
150 <http://localhost/lane/3406>
151 a libns:IlluminaLane ;
152 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
153 libns:library <http://localhost/library/10000/> ;
154 libns:lane_number "6" .
155 <http://localhost/lane/3407>
156 a libns:IlluminaLane ;
157 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
158 libns:library <http://localhost/library/10000/> ;
159 libns:lane_number "7" .
160 <http://localhost/lane/3408>
161 a libns:IlluminaLane ;
162 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
163 libns:library <http://localhost/library/10000/> ;
164 libns:lane_number "8" .
166 <http://localhost/flowcell/42JUYAAXX/>
167 a libns:IlluminaFlowcell ;
168 libns:read_length 76 ;
169 libns:flowcell_type "Paired"@en ;
170 libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
171 libns:has_lane <http://localhost/lane/4201> ;
172 libns:has_lane <http://localhost/lane/4202> ;
173 libns:has_lane <http://localhost/lane/4203> ;
174 libns:has_lane <http://localhost/lane/4204> ;
175 libns:has_lane <http://localhost/lane/4205> ;
176 libns:has_lane <http://localhost/lane/4206> ;
177 libns:has_lane <http://localhost/lane/4207> ;
178 libns:has_lane <http://localhost/lane/4208> ;
179 libns:flowcell_id "42JUYAAXX"@en .
181 <http://localhost/lane/4201>
182 a libns:IlluminaLane ;
183 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
184 libns:library <http://localhost/library/1421/> ;
185 libns:lane_number "1" .
186 <http://localhost/lane/4202>
187 a libns:IlluminaLane ;
188 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
189 libns:library <http://localhost/library/1421/> ;
190 libns:lane_number "2" .
191 <http://localhost/lane/4203>
192 a libns:IlluminaLane ;
193 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
194 libns:library <http://localhost/library/1421/> ;
195 libns:lane_number "3" .
196 <http://localhost/lane/4204>
197 a libns:IlluminaLane ;
198 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
199 libns:library <http://localhost/library/1421/> ;
200 libns:lane_number "4" .
201 <http://localhost/lane/4205>
202 a libns:IlluminaLane ;
203 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
204 libns:library <http://localhost/library/11154/> ;
205 libns:lane_number "5" .
208 # status "Unknown"@en .
209 <http://localhost/lane/4206>
210 a libns:IlluminaLane ;
211 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
212 libns:library <http://localhost/library/1421/> ;
213 libns:lane_number "6" .
214 <http://localhost/lane/4207>
215 a libns:IlluminaLane ;
216 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
217 libns:library <http://localhost/library/1421/> ;
218 libns:lane_number "7" .
219 <http://localhost/lane/4208>
220 a libns:IlluminaLane ;
221 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
222 libns:library <http://localhost/library/1421/> ;
223 libns:lane_number "8" .
225 <http://localhost/flowcell/61MJTAAXX/>
226 a libns:IlluminaFlowcell ;
227 libns:read_length 76 ;
228 libns:flowcell_type "Single"@en ;
229 libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
230 libns:has_lane <http://localhost/lane/6601> ;
231 libns:has_lane <http://localhost/lane/6602> ;
232 libns:has_lane <http://localhost/lane/6603> ;
233 libns:has_lane <http://localhost/lane/6604> ;
234 libns:has_lane <http://localhost/lane/6605> ;
235 libns:has_lane <http://localhost/lane/6606> ;
236 libns:has_lane <http://localhost/lane/6607> ;
237 libns:has_lane <http://localhost/lane/6608> ;
238 libns:flowcell_id "61MJTAAXX"@en .
240 <http://localhost/lane/6601>
241 a libns:IlluminaLane ;
242 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
243 libns:library <http://localhost/library/1661/> ;
244 libns:lane_number "1" .
245 <http://localhost/lane/6602>
246 a libns:IlluminaLane ;
247 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
248 libns:library <http://localhost/library/1661/> ;
249 libns:lane_number "2" .
250 <http://localhost/lane/6603>
251 a libns:IlluminaLane ;
252 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
253 libns:library <http://localhost/library/1661/> ;
254 libns:lane_number "3" .
255 <http://localhost/lane/6604>
256 a libns:IlluminaLane ;
257 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
258 libns:library <http://localhost/library/1661/> ;
259 libns:lane_number "4" .
260 <http://localhost/lane/6605>
261 a libns:IlluminaLane ;
262 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
263 libns:library <http://localhost/library/1661/> ;
264 libns:lane_number "5" .
265 <http://localhost/lane/6606>
266 a libns:IlluminaLane ;
267 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
268 libns:library <http://localhost/library/11154/> ;
269 libns:lane_number "6" .
272 # status "Unknown"@en .
273 <http://localhost/lane/6607>
274 a libns:IlluminaLane ;
275 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
276 libns:library <http://localhost/library/1661/> ;
277 libns:lane_number "7" .
278 <http://localhost/lane/6608>
279 a libns:IlluminaLane ;
280 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
281 libns:library <http://localhost/library/1661/> ;
282 libns:lane_number "8" .
284 <http://localhost/flowcell/30DY0AAXX/>
285 a libns:IlluminaFlowcell ;
286 libns:read_length 76 ;
287 libns:flowcell_type "Paired"@en ;
288 libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
289 libns:has_lane <http://localhost/lane/3801> ;
290 libns:has_lane <http://localhost/lane/3802> ;
291 libns:has_lane <http://localhost/lane/3803> ;
292 libns:has_lane <http://localhost/lane/3804> ;
293 libns:has_lane <http://localhost/lane/3805> ;
294 libns:has_lane <http://localhost/lane/3806> ;
295 libns:has_lane <http://localhost/lane/3807> ;
296 libns:has_lane <http://localhost/lane/3808> ;
297 libns:flowcell_id "30DY0AAXX"@en .
299 <http://localhost/lane/3801>
300 a libns:IlluminaLane ;
301 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
302 libns:library <http://localhost/library/1331/> ;
303 libns:lane_number "1" .
304 <http://localhost/lane/3802>
305 a libns:IlluminaLane ;
306 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
307 libns:library <http://localhost/library/1331/> ;
308 libns:lane_number "2" .
309 <http://localhost/lane/3803>
310 a libns:IlluminaLane ;
311 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
312 libns:library <http://localhost/library/1331/> ;
313 libns:lane_number "3" .
314 <http://localhost/lane/3804>
315 a libns:IlluminaLane ;
316 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
317 libns:library <http://localhost/library/1331/> ;
318 libns:lane_number "4" .
319 <http://localhost/lane/3805>
320 a libns:IlluminaLane ;
321 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
322 libns:library <http://localhost/library/1331/> ;
323 libns:lane_number "5" .
324 <http://localhost/lane/3806>
325 a libns:IlluminaLane ;
326 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
327 libns:library <http://localhost/library/1331/> ;
328 libns:lane_number "6" .
329 <http://localhost/lane/3807>
330 a libns:IlluminaLane ;
331 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
332 libns:library <http://localhost/library/1331/> ;
333 libns:lane_number "7" .
334 <http://localhost/lane/3808>
335 a libns:IlluminaLane ;
336 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
337 libns:library <http://localhost/library/11154/> ;
338 libns:lane_number "8" .
341 # status "Unknown"@en .
343 <http://localhost/flowcell/C02F9ACXX/>
344 a libns:IlluminaFlowcell ;
345 libns:read_length 101 ;
346 libns:flowcell_type "Paired"@en ;
347 libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
348 libns:has_lane <http://localhost/lane/12300> ;
349 libns:has_lane <http://localhost/lane/12500> ;
350 libns:flowcell_id "C02F9ACXX"@en .
352 <http://localhost/lane/12300>
353 a libns:IlluminaLane ;
354 libns:flowcell <http://localhost/flowcell/C02F9ACXX/> ;
355 libns:library <http://localhost/library/12345/> ;
356 libns:lane_number "3" .
359 # status "Unknown"@en .
361 <http://localhost/lane/12500>
362 a libns:IlluminaLane ;
363 libns:flowcell <http://localhost/flowcell/C02F9ACXX/> ;
364 libns:library <http://localhost/library/11154/> ;
365 libns:lane_number "3" .
368 # status "Unknown"@en .
370 <http://localhost/library/11154/>
372 libns:affiliation "TSR"@en;
373 libns:concentration "29.7";
374 libns:date "2012-12-28T00:00:00"^^xsd:dateTime ;
375 libns:experiment_type "RNA-seq"@en ;
377 libns:has_lane <http://localhost/lane/3404> ;
378 libns:has_lane <http://localhost/lane/4205> ;
379 libns:has_lane <http://localhost/lane/6606> ;
380 libns:has_lane <http://localhost/lane/3808> ;
381 libns:has_lane <http://localhost/lane/12500> ;
382 libns:insert_size 2000 ;
383 libns:library_id "11154"@en ;
384 libns:library_type "Paired End (Multiplexed)"@en ;
385 libns:made_by "Gary Gygax"@en ;
386 libns:name "Paired Ends ASDF"@en ;
387 libns:replicate "1"@en;
388 libns:species_name "Mus musculus"@en ;
389 libns:stopping_point "Completed"@en ;
390 libns:total_unique_locations 8841201 .
393 <http://localhost/library/12345/>
395 libns:affiliation "TSR"@en;
396 libns:concentration "12.345";
397 libns:cell_line "Unknown"@en ;
398 libns:date "2012-12-28T00:00:00"^^xsd:dateTime ;
399 libns:experiment_type "RNA-seq"@en ;
401 libns:has_lane <http://localhost/lane/12300> ;
402 libns:insert_size 2000 ;
403 libns:library_id "12345"@en ;
404 libns:library_type "Paired End (Multiplexed)"@en ;
405 libns:made_by "Gary Gygax"@en ;
406 libns:name "Paired Ends THING"@en ;
407 libns:replicate "1"@en;
408 libns:species_name "Mus musculus"@en ;
409 libns:stopping_point "Completed"@en ;
410 libns:total_unique_locations 8841201 .
413 HOST = "http://localhost"
415 class TestCondorFastq(TestCase):
417 self.cwd = os.getcwd()
419 self.tempdir = tempfile.mkdtemp(prefix='condorfastq_test')
420 self.flowcelldir = os.path.join(self.tempdir, 'flowcells')
421 os.mkdir(self.flowcelldir)
423 self.logdir = os.path.join(self.tempdir, 'log')
424 os.mkdir(self.logdir)
427 os.mkdir(os.path.join(self.flowcelldir, d))
430 filename = os.path.join(self.flowcelldir, f)
431 with open(filename, 'w') as stream:
432 stream.write('testfile')
434 self.result_map = ResultMap()
435 for lib_id in [u'11154', u'12345']:
436 subname = 'sub-%s' % (lib_id,)
437 sub_dir = os.path.join(self.tempdir, subname)
439 self.result_map[lib_id] = sub_dir
441 self.extract = CondorFastqExtract(HOST,
444 load_string_into_model(self.extract.model, 'turtle', lib_turtle)
445 add_default_schemas(self.extract.model)
446 inference = Infer(self.extract.model)
447 errmsgs = list(inference.run_validation())
448 self.assertEqual(len(errmsgs), 0)
449 os.chdir(self.tempdir)
452 shutil.rmtree(self.tempdir)
455 def test_find_relevant_flowcell_ids(self):
456 expected = set(('30221AAXX',
461 flowcell_ids = self.extract.find_relevant_flowcell_ids()
462 self.assertEqual(flowcell_ids, expected)
464 def test_find_archive_sequence(self):
465 seqs = self.extract.find_archive_sequence_files(self.result_map)
468 (u'11154', u'42JUYAAXX', '5', 1, 76, True, 'qseq'),
469 (u'11154', u'42JUYAAXX', '5', 2, 76, True, 'qseq'),
470 (u'11154', u'61MJTAAXX', '6', 1, 76, False, 'qseq'),
471 (u'11154', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
472 (u'11154', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
473 (u'11154', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
474 (u'11154', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
475 (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
476 (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
477 (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
478 (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
479 (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
480 (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
481 (u'11154', u'30221AAXX', '4', 1, 33, False, 'srf'),
482 (u'11154', u'30DY0AAXX', '8', 1, 151, True, 'srf')
484 found = set([(l.library_id, l.flowcell_id, l.lane_number, l.read, l.cycle, l.ispaired, l.filetype) for l in seqs])
485 self.assertEqual(expected, found)
487 def test_find_needed_targets(self):
488 lib_db = self.extract.find_archive_sequence_files(self.result_map)
490 needed_targets = self.extract.update_fastq_targets(self.result_map,
492 self.assertEqual(len(needed_targets), 9)
493 srf_30221 = needed_targets[
494 self.result_map['11154'] + u'/11154_30221AAXX_c33_l4.fastq']
495 qseq_42JUY_r1 = needed_targets[
496 self.result_map['11154'] + u'/11154_42JUYAAXX_c76_l5_r1.fastq']
497 qseq_42JUY_r2 = needed_targets[
498 self.result_map['11154'] + u'/11154_42JUYAAXX_c76_l5_r2.fastq']
499 qseq_61MJT = needed_targets[
500 self.result_map['11154'] + u'/11154_61MJTAAXX_c76_l6.fastq']
501 split_C02F9_r1 = needed_targets[
502 self.result_map['11154'] + u'/11154_C02F9ACXX_c202_l3_r1.fastq']
503 split_C02F9_r2 = needed_targets[
504 self.result_map['11154'] + u'/11154_C02F9ACXX_c202_l3_r2.fastq']
506 self.assertEqual(len(srf_30221['srf']), 1)
507 self.assertEqual(len(qseq_42JUY_r1['qseq']), 1)
508 self.assertEqual(len(qseq_42JUY_r2['qseq']), 1)
509 self.assertEqual(len(qseq_61MJT['qseq']), 1)
510 self.assertEqual(len(split_C02F9_r1['split_fastq']), 2)
511 self.assertEqual(len(split_C02F9_r2['split_fastq']), 2)
513 def test_generate_fastqs(self):
514 commands = self.extract.build_condor_arguments(self.result_map)
516 srf = commands['srf']
517 qseq = commands['qseq']
518 split = commands['split_fastq']
520 self.assertEqual(len(srf), 2)
521 self.assertEqual(len(qseq), 3)
522 self.assertEqual(len(split), 4)
525 os.path.join(self.result_map['11154'],
526 '11154_30221AAXX_c33_l4.fastq'): {
529 'sources': [u'woldlab_090425_HWI-EAS229_0110_30221AAXX_4.srf'],
530 'flowcell': u'30221AAXX',
531 'target': os.path.join(self.result_map['11154'],
532 u'11154_30221AAXX_c33_l4.fastq'),
534 os.path.join(self.result_map['11154'],
535 '11154_30DY0AAXX_c151_l8_r1.fastq'): {
538 'flowcell': u'30DY0AAXX',
539 'sources': [u'woldlab_090725_HWI-EAS229_0110_30DY0AAXX_8.srf'],
542 os.path.join(self.result_map['11154'],
543 u'11154_30DY0AAXX_c151_l8_r1.fastq'),
545 os.path.join(self.result_map['11154'],
546 u'11154_30DY0AAXX_c151_l8_r2.fastq'),
550 expected = srf_data[args['target']]
551 self.assertEqual(args['ispaired'], expected['ispaired'])
552 self.assertEqual(len(args['sources']), 1)
553 _, source_filename = os.path.split(args['sources'][0])
554 self.assertEqual(source_filename, expected['sources'][0])
555 self.assertEqual(args['target'], expected['target'])
557 self.assertEqual(args['target_right'],
558 expected['target_right'])
559 if 'mid' in expected:
560 self.assertEqual(args['mid'], expected['mid'])
563 os.path.join(self.result_map['11154'],
564 '11154_42JUYAAXX_c76_l5_r1.fastq'): {
568 u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r1.tar.bz2']
570 os.path.join(self.result_map['11154'],
571 '11154_42JUYAAXX_c76_l5_r2.fastq'): {
575 u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2']
577 os.path.join(self.result_map['11154'],
578 '11154_61MJTAAXX_c76_l6.fastq'): {
582 u'woldlab_100826_HSI-123_0001_61MJTAAXX_l6_r1.tar.bz2'],
586 expected = qseq_data[args['target']]
587 self.assertEqual(args['istar'], expected['istar'])
588 self.assertEqual(args['ispaired'], expected['ispaired'])
589 for i in range(len(expected['sources'])):
590 _, filename = os.path.split(args['sources'][i])
591 self.assertEqual(filename, expected['sources'][i])
594 split_test = dict((( x['target'], x) for x in
595 [{'sources': [u'11154_NoIndex_L003_R1_001.fastq.gz',
596 u'11154_NoIndex_L003_R1_002.fastq.gz'],
597 'pyscript': 'desplit_fastq.pyc',
598 'target': u'11154_C02F9ACXX_c202_l3_r1.fastq'},
599 {'sources': [u'11154_NoIndex_L003_R2_001.fastq.gz',
600 u'11154_NoIndex_L003_R2_002.fastq.gz'],
601 'pyscript': 'desplit_fastq.pyc',
602 'target': u'11154_C02F9ACXX_c202_l3_r2.fastq'},
603 {'sources': [u'12345_CGATGT_L003_R1_001.fastq.gz',
604 u'12345_CGATGT_L003_R1_002.fastq.gz',
605 u'12345_CGATGT_L003_R1_003.fastq.gz',
607 'pyscript': 'desplit_fastq.pyc',
608 'target': u'12345_C02F9ACXX_c202_l3_r1.fastq'},
609 {'sources': [u'12345_CGATGT_L003_R2_001.fastq.gz',
610 u'12345_CGATGT_L003_R2_002.fastq.gz',
611 u'12345_CGATGT_L003_R2_003.fastq.gz',
613 'pyscript': 'desplit_fastq.pyc',
614 'target': u'12345_C02F9ACXX_c202_l3_r2.fastq'}
618 _, target = os.path.split(arg['target'])
619 pyscript = split_test[target]['pyscript']
620 self.assertTrue(arg['pyscript'].endswith(pyscript))
621 filename = split_test[target]['target']
622 self.assertTrue(arg['target'].endswith(filename))
623 for s_index in range(len(arg['sources'])):
624 s1 = arg['sources'][s_index]
625 s2 = split_test[target]['sources'][s_index]
626 self.assertTrue(s1.endswith(s2))
628 def test_create_scripts(self):
629 self.extract.create_scripts(self.result_map)
631 self.assertTrue(os.path.exists('srf.condor'))
632 with open('srf.condor', 'r') as srf:
633 arguments = [ l for l in srf if l.startswith('argument') ]
635 self.assertEqual(len(arguments), 2)
636 self.assertTrue('sub-11154/11154_30221AAXX_c33_l4.fastq'
639 'sub-11154/11154_30DY0AAXX_c151_l8_r2.fastq' in
642 self.assertTrue(os.path.exists('qseq.condor'))
643 with open('qseq.condor', 'r') as srf:
644 arguments = [ l for l in srf if l.startswith('argument') ]
646 self.assertEqual(len(arguments), 3)
647 self.assertTrue('sub-11154/11154_42JUYAAXX_c76_l5_r1.fastq ' in
650 'C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2' in
652 self.assertTrue('61MJTAAXX_c76_l6.fastq -f 61MJTAAXX' in
655 self.assertTrue(os.path.exists('split_fastq.condor'))
656 with open('split_fastq.condor', 'r') as split:
657 arguments = [ l for l in split if l.startswith('argument') ]
659 self.assertEqual(len(arguments), 4)
661 self.assertTrue('11154_NoIndex_L003_R1_001.fastq.gz' in \
664 self.assertTrue('11154_NoIndex_L003_R2_002.fastq.gz' in \
667 self.assertTrue('12345_CGATGT_L003_R1_001.fastq.gz' in arguments[2])
668 self.assertTrue('12345_CGATGT_L003_R1_002.fastq.gz' in arguments[2])
669 self.assertTrue('12345_CGATGT_L003_R1_003.fastq.gz' in arguments[2])
670 self.assertTrue('12345_C02F9ACXX_c202_l3_r1.fastq' in arguments[2])
673 self.assertTrue('12345_CGATGT_L003_R2_001.fastq.gz' in arguments[3])
674 self.assertTrue('12345_CGATGT_L003_R2_002.fastq.gz' in arguments[3])
675 self.assertTrue('12345_CGATGT_L003_R2_003.fastq.gz' in arguments[3])
676 self.assertTrue('12345_C02F9ACXX_c202_l3_r2.fastq' in arguments[3])
680 from unittest2 import TestSuite, defaultTestLoader
682 suite.addTests(defaultTestLoader.loadTestsFromTestCase(TestCondorFastq))
685 if __name__ == "__main__":
686 from unittest2 import main
687 main(defaultTest='suite')