6 from pprint import pprint
10 from django.test import TestCase
11 from django.test.utils import setup_test_environment, \
12 teardown_test_environment
13 from django.db import connection
14 from django.conf import settings
16 from htsworkflow.submission.condorfastq import CondorFastqExtract
17 from htsworkflow.submission.results import ResultMap
18 from htsworkflow.util.rdfhelp import \
19 add_default_schemas, dump_model
20 from htsworkflow.util.rdfinfer import Infer
25 'C02F9ACXX/C1-202/Project_11154',
26 'C02F9ACXX/C1-202/Project_12342_Index1',
27 'C02F9ACXX/C1-202/Project_12342_Index2',
28 'C02F9ACXX/C1-202/Project_12345',
40 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R1_001.fastq.gz',
41 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R1_002.fastq.gz',
42 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R2_001.fastq.gz',
43 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R2_002.fastq.gz',
44 'C02F9ACXX/C1-202/Project_12342_Index1/12342_GCCAAT_L004_R1_001.fastq.gz',
45 'C02F9ACXX/C1-202/Project_12342_Index1/12342_GCCAAT_L004_R2_001.fastq.gz',
46 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L007_R1_001.fastq.gz',
47 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L007_R2_001.fastq.gz',
48 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L005_R1_001.fastq.gz',
49 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L005_R2_001.fastq.gz',
50 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_001.fastq.gz',
51 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_002.fastq.gz',
52 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_003.fastq.gz',
53 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_001.fastq.gz',
54 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_002.fastq.gz',
55 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_003.fastq.gz',
56 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r1.tar.bz2',
57 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l2_r1.tar.bz2',
58 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l3_r1.tar.bz2',
59 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l4_r1.tar.bz2',
60 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r1.tar.bz2',
61 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l6_r1.tar.bz2',
62 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l7_r1.tar.bz2',
63 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l8_r1.tar.bz2',
64 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r2.tar.bz2',
65 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r2.tar.bz2',
66 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l2_r2.tar.bz2',
67 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l3_r2.tar.bz2',
68 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l4_r2.tar.bz2',
69 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2',
70 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l6_r2.tar.bz2',
71 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l7_r2.tar.bz2',
72 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l8_r2.tar.bz2',
73 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_1.srf',
74 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_2.srf',
75 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_3.srf',
76 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_4.srf',
77 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_5.srf',
78 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_6.srf',
79 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_7.srf',
80 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_8.srf',
81 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_1.srf',
82 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_2.srf',
83 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_3.srf',
84 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_4.srf',
85 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_5.srf',
86 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_6.srf',
87 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_7.srf',
88 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_8.srf',
89 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l1_r1.tar.bz2',
90 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l2_r1.tar.bz2',
91 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l3_r1.tar.bz2',
92 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l4_r1.tar.bz2',
93 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l5_r1.tar.bz2',
94 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l6_r1.tar.bz2',
95 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l7_r1.tar.bz2',
96 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l8_r1.tar.bz2',
99 lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
100 @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
101 @prefix dc: <http://purl.org/dc/elements/1.1/> .
102 @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
103 @prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#> .
104 @prefix seqns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#> .
105 @prefix invns: <http://jumpgate.caltech.edu/wiki/InventoryOntology#> .
107 <http://localhost/library/10000/> a libns:Library .
108 <http://localhost/library/1331/> a libns:Library .
109 <http://localhost/library/1421/> a libns:Library .
110 <http://localhost/library/1661/> a libns:Library .
112 <http://localhost/flowcell/30221AAXX/>
113 a libns:IlluminaFlowcell ;
114 libns:read_length 33 ;
115 libns:flowcell_type "Single"@en ;
116 libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
117 libns:has_lane <http://localhost/lane/3401> ;
118 libns:has_lane <http://localhost/lane/3402> ;
119 libns:has_lane <http://localhost/lane/3403> ;
120 libns:has_lane <http://localhost/lane/3404> ;
121 libns:has_lane <http://localhost/lane/3405> ;
122 libns:has_lane <http://localhost/lane/3406> ;
123 libns:has_lane <http://localhost/lane/3407> ;
124 libns:has_lane <http://localhost/lane/3408> ;
125 libns:flowcell_id "30221AAXX"@en .
127 <http://localhost/lane/3401>
128 a libns:IlluminaLane ;
129 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
130 libns:library <http://localhost/library/10000/> ;
131 libns:lane_number "1" .
132 <http://localhost/lane/3402>
133 a libns:IlluminaLane ;
134 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
135 libns:library <http://localhost/library/10000/> ;
136 libns:lane_number "2" .
137 <http://localhost/lane/3403>
138 a libns:IlluminaLane ;
139 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
140 libns:library <http://localhost/library/10000/> ;
141 libns:lane_number "3" .
142 <http://localhost/lane/3404>
143 a libns:IlluminaLane ;
144 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
145 libns:library <http://localhost/library/11154/> ;
146 libns:lane_number "4" .
149 # status "Unknown"@en .
150 <http://localhost/lane/3405>
151 a libns:IlluminaLane ;
152 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
153 libns:library <http://localhost/library/10000/> ;
154 libns:lane_number "5" .
155 <http://localhost/lane/3406>
156 a libns:IlluminaLane ;
157 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
158 libns:library <http://localhost/library/10000/> ;
159 libns:lane_number "6" .
160 <http://localhost/lane/3407>
161 a libns:IlluminaLane ;
162 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
163 libns:library <http://localhost/library/10000/> ;
164 libns:lane_number "7" .
165 <http://localhost/lane/3408>
166 a libns:IlluminaLane ;
167 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
168 libns:library <http://localhost/library/10000/> ;
169 libns:lane_number "8" .
171 <http://localhost/flowcell/42JUYAAXX/>
172 a libns:IlluminaFlowcell ;
173 libns:read_length 76 ;
174 libns:flowcell_type "Paired"@en ;
175 libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
176 libns:has_lane <http://localhost/lane/4201> ;
177 libns:has_lane <http://localhost/lane/4202> ;
178 libns:has_lane <http://localhost/lane/4203> ;
179 libns:has_lane <http://localhost/lane/4204> ;
180 libns:has_lane <http://localhost/lane/4205> ;
181 libns:has_lane <http://localhost/lane/4206> ;
182 libns:has_lane <http://localhost/lane/4207> ;
183 libns:has_lane <http://localhost/lane/4208> ;
184 libns:flowcell_id "42JUYAAXX"@en .
186 <http://localhost/lane/4201>
187 a libns:IlluminaLane ;
188 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
189 libns:library <http://localhost/library/1421/> ;
190 libns:lane_number "1" .
191 <http://localhost/lane/4202>
192 a libns:IlluminaLane ;
193 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
194 libns:library <http://localhost/library/1421/> ;
195 libns:lane_number "2" .
196 <http://localhost/lane/4203>
197 a libns:IlluminaLane ;
198 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
199 libns:library <http://localhost/library/1421/> ;
200 libns:lane_number "3" .
201 <http://localhost/lane/4204>
202 a libns:IlluminaLane ;
203 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
204 libns:library <http://localhost/library/1421/> ;
205 libns:lane_number "4" .
206 <http://localhost/lane/4205>
207 a libns:IlluminaLane ;
208 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
209 libns:library <http://localhost/library/11154/> ;
210 libns:lane_number "5" .
213 # status "Unknown"@en .
214 <http://localhost/lane/4206>
215 a libns:IlluminaLane ;
216 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
217 libns:library <http://localhost/library/1421/> ;
218 libns:lane_number "6" .
219 <http://localhost/lane/4207>
220 a libns:IlluminaLane ;
221 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
222 libns:library <http://localhost/library/1421/> ;
223 libns:lane_number "7" .
224 <http://localhost/lane/4208>
225 a libns:IlluminaLane ;
226 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
227 libns:library <http://localhost/library/1421/> ;
228 libns:lane_number "8" .
230 <http://localhost/flowcell/61MJTAAXX/>
231 a libns:IlluminaFlowcell ;
232 libns:read_length 76 ;
233 libns:flowcell_type "Single"@en ;
234 libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
235 libns:has_lane <http://localhost/lane/6601> ;
236 libns:has_lane <http://localhost/lane/6602> ;
237 libns:has_lane <http://localhost/lane/6603> ;
238 libns:has_lane <http://localhost/lane/6604> ;
239 libns:has_lane <http://localhost/lane/6605> ;
240 libns:has_lane <http://localhost/lane/6606> ;
241 libns:has_lane <http://localhost/lane/6607> ;
242 libns:has_lane <http://localhost/lane/6608> ;
243 libns:flowcell_id "61MJTAAXX"@en .
245 <http://localhost/lane/6601>
246 a libns:IlluminaLane ;
247 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
248 libns:library <http://localhost/library/1661/> ;
249 libns:lane_number "1" .
250 <http://localhost/lane/6602>
251 a libns:IlluminaLane ;
252 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
253 libns:library <http://localhost/library/1661/> ;
254 libns:lane_number "2" .
255 <http://localhost/lane/6603>
256 a libns:IlluminaLane ;
257 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
258 libns:library <http://localhost/library/1661/> ;
259 libns:lane_number "3" .
260 <http://localhost/lane/6604>
261 a libns:IlluminaLane ;
262 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
263 libns:library <http://localhost/library/1661/> ;
264 libns:lane_number "4" .
265 <http://localhost/lane/6605>
266 a libns:IlluminaLane ;
267 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
268 libns:library <http://localhost/library/1661/> ;
269 libns:lane_number "5" .
270 <http://localhost/lane/6606>
271 a libns:IlluminaLane ;
272 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
273 libns:library <http://localhost/library/11154/> ;
274 libns:lane_number "6" .
277 # status "Unknown"@en .
278 <http://localhost/lane/6607>
279 a libns:IlluminaLane ;
280 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
281 libns:library <http://localhost/library/1661/> ;
282 libns:lane_number "7" .
283 <http://localhost/lane/6608>
284 a libns:IlluminaLane ;
285 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
286 libns:library <http://localhost/library/1661/> ;
287 libns:lane_number "8" .
289 <http://localhost/flowcell/30DY0AAXX/>
290 a libns:IlluminaFlowcell ;
291 libns:read_length 76 ;
292 libns:flowcell_type "Paired"@en ;
293 libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
294 libns:has_lane <http://localhost/lane/3801> ;
295 libns:has_lane <http://localhost/lane/3802> ;
296 libns:has_lane <http://localhost/lane/3803> ;
297 libns:has_lane <http://localhost/lane/3804> ;
298 libns:has_lane <http://localhost/lane/3805> ;
299 libns:has_lane <http://localhost/lane/3806> ;
300 libns:has_lane <http://localhost/lane/3807> ;
301 libns:has_lane <http://localhost/lane/3808> ;
302 libns:flowcell_id "30DY0AAXX"@en .
304 <http://localhost/lane/3801>
305 a libns:IlluminaLane ;
306 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
307 libns:library <http://localhost/library/1331/> ;
308 libns:lane_number "1" .
309 <http://localhost/lane/3802>
310 a libns:IlluminaLane ;
311 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
312 libns:library <http://localhost/library/1331/> ;
313 libns:lane_number "2" .
314 <http://localhost/lane/3803>
315 a libns:IlluminaLane ;
316 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
317 libns:library <http://localhost/library/1331/> ;
318 libns:lane_number "3" .
319 <http://localhost/lane/3804>
320 a libns:IlluminaLane ;
321 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
322 libns:library <http://localhost/library/1331/> ;
323 libns:lane_number "4" .
324 <http://localhost/lane/3805>
325 a libns:IlluminaLane ;
326 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
327 libns:library <http://localhost/library/1331/> ;
328 libns:lane_number "5" .
329 <http://localhost/lane/3806>
330 a libns:IlluminaLane ;
331 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
332 libns:library <http://localhost/library/1331/> ;
333 libns:lane_number "6" .
334 <http://localhost/lane/3807>
335 a libns:IlluminaLane ;
336 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
337 libns:library <http://localhost/library/1331/> ;
338 libns:lane_number "7" .
339 <http://localhost/lane/3808>
340 a libns:IlluminaLane ;
341 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
342 libns:library <http://localhost/library/11154/> ;
343 libns:lane_number "8" .
346 # status "Unknown"@en .
348 <http://localhost/flowcell/C02F9ACXX/>
349 a libns:IlluminaFlowcell ;
350 libns:read_length 101 ;
351 libns:flowcell_type "Paired"@en ;
352 libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
353 libns:has_lane <http://localhost/lane/12300> ;
354 libns:has_lane <http://localhost/lane/12500> ;
355 libns:flowcell_id "C02F9ACXX"@en .
357 <http://localhost/lane/12300>
358 a libns:IlluminaLane ;
359 libns:flowcell <http://localhost/flowcell/C02F9ACXX/> ;
360 libns:library <http://localhost/library/12345/> ;
361 libns:lane_number "3" .
364 # status "Unknown"@en .
366 <http://localhost/lane/12500>
367 a libns:IlluminaLane ;
368 libns:flowcell <http://localhost/flowcell/C02F9ACXX/> ;
369 libns:library <http://localhost/library/11154/> ;
370 libns:lane_number "3" .
373 # status "Unknown"@en .
375 <http://localhost/library/11154/>
377 libns:affiliation "TSR"@en;
378 libns:concentration "29.7";
379 libns:date "2012-12-28T00:00:00"^^xsd:dateTime ;
380 libns:experiment_type "RNA-seq"@en ;
382 libns:has_lane <http://localhost/lane/3404> ;
383 libns:has_lane <http://localhost/lane/4205> ;
384 libns:has_lane <http://localhost/lane/6606> ;
385 libns:has_lane <http://localhost/lane/3808> ;
386 libns:has_lane <http://localhost/lane/12500> ;
387 libns:insert_size 2000 ;
388 libns:library_id "11154"@en ;
389 libns:library_type "Paired End (Multiplexed)"@en ;
390 libns:made_by "Gary Gygax"@en ;
391 libns:name "Paired Ends ASDF"@en ;
392 libns:replicate "1"@en;
393 libns:species_name "Mus musculus"@en ;
394 libns:stopping_point "Completed"@en ;
395 libns:total_unique_locations 8841201 .
398 <http://localhost/library/12345/>
400 libns:affiliation "TSR"@en;
401 libns:concentration "12.345";
402 libns:cell_line "Unknown"@en ;
403 libns:date "2012-12-28T00:00:00"^^xsd:dateTime ;
404 libns:experiment_type "RNA-seq"@en ;
406 libns:has_lane <http://localhost/lane/12300> ;
407 libns:insert_size 2000 ;
408 libns:library_id "12345"@en ;
409 libns:library_type "Paired End (Multiplexed)"@en ;
410 libns:made_by "Gary Gygax"@en ;
411 libns:name "Paired Ends THING"@en ;
412 libns:replicate "1"@en;
413 libns:species_name "Mus musculus"@en ;
414 libns:stopping_point "Completed"@en ;
415 libns:total_unique_locations 8841201 .
418 HOST = "http://localhost"
420 class TestCondorFastq(TestCase):
422 self.cwd = os.getcwd()
424 self.tempdir = tempfile.mkdtemp(prefix='condorfastq_test')
425 self.flowcelldir = os.path.join(self.tempdir, 'flowcells')
426 os.mkdir(self.flowcelldir)
428 self.logdir = os.path.join(self.tempdir, 'log')
429 os.mkdir(self.logdir)
432 os.mkdir(os.path.join(self.flowcelldir, d))
435 filename = os.path.join(self.flowcelldir, f)
436 with open(filename, 'w') as stream:
437 stream.write('testfile')
439 self.result_map = ResultMap()
440 for lib_id in [u'11154', u'12345']:
441 subname = 'sub-%s' % (lib_id,)
442 sub_dir = os.path.join(self.tempdir, subname)
444 self.result_map[lib_id] = sub_dir
446 self.extract = CondorFastqExtract(HOST,
449 self.extract.model.parse(data=lib_turtle, format='turtle')
450 add_default_schemas(self.extract.model)
451 inference = Infer(self.extract.model)
452 errmsgs = list(inference.run_validation())
453 self.assertEqual(len(errmsgs), 0)
454 os.chdir(self.tempdir)
457 shutil.rmtree(self.tempdir)
460 def test_find_relevant_flowcell_ids(self):
461 expected = set(('30221AAXX',
466 flowcell_ids = self.extract.find_relevant_flowcell_ids()
467 self.assertEqual(flowcell_ids, expected)
469 def test_find_archive_sequence(self):
470 seqs = self.extract.find_archive_sequence_files(self.result_map)
473 (u'11154', u'42JUYAAXX', u'5', 1, 76, True, 'qseq'),
474 (u'11154', u'42JUYAAXX', u'5', 2, 76, True, 'qseq'),
475 (u'11154', u'61MJTAAXX', u'6', 1, 76, False, 'qseq'),
476 (u'11154', u'C02F9ACXX', u'3', 2, 202, True, 'split_fastq'),
477 (u'11154', u'C02F9ACXX', u'3', 1, 202, True, 'split_fastq'),
478 (u'11154', u'C02F9ACXX', u'3', 1, 202, True, 'split_fastq'),
479 (u'11154', u'C02F9ACXX', u'3', 2, 202, True, 'split_fastq'),
480 (u'12345', u'C02F9ACXX', u'3', 1, 202, True, 'split_fastq'),
481 (u'12345', u'C02F9ACXX', u'3', 2, 202, True, 'split_fastq'),
482 (u'12345', u'C02F9ACXX', u'3', 2, 202, True, 'split_fastq'),
483 (u'12345', u'C02F9ACXX', u'3', 1, 202, True, 'split_fastq'),
484 (u'12345', u'C02F9ACXX', u'3', 1, 202, True, 'split_fastq'),
485 (u'12345', u'C02F9ACXX', u'3', 2, 202, True, 'split_fastq'),
486 (u'11154', u'30221AAXX', u'4', 1, 33, False, 'srf'),
487 (u'11154', u'30DY0AAXX', u'8', 1, 151, True, 'srf')
489 found = set([(l.library_id, l.flowcell_id, l.lane_number, l.read, l.cycle, l.ispaired, l.filetype) for l in seqs])
490 self.assertEqual(expected, found)
492 def test_find_needed_targets(self):
493 lib_db = self.extract.find_archive_sequence_files(self.result_map)
495 needed_targets = self.extract.update_fastq_targets(self.result_map,
497 self.assertEqual(len(needed_targets), 9)
498 srf_30221 = needed_targets[
499 self.result_map['11154'] + u'/11154_30221AAXX_c33_l4.fastq']
500 qseq_42JUY_r1 = needed_targets[
501 self.result_map['11154'] + u'/11154_42JUYAAXX_c76_l5_r1.fastq']
502 qseq_42JUY_r2 = needed_targets[
503 self.result_map['11154'] + u'/11154_42JUYAAXX_c76_l5_r2.fastq']
504 qseq_61MJT = needed_targets[
505 self.result_map['11154'] + u'/11154_61MJTAAXX_c76_l6.fastq']
506 split_C02F9_r1 = needed_targets[
507 self.result_map['11154'] + u'/11154_C02F9ACXX_c202_l3_r1.fastq']
508 split_C02F9_r2 = needed_targets[
509 self.result_map['11154'] + u'/11154_C02F9ACXX_c202_l3_r2.fastq']
511 self.assertEqual(len(srf_30221['srf']), 1)
512 self.assertEqual(len(qseq_42JUY_r1['qseq']), 1)
513 self.assertEqual(len(qseq_42JUY_r2['qseq']), 1)
514 self.assertEqual(len(qseq_61MJT['qseq']), 1)
515 self.assertEqual(len(split_C02F9_r1['split_fastq']), 2)
516 self.assertEqual(len(split_C02F9_r2['split_fastq']), 2)
518 def test_generate_fastqs(self):
519 commands = self.extract.build_condor_arguments(self.result_map)
521 srf = commands['srf']
522 qseq = commands['qseq']
523 split = commands['split_fastq']
525 self.assertEqual(len(srf), 2)
526 self.assertEqual(len(qseq), 3)
527 self.assertEqual(len(split), 4)
530 os.path.join(self.result_map['11154'],
531 '11154_30221AAXX_c33_l4.fastq'): {
534 'sources': [u'woldlab_090425_HWI-EAS229_0110_30221AAXX_4.srf'],
535 'flowcell': u'30221AAXX',
536 'target': os.path.join(self.result_map['11154'],
537 u'11154_30221AAXX_c33_l4.fastq'),
539 os.path.join(self.result_map['11154'],
540 '11154_30DY0AAXX_c151_l8_r1.fastq'): {
543 'flowcell': u'30DY0AAXX',
544 'sources': [u'woldlab_090725_HWI-EAS229_0110_30DY0AAXX_8.srf'],
547 os.path.join(self.result_map['11154'],
548 u'11154_30DY0AAXX_c151_l8_r1.fastq'),
550 os.path.join(self.result_map['11154'],
551 u'11154_30DY0AAXX_c151_l8_r2.fastq'),
555 expected = srf_data[args['target']]
556 self.assertEqual(args['ispaired'], expected['ispaired'])
557 self.assertEqual(len(args['sources']), 1)
558 _, source_filename = os.path.split(args['sources'][0])
559 self.assertEqual(source_filename, expected['sources'][0])
560 self.assertEqual(args['target'], expected['target'])
562 self.assertEqual(args['target_right'],
563 expected['target_right'])
564 if 'mid' in expected:
565 self.assertEqual(args['mid'], expected['mid'])
568 os.path.join(self.result_map['11154'],
569 '11154_42JUYAAXX_c76_l5_r1.fastq'): {
573 u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r1.tar.bz2']
575 os.path.join(self.result_map['11154'],
576 '11154_42JUYAAXX_c76_l5_r2.fastq'): {
580 u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2']
582 os.path.join(self.result_map['11154'],
583 '11154_61MJTAAXX_c76_l6.fastq'): {
587 u'woldlab_100826_HSI-123_0001_61MJTAAXX_l6_r1.tar.bz2'],
591 expected = qseq_data[args['target']]
592 self.assertEqual(args['istar'], expected['istar'])
593 self.assertEqual(args['ispaired'], expected['ispaired'])
594 for i in range(len(expected['sources'])):
595 _, filename = os.path.split(args['sources'][i])
596 self.assertEqual(filename, expected['sources'][i])
599 split_test = dict((( x['target'], x) for x in
600 [{'sources': [u'11154_NoIndex_L003_R1_001.fastq.gz',
601 u'11154_NoIndex_L003_R1_002.fastq.gz'],
602 'pyscript': 'desplit_fastq.pyc?$',
603 'target': u'11154_C02F9ACXX_c202_l3_r1.fastq'},
604 {'sources': [u'11154_NoIndex_L003_R2_001.fastq.gz',
605 u'11154_NoIndex_L003_R2_002.fastq.gz'],
606 'pyscript': 'desplit_fastq.pyc?$',
607 'target': u'11154_C02F9ACXX_c202_l3_r2.fastq'},
608 {'sources': [u'12345_CGATGT_L003_R1_001.fastq.gz',
609 u'12345_CGATGT_L003_R1_002.fastq.gz',
610 u'12345_CGATGT_L003_R1_003.fastq.gz',
612 'pyscript': 'desplit_fastq.pyc?$',
613 'target': u'12345_C02F9ACXX_c202_l3_r1.fastq'},
614 {'sources': [u'12345_CGATGT_L003_R2_001.fastq.gz',
615 u'12345_CGATGT_L003_R2_002.fastq.gz',
616 u'12345_CGATGT_L003_R2_003.fastq.gz',
618 'pyscript': 'desplit_fastq.pyc?$',
619 'target': u'12345_C02F9ACXX_c202_l3_r2.fastq'}
623 _, target = os.path.split(arg['target'])
624 pyscript = split_test[target]['pyscript']
625 self.assertTrue(re.search(pyscript, arg['pyscript']))
626 filename = split_test[target]['target']
627 self.assertTrue(arg['target'].endswith(filename))
628 for s_index in range(len(arg['sources'])):
629 s1 = arg['sources'][s_index]
630 s2 = split_test[target]['sources'][s_index]
631 self.assertTrue(s1.endswith(s2))
633 def test_create_scripts(self):
634 self.extract.create_scripts(self.result_map)
636 self.assertTrue(os.path.exists('srf.condor'))
637 with open('srf.condor', 'r') as srf:
638 arguments = [ l for l in srf if l.startswith('argument') ]
640 self.assertEqual(len(arguments), 2)
641 self.assertTrue('sub-11154/11154_30221AAXX_c33_l4.fastq'
644 'sub-11154/11154_30DY0AAXX_c151_l8_r2.fastq' in
647 self.assertTrue(os.path.exists('qseq.condor'))
648 with open('qseq.condor', 'r') as srf:
649 arguments = [ l for l in srf if l.startswith('argument') ]
651 self.assertEqual(len(arguments), 3)
652 self.assertTrue('sub-11154/11154_42JUYAAXX_c76_l5_r1.fastq ' in
655 'C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2' in
657 self.assertTrue('61MJTAAXX_c76_l6.fastq -f 61MJTAAXX' in
660 self.assertTrue(os.path.exists('split_fastq.condor'))
661 with open('split_fastq.condor', 'r') as split:
662 arguments = [ l for l in split if l.startswith('argument') ]
664 self.assertEqual(len(arguments), 4)
666 self.assertTrue('11154_NoIndex_L003_R1_001.fastq.gz' in \
669 self.assertTrue('11154_NoIndex_L003_R2_002.fastq.gz' in \
672 self.assertTrue('12345_CGATGT_L003_R1_001.fastq.gz' in arguments[2])
673 self.assertTrue('12345_CGATGT_L003_R1_002.fastq.gz' in arguments[2])
674 self.assertTrue('12345_CGATGT_L003_R1_003.fastq.gz' in arguments[2])
675 self.assertTrue('12345_C02F9ACXX_c202_l3_r1.fastq' in arguments[2])
678 self.assertTrue('12345_CGATGT_L003_R2_001.fastq.gz' in arguments[3])
679 self.assertTrue('12345_CGATGT_L003_R2_002.fastq.gz' in arguments[3])
680 self.assertTrue('12345_CGATGT_L003_R2_003.fastq.gz' in arguments[3])
681 self.assertTrue('12345_C02F9ACXX_c202_l3_r2.fastq' in arguments[3])
685 from unittest import TestSuite, defaultTestLoader
687 suite.addTests(defaultTestLoader.loadTestsFromTestCase(TestCondorFastq))
690 if __name__ == "__main__":
691 from unittest import main
692 main(defaultTest='suite')