5 from pprint import pprint
9 from django.test import TestCase
10 from django.test.utils import setup_test_environment, \
11 teardown_test_environment
12 from django.db import connection
13 from django.conf import settings
15 from htsworkflow.submission.condorfastq import CondorFastqExtract
16 from htsworkflow.submission.results import ResultMap
17 from htsworkflow.util.rdfhelp import \
18 add_default_schemas, load_string_into_model, dump_model
19 from htsworkflow.util.rdfinfer import Infer
24 'C02F9ACXX/C1-202/Project_11154',
25 'C02F9ACXX/C1-202/Project_12342_Index1',
26 'C02F9ACXX/C1-202/Project_12342_Index2',
27 'C02F9ACXX/C1-202/Project_12345',
39 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R1_001.fastq.gz',
40 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R1_002.fastq.gz',
41 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R2_001.fastq.gz',
42 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R2_002.fastq.gz',
43 'C02F9ACXX/C1-202/Project_12342_Index1/12342_GCCAAT_L004_R1_001.fastq.gz',
44 'C02F9ACXX/C1-202/Project_12342_Index1/12342_GCCAAT_L004_R2_001.fastq.gz',
45 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L007_R1_001.fastq.gz',
46 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L007_R2_001.fastq.gz',
47 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L005_R1_001.fastq.gz',
48 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L005_R2_001.fastq.gz',
49 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_001.fastq.gz',
50 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_002.fastq.gz',
51 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_003.fastq.gz',
52 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_001.fastq.gz',
53 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_002.fastq.gz',
54 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_003.fastq.gz',
55 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r1.tar.bz2',
56 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l2_r1.tar.bz2',
57 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l3_r1.tar.bz2',
58 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l4_r1.tar.bz2',
59 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r1.tar.bz2',
60 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l6_r1.tar.bz2',
61 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l7_r1.tar.bz2',
62 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l8_r1.tar.bz2',
63 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r2.tar.bz2',
64 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r2.tar.bz2',
65 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l2_r2.tar.bz2',
66 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l3_r2.tar.bz2',
67 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l4_r2.tar.bz2',
68 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2',
69 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l6_r2.tar.bz2',
70 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l7_r2.tar.bz2',
71 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l8_r2.tar.bz2',
72 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_1.srf',
73 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_2.srf',
74 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_3.srf',
75 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_4.srf',
76 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_5.srf',
77 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_6.srf',
78 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_7.srf',
79 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_8.srf',
80 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_1.srf',
81 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_2.srf',
82 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_3.srf',
83 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_4.srf',
84 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_5.srf',
85 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_6.srf',
86 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_7.srf',
87 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_8.srf',
88 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l1_r1.tar.bz2',
89 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l2_r1.tar.bz2',
90 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l3_r1.tar.bz2',
91 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l4_r1.tar.bz2',
92 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l5_r1.tar.bz2',
93 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l6_r1.tar.bz2',
94 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l7_r1.tar.bz2',
95 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l8_r1.tar.bz2',
98 lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
99 @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
100 @prefix dc: <http://purl.org/dc/elements/1.1/> .
101 @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
102 @prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#> .
103 @prefix seqns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#> .
104 @prefix invns: <http://jumpgate.caltech.edu/wiki/InventoryOntology#> .
106 <http://localhost/library/10000/> a libns:Library .
107 <http://localhost/library/1331/> a libns:Library .
108 <http://localhost/library/1421/> a libns:Library .
109 <http://localhost/library/1661/> a libns:Library .
111 <http://localhost/flowcell/30221AAXX/>
112 a libns:IlluminaFlowcell ;
113 libns:read_length 33 ;
114 libns:flowcell_type "Single"@en ;
115 libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
116 libns:has_lane <http://localhost/lane/3401> ;
117 libns:has_lane <http://localhost/lane/3402> ;
118 libns:has_lane <http://localhost/lane/3403> ;
119 libns:has_lane <http://localhost/lane/3404> ;
120 libns:has_lane <http://localhost/lane/3405> ;
121 libns:has_lane <http://localhost/lane/3406> ;
122 libns:has_lane <http://localhost/lane/3407> ;
123 libns:has_lane <http://localhost/lane/3408> ;
124 libns:flowcell_id "30221AAXX"@en .
126 <http://localhost/lane/3401>
127 a libns:IlluminaLane ;
128 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
129 libns:library <http://localhost/library/10000/> ;
130 libns:lane_number "1" .
131 <http://localhost/lane/3402>
132 a libns:IlluminaLane ;
133 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
134 libns:library <http://localhost/library/10000/> ;
135 libns:lane_number "2" .
136 <http://localhost/lane/3403>
137 a libns:IlluminaLane ;
138 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
139 libns:library <http://localhost/library/10000/> ;
140 libns:lane_number "3" .
141 <http://localhost/lane/3404>
142 a libns:IlluminaLane ;
143 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
144 libns:library <http://localhost/library/11154/> ;
145 libns:lane_number "4" .
148 # status "Unknown"@en .
149 <http://localhost/lane/3405>
150 a libns:IlluminaLane ;
151 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
152 libns:library <http://localhost/library/10000/> ;
153 libns:lane_number "5" .
154 <http://localhost/lane/3406>
155 a libns:IlluminaLane ;
156 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
157 libns:library <http://localhost/library/10000/> ;
158 libns:lane_number "6" .
159 <http://localhost/lane/3407>
160 a libns:IlluminaLane ;
161 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
162 libns:library <http://localhost/library/10000/> ;
163 libns:lane_number "7" .
164 <http://localhost/lane/3408>
165 a libns:IlluminaLane ;
166 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
167 libns:library <http://localhost/library/10000/> ;
168 libns:lane_number "8" .
170 <http://localhost/flowcell/42JUYAAXX/>
171 a libns:IlluminaFlowcell ;
172 libns:read_length 76 ;
173 libns:flowcell_type "Paired"@en ;
174 libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
175 libns:has_lane <http://localhost/lane/4201> ;
176 libns:has_lane <http://localhost/lane/4202> ;
177 libns:has_lane <http://localhost/lane/4203> ;
178 libns:has_lane <http://localhost/lane/4204> ;
179 libns:has_lane <http://localhost/lane/4205> ;
180 libns:has_lane <http://localhost/lane/4206> ;
181 libns:has_lane <http://localhost/lane/4207> ;
182 libns:has_lane <http://localhost/lane/4208> ;
183 libns:flowcell_id "42JUYAAXX"@en .
185 <http://localhost/lane/4201>
186 a libns:IlluminaLane ;
187 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
188 libns:library <http://localhost/library/1421/> ;
189 libns:lane_number "1" .
190 <http://localhost/lane/4202>
191 a libns:IlluminaLane ;
192 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
193 libns:library <http://localhost/library/1421/> ;
194 libns:lane_number "2" .
195 <http://localhost/lane/4203>
196 a libns:IlluminaLane ;
197 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
198 libns:library <http://localhost/library/1421/> ;
199 libns:lane_number "3" .
200 <http://localhost/lane/4204>
201 a libns:IlluminaLane ;
202 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
203 libns:library <http://localhost/library/1421/> ;
204 libns:lane_number "4" .
205 <http://localhost/lane/4205>
206 a libns:IlluminaLane ;
207 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
208 libns:library <http://localhost/library/11154/> ;
209 libns:lane_number "5" .
212 # status "Unknown"@en .
213 <http://localhost/lane/4206>
214 a libns:IlluminaLane ;
215 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
216 libns:library <http://localhost/library/1421/> ;
217 libns:lane_number "6" .
218 <http://localhost/lane/4207>
219 a libns:IlluminaLane ;
220 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
221 libns:library <http://localhost/library/1421/> ;
222 libns:lane_number "7" .
223 <http://localhost/lane/4208>
224 a libns:IlluminaLane ;
225 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
226 libns:library <http://localhost/library/1421/> ;
227 libns:lane_number "8" .
229 <http://localhost/flowcell/61MJTAAXX/>
230 a libns:IlluminaFlowcell ;
231 libns:read_length 76 ;
232 libns:flowcell_type "Single"@en ;
233 libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
234 libns:has_lane <http://localhost/lane/6601> ;
235 libns:has_lane <http://localhost/lane/6602> ;
236 libns:has_lane <http://localhost/lane/6603> ;
237 libns:has_lane <http://localhost/lane/6604> ;
238 libns:has_lane <http://localhost/lane/6605> ;
239 libns:has_lane <http://localhost/lane/6606> ;
240 libns:has_lane <http://localhost/lane/6607> ;
241 libns:has_lane <http://localhost/lane/6608> ;
242 libns:flowcell_id "61MJTAAXX"@en .
244 <http://localhost/lane/6601>
245 a libns:IlluminaLane ;
246 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
247 libns:library <http://localhost/library/1661/> ;
248 libns:lane_number "1" .
249 <http://localhost/lane/6602>
250 a libns:IlluminaLane ;
251 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
252 libns:library <http://localhost/library/1661/> ;
253 libns:lane_number "2" .
254 <http://localhost/lane/6603>
255 a libns:IlluminaLane ;
256 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
257 libns:library <http://localhost/library/1661/> ;
258 libns:lane_number "3" .
259 <http://localhost/lane/6604>
260 a libns:IlluminaLane ;
261 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
262 libns:library <http://localhost/library/1661/> ;
263 libns:lane_number "4" .
264 <http://localhost/lane/6605>
265 a libns:IlluminaLane ;
266 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
267 libns:library <http://localhost/library/1661/> ;
268 libns:lane_number "5" .
269 <http://localhost/lane/6606>
270 a libns:IlluminaLane ;
271 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
272 libns:library <http://localhost/library/11154/> ;
273 libns:lane_number "6" .
276 # status "Unknown"@en .
277 <http://localhost/lane/6607>
278 a libns:IlluminaLane ;
279 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
280 libns:library <http://localhost/library/1661/> ;
281 libns:lane_number "7" .
282 <http://localhost/lane/6608>
283 a libns:IlluminaLane ;
284 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
285 libns:library <http://localhost/library/1661/> ;
286 libns:lane_number "8" .
288 <http://localhost/flowcell/30DY0AAXX/>
289 a libns:IlluminaFlowcell ;
290 libns:read_length 76 ;
291 libns:flowcell_type "Paired"@en ;
292 libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
293 libns:has_lane <http://localhost/lane/3801> ;
294 libns:has_lane <http://localhost/lane/3802> ;
295 libns:has_lane <http://localhost/lane/3803> ;
296 libns:has_lane <http://localhost/lane/3804> ;
297 libns:has_lane <http://localhost/lane/3805> ;
298 libns:has_lane <http://localhost/lane/3806> ;
299 libns:has_lane <http://localhost/lane/3807> ;
300 libns:has_lane <http://localhost/lane/3808> ;
301 libns:flowcell_id "30DY0AAXX"@en .
303 <http://localhost/lane/3801>
304 a libns:IlluminaLane ;
305 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
306 libns:library <http://localhost/library/1331/> ;
307 libns:lane_number "1" .
308 <http://localhost/lane/3802>
309 a libns:IlluminaLane ;
310 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
311 libns:library <http://localhost/library/1331/> ;
312 libns:lane_number "2" .
313 <http://localhost/lane/3803>
314 a libns:IlluminaLane ;
315 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
316 libns:library <http://localhost/library/1331/> ;
317 libns:lane_number "3" .
318 <http://localhost/lane/3804>
319 a libns:IlluminaLane ;
320 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
321 libns:library <http://localhost/library/1331/> ;
322 libns:lane_number "4" .
323 <http://localhost/lane/3805>
324 a libns:IlluminaLane ;
325 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
326 libns:library <http://localhost/library/1331/> ;
327 libns:lane_number "5" .
328 <http://localhost/lane/3806>
329 a libns:IlluminaLane ;
330 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
331 libns:library <http://localhost/library/1331/> ;
332 libns:lane_number "6" .
333 <http://localhost/lane/3807>
334 a libns:IlluminaLane ;
335 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
336 libns:library <http://localhost/library/1331/> ;
337 libns:lane_number "7" .
338 <http://localhost/lane/3808>
339 a libns:IlluminaLane ;
340 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
341 libns:library <http://localhost/library/11154/> ;
342 libns:lane_number "8" .
345 # status "Unknown"@en .
347 <http://localhost/flowcell/C02F9ACXX/>
348 a libns:IlluminaFlowcell ;
349 libns:read_length 101 ;
350 libns:flowcell_type "Paired"@en ;
351 libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
352 libns:has_lane <http://localhost/lane/12300> ;
353 libns:has_lane <http://localhost/lane/12500> ;
354 libns:flowcell_id "C02F9ACXX"@en .
356 <http://localhost/lane/12300>
357 a libns:IlluminaLane ;
358 libns:flowcell <http://localhost/flowcell/C02F9ACXX/> ;
359 libns:library <http://localhost/library/12345/> ;
360 libns:lane_number "3" .
363 # status "Unknown"@en .
365 <http://localhost/lane/12500>
366 a libns:IlluminaLane ;
367 libns:flowcell <http://localhost/flowcell/C02F9ACXX/> ;
368 libns:library <http://localhost/library/11154/> ;
369 libns:lane_number "3" .
372 # status "Unknown"@en .
374 <http://localhost/library/11154/>
376 libns:affiliation "TSR"@en;
377 libns:concentration "29.7";
378 libns:date "2012-12-28T00:00:00"^^xsd:dateTime ;
379 libns:experiment_type "RNA-seq"@en ;
381 libns:has_lane <http://localhost/lane/3404> ;
382 libns:has_lane <http://localhost/lane/4205> ;
383 libns:has_lane <http://localhost/lane/6606> ;
384 libns:has_lane <http://localhost/lane/3808> ;
385 libns:has_lane <http://localhost/lane/12500> ;
386 libns:insert_size 2000 ;
387 libns:library_id "11154"@en ;
388 libns:library_type "Paired End (Multiplexed)"@en ;
389 libns:made_by "Gary Gygax"@en ;
390 libns:name "Paired Ends ASDF"@en ;
391 libns:replicate "1"@en;
392 libns:species_name "Mus musculus"@en ;
393 libns:stopping_point "Completed"@en ;
394 libns:total_unique_locations 8841201 .
397 <http://localhost/library/12345/>
399 libns:affiliation "TSR"@en;
400 libns:concentration "12.345";
401 libns:cell_line "Unknown"@en ;
402 libns:date "2012-12-28T00:00:00"^^xsd:dateTime ;
403 libns:experiment_type "RNA-seq"@en ;
405 libns:has_lane <http://localhost/lane/12300> ;
406 libns:insert_size 2000 ;
407 libns:library_id "12345"@en ;
408 libns:library_type "Paired End (Multiplexed)"@en ;
409 libns:made_by "Gary Gygax"@en ;
410 libns:name "Paired Ends THING"@en ;
411 libns:replicate "1"@en;
412 libns:species_name "Mus musculus"@en ;
413 libns:stopping_point "Completed"@en ;
414 libns:total_unique_locations 8841201 .
417 HOST = "http://localhost"
419 class TestCondorFastq(TestCase):
421 self.cwd = os.getcwd()
423 self.tempdir = tempfile.mkdtemp(prefix='condorfastq_test')
424 self.flowcelldir = os.path.join(self.tempdir, 'flowcells')
425 os.mkdir(self.flowcelldir)
427 self.logdir = os.path.join(self.tempdir, 'log')
428 os.mkdir(self.logdir)
431 os.mkdir(os.path.join(self.flowcelldir, d))
434 filename = os.path.join(self.flowcelldir, f)
435 with open(filename, 'w') as stream:
436 stream.write('testfile')
438 self.result_map = ResultMap()
439 for lib_id in [u'11154', u'12345']:
440 subname = 'sub-%s' % (lib_id,)
441 sub_dir = os.path.join(self.tempdir, subname)
443 self.result_map[lib_id] = sub_dir
445 self.extract = CondorFastqExtract(HOST,
448 load_string_into_model(self.extract.model, 'turtle', lib_turtle)
449 add_default_schemas(self.extract.model)
450 inference = Infer(self.extract.model)
451 errmsgs = list(inference.run_validation())
452 self.assertEqual(len(errmsgs), 0)
453 os.chdir(self.tempdir)
456 shutil.rmtree(self.tempdir)
459 def test_find_relevant_flowcell_ids(self):
460 expected = set(('30221AAXX',
465 flowcell_ids = self.extract.find_relevant_flowcell_ids()
466 self.assertEqual(flowcell_ids, expected)
468 def test_find_archive_sequence(self):
469 seqs = self.extract.find_archive_sequence_files(self.result_map)
472 (u'11154', u'42JUYAAXX', '5', 1, 76, True, 'qseq'),
473 (u'11154', u'42JUYAAXX', '5', 2, 76, True, 'qseq'),
474 (u'11154', u'61MJTAAXX', '6', 1, 76, False, 'qseq'),
475 (u'11154', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
476 (u'11154', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
477 (u'11154', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
478 (u'11154', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
479 (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
480 (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
481 (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
482 (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
483 (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
484 (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
485 (u'11154', u'30221AAXX', '4', 1, 33, False, 'srf'),
486 (u'11154', u'30DY0AAXX', '8', 1, 151, True, 'srf')
488 found = set([(l.library_id, l.flowcell_id, l.lane_number, l.read, l.cycle, l.ispaired, l.filetype) for l in seqs])
489 self.assertEqual(expected, found)
491 def test_find_needed_targets(self):
492 lib_db = self.extract.find_archive_sequence_files(self.result_map)
494 needed_targets = self.extract.update_fastq_targets(self.result_map,
496 self.assertEqual(len(needed_targets), 9)
497 srf_30221 = needed_targets[
498 self.result_map['11154'] + u'/11154_30221AAXX_c33_l4.fastq']
499 qseq_42JUY_r1 = needed_targets[
500 self.result_map['11154'] + u'/11154_42JUYAAXX_c76_l5_r1.fastq']
501 qseq_42JUY_r2 = needed_targets[
502 self.result_map['11154'] + u'/11154_42JUYAAXX_c76_l5_r2.fastq']
503 qseq_61MJT = needed_targets[
504 self.result_map['11154'] + u'/11154_61MJTAAXX_c76_l6.fastq']
505 split_C02F9_r1 = needed_targets[
506 self.result_map['11154'] + u'/11154_C02F9ACXX_c202_l3_r1.fastq']
507 split_C02F9_r2 = needed_targets[
508 self.result_map['11154'] + u'/11154_C02F9ACXX_c202_l3_r2.fastq']
510 self.assertEqual(len(srf_30221['srf']), 1)
511 self.assertEqual(len(qseq_42JUY_r1['qseq']), 1)
512 self.assertEqual(len(qseq_42JUY_r2['qseq']), 1)
513 self.assertEqual(len(qseq_61MJT['qseq']), 1)
514 self.assertEqual(len(split_C02F9_r1['split_fastq']), 2)
515 self.assertEqual(len(split_C02F9_r2['split_fastq']), 2)
517 def test_generate_fastqs(self):
518 commands = self.extract.build_condor_arguments(self.result_map)
520 srf = commands['srf']
521 qseq = commands['qseq']
522 split = commands['split_fastq']
524 self.assertEqual(len(srf), 2)
525 self.assertEqual(len(qseq), 3)
526 self.assertEqual(len(split), 4)
529 os.path.join(self.result_map['11154'],
530 '11154_30221AAXX_c33_l4.fastq'): {
533 'sources': [u'woldlab_090425_HWI-EAS229_0110_30221AAXX_4.srf'],
534 'flowcell': u'30221AAXX',
535 'target': os.path.join(self.result_map['11154'],
536 u'11154_30221AAXX_c33_l4.fastq'),
538 os.path.join(self.result_map['11154'],
539 '11154_30DY0AAXX_c151_l8_r1.fastq'): {
542 'flowcell': u'30DY0AAXX',
543 'sources': [u'woldlab_090725_HWI-EAS229_0110_30DY0AAXX_8.srf'],
546 os.path.join(self.result_map['11154'],
547 u'11154_30DY0AAXX_c151_l8_r1.fastq'),
549 os.path.join(self.result_map['11154'],
550 u'11154_30DY0AAXX_c151_l8_r2.fastq'),
554 expected = srf_data[args['target']]
555 self.assertEqual(args['ispaired'], expected['ispaired'])
556 self.assertEqual(len(args['sources']), 1)
557 _, source_filename = os.path.split(args['sources'][0])
558 self.assertEqual(source_filename, expected['sources'][0])
559 self.assertEqual(args['target'], expected['target'])
561 self.assertEqual(args['target_right'],
562 expected['target_right'])
563 if 'mid' in expected:
564 self.assertEqual(args['mid'], expected['mid'])
567 os.path.join(self.result_map['11154'],
568 '11154_42JUYAAXX_c76_l5_r1.fastq'): {
572 u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r1.tar.bz2']
574 os.path.join(self.result_map['11154'],
575 '11154_42JUYAAXX_c76_l5_r2.fastq'): {
579 u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2']
581 os.path.join(self.result_map['11154'],
582 '11154_61MJTAAXX_c76_l6.fastq'): {
586 u'woldlab_100826_HSI-123_0001_61MJTAAXX_l6_r1.tar.bz2'],
590 expected = qseq_data[args['target']]
591 self.assertEqual(args['istar'], expected['istar'])
592 self.assertEqual(args['ispaired'], expected['ispaired'])
593 for i in range(len(expected['sources'])):
594 _, filename = os.path.split(args['sources'][i])
595 self.assertEqual(filename, expected['sources'][i])
598 split_test = dict((( x['target'], x) for x in
599 [{'sources': [u'11154_NoIndex_L003_R1_001.fastq.gz',
600 u'11154_NoIndex_L003_R1_002.fastq.gz'],
601 'pyscript': 'desplit_fastq.pyc',
602 'target': u'11154_C02F9ACXX_c202_l3_r1.fastq'},
603 {'sources': [u'11154_NoIndex_L003_R2_001.fastq.gz',
604 u'11154_NoIndex_L003_R2_002.fastq.gz'],
605 'pyscript': 'desplit_fastq.pyc',
606 'target': u'11154_C02F9ACXX_c202_l3_r2.fastq'},
607 {'sources': [u'12345_CGATGT_L003_R1_001.fastq.gz',
608 u'12345_CGATGT_L003_R1_002.fastq.gz',
609 u'12345_CGATGT_L003_R1_003.fastq.gz',
611 'pyscript': 'desplit_fastq.pyc',
612 'target': u'12345_C02F9ACXX_c202_l3_r1.fastq'},
613 {'sources': [u'12345_CGATGT_L003_R2_001.fastq.gz',
614 u'12345_CGATGT_L003_R2_002.fastq.gz',
615 u'12345_CGATGT_L003_R2_003.fastq.gz',
617 'pyscript': 'desplit_fastq.pyc',
618 'target': u'12345_C02F9ACXX_c202_l3_r2.fastq'}
622 _, target = os.path.split(arg['target'])
623 pyscript = split_test[target]['pyscript']
624 self.assertTrue(arg['pyscript'].endswith(pyscript))
625 filename = split_test[target]['target']
626 self.assertTrue(arg['target'].endswith(filename))
627 for s_index in range(len(arg['sources'])):
628 s1 = arg['sources'][s_index]
629 s2 = split_test[target]['sources'][s_index]
630 self.assertTrue(s1.endswith(s2))
632 def test_create_scripts(self):
633 self.extract.create_scripts(self.result_map)
635 self.assertTrue(os.path.exists('srf.condor'))
636 with open('srf.condor', 'r') as srf:
637 arguments = [ l for l in srf if l.startswith('argument') ]
639 self.assertEqual(len(arguments), 2)
640 self.assertTrue('sub-11154/11154_30221AAXX_c33_l4.fastq'
643 'sub-11154/11154_30DY0AAXX_c151_l8_r2.fastq' in
646 self.assertTrue(os.path.exists('qseq.condor'))
647 with open('qseq.condor', 'r') as srf:
648 arguments = [ l for l in srf if l.startswith('argument') ]
650 self.assertEqual(len(arguments), 3)
651 self.assertTrue('sub-11154/11154_42JUYAAXX_c76_l5_r1.fastq ' in
654 'C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2' in
656 self.assertTrue('61MJTAAXX_c76_l6.fastq -f 61MJTAAXX' in
659 self.assertTrue(os.path.exists('split_fastq.condor'))
660 with open('split_fastq.condor', 'r') as split:
661 arguments = [ l for l in split if l.startswith('argument') ]
663 self.assertEqual(len(arguments), 4)
665 self.assertTrue('11154_NoIndex_L003_R1_001.fastq.gz' in \
668 self.assertTrue('11154_NoIndex_L003_R2_002.fastq.gz' in \
671 self.assertTrue('12345_CGATGT_L003_R1_001.fastq.gz' in arguments[2])
672 self.assertTrue('12345_CGATGT_L003_R1_002.fastq.gz' in arguments[2])
673 self.assertTrue('12345_CGATGT_L003_R1_003.fastq.gz' in arguments[2])
674 self.assertTrue('12345_C02F9ACXX_c202_l3_r1.fastq' in arguments[2])
677 self.assertTrue('12345_CGATGT_L003_R2_001.fastq.gz' in arguments[3])
678 self.assertTrue('12345_CGATGT_L003_R2_002.fastq.gz' in arguments[3])
679 self.assertTrue('12345_CGATGT_L003_R2_003.fastq.gz' in arguments[3])
680 self.assertTrue('12345_C02F9ACXX_c202_l3_r2.fastq' in arguments[3])
683 OLD_DB = settings.DATABASES['default']['NAME']
685 setup_test_environment()
686 connection.creation.create_test_db()
688 def tearDownModule():
689 connection.creation.destroy_test_db(OLD_DB)
690 teardown_test_environment()
694 from unittest2 import TestSuite, defaultTestLoader
696 suite.addTests(defaultTestLoader.loadTestsFromTestCase(TestCondorFastq))
699 if __name__ == "__main__":
700 from unittest2 import main
701 main(defaultTest='suite')