5 from pprint import pprint
10 from htsworkflow.submission.condorfastq import CondorFastqExtract
11 from htsworkflow.submission.results import ResultMap
12 from htsworkflow.util.rdfhelp import \
13 add_default_schemas, load_string_into_model, dump_model
14 from htsworkflow.util.rdfinfer import Infer
19 'C02F9ACXX/C1-202/Project_11154',
20 'C02F9ACXX/C1-202/Project_12342_Index1',
21 'C02F9ACXX/C1-202/Project_12342_Index2',
22 'C02F9ACXX/C1-202/Project_12345',
34 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R1_001.fastq.gz',
35 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R1_002.fastq.gz',
36 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R2_001.fastq.gz',
37 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R2_002.fastq.gz',
38 'C02F9ACXX/C1-202/Project_12342_Index1/12342_GCCAAT_L004_R1_001.fastq.gz',
39 'C02F9ACXX/C1-202/Project_12342_Index1/12342_GCCAAT_L004_R2_001.fastq.gz',
40 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L007_R1_001.fastq.gz',
41 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L007_R2_001.fastq.gz',
42 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L005_R1_001.fastq.gz',
43 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L005_R2_001.fastq.gz',
44 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_001.fastq.gz',
45 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_002.fastq.gz',
46 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_003.fastq.gz',
47 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_001.fastq.gz',
48 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_002.fastq.gz',
49 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_003.fastq.gz',
50 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r1.tar.bz2',
51 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l2_r1.tar.bz2',
52 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l3_r1.tar.bz2',
53 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l4_r1.tar.bz2',
54 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r1.tar.bz2',
55 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l6_r1.tar.bz2',
56 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l7_r1.tar.bz2',
57 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l8_r1.tar.bz2',
58 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r2.tar.bz2',
59 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r2.tar.bz2',
60 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l2_r2.tar.bz2',
61 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l3_r2.tar.bz2',
62 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l4_r2.tar.bz2',
63 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2',
64 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l6_r2.tar.bz2',
65 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l7_r2.tar.bz2',
66 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l8_r2.tar.bz2',
67 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_1.srf',
68 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_2.srf',
69 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_3.srf',
70 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_4.srf',
71 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_5.srf',
72 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_6.srf',
73 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_7.srf',
74 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_8.srf',
75 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_1.srf',
76 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_2.srf',
77 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_3.srf',
78 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_4.srf',
79 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_5.srf',
80 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_6.srf',
81 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_7.srf',
82 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_8.srf',
83 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l1_r1.tar.bz2',
84 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l2_r1.tar.bz2',
85 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l3_r1.tar.bz2',
86 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l4_r1.tar.bz2',
87 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l5_r1.tar.bz2',
88 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l6_r1.tar.bz2',
89 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l7_r1.tar.bz2',
90 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l8_r1.tar.bz2',
93 lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
94 @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
95 @prefix dc: <http://purl.org/dc/elements/1.1/> .
96 @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
97 @prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#> .
98 @prefix seqns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#> .
99 @prefix invns: <http://jumpgate.caltech.edu/wiki/InventoryOntology#> .
101 <http://localhost/library/10000/> a libns:Library .
102 <http://localhost/library/1331/> a libns:Library .
103 <http://localhost/library/1421/> a libns:Library .
104 <http://localhost/library/1661/> a libns:Library .
106 <http://localhost/flowcell/30221AAXX/>
107 a libns:IlluminaFlowcell ;
108 libns:read_length 33 ;
109 libns:flowcell_type "Single"@en ;
110 libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
111 libns:has_lane <http://localhost/lane/3401> ;
112 libns:has_lane <http://localhost/lane/3402> ;
113 libns:has_lane <http://localhost/lane/3403> ;
114 libns:has_lane <http://localhost/lane/3404> ;
115 libns:has_lane <http://localhost/lane/3405> ;
116 libns:has_lane <http://localhost/lane/3406> ;
117 libns:has_lane <http://localhost/lane/3407> ;
118 libns:has_lane <http://localhost/lane/3408> ;
119 libns:flowcell_id "30221AAXX"@en .
121 <http://localhost/lane/3401>
122 a libns:IlluminaLane ;
123 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
124 libns:library <http://localhost/library/10000/> ;
125 libns:lane_number "1" .
126 <http://localhost/lane/3402>
127 a libns:IlluminaLane ;
128 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
129 libns:library <http://localhost/library/10000/> ;
130 libns:lane_number "2" .
131 <http://localhost/lane/3403>
132 a libns:IlluminaLane ;
133 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
134 libns:library <http://localhost/library/10000/> ;
135 libns:lane_number "3" .
136 <http://localhost/lane/3404>
137 a libns:IlluminaLane ;
138 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
139 libns:library <http://localhost/library/11154/> ;
140 libns:lane_number "4" .
143 # status "Unknown"@en .
144 <http://localhost/lane/3405>
145 a libns:IlluminaLane ;
146 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
147 libns:library <http://localhost/library/10000/> ;
148 libns:lane_number "5" .
149 <http://localhost/lane/3406>
150 a libns:IlluminaLane ;
151 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
152 libns:library <http://localhost/library/10000/> ;
153 libns:lane_number "6" .
154 <http://localhost/lane/3407>
155 a libns:IlluminaLane ;
156 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
157 libns:library <http://localhost/library/10000/> ;
158 libns:lane_number "7" .
159 <http://localhost/lane/3408>
160 a libns:IlluminaLane ;
161 libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
162 libns:library <http://localhost/library/10000/> ;
163 libns:lane_number "8" .
165 <http://localhost/flowcell/42JUYAAXX/>
166 a libns:IlluminaFlowcell ;
167 libns:read_length 76 ;
168 libns:flowcell_type "Paired"@en ;
169 libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
170 libns:has_lane <http://localhost/lane/4201> ;
171 libns:has_lane <http://localhost/lane/4202> ;
172 libns:has_lane <http://localhost/lane/4203> ;
173 libns:has_lane <http://localhost/lane/4204> ;
174 libns:has_lane <http://localhost/lane/4205> ;
175 libns:has_lane <http://localhost/lane/4206> ;
176 libns:has_lane <http://localhost/lane/4207> ;
177 libns:has_lane <http://localhost/lane/4208> ;
178 libns:flowcell_id "42JUYAAXX"@en .
180 <http://localhost/lane/4201>
181 a libns:IlluminaLane ;
182 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
183 libns:library <http://localhost/library/1421/> ;
184 libns:lane_number "1" .
185 <http://localhost/lane/4202>
186 a libns:IlluminaLane ;
187 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
188 libns:library <http://localhost/library/1421/> ;
189 libns:lane_number "2" .
190 <http://localhost/lane/4203>
191 a libns:IlluminaLane ;
192 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
193 libns:library <http://localhost/library/1421/> ;
194 libns:lane_number "3" .
195 <http://localhost/lane/4204>
196 a libns:IlluminaLane ;
197 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
198 libns:library <http://localhost/library/1421/> ;
199 libns:lane_number "4" .
200 <http://localhost/lane/4205>
201 a libns:IlluminaLane ;
202 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
203 libns:library <http://localhost/library/11154/> ;
204 libns:lane_number "5" .
207 # status "Unknown"@en .
208 <http://localhost/lane/4206>
209 a libns:IlluminaLane ;
210 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
211 libns:library <http://localhost/library/1421/> ;
212 libns:lane_number "6" .
213 <http://localhost/lane/4207>
214 a libns:IlluminaLane ;
215 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
216 libns:library <http://localhost/library/1421/> ;
217 libns:lane_number "7" .
218 <http://localhost/lane/4208>
219 a libns:IlluminaLane ;
220 libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
221 libns:library <http://localhost/library/1421/> ;
222 libns:lane_number "8" .
224 <http://localhost/flowcell/61MJTAAXX/>
225 a libns:IlluminaFlowcell ;
226 libns:read_length 76 ;
227 libns:flowcell_type "Single"@en ;
228 libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
229 libns:has_lane <http://localhost/lane/6601> ;
230 libns:has_lane <http://localhost/lane/6602> ;
231 libns:has_lane <http://localhost/lane/6603> ;
232 libns:has_lane <http://localhost/lane/6604> ;
233 libns:has_lane <http://localhost/lane/6605> ;
234 libns:has_lane <http://localhost/lane/6606> ;
235 libns:has_lane <http://localhost/lane/6607> ;
236 libns:has_lane <http://localhost/lane/6608> ;
237 libns:flowcell_id "61MJTAAXX"@en .
239 <http://localhost/lane/6601>
240 a libns:IlluminaLane ;
241 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
242 libns:library <http://localhost/library/1661/> ;
243 libns:lane_number "1" .
244 <http://localhost/lane/6602>
245 a libns:IlluminaLane ;
246 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
247 libns:library <http://localhost/library/1661/> ;
248 libns:lane_number "2" .
249 <http://localhost/lane/6603>
250 a libns:IlluminaLane ;
251 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
252 libns:library <http://localhost/library/1661/> ;
253 libns:lane_number "3" .
254 <http://localhost/lane/6604>
255 a libns:IlluminaLane ;
256 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
257 libns:library <http://localhost/library/1661/> ;
258 libns:lane_number "4" .
259 <http://localhost/lane/6605>
260 a libns:IlluminaLane ;
261 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
262 libns:library <http://localhost/library/1661/> ;
263 libns:lane_number "5" .
264 <http://localhost/lane/6606>
265 a libns:IlluminaLane ;
266 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
267 libns:library <http://localhost/library/11154/> ;
268 libns:lane_number "6" .
271 # status "Unknown"@en .
272 <http://localhost/lane/6607>
273 a libns:IlluminaLane ;
274 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
275 libns:library <http://localhost/library/1661/> ;
276 libns:lane_number "7" .
277 <http://localhost/lane/6608>
278 a libns:IlluminaLane ;
279 libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
280 libns:library <http://localhost/library/1661/> ;
281 libns:lane_number "8" .
283 <http://localhost/flowcell/30DY0AAXX/>
284 a libns:IlluminaFlowcell ;
285 libns:read_length 76 ;
286 libns:flowcell_type "Paired"@en ;
287 libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
288 libns:has_lane <http://localhost/lane/3801> ;
289 libns:has_lane <http://localhost/lane/3802> ;
290 libns:has_lane <http://localhost/lane/3803> ;
291 libns:has_lane <http://localhost/lane/3804> ;
292 libns:has_lane <http://localhost/lane/3805> ;
293 libns:has_lane <http://localhost/lane/3806> ;
294 libns:has_lane <http://localhost/lane/3807> ;
295 libns:has_lane <http://localhost/lane/3808> ;
296 libns:flowcell_id "30DY0AAXX"@en .
298 <http://localhost/lane/3801>
299 a libns:IlluminaLane ;
300 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
301 libns:library <http://localhost/library/1331/> ;
302 libns:lane_number "1" .
303 <http://localhost/lane/3802>
304 a libns:IlluminaLane ;
305 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
306 libns:library <http://localhost/library/1331/> ;
307 libns:lane_number "2" .
308 <http://localhost/lane/3803>
309 a libns:IlluminaLane ;
310 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
311 libns:library <http://localhost/library/1331/> ;
312 libns:lane_number "3" .
313 <http://localhost/lane/3804>
314 a libns:IlluminaLane ;
315 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
316 libns:library <http://localhost/library/1331/> ;
317 libns:lane_number "4" .
318 <http://localhost/lane/3805>
319 a libns:IlluminaLane ;
320 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
321 libns:library <http://localhost/library/1331/> ;
322 libns:lane_number "5" .
323 <http://localhost/lane/3806>
324 a libns:IlluminaLane ;
325 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
326 libns:library <http://localhost/library/1331/> ;
327 libns:lane_number "6" .
328 <http://localhost/lane/3807>
329 a libns:IlluminaLane ;
330 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
331 libns:library <http://localhost/library/1331/> ;
332 libns:lane_number "7" .
333 <http://localhost/lane/3808>
334 a libns:IlluminaLane ;
335 libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
336 libns:library <http://localhost/library/11154/> ;
337 libns:lane_number "8" .
340 # status "Unknown"@en .
342 <http://localhost/flowcell/C02F9ACXX/>
343 a libns:IlluminaFlowcell ;
344 libns:read_length 101 ;
345 libns:flowcell_type "Paired"@en ;
346 libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
347 libns:has_lane <http://localhost/lane/12300> ;
348 libns:has_lane <http://localhost/lane/12500> ;
349 libns:flowcell_id "C02F9ACXX"@en .
351 <http://localhost/lane/12300>
352 a libns:IlluminaLane ;
353 libns:flowcell <http://localhost/flowcell/C02F9ACXX/> ;
354 libns:library <http://localhost/library/12345/> ;
355 libns:lane_number "3" .
358 # status "Unknown"@en .
360 <http://localhost/lane/12500>
361 a libns:IlluminaLane ;
362 libns:flowcell <http://localhost/flowcell/C02F9ACXX/> ;
363 libns:library <http://localhost/library/11154/> ;
364 libns:lane_number "3" .
367 # status "Unknown"@en .
369 <http://localhost/library/11154/>
371 libns:affiliation "TSR"@en;
372 libns:concentration "29.7";
373 libns:date "2012-12-28T00:00:00"^^xsd:dateTime ;
374 libns:experiment_type "RNA-seq"@en ;
376 libns:has_lane <http://localhost/lane/3404> ;
377 libns:has_lane <http://localhost/lane/4205> ;
378 libns:has_lane <http://localhost/lane/6606> ;
379 libns:has_lane <http://localhost/lane/3808> ;
380 libns:has_lane <http://localhost/lane/12500> ;
381 libns:insert_size 2000 ;
382 libns:library_id "11154"@en ;
383 libns:library_type "Paired End (Multiplexed)"@en ;
384 libns:made_by "Gary Gygax"@en ;
385 libns:name "Paired Ends ASDF"@en ;
386 libns:replicate "1"@en;
387 libns:species "Mus musculus"@en ;
388 libns:stopping_point "Completed"@en ;
389 libns:total_unique_locations 8841201 .
393 <http://localhost/library/12345/>
395 libns:affiliation "TSR"@en;
396 libns:concentration "12.345";
397 libns:cell_line "Unknown"@en ;
398 libns:date "2012-12-28T00:00:00"^^xsd:dateTime ;
399 libns:experiment_type "RNA-seq"@en ;
401 libns:has_lane <http://localhost/lane/12300> ;
402 libns:insert_size 2000 ;
403 libns:library_id "12345"@en ;
404 libns:library_type "Paired End (Multiplexed)"@en ;
405 libns:made_by "Gary Gygax"@en ;
406 libns:name "Paired Ends THING"@en ;
407 libns:replicate "1"@en;
408 libns:species "Mus musculus"@en ;
409 libns:stopping_point "Completed"@en ;
410 libns:total_unique_locations 8841201 .
413 HOST = "http://localhost"
415 class TestCondorFastq(unittest.TestCase):
417 self.cwd = os.getcwd()
419 self.tempdir = tempfile.mkdtemp(prefix='condorfastq_test')
420 self.flowcelldir = os.path.join(self.tempdir, 'flowcells')
421 os.mkdir(self.flowcelldir)
423 self.logdir = os.path.join(self.tempdir, 'log')
424 os.mkdir(self.logdir)
427 os.mkdir(os.path.join(self.flowcelldir, d))
430 filename = os.path.join(self.flowcelldir, f)
431 with open(filename, 'w') as stream:
432 stream.write('testfile')
434 self.result_map = ResultMap()
435 for lib_id in [u'11154', u'12345']:
436 subname = 'sub-%s' % (lib_id,)
437 sub_dir = os.path.join(self.tempdir, subname)
439 self.result_map[lib_id] = sub_dir
441 self.extract = CondorFastqExtract(HOST,
444 load_string_into_model(self.extract.model, 'turtle', lib_turtle)
445 add_default_schemas(self.extract.model)
446 inference = Infer(self.extract.model)
447 errmsgs = list(inference.run_validation())
448 self.assertEqual(len(errmsgs), 0)
449 os.chdir(self.tempdir)
452 shutil.rmtree(self.tempdir)
455 def test_find_relevant_flowcell_ids(self):
456 expected = set(('30221AAXX',
461 flowcell_ids = self.extract.find_relevant_flowcell_ids()
462 self.assertEqual(flowcell_ids, expected)
464 def test_find_archive_sequence(self):
465 seqs = self.extract.find_archive_sequence_files(self.result_map)
468 (u'11154', u'42JUYAAXX', '5', 1, 76, True, 'qseq'),
469 (u'11154', u'42JUYAAXX', '5', 2, 76, True, 'qseq'),
470 (u'11154', u'61MJTAAXX', '6', 1, 76, False, 'qseq'),
471 (u'11154', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
472 (u'11154', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
473 (u'11154', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
474 (u'11154', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
475 (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
476 (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
477 (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
478 (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
479 (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
480 (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
481 (u'11154', u'30221AAXX', '4', 1, 33, False, 'srf'),
482 (u'11154', u'30DY0AAXX', '8', 1, 151, True, 'srf')
484 found = set([(l.library_id, l.flowcell_id, l.lane_number, l.read, l.cycle, l.ispaired, l.filetype) for l in seqs])
485 self.assertEqual(expected, found)
487 def test_find_needed_targets(self):
488 lib_db = self.extract.find_archive_sequence_files(self.result_map)
490 needed_targets = self.extract.update_fastq_targets(self.result_map,
492 self.assertEqual(len(needed_targets), 9)
493 srf_30221 = needed_targets[
494 self.result_map['11154'] + u'/11154_30221AAXX_c33_l4.fastq']
495 qseq_42JUY_r1 = needed_targets[
496 self.result_map['11154'] + u'/11154_42JUYAAXX_c76_l5_r1.fastq']
497 qseq_42JUY_r2 = needed_targets[
498 self.result_map['11154'] + u'/11154_42JUYAAXX_c76_l5_r2.fastq']
499 qseq_61MJT = needed_targets[
500 self.result_map['11154'] + u'/11154_61MJTAAXX_c76_l6.fastq']
501 split_C02F9_r1 = needed_targets[
502 self.result_map['11154'] + u'/11154_C02F9ACXX_c202_l3_r1.fastq']
503 split_C02F9_r2 = needed_targets[
504 self.result_map['11154'] + u'/11154_C02F9ACXX_c202_l3_r2.fastq']
506 self.assertEqual(len(srf_30221['srf']), 1)
507 self.assertEqual(len(qseq_42JUY_r1['qseq']), 1)
508 self.assertEqual(len(qseq_42JUY_r2['qseq']), 1)
509 self.assertEqual(len(qseq_61MJT['qseq']), 1)
510 self.assertEqual(len(split_C02F9_r1['split_fastq']), 2)
511 self.assertEqual(len(split_C02F9_r2['split_fastq']), 2)
513 def test_generate_fastqs(self):
514 commands = self.extract.build_condor_arguments(self.result_map)
516 srf = commands['srf']
517 qseq = commands['qseq']
518 split = commands['split_fastq']
520 self.assertEqual(len(srf), 2)
521 self.assertEqual(len(qseq), 3)
522 self.assertEqual(len(split), 4)
525 os.path.join(self.result_map['11154'],
526 '11154_30221AAXX_c33_l4.fastq'): {
529 'sources': [u'woldlab_090425_HWI-EAS229_0110_30221AAXX_4.srf'],
530 'flowcell': u'30221AAXX',
531 'target': os.path.join(self.result_map['11154'],
532 u'11154_30221AAXX_c33_l4.fastq'),
534 os.path.join(self.result_map['11154'],
535 '11154_30DY0AAXX_c151_l8_r1.fastq'): {
538 'flowcell': u'30DY0AAXX',
539 'sources': [u'woldlab_090725_HWI-EAS229_0110_30DY0AAXX_8.srf'],
542 os.path.join(self.result_map['11154'],
543 u'11154_30DY0AAXX_c151_l8_r1.fastq'),
545 os.path.join(self.result_map['11154'],
546 u'11154_30DY0AAXX_c151_l8_r2.fastq'),
550 expected = srf_data[args['target']]
551 self.assertEqual(args['ispaired'], expected['ispaired'])
552 self.assertEqual(len(args['sources']), 1)
553 _, source_filename = os.path.split(args['sources'][0])
554 self.assertEqual(source_filename, expected['sources'][0])
555 self.assertEqual(args['target'], expected['target'])
557 self.assertEqual(args['target_right'],
558 expected['target_right'])
559 if 'mid' in expected:
560 self.assertEqual(args['mid'], expected['mid'])
563 os.path.join(self.result_map['11154'],
564 '11154_42JUYAAXX_c76_l5_r1.fastq'): {
568 u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r1.tar.bz2']
570 os.path.join(self.result_map['11154'],
571 '11154_42JUYAAXX_c76_l5_r2.fastq'): {
575 u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2']
577 os.path.join(self.result_map['11154'],
578 '11154_61MJTAAXX_c76_l6.fastq'): {
582 u'woldlab_100826_HSI-123_0001_61MJTAAXX_l6_r1.tar.bz2'],
586 expected = qseq_data[args['target']]
587 self.assertEqual(args['istar'], expected['istar'])
588 self.assertEqual(args['ispaired'], expected['ispaired'])
589 for i in range(len(expected['sources'])):
590 _, filename = os.path.split(args['sources'][i])
591 self.assertEqual(filename, expected['sources'][i])
594 split_test = dict((( x['target'], x) for x in
595 [{'sources': [u'11154_NoIndex_L003_R1_001.fastq.gz',
596 u'11154_NoIndex_L003_R1_002.fastq.gz'],
597 'pyscript': 'desplit_fastq.pyc',
598 'target': u'11154_C02F9ACXX_c202_l3_r1.fastq'},
599 {'sources': [u'11154_NoIndex_L003_R2_001.fastq.gz',
600 u'11154_NoIndex_L003_R2_002.fastq.gz'],
601 'pyscript': 'desplit_fastq.pyc',
602 'target': u'11154_C02F9ACXX_c202_l3_r2.fastq'},
603 {'sources': [u'12345_CGATGT_L003_R1_001.fastq.gz',
604 u'12345_CGATGT_L003_R1_002.fastq.gz',
605 u'12345_CGATGT_L003_R1_003.fastq.gz',
607 'pyscript': 'desplit_fastq.pyc',
608 'target': u'12345_C02F9ACXX_c202_l3_r1.fastq'},
609 {'sources': [u'12345_CGATGT_L003_R2_001.fastq.gz',
610 u'12345_CGATGT_L003_R2_002.fastq.gz',
611 u'12345_CGATGT_L003_R2_003.fastq.gz',
613 'pyscript': 'desplit_fastq.pyc',
614 'target': u'12345_C02F9ACXX_c202_l3_r2.fastq'}
618 _, target = os.path.split(arg['target'])
619 pyscript = split_test[target]['pyscript']
620 self.assertTrue(arg['pyscript'].endswith(pyscript))
621 filename = split_test[target]['target']
622 self.assertTrue(arg['target'].endswith(filename))
623 for s_index in range(len(arg['sources'])):
624 s1 = arg['sources'][s_index]
625 s2 = split_test[target]['sources'][s_index]
626 self.assertTrue(s1.endswith(s2))
628 def test_create_scripts(self):
629 self.extract.create_scripts(self.result_map)
631 self.assertTrue(os.path.exists('srf.condor'))
632 with open('srf.condor', 'r') as srf:
633 arguments = [ l for l in srf if l.startswith('argument') ]
635 self.assertEqual(len(arguments), 2)
636 self.assertTrue('sub-11154/11154_30221AAXX_c33_l4.fastq'
639 'sub-11154/11154_30DY0AAXX_c151_l8_r2.fastq' in
642 self.assertTrue(os.path.exists('qseq.condor'))
643 with open('qseq.condor', 'r') as srf:
644 arguments = [ l for l in srf if l.startswith('argument') ]
646 self.assertEqual(len(arguments), 3)
647 self.assertTrue('sub-11154/11154_42JUYAAXX_c76_l5_r1.fastq ' in
650 'C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2' in
652 self.assertTrue('61MJTAAXX_c76_l6.fastq -f 61MJTAAXX' in
655 self.assertTrue(os.path.exists('split_fastq.condor'))
656 with open('split_fastq.condor', 'r') as split:
657 arguments = [ l for l in split if l.startswith('argument') ]
659 self.assertEqual(len(arguments), 4)
661 self.assertTrue('11154_NoIndex_L003_R1_001.fastq.gz' in \
664 self.assertTrue('11154_NoIndex_L003_R2_002.fastq.gz' in \
667 self.assertTrue('12345_CGATGT_L003_R1_001.fastq.gz' in arguments[2])
668 self.assertTrue('12345_CGATGT_L003_R1_002.fastq.gz' in arguments[2])
669 self.assertTrue('12345_CGATGT_L003_R1_003.fastq.gz' in arguments[2])
670 self.assertTrue('12345_C02F9ACXX_c202_l3_r1.fastq' in arguments[2])
673 self.assertTrue('12345_CGATGT_L003_R2_001.fastq.gz' in arguments[3])
674 self.assertTrue('12345_CGATGT_L003_R2_002.fastq.gz' in arguments[3])
675 self.assertTrue('12345_CGATGT_L003_R2_003.fastq.gz' in arguments[3])
676 self.assertTrue('12345_C02F9ACXX_c202_l3_r2.fastq' in arguments[3])
680 suite = unittest.makeSuite(TestCondorFastq, 'test')
683 if __name__ == "__main__":
684 unittest.main(defaultTest='suite')