ffb9f88620356ba0e61800db60fa4715463515e2
[htsworkflow.git] / htsworkflow / submission / test / test_condorfastq.py
1 #!/usr/bin/env python
2
3 import copy
4 import os
5 from pprint import pprint
6 import shutil
7 import tempfile
8
9 from django.test import TestCase
10
11 from htsworkflow.submission.condorfastq import CondorFastqExtract
12 from htsworkflow.submission.results import ResultMap
13 from htsworkflow.util.rdfhelp import \
14      add_default_schemas, load_string_into_model, dump_model
15 from htsworkflow.util.rdfinfer import Infer
16
17 FCDIRS = [
18     'C02F9ACXX',
19     'C02F9ACXX/C1-202',
20     'C02F9ACXX/C1-202/Project_11154',
21     'C02F9ACXX/C1-202/Project_12342_Index1',
22     'C02F9ACXX/C1-202/Project_12342_Index2',
23     'C02F9ACXX/C1-202/Project_12345',
24     '42JUYAAXX',
25     '42JUYAAXX/C1-76',
26     '30221AAXX',
27     '30221AAXX/C1-33',
28     '30DY0AAXX',
29     '30DY0AAXX/C1-151',
30     '61MJTAAXX',
31     '61MJTAAXX/C1-76',
32 ]
33
34 DATAFILES = [
35     'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R1_001.fastq.gz',
36     'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R1_002.fastq.gz',
37     'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R2_001.fastq.gz',
38     'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R2_002.fastq.gz',
39     'C02F9ACXX/C1-202/Project_12342_Index1/12342_GCCAAT_L004_R1_001.fastq.gz',
40     'C02F9ACXX/C1-202/Project_12342_Index1/12342_GCCAAT_L004_R2_001.fastq.gz',
41     'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L007_R1_001.fastq.gz',
42     'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L007_R2_001.fastq.gz',
43     'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L005_R1_001.fastq.gz',
44     'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L005_R2_001.fastq.gz',
45     'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_001.fastq.gz',
46     'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_002.fastq.gz',
47     'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_003.fastq.gz',
48     'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_001.fastq.gz',
49     'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_002.fastq.gz',
50     'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_003.fastq.gz',
51     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r1.tar.bz2',
52     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l2_r1.tar.bz2',
53     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l3_r1.tar.bz2',
54     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l4_r1.tar.bz2',
55     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r1.tar.bz2',
56     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l6_r1.tar.bz2',
57     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l7_r1.tar.bz2',
58     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l8_r1.tar.bz2',
59     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r2.tar.bz2',
60     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r2.tar.bz2',
61     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l2_r2.tar.bz2',
62     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l3_r2.tar.bz2',
63     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l4_r2.tar.bz2',
64     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2',
65     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l6_r2.tar.bz2',
66     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l7_r2.tar.bz2',
67     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l8_r2.tar.bz2',
68     '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_1.srf',
69     '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_2.srf',
70     '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_3.srf',
71     '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_4.srf',
72     '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_5.srf',
73     '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_6.srf',
74     '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_7.srf',
75     '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_8.srf',
76     '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_1.srf',
77     '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_2.srf',
78     '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_3.srf',
79     '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_4.srf',
80     '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_5.srf',
81     '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_6.srf',
82     '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_7.srf',
83     '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_8.srf',
84     '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l1_r1.tar.bz2',
85     '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l2_r1.tar.bz2',
86     '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l3_r1.tar.bz2',
87     '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l4_r1.tar.bz2',
88     '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l5_r1.tar.bz2',
89     '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l6_r1.tar.bz2',
90     '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l7_r1.tar.bz2',
91     '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l8_r1.tar.bz2',
92 ]
93
94 lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
95 @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
96 @prefix dc: <http://purl.org/dc/elements/1.1/> .
97 @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
98 @prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#> .
99 @prefix seqns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#> .
100 @prefix invns: <http://jumpgate.caltech.edu/wiki/InventoryOntology#> .
101
102 <http://localhost/library/10000/> a libns:Library .
103 <http://localhost/library/1331/> a libns:Library .
104 <http://localhost/library/1421/> a libns:Library .
105 <http://localhost/library/1661/> a libns:Library .
106
107 <http://localhost/flowcell/30221AAXX/>
108         a libns:IlluminaFlowcell ;
109         libns:read_length 33 ;
110         libns:flowcell_type "Single"@en ;
111         libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
112         libns:has_lane <http://localhost/lane/3401> ;
113         libns:has_lane <http://localhost/lane/3402> ;
114         libns:has_lane <http://localhost/lane/3403> ;
115         libns:has_lane <http://localhost/lane/3404> ;
116         libns:has_lane <http://localhost/lane/3405> ;
117         libns:has_lane <http://localhost/lane/3406> ;
118         libns:has_lane <http://localhost/lane/3407> ;
119         libns:has_lane <http://localhost/lane/3408> ;
120         libns:flowcell_id "30221AAXX"@en .
121
122 <http://localhost/lane/3401>
123         a libns:IlluminaLane ;
124         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
125         libns:library <http://localhost/library/10000/> ;
126         libns:lane_number "1" .
127 <http://localhost/lane/3402>
128         a libns:IlluminaLane ;
129         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
130         libns:library <http://localhost/library/10000/> ;
131         libns:lane_number "2" .
132 <http://localhost/lane/3403>
133         a libns:IlluminaLane ;
134         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
135         libns:library <http://localhost/library/10000/> ;
136         libns:lane_number "3" .
137 <http://localhost/lane/3404>
138         a libns:IlluminaLane ;
139         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
140         libns:library <http://localhost/library/11154/> ;
141         libns:lane_number "4" .
142         # paired_end 1;
143         # read_length 33;
144         # status "Unknown"@en .
145 <http://localhost/lane/3405>
146         a libns:IlluminaLane ;
147         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
148         libns:library <http://localhost/library/10000/> ;
149         libns:lane_number "5" .
150 <http://localhost/lane/3406>
151         a libns:IlluminaLane ;
152         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
153         libns:library <http://localhost/library/10000/> ;
154         libns:lane_number "6" .
155 <http://localhost/lane/3407>
156         a libns:IlluminaLane ;
157         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
158         libns:library <http://localhost/library/10000/> ;
159         libns:lane_number "7" .
160 <http://localhost/lane/3408>
161         a libns:IlluminaLane ;
162         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
163         libns:library <http://localhost/library/10000/> ;
164         libns:lane_number "8" .
165
166 <http://localhost/flowcell/42JUYAAXX/>
167         a libns:IlluminaFlowcell ;
168         libns:read_length 76 ;
169         libns:flowcell_type "Paired"@en ;
170         libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
171         libns:has_lane <http://localhost/lane/4201> ;
172         libns:has_lane <http://localhost/lane/4202> ;
173         libns:has_lane <http://localhost/lane/4203> ;
174         libns:has_lane <http://localhost/lane/4204> ;
175         libns:has_lane <http://localhost/lane/4205> ;
176         libns:has_lane <http://localhost/lane/4206> ;
177         libns:has_lane <http://localhost/lane/4207> ;
178         libns:has_lane <http://localhost/lane/4208> ;
179         libns:flowcell_id "42JUYAAXX"@en .
180
181 <http://localhost/lane/4201>
182         a libns:IlluminaLane ;
183         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
184         libns:library <http://localhost/library/1421/> ;
185         libns:lane_number "1" .
186 <http://localhost/lane/4202>
187         a libns:IlluminaLane ;
188         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
189         libns:library <http://localhost/library/1421/> ;
190         libns:lane_number "2" .
191 <http://localhost/lane/4203>
192         a libns:IlluminaLane ;
193         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
194         libns:library <http://localhost/library/1421/> ;
195         libns:lane_number "3" .
196 <http://localhost/lane/4204>
197         a libns:IlluminaLane ;
198         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
199         libns:library <http://localhost/library/1421/> ;
200         libns:lane_number "4" .
201 <http://localhost/lane/4205>
202         a libns:IlluminaLane ;
203         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
204         libns:library <http://localhost/library/11154/> ;
205         libns:lane_number "5" .
206         # paired_end 1;
207         # read_length 76;
208         # status "Unknown"@en .
209 <http://localhost/lane/4206>
210         a libns:IlluminaLane ;
211         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
212         libns:library <http://localhost/library/1421/> ;
213         libns:lane_number "6" .
214 <http://localhost/lane/4207>
215         a libns:IlluminaLane ;
216         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
217         libns:library <http://localhost/library/1421/> ;
218         libns:lane_number "7" .
219 <http://localhost/lane/4208>
220         a libns:IlluminaLane ;
221         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
222         libns:library <http://localhost/library/1421/> ;
223         libns:lane_number "8" .
224
225 <http://localhost/flowcell/61MJTAAXX/>
226         a libns:IlluminaFlowcell ;
227         libns:read_length 76 ;
228         libns:flowcell_type "Single"@en ;
229         libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
230         libns:has_lane <http://localhost/lane/6601> ;
231         libns:has_lane <http://localhost/lane/6602> ;
232         libns:has_lane <http://localhost/lane/6603> ;
233         libns:has_lane <http://localhost/lane/6604> ;
234         libns:has_lane <http://localhost/lane/6605> ;
235         libns:has_lane <http://localhost/lane/6606> ;
236         libns:has_lane <http://localhost/lane/6607> ;
237         libns:has_lane <http://localhost/lane/6608> ;
238         libns:flowcell_id "61MJTAAXX"@en .
239
240 <http://localhost/lane/6601>
241         a libns:IlluminaLane ;
242         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
243         libns:library <http://localhost/library/1661/> ;
244         libns:lane_number "1" .
245 <http://localhost/lane/6602>
246         a libns:IlluminaLane ;
247         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
248         libns:library <http://localhost/library/1661/> ;
249         libns:lane_number "2" .
250 <http://localhost/lane/6603>
251         a libns:IlluminaLane ;
252         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
253         libns:library <http://localhost/library/1661/> ;
254         libns:lane_number "3" .
255 <http://localhost/lane/6604>
256         a libns:IlluminaLane ;
257         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
258         libns:library <http://localhost/library/1661/> ;
259         libns:lane_number "4" .
260 <http://localhost/lane/6605>
261         a libns:IlluminaLane ;
262         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
263         libns:library <http://localhost/library/1661/> ;
264         libns:lane_number "5" .
265 <http://localhost/lane/6606>
266         a libns:IlluminaLane ;
267         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
268         libns:library <http://localhost/library/11154/> ;
269         libns:lane_number "6" .
270         # paired_end 1;
271         # read_length 76;
272         # status "Unknown"@en .
273 <http://localhost/lane/6607>
274         a libns:IlluminaLane ;
275         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
276         libns:library <http://localhost/library/1661/> ;
277         libns:lane_number "7" .
278 <http://localhost/lane/6608>
279         a libns:IlluminaLane ;
280         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
281         libns:library <http://localhost/library/1661/> ;
282         libns:lane_number "8" .
283
284 <http://localhost/flowcell/30DY0AAXX/>
285         a libns:IlluminaFlowcell ;
286         libns:read_length 76 ;
287         libns:flowcell_type "Paired"@en ;
288         libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
289         libns:has_lane <http://localhost/lane/3801> ;
290         libns:has_lane <http://localhost/lane/3802> ;
291         libns:has_lane <http://localhost/lane/3803> ;
292         libns:has_lane <http://localhost/lane/3804> ;
293         libns:has_lane <http://localhost/lane/3805> ;
294         libns:has_lane <http://localhost/lane/3806> ;
295         libns:has_lane <http://localhost/lane/3807> ;
296         libns:has_lane <http://localhost/lane/3808> ;
297         libns:flowcell_id "30DY0AAXX"@en .
298
299 <http://localhost/lane/3801>
300         a libns:IlluminaLane ;
301         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
302         libns:library <http://localhost/library/1331/> ;
303         libns:lane_number "1" .
304 <http://localhost/lane/3802>
305         a libns:IlluminaLane ;
306         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
307         libns:library <http://localhost/library/1331/> ;
308         libns:lane_number "2" .
309 <http://localhost/lane/3803>
310         a libns:IlluminaLane ;
311         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
312         libns:library <http://localhost/library/1331/> ;
313         libns:lane_number "3" .
314 <http://localhost/lane/3804>
315         a libns:IlluminaLane ;
316         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
317         libns:library <http://localhost/library/1331/> ;
318         libns:lane_number "4" .
319 <http://localhost/lane/3805>
320         a libns:IlluminaLane ;
321         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
322         libns:library <http://localhost/library/1331/> ;
323         libns:lane_number "5" .
324 <http://localhost/lane/3806>
325         a libns:IlluminaLane ;
326         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
327         libns:library <http://localhost/library/1331/> ;
328         libns:lane_number "6" .
329 <http://localhost/lane/3807>
330         a libns:IlluminaLane ;
331         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
332         libns:library <http://localhost/library/1331/> ;
333         libns:lane_number "7" .
334 <http://localhost/lane/3808>
335         a libns:IlluminaLane ;
336         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
337         libns:library <http://localhost/library/11154/> ;
338         libns:lane_number "8" .
339         # paired_end 1;
340         # read_length 76;
341         # status "Unknown"@en .
342
343 <http://localhost/flowcell/C02F9ACXX/>
344         a libns:IlluminaFlowcell ;
345         libns:read_length 101 ;
346         libns:flowcell_type "Paired"@en ;
347         libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
348         libns:has_lane <http://localhost/lane/12300> ;
349         libns:has_lane <http://localhost/lane/12500> ;
350         libns:flowcell_id "C02F9ACXX"@en .
351
352 <http://localhost/lane/12300>
353         a libns:IlluminaLane ;
354         libns:flowcell <http://localhost/flowcell/C02F9ACXX/> ;
355         libns:library <http://localhost/library/12345/> ;
356         libns:lane_number "3" .
357         # paired_end 1;
358         # read_length 101;
359         # status "Unknown"@en .
360
361 <http://localhost/lane/12500>
362         a libns:IlluminaLane ;
363         libns:flowcell <http://localhost/flowcell/C02F9ACXX/> ;
364         libns:library <http://localhost/library/11154/> ;
365         libns:lane_number "3" .
366         # paired_end 1;
367         # read_length 101;
368         # status "Unknown"@en .
369
370 <http://localhost/library/11154/>
371         a libns:Library ;
372         libns:affiliation "TSR"@en;
373         libns:concentration "29.7";
374         libns:date "2012-12-28T00:00:00"^^xsd:dateTime ;
375         libns:experiment_type "RNA-seq"@en ;
376         libns:gel_cut 300 ;
377         libns:has_lane <http://localhost/lane/3404> ;
378         libns:has_lane <http://localhost/lane/4205> ;
379         libns:has_lane <http://localhost/lane/6606> ;
380         libns:has_lane <http://localhost/lane/3808> ;
381         libns:has_lane <http://localhost/lane/12500> ;
382         libns:insert_size 2000 ;
383         libns:library_id "11154"@en ;
384         libns:library_type "Paired End (Multiplexed)"@en ;
385         libns:made_by "Gary Gygax"@en ;
386         libns:name "Paired Ends ASDF"@en ;
387         libns:replicate "1"@en;
388         libns:species_name "Mus musculus"@en ;
389         libns:stopping_point "Completed"@en ;
390         libns:total_unique_locations 8841201 .
391         # cell_line
392
393 <http://localhost/library/12345/>
394         a libns:Library ;
395         libns:affiliation "TSR"@en;
396         libns:concentration "12.345";
397         libns:cell_line "Unknown"@en ;
398         libns:date "2012-12-28T00:00:00"^^xsd:dateTime ;
399         libns:experiment_type "RNA-seq"@en ;
400         libns:gel_cut 300 ;
401         libns:has_lane <http://localhost/lane/12300> ;
402         libns:insert_size 2000 ;
403         libns:library_id "12345"@en ;
404         libns:library_type "Paired End (Multiplexed)"@en ;
405         libns:made_by "Gary Gygax"@en ;
406         libns:name "Paired Ends THING"@en ;
407         libns:replicate "1"@en;
408         libns:species_name "Mus musculus"@en ;
409         libns:stopping_point "Completed"@en ;
410         libns:total_unique_locations 8841201 .
411         # cell_line
412 """
413 HOST = "http://localhost"
414
415 class TestCondorFastq(TestCase):
416     def setUp(self):
417         self.cwd = os.getcwd()
418
419         self.tempdir = tempfile.mkdtemp(prefix='condorfastq_test')
420         self.flowcelldir = os.path.join(self.tempdir, 'flowcells')
421         os.mkdir(self.flowcelldir)
422
423         self.logdir = os.path.join(self.tempdir, 'log')
424         os.mkdir(self.logdir)
425
426         for d in FCDIRS:
427             os.mkdir(os.path.join(self.flowcelldir, d))
428
429         for f in DATAFILES:
430             filename = os.path.join(self.flowcelldir, f)
431             with open(filename, 'w') as stream:
432                 stream.write('testfile')
433
434         self.result_map = ResultMap()
435         for lib_id in [u'11154', u'12345']:
436             subname = 'sub-%s' % (lib_id,)
437             sub_dir = os.path.join(self.tempdir, subname)
438             os.mkdir(sub_dir)
439             self.result_map[lib_id] =  sub_dir
440
441         self.extract = CondorFastqExtract(HOST,
442                                           self.flowcelldir,
443                                           self.logdir)
444         load_string_into_model(self.extract.model, 'turtle', lib_turtle)
445         add_default_schemas(self.extract.model)
446         inference = Infer(self.extract.model)
447         errmsgs = list(inference.run_validation())
448         self.assertEqual(len(errmsgs), 0)
449         os.chdir(self.tempdir)
450
451     def tearDown(self):
452         shutil.rmtree(self.tempdir)
453         os.chdir(self.cwd)
454
455     def test_find_relevant_flowcell_ids(self):
456         expected = set(('30221AAXX',
457                         '42JUYAAXX',
458                         '61MJTAAXX',
459                         '30DY0AAXX',
460                         'C02F9ACXX'))
461         flowcell_ids = self.extract.find_relevant_flowcell_ids()
462         self.assertEqual(flowcell_ids, expected)
463
464     def test_find_archive_sequence(self):
465         seqs = self.extract.find_archive_sequence_files(self.result_map)
466
467         expected = set([
468             (u'11154', u'42JUYAAXX', '5', 1, 76, True, 'qseq'),
469             (u'11154', u'42JUYAAXX', '5', 2, 76, True, 'qseq'),
470             (u'11154', u'61MJTAAXX', '6', 1, 76, False, 'qseq'),
471             (u'11154', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
472             (u'11154', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
473             (u'11154', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
474             (u'11154', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
475             (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
476             (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
477             (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
478             (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
479             (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
480             (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
481             (u'11154', u'30221AAXX', '4', 1, 33, False, 'srf'),
482             (u'11154', u'30DY0AAXX', '8', 1, 151, True, 'srf')
483         ])
484         found = set([(l.library_id, l.flowcell_id, l.lane_number, l.read, l.cycle, l.ispaired, l.filetype) for l in seqs])
485         self.assertEqual(expected, found)
486
487     def test_find_needed_targets(self):
488         lib_db = self.extract.find_archive_sequence_files(self.result_map)
489
490         needed_targets = self.extract.update_fastq_targets(self.result_map,
491                                                            lib_db)
492         self.assertEqual(len(needed_targets), 9)
493         srf_30221 = needed_targets[
494             self.result_map['11154'] + u'/11154_30221AAXX_c33_l4.fastq']
495         qseq_42JUY_r1 = needed_targets[
496             self.result_map['11154'] + u'/11154_42JUYAAXX_c76_l5_r1.fastq']
497         qseq_42JUY_r2 = needed_targets[
498             self.result_map['11154'] + u'/11154_42JUYAAXX_c76_l5_r2.fastq']
499         qseq_61MJT = needed_targets[
500             self.result_map['11154'] + u'/11154_61MJTAAXX_c76_l6.fastq']
501         split_C02F9_r1 = needed_targets[
502             self.result_map['11154'] + u'/11154_C02F9ACXX_c202_l3_r1.fastq']
503         split_C02F9_r2 = needed_targets[
504             self.result_map['11154'] + u'/11154_C02F9ACXX_c202_l3_r2.fastq']
505
506         self.assertEqual(len(srf_30221['srf']), 1)
507         self.assertEqual(len(qseq_42JUY_r1['qseq']), 1)
508         self.assertEqual(len(qseq_42JUY_r2['qseq']), 1)
509         self.assertEqual(len(qseq_61MJT['qseq']), 1)
510         self.assertEqual(len(split_C02F9_r1['split_fastq']), 2)
511         self.assertEqual(len(split_C02F9_r2['split_fastq']), 2)
512
513     def test_generate_fastqs(self):
514         commands = self.extract.build_condor_arguments(self.result_map)
515
516         srf = commands['srf']
517         qseq = commands['qseq']
518         split = commands['split_fastq']
519
520         self.assertEqual(len(srf), 2)
521         self.assertEqual(len(qseq), 3)
522         self.assertEqual(len(split), 4)
523
524         srf_data = {
525             os.path.join(self.result_map['11154'],
526                          '11154_30221AAXX_c33_l4.fastq'): {
527                 'mid': None,
528                 'ispaired': False,
529                 'sources': [u'woldlab_090425_HWI-EAS229_0110_30221AAXX_4.srf'],
530                 'flowcell': u'30221AAXX',
531                 'target': os.path.join(self.result_map['11154'],
532                                        u'11154_30221AAXX_c33_l4.fastq'),
533             },
534             os.path.join(self.result_map['11154'],
535                          '11154_30DY0AAXX_c151_l8_r1.fastq'): {
536                 'mid': None,
537                 'ispaired': True,
538                 'flowcell': u'30DY0AAXX',
539                 'sources': [u'woldlab_090725_HWI-EAS229_0110_30DY0AAXX_8.srf'],
540                 'mid': 76,
541                 'target':
542                     os.path.join(self.result_map['11154'],
543                                  u'11154_30DY0AAXX_c151_l8_r1.fastq'),
544                 'target_right':
545                     os.path.join(self.result_map['11154'],
546                                  u'11154_30DY0AAXX_c151_l8_r2.fastq'),
547             }
548         }
549         for args in srf:
550             expected = srf_data[args['target']]
551             self.assertEqual(args['ispaired'], expected['ispaired'])
552             self.assertEqual(len(args['sources']), 1)
553             _, source_filename = os.path.split(args['sources'][0])
554             self.assertEqual(source_filename, expected['sources'][0])
555             self.assertEqual(args['target'], expected['target'])
556             if args['ispaired']:
557                 self.assertEqual(args['target_right'],
558                                      expected['target_right'])
559             if 'mid' in expected:
560                 self.assertEqual(args['mid'], expected['mid'])
561
562         qseq_data = {
563             os.path.join(self.result_map['11154'],
564                          '11154_42JUYAAXX_c76_l5_r1.fastq'): {
565                 'istar': True,
566                 'ispaired': True,
567                 'sources': [
568                     u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r1.tar.bz2']
569             },
570             os.path.join(self.result_map['11154'],
571                          '11154_42JUYAAXX_c76_l5_r2.fastq'): {
572                 'istar': True,
573                 'ispaired': True,
574                 'sources': [
575                     u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2']
576             },
577             os.path.join(self.result_map['11154'],
578                          '11154_61MJTAAXX_c76_l6.fastq'): {
579                 'istar': True,
580                 'ispaired': False,
581                 'sources': [
582                     u'woldlab_100826_HSI-123_0001_61MJTAAXX_l6_r1.tar.bz2'],
583             },
584         }
585         for args in qseq:
586             expected = qseq_data[args['target']]
587             self.assertEqual(args['istar'], expected['istar'])
588             self.assertEqual(args['ispaired'], expected['ispaired'])
589             for i in range(len(expected['sources'])):
590                 _, filename = os.path.split(args['sources'][i])
591                 self.assertEqual(filename, expected['sources'][i])
592
593
594         split_test = dict((( x['target'], x) for x in
595             [{'sources': [u'11154_NoIndex_L003_R1_001.fastq.gz',
596                          u'11154_NoIndex_L003_R1_002.fastq.gz'],
597              'pyscript': 'desplit_fastq.pyc',
598              'target': u'11154_C02F9ACXX_c202_l3_r1.fastq'},
599             {'sources': [u'11154_NoIndex_L003_R2_001.fastq.gz',
600                          u'11154_NoIndex_L003_R2_002.fastq.gz'],
601              'pyscript': 'desplit_fastq.pyc',
602              'target': u'11154_C02F9ACXX_c202_l3_r2.fastq'},
603             {'sources': [u'12345_CGATGT_L003_R1_001.fastq.gz',
604                          u'12345_CGATGT_L003_R1_002.fastq.gz',
605                          u'12345_CGATGT_L003_R1_003.fastq.gz',
606                          ],
607              'pyscript': 'desplit_fastq.pyc',
608              'target': u'12345_C02F9ACXX_c202_l3_r1.fastq'},
609             {'sources': [u'12345_CGATGT_L003_R2_001.fastq.gz',
610                          u'12345_CGATGT_L003_R2_002.fastq.gz',
611                          u'12345_CGATGT_L003_R2_003.fastq.gz',
612                          ],
613              'pyscript': 'desplit_fastq.pyc',
614              'target': u'12345_C02F9ACXX_c202_l3_r2.fastq'}
615              ]
616          ))
617         for arg in split:
618             _, target = os.path.split(arg['target'])
619             pyscript = split_test[target]['pyscript']
620             self.assertTrue(arg['pyscript'].endswith(pyscript))
621             filename = split_test[target]['target']
622             self.assertTrue(arg['target'].endswith(filename))
623             for s_index in range(len(arg['sources'])):
624                 s1 = arg['sources'][s_index]
625                 s2 = split_test[target]['sources'][s_index]
626                 self.assertTrue(s1.endswith(s2))
627
628     def test_create_scripts(self):
629         self.extract.create_scripts(self.result_map)
630
631         self.assertTrue(os.path.exists('srf.condor'))
632         with open('srf.condor', 'r') as srf:
633             arguments = [ l for l in srf if l.startswith('argument') ]
634             arguments.sort()
635             self.assertEqual(len(arguments), 2)
636             self.assertTrue('sub-11154/11154_30221AAXX_c33_l4.fastq'
637                             in arguments[0])
638             self.assertTrue(
639                 'sub-11154/11154_30DY0AAXX_c151_l8_r2.fastq' in
640                 arguments[1])
641
642         self.assertTrue(os.path.exists('qseq.condor'))
643         with open('qseq.condor', 'r') as srf:
644             arguments = [ l for l in srf if l.startswith('argument') ]
645             arguments.sort()
646             self.assertEqual(len(arguments), 3)
647             self.assertTrue('sub-11154/11154_42JUYAAXX_c76_l5_r1.fastq ' in
648                             arguments[0])
649             self.assertTrue(
650                 'C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2' in
651                 arguments[1])
652             self.assertTrue('61MJTAAXX_c76_l6.fastq -f 61MJTAAXX' in
653                             arguments[2])
654
655         self.assertTrue(os.path.exists('split_fastq.condor'))
656         with open('split_fastq.condor', 'r') as split:
657             arguments = [ l for l in split if l.startswith('argument') ]
658             arguments.sort()
659             self.assertEqual(len(arguments), 4)
660             # Lane 3 Read 1
661             self.assertTrue('11154_NoIndex_L003_R1_001.fastq.gz' in \
662                             arguments[0])
663             # Lane 3 Read 2
664             self.assertTrue('11154_NoIndex_L003_R2_002.fastq.gz' in \
665                             arguments[1])
666             # Lane 3 Read 1
667             self.assertTrue('12345_CGATGT_L003_R1_001.fastq.gz' in arguments[2])
668             self.assertTrue('12345_CGATGT_L003_R1_002.fastq.gz' in arguments[2])
669             self.assertTrue('12345_CGATGT_L003_R1_003.fastq.gz' in arguments[2])
670             self.assertTrue('12345_C02F9ACXX_c202_l3_r1.fastq' in arguments[2])
671
672             # Lane 3 Read 2
673             self.assertTrue('12345_CGATGT_L003_R2_001.fastq.gz' in arguments[3])
674             self.assertTrue('12345_CGATGT_L003_R2_002.fastq.gz' in arguments[3])
675             self.assertTrue('12345_CGATGT_L003_R2_003.fastq.gz' in arguments[3])
676             self.assertTrue('12345_C02F9ACXX_c202_l3_r2.fastq' in arguments[3])
677
678
679 def suite():
680     from unittest2 import TestSuite, defaultTestLoader
681     suite = TestSuite()
682     suite.addTests(defaultTestLoader.loadTestsFromTestCase(TestCondorFastq))
683     return suite
684
685 if __name__ == "__main__":
686     from unittest2 import main
687     main(defaultTest='suite')