Remove django test environment setup code.
[htsworkflow.git] / htsworkflow / submission / test / test_condorfastq.py
1 #!/usr/bin/env python
2
3 import copy
4 import os
5 from pprint import pprint
6 import shutil
7 import tempfile
8
9 from django.test import TestCase
10 from django.test.utils import setup_test_environment, \
11      teardown_test_environment
12 from django.db import connection
13 from django.conf import settings
14
15 from htsworkflow.submission.condorfastq import CondorFastqExtract
16 from htsworkflow.submission.results import ResultMap
17 from htsworkflow.util.rdfhelp import \
18      add_default_schemas, load_string_into_model, dump_model
19 from htsworkflow.util.rdfinfer import Infer
20
21 FCDIRS = [
22     'C02F9ACXX',
23     'C02F9ACXX/C1-202',
24     'C02F9ACXX/C1-202/Project_11154',
25     'C02F9ACXX/C1-202/Project_12342_Index1',
26     'C02F9ACXX/C1-202/Project_12342_Index2',
27     'C02F9ACXX/C1-202/Project_12345',
28     '42JUYAAXX',
29     '42JUYAAXX/C1-76',
30     '30221AAXX',
31     '30221AAXX/C1-33',
32     '30DY0AAXX',
33     '30DY0AAXX/C1-151',
34     '61MJTAAXX',
35     '61MJTAAXX/C1-76',
36 ]
37
38 DATAFILES = [
39     'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R1_001.fastq.gz',
40     'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R1_002.fastq.gz',
41     'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R2_001.fastq.gz',
42     'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R2_002.fastq.gz',
43     'C02F9ACXX/C1-202/Project_12342_Index1/12342_GCCAAT_L004_R1_001.fastq.gz',
44     'C02F9ACXX/C1-202/Project_12342_Index1/12342_GCCAAT_L004_R2_001.fastq.gz',
45     'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L007_R1_001.fastq.gz',
46     'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L007_R2_001.fastq.gz',
47     'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L005_R1_001.fastq.gz',
48     'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L005_R2_001.fastq.gz',
49     'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_001.fastq.gz',
50     'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_002.fastq.gz',
51     'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_003.fastq.gz',
52     'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_001.fastq.gz',
53     'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_002.fastq.gz',
54     'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_003.fastq.gz',
55     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r1.tar.bz2',
56     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l2_r1.tar.bz2',
57     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l3_r1.tar.bz2',
58     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l4_r1.tar.bz2',
59     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r1.tar.bz2',
60     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l6_r1.tar.bz2',
61     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l7_r1.tar.bz2',
62     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l8_r1.tar.bz2',
63     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r2.tar.bz2',
64     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r2.tar.bz2',
65     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l2_r2.tar.bz2',
66     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l3_r2.tar.bz2',
67     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l4_r2.tar.bz2',
68     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2',
69     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l6_r2.tar.bz2',
70     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l7_r2.tar.bz2',
71     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l8_r2.tar.bz2',
72     '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_1.srf',
73     '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_2.srf',
74     '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_3.srf',
75     '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_4.srf',
76     '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_5.srf',
77     '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_6.srf',
78     '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_7.srf',
79     '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_8.srf',
80     '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_1.srf',
81     '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_2.srf',
82     '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_3.srf',
83     '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_4.srf',
84     '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_5.srf',
85     '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_6.srf',
86     '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_7.srf',
87     '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_8.srf',
88     '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l1_r1.tar.bz2',
89     '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l2_r1.tar.bz2',
90     '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l3_r1.tar.bz2',
91     '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l4_r1.tar.bz2',
92     '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l5_r1.tar.bz2',
93     '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l6_r1.tar.bz2',
94     '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l7_r1.tar.bz2',
95     '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l8_r1.tar.bz2',
96 ]
97
98 lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
99 @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
100 @prefix dc: <http://purl.org/dc/elements/1.1/> .
101 @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
102 @prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#> .
103 @prefix seqns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#> .
104 @prefix invns: <http://jumpgate.caltech.edu/wiki/InventoryOntology#> .
105
106 <http://localhost/library/10000/> a libns:Library .
107 <http://localhost/library/1331/> a libns:Library .
108 <http://localhost/library/1421/> a libns:Library .
109 <http://localhost/library/1661/> a libns:Library .
110
111 <http://localhost/flowcell/30221AAXX/>
112         a libns:IlluminaFlowcell ;
113         libns:read_length 33 ;
114         libns:flowcell_type "Single"@en ;
115         libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
116         libns:has_lane <http://localhost/lane/3401> ;
117         libns:has_lane <http://localhost/lane/3402> ;
118         libns:has_lane <http://localhost/lane/3403> ;
119         libns:has_lane <http://localhost/lane/3404> ;
120         libns:has_lane <http://localhost/lane/3405> ;
121         libns:has_lane <http://localhost/lane/3406> ;
122         libns:has_lane <http://localhost/lane/3407> ;
123         libns:has_lane <http://localhost/lane/3408> ;
124         libns:flowcell_id "30221AAXX"@en .
125
126 <http://localhost/lane/3401>
127         a libns:IlluminaLane ;
128         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
129         libns:library <http://localhost/library/10000/> ;
130         libns:lane_number "1" .
131 <http://localhost/lane/3402>
132         a libns:IlluminaLane ;
133         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
134         libns:library <http://localhost/library/10000/> ;
135         libns:lane_number "2" .
136 <http://localhost/lane/3403>
137         a libns:IlluminaLane ;
138         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
139         libns:library <http://localhost/library/10000/> ;
140         libns:lane_number "3" .
141 <http://localhost/lane/3404>
142         a libns:IlluminaLane ;
143         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
144         libns:library <http://localhost/library/11154/> ;
145         libns:lane_number "4" .
146         # paired_end 1;
147         # read_length 33;
148         # status "Unknown"@en .
149 <http://localhost/lane/3405>
150         a libns:IlluminaLane ;
151         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
152         libns:library <http://localhost/library/10000/> ;
153         libns:lane_number "5" .
154 <http://localhost/lane/3406>
155         a libns:IlluminaLane ;
156         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
157         libns:library <http://localhost/library/10000/> ;
158         libns:lane_number "6" .
159 <http://localhost/lane/3407>
160         a libns:IlluminaLane ;
161         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
162         libns:library <http://localhost/library/10000/> ;
163         libns:lane_number "7" .
164 <http://localhost/lane/3408>
165         a libns:IlluminaLane ;
166         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
167         libns:library <http://localhost/library/10000/> ;
168         libns:lane_number "8" .
169
170 <http://localhost/flowcell/42JUYAAXX/>
171         a libns:IlluminaFlowcell ;
172         libns:read_length 76 ;
173         libns:flowcell_type "Paired"@en ;
174         libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
175         libns:has_lane <http://localhost/lane/4201> ;
176         libns:has_lane <http://localhost/lane/4202> ;
177         libns:has_lane <http://localhost/lane/4203> ;
178         libns:has_lane <http://localhost/lane/4204> ;
179         libns:has_lane <http://localhost/lane/4205> ;
180         libns:has_lane <http://localhost/lane/4206> ;
181         libns:has_lane <http://localhost/lane/4207> ;
182         libns:has_lane <http://localhost/lane/4208> ;
183         libns:flowcell_id "42JUYAAXX"@en .
184
185 <http://localhost/lane/4201>
186         a libns:IlluminaLane ;
187         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
188         libns:library <http://localhost/library/1421/> ;
189         libns:lane_number "1" .
190 <http://localhost/lane/4202>
191         a libns:IlluminaLane ;
192         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
193         libns:library <http://localhost/library/1421/> ;
194         libns:lane_number "2" .
195 <http://localhost/lane/4203>
196         a libns:IlluminaLane ;
197         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
198         libns:library <http://localhost/library/1421/> ;
199         libns:lane_number "3" .
200 <http://localhost/lane/4204>
201         a libns:IlluminaLane ;
202         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
203         libns:library <http://localhost/library/1421/> ;
204         libns:lane_number "4" .
205 <http://localhost/lane/4205>
206         a libns:IlluminaLane ;
207         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
208         libns:library <http://localhost/library/11154/> ;
209         libns:lane_number "5" .
210         # paired_end 1;
211         # read_length 76;
212         # status "Unknown"@en .
213 <http://localhost/lane/4206>
214         a libns:IlluminaLane ;
215         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
216         libns:library <http://localhost/library/1421/> ;
217         libns:lane_number "6" .
218 <http://localhost/lane/4207>
219         a libns:IlluminaLane ;
220         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
221         libns:library <http://localhost/library/1421/> ;
222         libns:lane_number "7" .
223 <http://localhost/lane/4208>
224         a libns:IlluminaLane ;
225         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
226         libns:library <http://localhost/library/1421/> ;
227         libns:lane_number "8" .
228
229 <http://localhost/flowcell/61MJTAAXX/>
230         a libns:IlluminaFlowcell ;
231         libns:read_length 76 ;
232         libns:flowcell_type "Single"@en ;
233         libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
234         libns:has_lane <http://localhost/lane/6601> ;
235         libns:has_lane <http://localhost/lane/6602> ;
236         libns:has_lane <http://localhost/lane/6603> ;
237         libns:has_lane <http://localhost/lane/6604> ;
238         libns:has_lane <http://localhost/lane/6605> ;
239         libns:has_lane <http://localhost/lane/6606> ;
240         libns:has_lane <http://localhost/lane/6607> ;
241         libns:has_lane <http://localhost/lane/6608> ;
242         libns:flowcell_id "61MJTAAXX"@en .
243
244 <http://localhost/lane/6601>
245         a libns:IlluminaLane ;
246         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
247         libns:library <http://localhost/library/1661/> ;
248         libns:lane_number "1" .
249 <http://localhost/lane/6602>
250         a libns:IlluminaLane ;
251         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
252         libns:library <http://localhost/library/1661/> ;
253         libns:lane_number "2" .
254 <http://localhost/lane/6603>
255         a libns:IlluminaLane ;
256         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
257         libns:library <http://localhost/library/1661/> ;
258         libns:lane_number "3" .
259 <http://localhost/lane/6604>
260         a libns:IlluminaLane ;
261         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
262         libns:library <http://localhost/library/1661/> ;
263         libns:lane_number "4" .
264 <http://localhost/lane/6605>
265         a libns:IlluminaLane ;
266         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
267         libns:library <http://localhost/library/1661/> ;
268         libns:lane_number "5" .
269 <http://localhost/lane/6606>
270         a libns:IlluminaLane ;
271         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
272         libns:library <http://localhost/library/11154/> ;
273         libns:lane_number "6" .
274         # paired_end 1;
275         # read_length 76;
276         # status "Unknown"@en .
277 <http://localhost/lane/6607>
278         a libns:IlluminaLane ;
279         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
280         libns:library <http://localhost/library/1661/> ;
281         libns:lane_number "7" .
282 <http://localhost/lane/6608>
283         a libns:IlluminaLane ;
284         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
285         libns:library <http://localhost/library/1661/> ;
286         libns:lane_number "8" .
287
288 <http://localhost/flowcell/30DY0AAXX/>
289         a libns:IlluminaFlowcell ;
290         libns:read_length 76 ;
291         libns:flowcell_type "Paired"@en ;
292         libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
293         libns:has_lane <http://localhost/lane/3801> ;
294         libns:has_lane <http://localhost/lane/3802> ;
295         libns:has_lane <http://localhost/lane/3803> ;
296         libns:has_lane <http://localhost/lane/3804> ;
297         libns:has_lane <http://localhost/lane/3805> ;
298         libns:has_lane <http://localhost/lane/3806> ;
299         libns:has_lane <http://localhost/lane/3807> ;
300         libns:has_lane <http://localhost/lane/3808> ;
301         libns:flowcell_id "30DY0AAXX"@en .
302
303 <http://localhost/lane/3801>
304         a libns:IlluminaLane ;
305         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
306         libns:library <http://localhost/library/1331/> ;
307         libns:lane_number "1" .
308 <http://localhost/lane/3802>
309         a libns:IlluminaLane ;
310         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
311         libns:library <http://localhost/library/1331/> ;
312         libns:lane_number "2" .
313 <http://localhost/lane/3803>
314         a libns:IlluminaLane ;
315         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
316         libns:library <http://localhost/library/1331/> ;
317         libns:lane_number "3" .
318 <http://localhost/lane/3804>
319         a libns:IlluminaLane ;
320         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
321         libns:library <http://localhost/library/1331/> ;
322         libns:lane_number "4" .
323 <http://localhost/lane/3805>
324         a libns:IlluminaLane ;
325         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
326         libns:library <http://localhost/library/1331/> ;
327         libns:lane_number "5" .
328 <http://localhost/lane/3806>
329         a libns:IlluminaLane ;
330         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
331         libns:library <http://localhost/library/1331/> ;
332         libns:lane_number "6" .
333 <http://localhost/lane/3807>
334         a libns:IlluminaLane ;
335         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
336         libns:library <http://localhost/library/1331/> ;
337         libns:lane_number "7" .
338 <http://localhost/lane/3808>
339         a libns:IlluminaLane ;
340         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
341         libns:library <http://localhost/library/11154/> ;
342         libns:lane_number "8" .
343         # paired_end 1;
344         # read_length 76;
345         # status "Unknown"@en .
346
347 <http://localhost/flowcell/C02F9ACXX/>
348         a libns:IlluminaFlowcell ;
349         libns:read_length 101 ;
350         libns:flowcell_type "Paired"@en ;
351         libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
352         libns:has_lane <http://localhost/lane/12300> ;
353         libns:has_lane <http://localhost/lane/12500> ;
354         libns:flowcell_id "C02F9ACXX"@en .
355
356 <http://localhost/lane/12300>
357         a libns:IlluminaLane ;
358         libns:flowcell <http://localhost/flowcell/C02F9ACXX/> ;
359         libns:library <http://localhost/library/12345/> ;
360         libns:lane_number "3" .
361         # paired_end 1;
362         # read_length 101;
363         # status "Unknown"@en .
364
365 <http://localhost/lane/12500>
366         a libns:IlluminaLane ;
367         libns:flowcell <http://localhost/flowcell/C02F9ACXX/> ;
368         libns:library <http://localhost/library/11154/> ;
369         libns:lane_number "3" .
370         # paired_end 1;
371         # read_length 101;
372         # status "Unknown"@en .
373
374 <http://localhost/library/11154/>
375         a libns:Library ;
376         libns:affiliation "TSR"@en;
377         libns:concentration "29.7";
378         libns:date "2012-12-28T00:00:00"^^xsd:dateTime ;
379         libns:experiment_type "RNA-seq"@en ;
380         libns:gel_cut 300 ;
381         libns:has_lane <http://localhost/lane/3404> ;
382         libns:has_lane <http://localhost/lane/4205> ;
383         libns:has_lane <http://localhost/lane/6606> ;
384         libns:has_lane <http://localhost/lane/3808> ;
385         libns:has_lane <http://localhost/lane/12500> ;
386         libns:insert_size 2000 ;
387         libns:library_id "11154"@en ;
388         libns:library_type "Paired End (Multiplexed)"@en ;
389         libns:made_by "Gary Gygax"@en ;
390         libns:name "Paired Ends ASDF"@en ;
391         libns:replicate "1"@en;
392         libns:species_name "Mus musculus"@en ;
393         libns:stopping_point "Completed"@en ;
394         libns:total_unique_locations 8841201 .
395         # cell_line
396
397 <http://localhost/library/12345/>
398         a libns:Library ;
399         libns:affiliation "TSR"@en;
400         libns:concentration "12.345";
401         libns:cell_line "Unknown"@en ;
402         libns:date "2012-12-28T00:00:00"^^xsd:dateTime ;
403         libns:experiment_type "RNA-seq"@en ;
404         libns:gel_cut 300 ;
405         libns:has_lane <http://localhost/lane/12300> ;
406         libns:insert_size 2000 ;
407         libns:library_id "12345"@en ;
408         libns:library_type "Paired End (Multiplexed)"@en ;
409         libns:made_by "Gary Gygax"@en ;
410         libns:name "Paired Ends THING"@en ;
411         libns:replicate "1"@en;
412         libns:species_name "Mus musculus"@en ;
413         libns:stopping_point "Completed"@en ;
414         libns:total_unique_locations 8841201 .
415         # cell_line
416 """
417 HOST = "http://localhost"
418
419 class TestCondorFastq(TestCase):
420     def setUp(self):
421         self.cwd = os.getcwd()
422
423         self.tempdir = tempfile.mkdtemp(prefix='condorfastq_test')
424         self.flowcelldir = os.path.join(self.tempdir, 'flowcells')
425         os.mkdir(self.flowcelldir)
426
427         self.logdir = os.path.join(self.tempdir, 'log')
428         os.mkdir(self.logdir)
429
430         for d in FCDIRS:
431             os.mkdir(os.path.join(self.flowcelldir, d))
432
433         for f in DATAFILES:
434             filename = os.path.join(self.flowcelldir, f)
435             with open(filename, 'w') as stream:
436                 stream.write('testfile')
437
438         self.result_map = ResultMap()
439         for lib_id in [u'11154', u'12345']:
440             subname = 'sub-%s' % (lib_id,)
441             sub_dir = os.path.join(self.tempdir, subname)
442             os.mkdir(sub_dir)
443             self.result_map[lib_id] =  sub_dir
444
445         self.extract = CondorFastqExtract(HOST,
446                                           self.flowcelldir,
447                                           self.logdir)
448         load_string_into_model(self.extract.model, 'turtle', lib_turtle)
449         add_default_schemas(self.extract.model)
450         inference = Infer(self.extract.model)
451         errmsgs = list(inference.run_validation())
452         self.assertEqual(len(errmsgs), 0)
453         os.chdir(self.tempdir)
454
455     def tearDown(self):
456         shutil.rmtree(self.tempdir)
457         os.chdir(self.cwd)
458
459     def test_find_relevant_flowcell_ids(self):
460         expected = set(('30221AAXX',
461                         '42JUYAAXX',
462                         '61MJTAAXX',
463                         '30DY0AAXX',
464                         'C02F9ACXX'))
465         flowcell_ids = self.extract.find_relevant_flowcell_ids()
466         self.assertEqual(flowcell_ids, expected)
467
468     def test_find_archive_sequence(self):
469         seqs = self.extract.find_archive_sequence_files(self.result_map)
470
471         expected = set([
472             (u'11154', u'42JUYAAXX', '5', 1, 76, True, 'qseq'),
473             (u'11154', u'42JUYAAXX', '5', 2, 76, True, 'qseq'),
474             (u'11154', u'61MJTAAXX', '6', 1, 76, False, 'qseq'),
475             (u'11154', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
476             (u'11154', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
477             (u'11154', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
478             (u'11154', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
479             (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
480             (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
481             (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
482             (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
483             (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
484             (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
485             (u'11154', u'30221AAXX', '4', 1, 33, False, 'srf'),
486             (u'11154', u'30DY0AAXX', '8', 1, 151, True, 'srf')
487         ])
488         found = set([(l.library_id, l.flowcell_id, l.lane_number, l.read, l.cycle, l.ispaired, l.filetype) for l in seqs])
489         self.assertEqual(expected, found)
490
491     def test_find_needed_targets(self):
492         lib_db = self.extract.find_archive_sequence_files(self.result_map)
493
494         needed_targets = self.extract.update_fastq_targets(self.result_map,
495                                                            lib_db)
496         self.assertEqual(len(needed_targets), 9)
497         srf_30221 = needed_targets[
498             self.result_map['11154'] + u'/11154_30221AAXX_c33_l4.fastq']
499         qseq_42JUY_r1 = needed_targets[
500             self.result_map['11154'] + u'/11154_42JUYAAXX_c76_l5_r1.fastq']
501         qseq_42JUY_r2 = needed_targets[
502             self.result_map['11154'] + u'/11154_42JUYAAXX_c76_l5_r2.fastq']
503         qseq_61MJT = needed_targets[
504             self.result_map['11154'] + u'/11154_61MJTAAXX_c76_l6.fastq']
505         split_C02F9_r1 = needed_targets[
506             self.result_map['11154'] + u'/11154_C02F9ACXX_c202_l3_r1.fastq']
507         split_C02F9_r2 = needed_targets[
508             self.result_map['11154'] + u'/11154_C02F9ACXX_c202_l3_r2.fastq']
509
510         self.assertEqual(len(srf_30221['srf']), 1)
511         self.assertEqual(len(qseq_42JUY_r1['qseq']), 1)
512         self.assertEqual(len(qseq_42JUY_r2['qseq']), 1)
513         self.assertEqual(len(qseq_61MJT['qseq']), 1)
514         self.assertEqual(len(split_C02F9_r1['split_fastq']), 2)
515         self.assertEqual(len(split_C02F9_r2['split_fastq']), 2)
516
517     def test_generate_fastqs(self):
518         commands = self.extract.build_condor_arguments(self.result_map)
519
520         srf = commands['srf']
521         qseq = commands['qseq']
522         split = commands['split_fastq']
523
524         self.assertEqual(len(srf), 2)
525         self.assertEqual(len(qseq), 3)
526         self.assertEqual(len(split), 4)
527
528         srf_data = {
529             os.path.join(self.result_map['11154'],
530                          '11154_30221AAXX_c33_l4.fastq'): {
531                 'mid': None,
532                 'ispaired': False,
533                 'sources': [u'woldlab_090425_HWI-EAS229_0110_30221AAXX_4.srf'],
534                 'flowcell': u'30221AAXX',
535                 'target': os.path.join(self.result_map['11154'],
536                                        u'11154_30221AAXX_c33_l4.fastq'),
537             },
538             os.path.join(self.result_map['11154'],
539                          '11154_30DY0AAXX_c151_l8_r1.fastq'): {
540                 'mid': None,
541                 'ispaired': True,
542                 'flowcell': u'30DY0AAXX',
543                 'sources': [u'woldlab_090725_HWI-EAS229_0110_30DY0AAXX_8.srf'],
544                 'mid': 76,
545                 'target':
546                     os.path.join(self.result_map['11154'],
547                                  u'11154_30DY0AAXX_c151_l8_r1.fastq'),
548                 'target_right':
549                     os.path.join(self.result_map['11154'],
550                                  u'11154_30DY0AAXX_c151_l8_r2.fastq'),
551             }
552         }
553         for args in srf:
554             expected = srf_data[args['target']]
555             self.assertEqual(args['ispaired'], expected['ispaired'])
556             self.assertEqual(len(args['sources']), 1)
557             _, source_filename = os.path.split(args['sources'][0])
558             self.assertEqual(source_filename, expected['sources'][0])
559             self.assertEqual(args['target'], expected['target'])
560             if args['ispaired']:
561                 self.assertEqual(args['target_right'],
562                                      expected['target_right'])
563             if 'mid' in expected:
564                 self.assertEqual(args['mid'], expected['mid'])
565
566         qseq_data = {
567             os.path.join(self.result_map['11154'],
568                          '11154_42JUYAAXX_c76_l5_r1.fastq'): {
569                 'istar': True,
570                 'ispaired': True,
571                 'sources': [
572                     u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r1.tar.bz2']
573             },
574             os.path.join(self.result_map['11154'],
575                          '11154_42JUYAAXX_c76_l5_r2.fastq'): {
576                 'istar': True,
577                 'ispaired': True,
578                 'sources': [
579                     u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2']
580             },
581             os.path.join(self.result_map['11154'],
582                          '11154_61MJTAAXX_c76_l6.fastq'): {
583                 'istar': True,
584                 'ispaired': False,
585                 'sources': [
586                     u'woldlab_100826_HSI-123_0001_61MJTAAXX_l6_r1.tar.bz2'],
587             },
588         }
589         for args in qseq:
590             expected = qseq_data[args['target']]
591             self.assertEqual(args['istar'], expected['istar'])
592             self.assertEqual(args['ispaired'], expected['ispaired'])
593             for i in range(len(expected['sources'])):
594                 _, filename = os.path.split(args['sources'][i])
595                 self.assertEqual(filename, expected['sources'][i])
596
597
598         split_test = dict((( x['target'], x) for x in
599             [{'sources': [u'11154_NoIndex_L003_R1_001.fastq.gz',
600                          u'11154_NoIndex_L003_R1_002.fastq.gz'],
601              'pyscript': 'desplit_fastq.pyc',
602              'target': u'11154_C02F9ACXX_c202_l3_r1.fastq'},
603             {'sources': [u'11154_NoIndex_L003_R2_001.fastq.gz',
604                          u'11154_NoIndex_L003_R2_002.fastq.gz'],
605              'pyscript': 'desplit_fastq.pyc',
606              'target': u'11154_C02F9ACXX_c202_l3_r2.fastq'},
607             {'sources': [u'12345_CGATGT_L003_R1_001.fastq.gz',
608                          u'12345_CGATGT_L003_R1_002.fastq.gz',
609                          u'12345_CGATGT_L003_R1_003.fastq.gz',
610                          ],
611              'pyscript': 'desplit_fastq.pyc',
612              'target': u'12345_C02F9ACXX_c202_l3_r1.fastq'},
613             {'sources': [u'12345_CGATGT_L003_R2_001.fastq.gz',
614                          u'12345_CGATGT_L003_R2_002.fastq.gz',
615                          u'12345_CGATGT_L003_R2_003.fastq.gz',
616                          ],
617              'pyscript': 'desplit_fastq.pyc',
618              'target': u'12345_C02F9ACXX_c202_l3_r2.fastq'}
619              ]
620          ))
621         for arg in split:
622             _, target = os.path.split(arg['target'])
623             pyscript = split_test[target]['pyscript']
624             self.assertTrue(arg['pyscript'].endswith(pyscript))
625             filename = split_test[target]['target']
626             self.assertTrue(arg['target'].endswith(filename))
627             for s_index in range(len(arg['sources'])):
628                 s1 = arg['sources'][s_index]
629                 s2 = split_test[target]['sources'][s_index]
630                 self.assertTrue(s1.endswith(s2))
631
632     def test_create_scripts(self):
633         self.extract.create_scripts(self.result_map)
634
635         self.assertTrue(os.path.exists('srf.condor'))
636         with open('srf.condor', 'r') as srf:
637             arguments = [ l for l in srf if l.startswith('argument') ]
638             arguments.sort()
639             self.assertEqual(len(arguments), 2)
640             self.assertTrue('sub-11154/11154_30221AAXX_c33_l4.fastq'
641                             in arguments[0])
642             self.assertTrue(
643                 'sub-11154/11154_30DY0AAXX_c151_l8_r2.fastq' in
644                 arguments[1])
645
646         self.assertTrue(os.path.exists('qseq.condor'))
647         with open('qseq.condor', 'r') as srf:
648             arguments = [ l for l in srf if l.startswith('argument') ]
649             arguments.sort()
650             self.assertEqual(len(arguments), 3)
651             self.assertTrue('sub-11154/11154_42JUYAAXX_c76_l5_r1.fastq ' in
652                             arguments[0])
653             self.assertTrue(
654                 'C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2' in
655                 arguments[1])
656             self.assertTrue('61MJTAAXX_c76_l6.fastq -f 61MJTAAXX' in
657                             arguments[2])
658
659         self.assertTrue(os.path.exists('split_fastq.condor'))
660         with open('split_fastq.condor', 'r') as split:
661             arguments = [ l for l in split if l.startswith('argument') ]
662             arguments.sort()
663             self.assertEqual(len(arguments), 4)
664             # Lane 3 Read 1
665             self.assertTrue('11154_NoIndex_L003_R1_001.fastq.gz' in \
666                             arguments[0])
667             # Lane 3 Read 2
668             self.assertTrue('11154_NoIndex_L003_R2_002.fastq.gz' in \
669                             arguments[1])
670             # Lane 3 Read 1
671             self.assertTrue('12345_CGATGT_L003_R1_001.fastq.gz' in arguments[2])
672             self.assertTrue('12345_CGATGT_L003_R1_002.fastq.gz' in arguments[2])
673             self.assertTrue('12345_CGATGT_L003_R1_003.fastq.gz' in arguments[2])
674             self.assertTrue('12345_C02F9ACXX_c202_l3_r1.fastq' in arguments[2])
675
676             # Lane 3 Read 2
677             self.assertTrue('12345_CGATGT_L003_R2_001.fastq.gz' in arguments[3])
678             self.assertTrue('12345_CGATGT_L003_R2_002.fastq.gz' in arguments[3])
679             self.assertTrue('12345_CGATGT_L003_R2_003.fastq.gz' in arguments[3])
680             self.assertTrue('12345_C02F9ACXX_c202_l3_r2.fastq' in arguments[3])
681
682
683 def suite():
684     from unittest2 import TestSuite, defaultTestLoader
685     suite = TestSuite()
686     suite.addTests(defaultTestLoader.loadTestsFromTestCase(TestCondorFastq))
687     return suite
688
689 if __name__ == "__main__":
690     from unittest2 import main
691     main(defaultTest='suite')