Add support for CASAVA 1.7
[htsworkflow.git] / htsworkflow / pipelines / test / simulate_runfolder.py
1 """
2 Create simulated solexa/illumina runfolders for testing
3 """
4
5 import os
6 import shutil
7
8 TEST_CODE_DIR = os.path.split(__file__)[0]
9 TESTDATA_DIR = os.path.join(TEST_CODE_DIR, 'testdata')
10 LANE_LIST = range(1,9)
11 TILE_LIST = range(1,101)
12
13 def make_firecrest_dir(data_dir, version="1.9.2", start=1, stop=37):
14     firecrest_dir = os.path.join(data_dir, 
15                                  'C%d-%d_Firecrest%s_12-04-2008_diane' % (start, stop, version)
16                                  )
17     os.mkdir(firecrest_dir)
18     return firecrest_dir
19     
20 def make_ipar_dir(data_dir, version='1.01'):
21     """
22     Construct an artificial ipar parameter file and directory
23     """
24     ipar1_01_file = os.path.join(TESTDATA_DIR, 'IPAR1.01.params')
25     shutil.copy(ipar1_01_file, os.path.join(data_dir, '.params'))
26
27     ipar_dir = os.path.join(data_dir, 'IPAR_%s' % (version,))
28     if not os.path.exists(ipar_dir):
29       os.mkdir(ipar_dir)
30     return ipar_dir
31
32 def make_flowcell_id(runfolder_dir, flowcell_id=None):
33     if flowcell_id is None:
34         flowcell_id = '207BTAAXY'
35
36     config = """<?xml version="1.0"?>
37 <FlowcellId>
38   <Text>%s</Text>
39 </FlowcellId>""" % (flowcell_id,)
40     config_dir = os.path.join(runfolder_dir, 'Config')
41
42     if not os.path.exists(config_dir):
43         os.mkdir(config_dir)
44     pathname = os.path.join(config_dir, 'FlowcellId.xml')
45     f = open(pathname,'w')
46     f.write(config)
47     f.close()
48
49 def make_bustard_config132(image_dir):
50     source = os.path.join(TESTDATA_DIR, 'bustard-config132.xml')
51     destination = os.path.join(image_dir, 'config.xml')
52     shutil.copy(source, destination)
53
54 def make_rta_intensities_1460(data_dir, version='1.4.6.0'):
55     """
56     Construct an artificial RTA Intensities parameter file and directory
57     """
58     intensities_dir = os.path.join(data_dir, 'Intensities')
59     if not os.path.exists(intensities_dir):
60       os.mkdir(intensities_dir)
61  
62     param_file = os.path.join(TESTDATA_DIR, 'rta_intensities_config.xml')
63     shutil.copy(param_file, os.path.join(intensities_dir, 'config.xml'))
64
65     return intensities_dir
66
67 def make_rta_basecalls_1460(intensities_dir):
68     """
69     Construct an artificial RTA Intensities parameter file and directory
70     """
71     basecalls_dir = os.path.join(intensities_dir, 'BaseCalls')
72     if not os.path.exists(basecalls_dir):
73       os.mkdir(basecalls_dir)
74  
75     param_file = os.path.join(TESTDATA_DIR, 'rta_basecalls_config.xml')
76     shutil.copy(param_file, os.path.join(basecalls_dir, 'config.xml'))
77
78     return basecalls_dir
79
80 def make_rta_intensities_1870(data_dir, version='1.8.70.0'):
81     """
82     Construct an artificial RTA Intensities parameter file and directory
83     """
84     intensities_dir = os.path.join(data_dir, 'Intensities')
85     if not os.path.exists(intensities_dir):
86       os.mkdir(intensities_dir)
87  
88     param_file = os.path.join(TESTDATA_DIR, 'rta_intensities_config_1870.xml')
89     shutil.copy(param_file, os.path.join(intensities_dir, 'config.xml'))
90
91     return intensities_dir
92
93 def make_rta_basecalls_1870(intensities_dir):
94     """
95     Construct an artificial RTA Intensities parameter file and directory
96     """
97     basecalls_dir = os.path.join(intensities_dir, 'BaseCalls')
98     if not os.path.exists(basecalls_dir):
99       os.mkdir(basecalls_dir)
100  
101     param_file = os.path.join(TESTDATA_DIR, 'rta_basecalls_config_1870.xml')
102     shutil.copy(param_file, os.path.join(basecalls_dir, 'config.xml'))
103
104     return basecalls_dir
105
106 def make_qseqs(bustard_dir, in_temp=True):
107     """
108     Fill gerald directory with qseq files
109     """
110     # 42BRJ 8 1 0039 happened to be a better than usual tile, in that there
111     # was actually sequence at the start
112     source = os.path.join(TESTDATA_DIR, '42BRJAAXX_8_1_0039_qseq.txt')
113     destdir = bustard_dir
114     if not os.path.isdir(destdir):
115         os.mkdir(destdir)
116         
117     for lane in LANE_LIST:
118         for tile in TILE_LIST:
119             destination = os.path.join(bustard_dir, 's_%d_1_%04d_qseq.txt' % (lane, tile))
120             shutil.copy(source, destination)
121
122     make_matrix_dir(bustard_dir)
123     make_phasing_dir(bustard_dir)
124
125     summary_source = os.path.join(TESTDATA_DIR, '42BRJAAXX_BustardSummary.xml')
126     summary_dest = os.path.join(bustard_dir, 'BustardSummary.xml')
127     shutil.copy(summary_source, summary_dest)
128     
129     return destdir
130
131 def make_scores(gerald_dir, in_temp=True):
132     """
133     Fill gerald directory with score temp files
134     will create the directory if it doesn't exist.
135     """
136     source = os.path.join(TESTDATA_DIR, 's_1_0001_score.txt')
137     destdir = gerald_dir
138     if in_temp:
139         destdir = os.path.join(destdir, 'Temp')
140     if not os.path.isdir(destdir):
141         os.mkdir(destdir)
142         
143     for lane in LANE_LIST:
144         for tile in TILE_LIST:
145             destination = os.path.join(destdir, 's_%d_%04d_score.txt' % (lane, tile))
146             shutil.copy(source, destination)
147             
148     return destdir
149
150 def make_matrix_dir(bustard_dir):
151     """
152     Create several matrix files in <bustard_dir>/Matrix/
153
154     from pipeline 1.4    
155     """
156     destdir = os.path.join(bustard_dir, 'Matrix')
157     if not os.path.isdir(destdir):
158         os.mkdir(destdir)
159         
160     source = os.path.join(TESTDATA_DIR, '42BRJAAXX_8_02_matrix.txt')
161     for lane in LANE_LIST:
162         destination = os.path.join(destdir, 's_%d_02_matrix.txt' % ( lane, ))
163         shutil.copy(source, destination)
164         
165 def make_matrix(matrix_filename):
166     contents = """# Auto-generated frequency response matrix
167 > A
168 > C
169 > G
170 > T
171 0.77 0.15 -0.04 -0.04
172 0.76 1.02 -0.05 -0.06
173 -0.10 -0.10 1.17 -0.03
174 -0.13 -0.12 0.80 1.27
175 """
176     f = open(matrix_filename, 'w')
177     f.write(contents)
178     f.close()
179
180 def make_matrix_dir_rta160(bustard_dir):
181     """
182     Create several matrix files in <bustard_dir>/Matrix/
183     """
184     destdir = os.path.join(bustard_dir, 'Matrix')
185     if not os.path.isdir(destdir):
186         os.mkdir(destdir)
187         
188     source = os.path.join(TESTDATA_DIR, '61MMFAAXX_4_1_matrix.txt')
189     for lane in LANE_LIST:
190         destination = os.path.join(destdir, 's_%d_1_matrix.txt' % ( lane, ))
191         shutil.copy(source, destination)
192         
193 def make_phasing_dir(bustard_dir):
194     """
195     Create several phasing files in <bustard_dir>/Phasing/
196
197     from pipeline 1.4
198     """
199     destdir = os.path.join(bustard_dir, 'Phasing')
200     if not os.path.isdir(destdir):
201         os.mkdir(destdir)
202         
203     source = os.path.join(TESTDATA_DIR, '42BRJAAXX_8_01_phasing.xml')
204     for lane in LANE_LIST:
205         destination = os.path.join(destdir, 's_%d_01_phasing.xml' % ( lane, ))
206         shutil.copy(source, destination)
207     
208 def make_phasing_params(bustard_dir):
209     for lane in LANE_LIST:
210         pathname = os.path.join(bustard_dir, 'params%d.xml' % (lane))
211         f = open(pathname, 'w')
212         f.write("""<Parameters>
213   <Phasing>0.009900</Phasing>
214   <Prephasing>0.003500</Prephasing>
215 </Parameters>
216 """)
217         f.close()
218
219 def make_gerald_config_026(gerald_dir):
220     source = os.path.join(TESTDATA_DIR, 'gerald_config_0.2.6.xml')
221     destination = os.path.join(gerald_dir, 'config.xml')
222     shutil.copy(source, destination)
223
224 def make_gerald_config_100(gerald_dir):
225     source = os.path.join(TESTDATA_DIR, 'gerald_config_1.0.xml')
226     destination = os.path.join(gerald_dir, 'config.xml')
227     shutil.copy(source, destination)
228
229 def make_summary_htm_100(gerald_dir):
230     source = os.path.join(TESTDATA_DIR, 'Summary-pipeline100.htm')
231     destination = os.path.join(gerald_dir, 'Summary.htm')
232     shutil.copy(source, destination)
233
234 def make_summary_htm_110(gerald_dir):
235     source = os.path.join(TESTDATA_DIR, 'Summary-pipeline110.htm')
236     destination = os.path.join(gerald_dir, 'Summary.htm')
237     shutil.copy(source, destination)
238
239 def make_summary_paired_htm(gerald_dir):
240     source = os.path.join(TESTDATA_DIR, 'Summary-paired-pipeline110.htm')
241     destination = os.path.join(gerald_dir, 'Summary.htm')
242     shutil.copy(source, destination)
243
244 def make_summary_ipar130_htm(gerald_dir):
245     source = os.path.join(TESTDATA_DIR, 'Summary-ipar130.htm')
246     destination = os.path.join(gerald_dir, 'Summary.htm')
247     shutil.copy(source, destination)
248
249 def make_summary_rta160_xml(gerald_dir):
250     source = os.path.join(TESTDATA_DIR, 'Summary-rta160.xml')
251     destination = os.path.join(gerald_dir, 'Summary.xml')
252     shutil.copy(source, destination)
253
254 def make_eland_results(gerald_dir):
255     eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759    ACATAGNCACAGACATAAACATAGACATAGAC U0      1       1       3       chrUextra.fa    28189829        R       D.
256 >HWI-EAS229_24_207BTAAXX:1:7:205:842    AAACAANNCTCCCAAACACGTAAACTGGAAAA  U1      0       1       0       chr2L.fa        8796855 R       DD      24T
257 >HWI-EAS229_24_207BTAAXX:1:7:776:582    AGCTCANCCGATCGAAAACCTCNCCAAGCAAT        NM      0       0       0
258 >HWI-EAS229_24_207BTAAXX:1:7:205:842    AAACAANNCTCCCAAACACGTAAACTGGAAAA        U1      0       1       0       Lambda.fa        8796855 R       DD      24T
259 """
260     for i in LANE_LIST:
261         pathname = os.path.join(gerald_dir,
262                                 's_%d_eland_result.txt' % (i,))
263         f = open(pathname, 'w')
264         f.write(eland_result)
265         f.close()
266
267 def make_eland_multi(gerald_dir, paired=False, lane_list=LANE_LIST):
268     eland_multi = [""">HWI-EAS229_60_30DP9AAXX:1:1:1221:788   AAGATATCTACGACGTGGTATGGCGGTGTCTGGTCGT      NM
269 >HWI-EAS229_60_30DP9AAXX:1:1:931:747    AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA   1:0:2   chr5.fa:55269838R0
270 >HWI-EAS229_60_30DP9AAXX:1:1:1121:379   AGAAGAGACATTAAGAGTTCCTGAAATTTATATCTGG   2:1:0   chr16.fa:46189180R1,chr7.fa:122968519R0,chr8.fa:48197174F0
271 >HWI-EAS229_60_30DP9AAXX:1:1:892:1155   ACATTCTCCTTTCCTTCTGAAGTTTTTACGATTCTTT   0:9:10  chr10.fa:114298201F1,chr12.fa:8125072F1,19500297F2,42341293R2,chr13.fa:27688155R2,95069772R1,chr15.fa:51016475F2,chr16.fa:27052155F2,chr1.fa:192426217R2,chr21.fa:23685310R2,chr2.fa:106680068F1,chr3.fa:185226695F2,chr4.fa:106626808R2,chr5.fa:14704894F1,43530779F1,126543189F2,chr6.fa:74284101F1,chr7.fa:22516603F1,chr9.fa:134886204R
272 >HWI-EAS229_60_30DP9AAXX:1:1:931:747    AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA   1:0:0   spike.fa/sample1:55269838R0
273 >HWI-EAS229_60_30DP9AAXX:1:1:931:747    AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA   1:0:0   spike.fa/sample2:55269838R0
274 """, """>HWI-EAS229_60_30DP9AAXX:1:1:1221:788   AAGATATCTACGACGTGGTATGGCGGTGTCTGGTCGT      NM
275 >HWI-EAS229_60_30DP9AAXX:1:1:1221:788   NNNNNNNNNNNNNNGTGGTATGGCGGTGTCTGGTCGT     QC 
276 >HWI-EAS229_60_30DP9AAXX:1:1:931:747    AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA   1:0:2   chr5.fa:55269838R0
277 >HWI-EAS229_60_30DP9AAXX:1:1:1121:379   AGAAGAGACATTAAGAGTTCCTGAAATTTATATCTGG   2:1:0   chr16.fa:46189180R1,chr7.fa:122968519R0,chr8.fa:48197174F0,chr7.fa:22516603F1,chr9.fa:134886204R
278 >HWI-EAS229_60_30DP9AAXX:1:1:892:1155   ACATTCTCCTTTCCTTCTGAAGTTTTTACGATTCTTT   0:9:10  chr10.fa:114298201F1,chr12.fa:8125072F1,19500297F2,42341293R2,chr13.fa:27688155R2,95069772R1,chr15.fa:51016475F2,chr16.fa:27052155F2,chr1.fa:192426217R2,chr21.fa:23685310R2,chr2.fa:106680068F1,chr3.fa:185226695F2,chr4.fa:106626808R2,chr5.fa:14704894F1,43530779F1,126543189F2,chr6.fa:74284101F1
279 >HWI-EAS229_60_30DP9AAXX:1:1:931:747    AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA   1:0:0   spike.fa/sample1:55269838R0
280 >HWI-EAS229_60_30DP9AAXX:1:1:931:747    AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA   1:0:0   spike.fa/sample2:55269838R0
281 """]
282     if paired:
283         for e in [1,2]:
284             for i in lane_list:
285                 pathname = os.path.join(gerald_dir,
286                                         's_%d_%d_eland_multi.txt' % (i,e))
287                 f = open(pathname, 'w')
288                 f.write(eland_multi[e-1])
289                 f.close()
290     else:
291         for i in lane_list:
292             pathname = os.path.join(gerald_dir,
293                                     's_%d_eland_multi.txt' % (i,))
294             f = open(pathname, 'w')
295             f.write(eland_multi[0])
296             f.close()
297
298 def make_eland_export(gerald_dir, paired=False, lane_list=LANE_LIST):
299     source = os.path.join(TESTDATA_DIR, 'casava_1.7_export.txt')
300
301     for i in lane_list:
302         destination = os.path.join(gerald_dir,
303                                    's_%d_export.txt' % (i,))
304         shutil.copy(source, destination)
305
306
307 def make_scarf(gerald_dir, lane_list=LANE_LIST):
308     seq = """HWI-EAS229_92_30VNBAAXX:1:1:0:161:NCAATTACACGACGCTAGCCCTAAAGCTATTTCGAGG:E[aaaabb^a\a_^^a[S`ba_WZUXaaaaaaUKPER
309 HWI-EAS229_92_30VNBAAXX:1:1:0:447:NAGATGCGCATTTGAAGTAGGAGCAAAAGATCAAGGT:EUabaab^baabaaaaaaaa^^Uaaaaa\aaaa__`a
310 HWI-EAS229_92_30VNBAAXX:1:1:0:1210:NATAGCCTCTATAGAAGCCACTATTATTTTTTTCTTA:EUa`]`baaaaa^XQU^a`S``S_`J_aaaaaabb^V
311 HWI-EAS229_92_30VNBAAXX:1:1:0:1867:NTGGAGCAGATATAAAAACAGATGGTGACGTTGAAGT:E[^UaaaUaba^aaa^aa^XV\baaLaLaaaaQVXV^
312 HWI-EAS229_92_30VNBAAXX:1:1:0:1898:NAGCTCGTGTCGTGAGATGTTAGGTTAAGTCCTGCAA:EK_aaaaaaaaaaaUZaaZaXM[aaaXSM\aaZ]URE
313 """
314     for l in lane_list:
315         pathname = os.path.join(gerald_dir, 's_%d_sequence.txt' %(l,))
316         f = open(pathname,'w')
317         f.write(seq)
318         f.close()
319
320 def make_fastq(gerald_dir, lane_list=LANE_LIST):
321     seq = """@HWI-EAS229:1:2:182:712#0/1
322 AAAAAAAAAAAAAAAAAAAAANAAAAAAAAAAAAAAA
323 +HWI-EAS229:1:2:182:712#0/1
324 \bab_bbaabbababbaaa]]D]bb_baabbab\baa
325 @HWI-EAS229:1:2:198:621#0/1
326 CCCCCCCCCCCCCCCCCCCCCNCCCCCCCCCCCCCCC
327 +HWI-EAS229:1:2:198:621#0/1
328 [aaaaaaa`_`aaaaaaa[`ZDZaaaaaaaaaaaaaa
329 @HWI-EAS229:1:2:209:1321#0/1
330 AAAAAAAAAAAAAAAAAAAAANAAAAAAAAAAAAAAA
331 +HWI-EAS229:1:2:209:1321#0/1
332 _bbbbbaaababaabbbbab]D]aaaaaaaaaaaaaa
333 """
334     for l in lane_list:
335         pathname = os.path.join(gerald_dir, 's_%d_sequence.txt' %(l,))
336         f = open(pathname,'w')
337         f.write(seq)
338         f.close()
339
340