2 Create simulated solexa/illumina runfolders for testing
8 TEST_CODE_DIR = os.path.split(__file__)[0]
9 TESTDATA_DIR = os.path.join(TEST_CODE_DIR, 'testdata')
10 LANE_LIST = range(1,9)
11 TILE_LIST = range(1,101)
12 HISEQ_TILE_LIST = [1101, 1102, 1103, 1104, 1105, 1106, 1107, 1108,
13 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208,
14 2101, 2102, 2103, 2104, 2105, 2106, 2107, 2108,
15 2201, 2202, 2203, 2204, 2205, 2206, 2207, 2208,]
17 def make_firecrest_dir(data_dir, version="1.9.2", start=1, stop=37):
18 firecrest_dir = os.path.join(data_dir,
19 'C%d-%d_Firecrest%s_12-04-2008_diane' % (start, stop, version)
21 os.mkdir(firecrest_dir)
24 def make_ipar_dir(data_dir, version='1.01'):
26 Construct an artificial ipar parameter file and directory
28 ipar1_01_file = os.path.join(TESTDATA_DIR, 'IPAR1.01.params')
29 shutil.copy(ipar1_01_file, os.path.join(data_dir, '.params'))
31 ipar_dir = os.path.join(data_dir, 'IPAR_%s' % (version,))
32 if not os.path.exists(ipar_dir):
36 def make_flowcell_id(runfolder_dir, flowcell_id=None):
37 if flowcell_id is None:
38 flowcell_id = '207BTAAXY'
40 config = """<?xml version="1.0"?>
43 </FlowcellId>""" % (flowcell_id,)
44 config_dir = os.path.join(runfolder_dir, 'Config')
46 if not os.path.exists(config_dir):
48 pathname = os.path.join(config_dir, 'FlowcellId.xml')
49 f = open(pathname,'w')
53 def make_bustard_config132(image_dir):
54 source = os.path.join(TESTDATA_DIR, 'bustard-config132.xml')
55 destination = os.path.join(image_dir, 'config.xml')
56 shutil.copy(source, destination)
58 def make_rta_intensities_1460(data_dir, version='1.4.6.0'):
60 Construct an artificial RTA Intensities parameter file and directory
62 intensities_dir = os.path.join(data_dir, 'Intensities')
63 if not os.path.exists(intensities_dir):
64 os.mkdir(intensities_dir)
66 param_file = os.path.join(TESTDATA_DIR, 'rta_intensities_config.xml')
67 shutil.copy(param_file, os.path.join(intensities_dir, 'config.xml'))
69 return intensities_dir
71 def make_rta_basecalls_1460(intensities_dir):
73 Construct an artificial RTA Intensities parameter file and directory
75 basecalls_dir = os.path.join(intensities_dir, 'BaseCalls')
76 if not os.path.exists(basecalls_dir):
77 os.mkdir(basecalls_dir)
79 param_file = os.path.join(TESTDATA_DIR, 'rta_basecalls_config.xml')
80 shutil.copy(param_file, os.path.join(basecalls_dir, 'config.xml'))
84 def make_rta_intensities_1870(data_dir, version='1.8.70.0'):
86 Construct an artificial RTA Intensities parameter file and directory
88 intensities_dir = os.path.join(data_dir, 'Intensities')
89 if not os.path.exists(intensities_dir):
90 os.mkdir(intensities_dir)
92 param_file = os.path.join(TESTDATA_DIR, 'rta_intensities_config_1870.xml')
93 shutil.copy(param_file, os.path.join(intensities_dir, 'config.xml'))
95 return intensities_dir
97 def make_rta_intensities_1_10(data_dir, version='1.10.36.0'):
99 Construct an artificial RTA Intensities parameter file and directory
101 intensities_dir = os.path.join(data_dir, 'Intensities')
102 if not os.path.exists(intensities_dir):
103 os.mkdir(intensities_dir)
105 param_file = os.path.join(TESTDATA_DIR, 'rta_intensities_config_1.10.xml')
106 shutil.copy(param_file, os.path.join(intensities_dir, 'config.xml'))
108 return intensities_dir
110 def make_rta_basecalls_1870(intensities_dir):
112 Construct an artificial RTA Intensities parameter file and directory
114 basecalls_dir = os.path.join(intensities_dir, 'BaseCalls')
115 if not os.path.exists(basecalls_dir):
116 os.mkdir(basecalls_dir)
118 param_file = os.path.join(TESTDATA_DIR, 'rta_basecalls_config_1870.xml')
119 shutil.copy(param_file, os.path.join(basecalls_dir, 'config.xml'))
123 def make_rta_basecalls_1_10(intensities_dir):
125 Construct an artificial RTA Intensities parameter file and directory
127 basecalls_dir = os.path.join(intensities_dir, 'BaseCalls')
128 if not os.path.exists(basecalls_dir):
129 os.mkdir(basecalls_dir)
131 make_qseqs(basecalls_dir, basecall_info=ABXX_BASE_CALL_INFO)
132 param_file = os.path.join(TESTDATA_DIR, 'rta_basecalls_config_1.10.xml')
133 shutil.copy(param_file, os.path.join(basecalls_dir, 'config.xml'))
137 def make_qseqs(bustard_dir, in_temp=True, basecall_info=None):
139 Fill gerald directory with qseq files
141 if basecall_info is None:
142 qseq_file = '42BRJAAXX_8_1_0039_qseq.txt'
143 tile_list = TILE_LIST
144 summary_file = '42BRJAAXX_BustardSummary.xml'
146 qseq_file = basecall_info.qseq_file
147 tile_list = basecall_info.tile_list
148 summary_file = basecall_info.basecall_summary
150 # 42BRJ 8 1 0039 happened to be a better than usual tile, in that there
151 # was actually sequence at the start
152 source = os.path.join(TESTDATA_DIR, qseq_file)
153 destdir = bustard_dir
154 if not os.path.isdir(destdir):
157 for lane in LANE_LIST:
158 for tile in tile_list:
159 destination = os.path.join(bustard_dir, 's_%d_1_%04d_qseq.txt' % (lane, tile))
160 shutil.copy(source, destination)
162 make_matrix_dir(bustard_dir)
163 make_phasing_dir(bustard_dir)
165 summary_source = os.path.join(TESTDATA_DIR, summary_file)
166 summary_dest = os.path.join(bustard_dir, 'BustardSummary.xml')
167 shutil.copy(summary_source, summary_dest)
171 def make_scores(gerald_dir, in_temp=True):
173 Fill gerald directory with score temp files
174 will create the directory if it doesn't exist.
176 source = os.path.join(TESTDATA_DIR, 's_1_0001_score.txt')
179 destdir = os.path.join(destdir, 'Temp')
180 if not os.path.isdir(destdir):
183 for lane in LANE_LIST:
184 for tile in TILE_LIST:
185 destination = os.path.join(destdir, 's_%d_%04d_score.txt' % (lane, tile))
186 shutil.copy(source, destination)
190 def make_matrix_dir(bustard_dir):
192 Create several matrix files in <bustard_dir>/Matrix/
196 destdir = os.path.join(bustard_dir, 'Matrix')
197 if not os.path.isdir(destdir):
200 source = os.path.join(TESTDATA_DIR, '42BRJAAXX_8_02_matrix.txt')
201 for lane in LANE_LIST:
202 destination = os.path.join(destdir, 's_%d_02_matrix.txt' % ( lane, ))
203 shutil.copy(source, destination)
205 def make_matrix(matrix_filename):
206 contents = """# Auto-generated frequency response matrix
211 0.77 0.15 -0.04 -0.04
212 0.76 1.02 -0.05 -0.06
213 -0.10 -0.10 1.17 -0.03
214 -0.13 -0.12 0.80 1.27
216 f = open(matrix_filename, 'w')
220 def make_matrix_dir_rta160(bustard_dir):
222 Create several matrix files in <bustard_dir>/Matrix/
224 destdir = os.path.join(bustard_dir, 'Matrix')
225 if not os.path.isdir(destdir):
228 source = os.path.join(TESTDATA_DIR, '61MMFAAXX_4_1_matrix.txt')
229 lane_fragments = [ "_%d" % (l,) for l in LANE_LIST]
230 for fragment in lane_fragments:
231 destination = os.path.join(destdir, 's%s_1_matrix.txt' % ( fragment, ))
232 shutil.copy(source, destination)
234 def make_phasing_dir(bustard_dir):
236 Create several phasing files in <bustard_dir>/Phasing/
240 destdir = os.path.join(bustard_dir, 'Phasing')
241 if not os.path.isdir(destdir):
244 source = os.path.join(TESTDATA_DIR, '42BRJAAXX_8_01_phasing.xml')
245 for lane in LANE_LIST:
246 destination = os.path.join(destdir, 's_%d_01_phasing.xml' % ( lane, ))
247 shutil.copy(source, destination)
249 def make_phasing_params(bustard_dir):
250 for lane in LANE_LIST:
251 pathname = os.path.join(bustard_dir, 'params%d.xml' % (lane))
252 f = open(pathname, 'w')
253 f.write("""<Parameters>
254 <Phasing>0.009900</Phasing>
255 <Prephasing>0.003500</Prephasing>
260 def make_gerald_config_026(gerald_dir):
261 source = os.path.join(TESTDATA_DIR, 'gerald_config_0.2.6.xml')
262 destination = os.path.join(gerald_dir, 'config.xml')
263 shutil.copy(source, destination)
265 def make_gerald_config_100(gerald_dir):
266 source = os.path.join(TESTDATA_DIR, 'gerald_config_1.0.xml')
267 destination = os.path.join(gerald_dir, 'config.xml')
268 shutil.copy(source, destination)
270 def make_gerald_config_1_10(gerald_dir):
271 source = os.path.join(TESTDATA_DIR, 'gerald_config_1.10.xml')
272 destination = os.path.join(gerald_dir, 'config.xml')
273 shutil.copy(source, destination)
275 def make_summary_htm_100(gerald_dir):
276 source = os.path.join(TESTDATA_DIR, 'Summary-pipeline100.htm')
277 destination = os.path.join(gerald_dir, 'Summary.htm')
278 shutil.copy(source, destination)
280 def make_summary_htm_110(gerald_dir):
281 source = os.path.join(TESTDATA_DIR, 'Summary-pipeline110.htm')
282 destination = os.path.join(gerald_dir, 'Summary.htm')
283 shutil.copy(source, destination)
285 def make_summary_paired_htm(gerald_dir):
286 source = os.path.join(TESTDATA_DIR, 'Summary-paired-pipeline110.htm')
287 destination = os.path.join(gerald_dir, 'Summary.htm')
288 shutil.copy(source, destination)
290 def make_summary_ipar130_htm(gerald_dir):
291 source = os.path.join(TESTDATA_DIR, 'Summary-ipar130.htm')
292 destination = os.path.join(gerald_dir, 'Summary.htm')
293 shutil.copy(source, destination)
295 def make_summary_rta160_xml(gerald_dir):
296 source = os.path.join(TESTDATA_DIR, 'Summary-rta160.xml')
297 destination = os.path.join(gerald_dir, 'Summary.xml')
298 shutil.copy(source, destination)
301 def make_summary_rta1_10_xml(gerald_dir):
302 source = os.path.join(TESTDATA_DIR, 'Summary-rta1.10.xml')
303 destination = os.path.join(gerald_dir, 'Summary.xml')
304 shutil.copy(source, destination)
307 def make_eland_results(gerald_dir):
308 eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759 ACATAGNCACAGACATAAACATAGACATAGAC U0 1 1 3 chrUextra.fa 28189829 R D.
309 >HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 chr2L.fa 8796855 R DD 24T
310 >HWI-EAS229_24_207BTAAXX:1:7:776:582 AGCTCANCCGATCGAAAACCTCNCCAAGCAAT NM 0 0 0
311 >HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 Lambda.fa 8796855 R DD 24T
314 pathname = os.path.join(gerald_dir,
315 's_%d_eland_result.txt' % (i,))
316 f = open(pathname, 'w')
317 f.write(eland_result)
320 def make_eland_multi(gerald_dir, paired=False, lane_list=LANE_LIST):
321 eland_multi = [""">HWI-EAS229_60_30DP9AAXX:1:1:1221:788 AAGATATCTACGACGTGGTATGGCGGTGTCTGGTCGT NM
322 >HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:2 chr5.fa:55269838R0
323 >HWI-EAS229_60_30DP9AAXX:1:1:1121:379 AGAAGAGACATTAAGAGTTCCTGAAATTTATATCTGG 2:1:0 chr16.fa:46189180R1,chr7.fa:122968519R0,chr8.fa:48197174F0
324 >HWI-EAS229_60_30DP9AAXX:1:1:892:1155 ACATTCTCCTTTCCTTCTGAAGTTTTTACGATTCTTT 0:9:10 chr10.fa:114298201F1,chr12.fa:8125072F1,19500297F2,42341293R2,chr13.fa:27688155R2,95069772R1,chr15.fa:51016475F2,chr16.fa:27052155F2,chr1.fa:192426217R2,chr21.fa:23685310R2,chr2.fa:106680068F1,chr3.fa:185226695F2,chr4.fa:106626808R2,chr5.fa:14704894F1,43530779F1,126543189F2,chr6.fa:74284101F1,chr7.fa:22516603F1,chr9.fa:134886204R
325 >HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:0 spike.fa/sample1:55269838R0
326 >HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:0 spike.fa/sample2:55269838R0
327 """, """>HWI-EAS229_60_30DP9AAXX:1:1:1221:788 AAGATATCTACGACGTGGTATGGCGGTGTCTGGTCGT NM
328 >HWI-EAS229_60_30DP9AAXX:1:1:1221:788 NNNNNNNNNNNNNNGTGGTATGGCGGTGTCTGGTCGT QC
329 >HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:2 chr5.fa:55269838R0
330 >HWI-EAS229_60_30DP9AAXX:1:1:1121:379 AGAAGAGACATTAAGAGTTCCTGAAATTTATATCTGG 2:1:0 chr16.fa:46189180R1,chr7.fa:122968519R0,chr8.fa:48197174F0,chr7.fa:22516603F1,chr9.fa:134886204R
331 >HWI-EAS229_60_30DP9AAXX:1:1:892:1155 ACATTCTCCTTTCCTTCTGAAGTTTTTACGATTCTTT 0:9:10 chr10.fa:114298201F1,chr12.fa:8125072F1,19500297F2,42341293R2,chr13.fa:27688155R2,95069772R1,chr15.fa:51016475F2,chr16.fa:27052155F2,chr1.fa:192426217R2,chr21.fa:23685310R2,chr2.fa:106680068F1,chr3.fa:185226695F2,chr4.fa:106626808R2,chr5.fa:14704894F1,43530779F1,126543189F2,chr6.fa:74284101F1
332 >HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:0 spike.fa/sample1:55269838R0
333 >HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:0 spike.fa/sample2:55269838R0
338 pathname = os.path.join(gerald_dir,
339 's_%d_%d_eland_multi.txt' % (i,e))
340 f = open(pathname, 'w')
341 f.write(eland_multi[e-1])
345 pathname = os.path.join(gerald_dir,
346 's_%d_eland_multi.txt' % (i,))
347 f = open(pathname, 'w')
348 f.write(eland_multi[0])
351 def make_eland_export(gerald_dir, paired=False, lane_list=LANE_LIST):
352 source = os.path.join(TESTDATA_DIR, 'casava_1.7_export.txt')
355 destination = os.path.join(gerald_dir,
356 's_%d_export.txt' % (i,))
357 shutil.copy(source, destination)
360 def make_scarf(gerald_dir, lane_list=LANE_LIST):
361 seq = """HWI-EAS229_92_30VNBAAXX:1:1:0:161:NCAATTACACGACGCTAGCCCTAAAGCTATTTCGAGG:E[aaaabb^a\a_^^a[S`ba_WZUXaaaaaaUKPER
362 HWI-EAS229_92_30VNBAAXX:1:1:0:447:NAGATGCGCATTTGAAGTAGGAGCAAAAGATCAAGGT:EUabaab^baabaaaaaaaa^^Uaaaaa\aaaa__`a
363 HWI-EAS229_92_30VNBAAXX:1:1:0:1210:NATAGCCTCTATAGAAGCCACTATTATTTTTTTCTTA:EUa`]`baaaaa^XQU^a`S``S_`J_aaaaaabb^V
364 HWI-EAS229_92_30VNBAAXX:1:1:0:1867:NTGGAGCAGATATAAAAACAGATGGTGACGTTGAAGT:E[^UaaaUaba^aaa^aa^XV\baaLaLaaaaQVXV^
365 HWI-EAS229_92_30VNBAAXX:1:1:0:1898:NAGCTCGTGTCGTGAGATGTTAGGTTAAGTCCTGCAA:EK_aaaaaaaaaaaUZaaZaXM[aaaXSM\aaZ]URE
368 pathname = os.path.join(gerald_dir, 's_%d_sequence.txt' %(l,))
369 f = open(pathname,'w')
373 def make_fastq(gerald_dir, lane_list=LANE_LIST):
374 seq = """@HWI-EAS229:1:2:182:712#0/1
375 AAAAAAAAAAAAAAAAAAAAANAAAAAAAAAAAAAAA
376 +HWI-EAS229:1:2:182:712#0/1
377 \bab_bbaabbababbaaa]]D]bb_baabbab\baa
378 @HWI-EAS229:1:2:198:621#0/1
379 CCCCCCCCCCCCCCCCCCCCCNCCCCCCCCCCCCCCC
380 +HWI-EAS229:1:2:198:621#0/1
381 [aaaaaaa`_`aaaaaaa[`ZDZaaaaaaaaaaaaaa
382 @HWI-EAS229:1:2:209:1321#0/1
383 AAAAAAAAAAAAAAAAAAAAANAAAAAAAAAAAAAAA
384 +HWI-EAS229:1:2:209:1321#0/1
385 _bbbbbaaababaabbbbab]D]aaaaaaaaaaaaaa
388 pathname = os.path.join(gerald_dir, 's_%d_sequence.txt' %(l,))
389 f = open(pathname,'w')
394 class BaseCallInfo(object):
395 """Provide customization for how to setup the base call mock data
397 def __init__(self, qseq_file, tile_list, basecall_summary):
398 self.qseq_file = qseq_file
399 self.tile_list = tile_list
400 self.basecall_summary = basecall_summary
402 # First generation HiSeq Flowcell
403 ABXX_BASE_CALL_INFO = BaseCallInfo(
404 qseq_file='AA01CCABXX_8_2_2207_qseq.txt',
405 tile_list = HISEQ_TILE_LIST,
406 basecall_summary = 'AA01CCABXX_BustardSummary.xml')