2 Create simulated solexa/illumina runfolders for testing
4 from __future__ import print_function, unicode_literals
10 TEST_CODE_DIR = os.path.split(__file__)[0]
11 TESTDATA_DIR = os.path.join(TEST_CODE_DIR, 'testdata')
12 LANE_LIST = range(1,9)
13 TILE_LIST = range(1,101)
14 HISEQ_TILE_LIST = [1101, 1102, 1103, 1104, 1105, 1106, 1107, 1108,
15 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208,
16 2101, 2102, 2103, 2104, 2105, 2106, 2107, 2108,
17 2201, 2202, 2203, 2204, 2205, 2206, 2207, 2208,]
19 def make_firecrest_dir(data_dir, version="1.9.2", start=1, stop=37):
20 firecrest_dir = os.path.join(data_dir,
21 'C%d-%d_Firecrest%s_12-04-2008_diane' % (start, stop, version)
23 os.mkdir(firecrest_dir)
26 def make_ipar_dir(data_dir, version='1.01'):
28 Construct an artificial ipar parameter file and directory
30 ipar1_01_file = os.path.join(TESTDATA_DIR, 'IPAR1.01.params')
31 shutil.copy(ipar1_01_file, os.path.join(data_dir, '.params'))
33 ipar_dir = os.path.join(data_dir, 'IPAR_%s' % (version,))
34 if not os.path.exists(ipar_dir):
38 def make_flowcell_id(runfolder_dir, flowcell_id=None):
39 if flowcell_id is None:
40 flowcell_id = '207BTAAXY'
42 config = """<?xml version="1.0"?>
45 </FlowcellId>""" % (flowcell_id,)
46 config_dir = os.path.join(runfolder_dir, 'Config')
48 if not os.path.exists(config_dir):
50 pathname = os.path.join(config_dir, 'FlowcellId.xml')
51 f = open(pathname,'w')
55 def make_runinfo(runfolder_dir, flowcell_id):
56 """Simulate a RunInfo.xml file created by >= RTA 1.9
58 xml = '''<?xml version="1.0"?>
59 <RunInfo xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" Version="2">
60 <Run Id="{runfolder}" Number="101">
61 <Flowcell>{flowcell}</Flowcell>
62 <Instrument>SN787</Instrument>
65 <Read Number="1" NumCycles="50" IsIndexedRead="N" />
66 <Read Number="2" NumCycles="7" IsIndexedRead="Y" />
68 <FlowcellLayout LaneCount="8" SurfaceCount="2" SwathCount="3" TileCount="8" />
73 path, runfolder = os.path.split(runfolder_dir)
74 runinfo = os.path.join(runfolder_dir, 'RunInfo.xml')
75 stream = open(runinfo, 'w')
76 stream.write(xml.format(runfolder=runfolder, flowcell=flowcell_id))
80 def make_bustard_config132(image_dir):
81 source = os.path.join(TESTDATA_DIR, 'bustard-config132.xml')
82 destination = os.path.join(image_dir, 'config.xml')
83 shutil.copy(source, destination)
85 def make_aligned_config_1_12(aligned_dir):
86 """This is rouglhly equivalent to the old gerald file"""
87 source = os.path.join(TESTDATA_DIR, '1_12', 'aligned_config_1_12.xml')
88 destination = os.path.join(aligned_dir, 'config.xml')
89 shutil.copy(source, destination)
91 def make_unaligned_config_1_12(unaligned_dir):
92 demultiplex_pairs = [ # (src,
94 (os.path.join(TESTDATA_DIR, '1_12', 'demultiplex_1.12.4.2.xml'),
95 os.path.join(unaligned_dir, 'DemultiplexConfig.xml')),
96 (os.path.join(TESTDATA_DIR, '1_12',
97 'demultiplexed_bustard_1.12.4.2.xml'),
98 os.path.join(unaligned_dir, 'DemultiplexedBustardConfig.xml')),
99 (os.path.join(TESTDATA_DIR, '1_12',
100 'demultiplexed_summary_1.12.4.2.xml'),
101 os.path.join(unaligned_dir, 'DemultiplexedBustardSummary.xml')),
103 for src, dest in demultiplex_pairs:
104 shutil.copy(src, dest)
106 def make_unaligned_status_1_12(unaligned_dir, flowcell_id):
107 basecall_status = ['All.htm', 'Demultiplex_Stats.htm', 'IVC.htm']
108 test_data_root = os.path.join(TESTDATA_DIR, '1_12', 'basecall_stats')
109 basecall_stats = os.path.join(unaligned_dir,
110 'Basecall_Stats_{0}'.format(flowcell_id))
111 os.mkdir(basecall_stats)
112 for filename in basecall_status:
113 source = os.path.join(test_data_root, filename)
114 destination = os.path.join(basecall_stats, filename)
115 shutil.copy(source, destination)
117 def make_rta_intensities_1460(data_dir, version='1.4.6.0'):
119 Construct an artificial RTA Intensities parameter file and directory
121 intensities_dir = os.path.join(data_dir, 'Intensities')
122 if not os.path.exists(intensities_dir):
123 os.mkdir(intensities_dir)
125 param_file = os.path.join(TESTDATA_DIR, 'rta_intensities_config.xml')
126 shutil.copy(param_file, os.path.join(intensities_dir, 'config.xml'))
128 return intensities_dir
130 def make_rta_basecalls_1460(intensities_dir):
132 Construct an artificial RTA Intensities parameter file and directory
134 basecalls_dir = os.path.join(intensities_dir, 'BaseCalls')
135 if not os.path.exists(basecalls_dir):
136 os.mkdir(basecalls_dir)
138 param_file = os.path.join(TESTDATA_DIR, 'rta_basecalls_config.xml')
139 shutil.copy(param_file, os.path.join(basecalls_dir, 'config.xml'))
143 def make_rta_intensities_1870(data_dir, version='1.8.70.0'):
145 Construct an artificial RTA Intensities parameter file and directory
147 intensities_dir = os.path.join(data_dir, 'Intensities')
148 if not os.path.exists(intensities_dir):
149 os.mkdir(intensities_dir)
151 param_file = os.path.join(TESTDATA_DIR, 'rta_intensities_config_1870.xml')
152 shutil.copy(param_file, os.path.join(intensities_dir, 'config.xml'))
154 return intensities_dir
156 def make_rta_intensities_1_10(data_dir, version='1.10.36.0'):
158 Construct an artificial RTA Intensities parameter file and directory
160 intensities_dir = os.path.join(data_dir, 'Intensities')
161 if not os.path.exists(intensities_dir):
162 os.mkdir(intensities_dir)
164 param_file = os.path.join(TESTDATA_DIR, 'rta_intensities_config_1.10.xml')
165 shutil.copy(param_file, os.path.join(intensities_dir, 'config.xml'))
167 return intensities_dir
169 def make_rta_intensities_1_12(data_dir, version='1.12.4.2'):
171 Construct an artificial RTA Intensities parameter file and directory
173 intensities_dir = os.path.join(data_dir, 'Intensities')
174 if not os.path.exists(intensities_dir):
175 os.mkdir(intensities_dir)
177 param_file = os.path.join(TESTDATA_DIR, '1_12',
178 'rta_intensities_config_1.12.4.2.xml')
179 shutil.copy(param_file, os.path.join(intensities_dir, 'RTAConfig.xml'))
181 return intensities_dir
183 def make_rta_basecalls_1870(intensities_dir):
185 Construct an artificial RTA Intensities parameter file and directory
187 basecalls_dir = os.path.join(intensities_dir, 'BaseCalls')
188 if not os.path.exists(basecalls_dir):
189 os.mkdir(basecalls_dir)
191 param_file = os.path.join(TESTDATA_DIR, 'rta_basecalls_config_1870.xml')
192 shutil.copy(param_file, os.path.join(basecalls_dir, 'config.xml'))
196 def make_rta_basecalls_1_10(intensities_dir):
198 Construct an artificial RTA Intensities parameter file and directory
200 basecalls_dir = os.path.join(intensities_dir, 'BaseCalls')
201 if not os.path.exists(basecalls_dir):
202 os.mkdir(basecalls_dir)
204 make_qseqs(basecalls_dir, basecall_info=ABXX_BASE_CALL_INFO)
205 param_file = os.path.join(TESTDATA_DIR, 'rta_basecalls_config_1.10.xml')
206 shutil.copy(param_file, os.path.join(basecalls_dir, 'config.xml'))
210 def make_rta_basecalls_1_12(intensities_dir):
212 Construct an artificial RTA Intensities parameter file and directory
214 basecalls_dir = os.path.join(intensities_dir, 'BaseCalls')
215 if not os.path.exists(basecalls_dir):
216 os.mkdir(basecalls_dir)
218 make_qseqs(basecalls_dir, basecall_info=ABXX_BASE_CALL_INFO)
219 param_file = os.path.join(TESTDATA_DIR, '1_12',
220 'rta_basecalls_config_1.12.4.2.xml')
221 shutil.copy(param_file, os.path.join(basecalls_dir, 'config.xml'))
226 def make_qseqs(bustard_dir, basecall_info=None):
228 Fill gerald directory with qseq files
230 if basecall_info is None:
231 qseq_file = '42BRJAAXX_8_1_0039_qseq.txt'
232 tile_list = TILE_LIST
233 summary_file = '42BRJAAXX_BustardSummary.xml'
235 qseq_file = basecall_info.qseq_file
236 tile_list = basecall_info.tile_list
237 summary_file = basecall_info.basecall_summary
239 # 42BRJ 8 1 0039 happened to be a better than usual tile, in that there
240 # was actually sequence at the start
241 source = os.path.join(TESTDATA_DIR, qseq_file)
242 destdir = bustard_dir
243 if not os.path.isdir(destdir):
246 for lane in LANE_LIST:
247 for tile in tile_list:
248 destination = os.path.join(bustard_dir, 's_%d_1_%04d_qseq.txt' % (lane, tile))
249 shutil.copy(source, destination)
251 make_matrix_dir(bustard_dir)
252 make_phasing_dir(bustard_dir)
254 summary_source = os.path.join(TESTDATA_DIR, summary_file)
255 summary_dest = os.path.join(bustard_dir, 'BustardSummary.xml')
256 shutil.copy(summary_source, summary_dest)
260 def make_scores(gerald_dir, in_temp=True):
262 Fill gerald directory with score temp files
263 will create the directory if it doesn't exist.
265 source = os.path.join(TESTDATA_DIR, 's_1_0001_score.txt')
268 destdir = os.path.join(destdir, 'Temp')
269 if not os.path.isdir(destdir):
272 for lane in LANE_LIST:
273 for tile in TILE_LIST:
274 destination = os.path.join(destdir, 's_%d_%04d_score.txt' % (lane, tile))
275 shutil.copy(source, destination)
279 def make_matrix_dir(bustard_dir):
281 Create several matrix files in <bustard_dir>/Matrix/
285 destdir = os.path.join(bustard_dir, 'Matrix')
286 if not os.path.isdir(destdir):
289 source = os.path.join(TESTDATA_DIR, '42BRJAAXX_8_02_matrix.txt')
290 for lane in LANE_LIST:
291 destination = os.path.join(destdir, 's_%d_02_matrix.txt' % ( lane, ))
292 shutil.copy(source, destination)
294 def make_matrix(matrix_filename):
295 contents = """# Auto-generated frequency response matrix
300 0.77 0.15 -0.04 -0.04
301 0.76 1.02 -0.05 -0.06
302 -0.10 -0.10 1.17 -0.03
303 -0.13 -0.12 0.80 1.27
305 f = open(matrix_filename, 'w')
309 def make_matrix_dir_rta160(bustard_dir):
311 Create several matrix files in <bustard_dir>/Matrix/
313 destdir = os.path.join(bustard_dir, 'Matrix')
314 if not os.path.isdir(destdir):
317 source = os.path.join(TESTDATA_DIR, '61MMFAAXX_4_1_matrix.txt')
318 lane_fragments = [ "_%d" % (l,) for l in LANE_LIST]
319 for fragment in lane_fragments:
320 destination = os.path.join(destdir, 's%s_1_matrix.txt' % ( fragment, ))
321 shutil.copy(source, destination)
323 def make_matrix_dir_rta_1_10(bustard_dir):
324 make_matrix_dir_rta160(bustard_dir)
326 def make_matrix_dir_rta_1_12(bustard_dir):
327 make_matrix_dir_rta160(bustard_dir)
329 def make_phasing_dir(bustard_dir):
331 Create several phasing files in <bustard_dir>/Phasing/
335 destdir = os.path.join(bustard_dir, 'Phasing')
336 if not os.path.isdir(destdir):
339 source = os.path.join(TESTDATA_DIR, '42BRJAAXX_8_01_phasing.xml')
340 for lane in LANE_LIST:
341 destination = os.path.join(destdir, 's_%d_01_phasing.xml' % ( lane, ))
342 shutil.copy(source, destination)
344 def make_phasing_params(bustard_dir):
345 for lane in LANE_LIST:
346 pathname = os.path.join(bustard_dir, 'params%d.xml' % (lane))
347 f = open(pathname, 'w')
348 f.write("""<Parameters>
349 <Phasing>0.009900</Phasing>
350 <Prephasing>0.003500</Prephasing>
355 def make_gerald_config_026(gerald_dir):
356 source = os.path.join(TESTDATA_DIR, 'gerald_config_0.2.6.xml')
357 destination = os.path.join(gerald_dir, 'config.xml')
358 shutil.copy(source, destination)
360 def make_gerald_config_100(gerald_dir):
361 source = os.path.join(TESTDATA_DIR, 'gerald_config_1.0.xml')
362 destination = os.path.join(gerald_dir, 'config.xml')
363 shutil.copy(source, destination)
365 def make_gerald_config_1_7(gerald_dir):
366 """CASAVA 1.7 gerald config"""
367 source = os.path.join(TESTDATA_DIR, 'gerald_config_1.7.xml')
368 destination = os.path.join(gerald_dir, 'config.xml')
369 shutil.copy(source, destination)
371 def make_summary_htm_100(gerald_dir):
372 source = os.path.join(TESTDATA_DIR, 'Summary-pipeline100.htm')
373 destination = os.path.join(gerald_dir, 'Summary.htm')
374 shutil.copy(source, destination)
376 def make_summary_htm_110(gerald_dir):
377 source = os.path.join(TESTDATA_DIR, 'Summary-pipeline110.htm')
378 destination = os.path.join(gerald_dir, 'Summary.htm')
379 shutil.copy(source, destination)
381 def make_summary_paired_htm(gerald_dir):
382 source = os.path.join(TESTDATA_DIR, 'Summary-paired-pipeline110.htm')
383 destination = os.path.join(gerald_dir, 'Summary.htm')
384 shutil.copy(source, destination)
386 def make_summary_ipar130_htm(gerald_dir):
387 source = os.path.join(TESTDATA_DIR, 'Summary-ipar130.htm')
388 destination = os.path.join(gerald_dir, 'Summary.htm')
389 shutil.copy(source, destination)
391 def make_summary_rta160_xml(gerald_dir):
392 source = os.path.join(TESTDATA_DIR, 'Summary-rta160.xml')
393 destination = os.path.join(gerald_dir, 'Summary.xml')
394 shutil.copy(source, destination)
397 def make_summary_casava1_7_xml(gerald_dir):
398 source = os.path.join(TESTDATA_DIR, 'Summary-casava1.7.xml')
399 destination = os.path.join(gerald_dir, 'Summary.xml')
400 shutil.copy(source, destination)
402 def make_status_rta1_12(datadir):
403 sourcedir = os.path.join(TESTDATA_DIR, '1_12')
404 status_htm = os.path.join(sourcedir, 'Status.htm')
405 destination = os.path.join(datadir, 'Status.htm')
406 shutil.copy(status_htm, destination)
408 status_dir = os.path.join(datadir, 'Status_Files')
409 status_source_dir = os.path.join(sourcedir, 'Status_Files')
410 shutil.copytree(status_source_dir, status_dir)
412 report_source_dir = os.path.join(sourcedir, 'reports')
413 report_dir = os.path.join(datadir, 'reports')
414 shutil.copytree(report_source_dir, report_dir)
416 def make_eland_results(gerald_dir):
417 eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759 ACATAGNCACAGACATAAACATAGACATAGAC U0 1 1 3 chrUextra.fa 28189829 R D.
418 >HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 chr2L.fa 8796855 R DD 24T
419 >HWI-EAS229_24_207BTAAXX:1:7:776:582 AGCTCANCCGATCGAAAACCTCNCCAAGCAAT NM 0 0 0
420 >HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 Lambda.fa 8796855 R DD 24T
423 pathname = os.path.join(gerald_dir,
424 's_%d_eland_result.txt' % (i,))
425 f = open(pathname, 'w')
426 f.write(eland_result)
429 def make_eland_multi(gerald_dir, paired=False, lane_list=LANE_LIST):
430 eland_multi = [""">HWI-EAS229_60_30DP9AAXX:1:1:1221:788 AAGATATCTACGACGTGGTATGGCGGTGTCTGGTCGT NM
431 >HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:2 chr5.fa:55269838R0
432 >HWI-EAS229_60_30DP9AAXX:1:1:1121:379 AGAAGAGACATTAAGAGTTCCTGAAATTTATATCTGG 2:1:0 chr16.fa:46189180R1,chr7.fa:122968519R0,chr8.fa:48197174F0
433 >HWI-EAS229_60_30DP9AAXX:1:1:892:1155 ACATTCTCCTTTCCTTCTGAAGTTTTTACGATTCTTT 0:9:10 chr10.fa:114298201F1,chr12.fa:8125072F1,19500297F2,42341293R2,chr13.fa:27688155R2,95069772R1,chr15.fa:51016475F2,chr16.fa:27052155F2,chr1.fa:192426217R2,chr21.fa:23685310R2,chr2.fa:106680068F1,chr3.fa:185226695F2,chr4.fa:106626808R2,chr5.fa:14704894F1,43530779F1,126543189F2,chr6.fa:74284101F1,chr7.fa:22516603F1,chr9.fa:134886204R
434 >HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:0 spike.fa/sample1:55269838R0
435 >HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:0 spike.fa/sample2:55269838R0
436 """, """>HWI-EAS229_60_30DP9AAXX:1:1:1221:788 AAGATATCTACGACGTGGTATGGCGGTGTCTGGTCGT NM
437 >HWI-EAS229_60_30DP9AAXX:1:1:1221:788 NNNNNNNNNNNNNNGTGGTATGGCGGTGTCTGGTCGT QC
438 >HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:2 chr5.fa:55269838R0
439 >HWI-EAS229_60_30DP9AAXX:1:1:1121:379 AGAAGAGACATTAAGAGTTCCTGAAATTTATATCTGG 2:1:0 chr16.fa:46189180R1,chr7.fa:122968519R0,chr8.fa:48197174F0,chr7.fa:22516603F1,chr9.fa:134886204R
440 >HWI-EAS229_60_30DP9AAXX:1:1:892:1155 ACATTCTCCTTTCCTTCTGAAGTTTTTACGATTCTTT 0:9:10 chr10.fa:114298201F1,chr12.fa:8125072F1,19500297F2,42341293R2,chr13.fa:27688155R2,95069772R1,chr15.fa:51016475F2,chr16.fa:27052155F2,chr1.fa:192426217R2,chr21.fa:23685310R2,chr2.fa:106680068F1,chr3.fa:185226695F2,chr4.fa:106626808R2,chr5.fa:14704894F1,43530779F1,126543189F2,chr6.fa:74284101F1
441 >HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:0 spike.fa/sample1:55269838R0
442 >HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:0 spike.fa/sample2:55269838R0
447 pathname = os.path.join(gerald_dir,
448 's_%d_%d_eland_multi.txt' % (i,e))
449 f = open(pathname, 'w')
450 f.write(eland_multi[e-1])
454 pathname = os.path.join(gerald_dir,
455 's_%d_eland_multi.txt' % (i,))
456 f = open(pathname, 'w')
457 f.write(eland_multi[0])
460 def make_eland_export(gerald_dir, paired=False, lane_list=LANE_LIST):
461 source = os.path.join(TESTDATA_DIR, 'casava_1.7_export.txt')
464 destination = os.path.join(gerald_dir,
465 's_%d_export.txt' % (i,))
466 shutil.copy(source, destination)
469 def make_scarf(gerald_dir, lane_list=LANE_LIST):
470 seq = """HWI-EAS229_92_30VNBAAXX:1:1:0:161:NCAATTACACGACGCTAGCCCTAAAGCTATTTCGAGG:E[aaaabb^a\a_^^a[S`ba_WZUXaaaaaaUKPER
471 HWI-EAS229_92_30VNBAAXX:1:1:0:447:NAGATGCGCATTTGAAGTAGGAGCAAAAGATCAAGGT:EUabaab^baabaaaaaaaa^^Uaaaaa\aaaa__`a
472 HWI-EAS229_92_30VNBAAXX:1:1:0:1210:NATAGCCTCTATAGAAGCCACTATTATTTTTTTCTTA:EUa`]`baaaaa^XQU^a`S``S_`J_aaaaaabb^V
473 HWI-EAS229_92_30VNBAAXX:1:1:0:1867:NTGGAGCAGATATAAAAACAGATGGTGACGTTGAAGT:E[^UaaaUaba^aaa^aa^XV\baaLaLaaaaQVXV^
474 HWI-EAS229_92_30VNBAAXX:1:1:0:1898:NAGCTCGTGTCGTGAGATGTTAGGTTAAGTCCTGCAA:EK_aaaaaaaaaaaUZaaZaXM[aaaXSM\aaZ]URE
477 pathname = os.path.join(gerald_dir, 's_%d_sequence.txt' %(l,))
478 f = open(pathname,'w')
482 def make_fastq(gerald_dir, lane_list=LANE_LIST):
483 seq = """@HWI-EAS229:1:2:182:712#0/1
484 AAAAAAAAAAAAAAAAAAAAANAAAAAAAAAAAAAAA
485 +HWI-EAS229:1:2:182:712#0/1
486 \\bab_bbaabbababbaaa]]D]bb_baabbab\baa
487 @HWI-EAS229:1:2:198:621#0/1
488 CCCCCCCCCCCCCCCCCCCCCNCCCCCCCCCCCCCCC
489 +HWI-EAS229:1:2:198:621#0/1
490 [aaaaaaa`_`aaaaaaa[`ZDZaaaaaaaaaaaaaa
491 @HWI-EAS229:1:2:209:1321#0/1
492 AAAAAAAAAAAAAAAAAAAAANAAAAAAAAAAAAAAA
493 +HWI-EAS229:1:2:209:1321#0/1
494 _bbbbbaaababaabbbbab]D]aaaaaaaaaaaaaa
497 pathname = os.path.join(gerald_dir, 's_%d_sequence.txt' %(l,))
498 f = open(pathname,'w')
502 UNALIGNED_READS = [1,2]
503 UNALIGNED_SAMPLES = [ (1, UNALIGNED_READS, '11111', None, None),
504 (2, UNALIGNED_READS, '11112', None, None),
505 (3, UNALIGNED_READS, '11113', 1, 'ATCACG'),
506 (3, UNALIGNED_READS, '11113', 2, 'CGATGT'),
507 (3, UNALIGNED_READS, '11113', 3, 'TTAGGC'),
508 (4, UNALIGNED_READS, '11114', 6, 'GCCAAT'),
509 (5, UNALIGNED_READS, '11115', 1, 'ATCACG'),
510 (5, UNALIGNED_READS, '11116', 7, 'ACTTGA'),
511 (5, UNALIGNED_READS, '11117', 9, 'GATCAG'),
512 (6, UNALIGNED_READS, '11118', 1, 'ATCACG'),
513 (7, UNALIGNED_READS, '11119', 2, 'CGATGT'),
514 (8, UNALIGNED_READS, '11120', 3, 'TTAGGC'),
515 (1, UNALIGNED_READS, None, None, None),
516 (2, UNALIGNED_READS, None, None, None),
517 (3, UNALIGNED_READS, None, None, None),
518 (4, UNALIGNED_READS, None, None, None),
519 (5, UNALIGNED_READS, None, None, None)]
522 def make_aligned_eland_export(aligned_dir, flowcell_id):
523 summary_source = os.path.join(TESTDATA_DIR, 'sample_summary_1_12.htm')
524 for lane, read, project_id, index_id, index_seq in UNALIGNED_SAMPLES:
525 paths = DemultiplexedPaths(aligned_dir,
531 paths.make_sample_dirs()
532 paths.make_summary_dirs()
533 summary_dest = os.path.join(paths.summary_dir, 'Sample_Summary.htm')
534 shutil.copy(summary_source, summary_dest)
536 body = get_aligned_sample_export(lane, index_seq)
537 for split in ['001','002']:
538 for read in UNALIGNED_READS:
539 suffix = 'R{0}_{1}_export.txt.gz'.format(read, split)
540 pathname = paths.make_test_filename(suffix)
541 stream = gzip.open(pathname, 'w')
546 def make_unaligned_fastqs_1_12(unaligned_dir, flowcell_id):
547 """Create a default mix of unaligned sample files
549 for lane, read, name, index_id, index in UNALIGNED_SAMPLES:
550 make_unaligned_fastq_sample_1_12(unaligned_dir,
558 def make_unaligned_fastq_sample_1_12(unaligned_dir,
566 paths = DemultiplexedPaths(unaligned_dir,
572 paths.make_sample_dirs()
574 sample_seq = get_unaligned_sample_fastq_data(flowcell_id, lane, index_seq)
575 for split in ['001','002']:
577 suffix = 'R{0}_{1}.fastq.gz'.format(read, split)
578 pathname = paths.make_test_filename(suffix)
579 stream = gzip.open(pathname, 'w')
580 stream.write(sample_seq)
583 sheetname = os.path.join(paths.sample_dir, 'SampleSheet.csv')
584 stream = open(sheetname, 'w')
585 stream.write('FCID,Lane,SampleID,SampleRef,Index,Description,Control,Recipe,Operator,SampleProject'+os.linesep)
586 template = '{flowcell},{lane},{id},mm9,{index},Sample #{id},N,PR_indexing,Operator,{sample_project}'+os.linesep
587 stream.write(template.format(flowcell=flowcell_id,
590 index=paths.index_seq,
591 sample_project=paths.sample_project))
595 class DemultiplexedPaths(object):
596 def __init__(self, basedir, flowcell_id, lane, project_id, index_id, index_seq):
597 if lane not in LANE_LIST:
598 raise ValueError("Invalid lane ID: {0}".format(lane))
599 self.basedir = basedir
600 self.flowcell_id = flowcell_id
603 if project_id is None:
606 self.sample_id = 'lane{0}'.format(lane)
607 self.sample_project = 'Undetermined_indices'
608 self.rootname = 'lane{lane}_Undetermined_L00{lane}_'.format(
610 self.project_dir = 'Undetermined_indices'
611 self.sample_dir = 'Sample_lane{lane}'.format(lane=lane)
612 elif index_seq is None:
614 self.sample_id = project_id
615 self.sample_project = '{project_id}'.format(project_id=project_id)
616 self.rootname = '{project_id}_NoIndex_L00{lane}_'.format(
617 project_id=project_id,
619 self.project_dir = 'Project_' + self.sample_project
620 self.sample_dir = 'Sample_{project_id}'.format(
621 project_id=project_id)
623 self.index_seq = index_seq
624 self.sample_id = project_id
625 self.sample_project = '{project_id}_Index{index_id}'.format(
626 project_id=project_id,
628 self.rootname = '{project_id}_{index}_L00{lane}_'.format(
629 project_id=project_id,
632 self.project_dir = 'Project_' + self.sample_project
633 self.sample_dir = 'Sample_{project_id}'.format(
634 project_id=project_id)
636 self.project_dir = os.path.join(self.basedir, self.project_dir)
637 self.sample_dir = os.path.join(self.project_dir, self.sample_dir)
638 self.summary_dir = 'Summary_Stats_{0}'.format(self.flowcell_id)
639 self.summary_dir = os.path.join(self.project_dir, self.summary_dir)
642 def make_sample_dirs(self):
643 if not os.path.isdir(self.project_dir):
644 os.mkdir(self.project_dir)
645 if not os.path.isdir(self.sample_dir):
646 os.mkdir(self.sample_dir)
648 def make_summary_dirs(self):
649 if not os.path.isdir(self.summary_dir):
650 os.mkdir(self.summary_dir)
652 def make_test_filename(self, suffix):
653 filename = self.rootname + suffix
654 pathname = os.path.join(self.sample_dir, filename)
658 print('index seq: {0}'.format(self.index_seq))
660 print('project dir: {0}'.format(self.project_dir))
661 print('sample dir: {0}'.format(self.sample_dir))
662 print('rootname: {0}'.format(self.rootname))
663 print('path: {0}'.format(
664 os.path.join(self.project_dir,
666 self.rootname+'R1_001.fastq.gz')))
669 def get_unaligned_sample_fastq_data(flowcell_id, lane, index_seq):
670 seq = """@HWI-ST0787:101:{flowcell}:{lane}:1101:2416:3469 1:Y:0:{index}
671 TCCTTCATTCCACCGGAGTCTGTGGAATTCTCGGGTGCCAAGGAACTCCA
673 CCCFFFFFHHHHHJJJJJJJJJIJJJJJJJJJJJJJJJJIIJJIIJJJJJ
674 @HWI-ST0787:101:{flowcell}:{lane}:1101:2677:3293 1:Y:0:{index}
675 TGGAAATCCATTGGGGTTTCCCCTGGAATTCTCGGGTGCCAAGGAACTCC
677 @CCFF3BDHHHHHIIIIIHHIIIDIIIGIIIEGIIIIIIIIIIIIIIIHH
678 @HWI-ST0787:101:{flowcell}:{lane}:1101:2616:3297 1:Y:0:{index}
679 TAATACTGCCGGGTAATGATGGCTGGAATTCTCGGGTGCCAAGGAACTCC
681 CCCFFFFFHHHHHCGHJJJJJJJJJJJJJJJJJIIJJJJJJJJJIHJJJI
682 @HWI-ST0787:101:{flowcell}:{lane}:1101:2545:3319 1:N:0:{index}
683 TCCTTCATTCCACCGGAGTCTGCTGGAATTCTCGGGTGCCAAGGAACTCC
685 CCCFFFFFHHHFHJGIGHIJHIIGHIGIGIGEHFIJJJIHIJHJIIJJIH
686 """.format(flowcell=flowcell_id, lane=lane, index=index_seq)
689 def get_aligned_sample_export(lane, index_seq):
690 body = """HWI-ST0787\t102\t{lane}\t1101\t1207\t1993\t{index}\t1\tAANGGATTCGATCCGGCTTAAGAGATGAAAACCGAAAGGGCCGACCGAA\taaBS`ccceg[`ae[dRR_[[SPPPP__ececfYYWaegh^\\ZLLY\\X`\tNM\t\t\t\t\t\t
691 HWI-ST0787\t102\t{lane}\t1101\t1478\t1997\t{index}\t1\tCAAGAACCCCGGGGGGGGGGGGGCAGAGAGGGGGAATTTTTTTTTTGTT\tBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\tNM\t\t\t\t\t\t\t\t\t\t\tN
692 HWI-ST0787\t102\t{lane}\t1101\t1625\t1994\t{index}\t1\tAANAATGCTACAGAGACAAAACAAAACTGATATGAAAGTTGAGAATAAA\tB^BS\cccgegg[Q[QQQ[`egdgffbeggfgh^^YcfgfhXaHY^O^c\tchr9.fa\t67717938\tR\t99\t72
693 HWI-ST0787\t102\t{lane}\t1101\t1625\t1994\t{index}\t1\tAANAATGCTACAGAGACAAAACAAAACTGATATGAAAGTTGAGAATAAA\tB^BS\cccgegg[Q[QQQ[`egdgffbeggfgh^^YcfgfhXaHY^O^c\t3:4:3\t\t\t\t\t\t\t\t\t\t\tY
694 """.format(lane=lane, index=index_seq)
697 def print_ls_tree(root):
698 """List tree contents, useful for debugging.
700 for dirpath, dirnames, filenames in os.walk(root):
701 for filename in filenames:
702 print(os.path.join(dirpath, filename))
705 class BaseCallInfo(object):
706 """Provide customization for how to setup the base call mock data
708 def __init__(self, qseq_file, tile_list, basecall_summary):
709 self.qseq_file = qseq_file
710 self.tile_list = tile_list
711 self.basecall_summary = basecall_summary
713 # First generation HiSeq Flowcell
714 ABXX_BASE_CALL_INFO = BaseCallInfo(
715 qseq_file='AA01CCABXX_8_2_2207_qseq.txt',
716 tile_list = HISEQ_TILE_LIST,
717 basecall_summary = 'AA01CCABXX_BustardSummary.xml')