2 Create simulated solexa/illumina runfolders for testing
8 TEST_CODE_DIR = os.path.split(__file__)[0]
9 TESTDATA_DIR = os.path.join(TEST_CODE_DIR, 'testdata')
10 LANE_LIST = range(1,9)
11 TILE_LIST = range(1,101)
12 HISEQ_TILE_LIST = [1101, 1102, 1103, 1104, 1105, 1106, 1107, 1108,
13 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208,
14 2101, 2102, 2103, 2104, 2105, 2106, 2107, 2108,
15 2201, 2202, 2203, 2204, 2205, 2206, 2207, 2208,]
17 def make_firecrest_dir(data_dir, version="1.9.2", start=1, stop=37):
18 firecrest_dir = os.path.join(data_dir,
19 'C%d-%d_Firecrest%s_12-04-2008_diane' % (start, stop, version)
21 os.mkdir(firecrest_dir)
24 def make_ipar_dir(data_dir, version='1.01'):
26 Construct an artificial ipar parameter file and directory
28 ipar1_01_file = os.path.join(TESTDATA_DIR, 'IPAR1.01.params')
29 shutil.copy(ipar1_01_file, os.path.join(data_dir, '.params'))
31 ipar_dir = os.path.join(data_dir, 'IPAR_%s' % (version,))
32 if not os.path.exists(ipar_dir):
36 def make_flowcell_id(runfolder_dir, flowcell_id=None):
37 if flowcell_id is None:
38 flowcell_id = '207BTAAXY'
40 config = """<?xml version="1.0"?>
43 </FlowcellId>""" % (flowcell_id,)
44 config_dir = os.path.join(runfolder_dir, 'Config')
46 if not os.path.exists(config_dir):
48 pathname = os.path.join(config_dir, 'FlowcellId.xml')
49 f = open(pathname,'w')
53 def make_bustard_config132(image_dir):
54 source = os.path.join(TESTDATA_DIR, 'bustard-config132.xml')
55 destination = os.path.join(image_dir, 'config.xml')
56 shutil.copy(source, destination)
58 def make_aligned_config_1_12(aligned_dir):
59 """This is rouglhly equivalent to the old gerald file"""
60 source = os.path.join(TESTDATA_DIR, 'aligned_config_1_12.xml')
61 destination = os.path.join(aligned_dir, 'config.xml')
62 shutil.copy(source, destination)
64 def make_unaligned_config_1_12(unaligned_dir):
65 demultiplex_pairs = [ # (src,
67 (os.path.join(TESTDATA_DIR, 'demultiplex_1.12.4.2.xml'),
68 os.path.join(unaligned_dir, 'DemultiplexConfig.xml')),
69 (os.path.join(TESTDATA_DIR, 'demultiplexed_bustard_1.12.4.2.xml'),
70 os.path.join(unaligned_dir, 'DemultiplexConfig.xml')),
71 (os.path.join(TESTDATA_DIR, 'demultiplexed_summary_1.12.4.2.xml'),
72 os.path.join(unaligned_dir, 'DemultiplexConfig.xml')),
74 for src, dest in demultiplex_pairs:
75 shutil.copy(src, dest)
77 def make_rta_intensities_1460(data_dir, version='1.4.6.0'):
79 Construct an artificial RTA Intensities parameter file and directory
81 intensities_dir = os.path.join(data_dir, 'Intensities')
82 if not os.path.exists(intensities_dir):
83 os.mkdir(intensities_dir)
85 param_file = os.path.join(TESTDATA_DIR, 'rta_intensities_config.xml')
86 shutil.copy(param_file, os.path.join(intensities_dir, 'config.xml'))
88 return intensities_dir
90 def make_rta_basecalls_1460(intensities_dir):
92 Construct an artificial RTA Intensities parameter file and directory
94 basecalls_dir = os.path.join(intensities_dir, 'BaseCalls')
95 if not os.path.exists(basecalls_dir):
96 os.mkdir(basecalls_dir)
98 param_file = os.path.join(TESTDATA_DIR, 'rta_basecalls_config.xml')
99 shutil.copy(param_file, os.path.join(basecalls_dir, 'config.xml'))
103 def make_rta_intensities_1870(data_dir, version='1.8.70.0'):
105 Construct an artificial RTA Intensities parameter file and directory
107 intensities_dir = os.path.join(data_dir, 'Intensities')
108 if not os.path.exists(intensities_dir):
109 os.mkdir(intensities_dir)
111 param_file = os.path.join(TESTDATA_DIR, 'rta_intensities_config_1870.xml')
112 shutil.copy(param_file, os.path.join(intensities_dir, 'config.xml'))
114 return intensities_dir
116 def make_rta_intensities_1_10(data_dir, version='1.10.36.0'):
118 Construct an artificial RTA Intensities parameter file and directory
120 intensities_dir = os.path.join(data_dir, 'Intensities')
121 if not os.path.exists(intensities_dir):
122 os.mkdir(intensities_dir)
124 param_file = os.path.join(TESTDATA_DIR, 'rta_intensities_config_1.10.xml')
125 shutil.copy(param_file, os.path.join(intensities_dir, 'config.xml'))
127 return intensities_dir
129 def make_rta_intensities_1_12(data_dir, version='1.12.4.2'):
131 Construct an artificial RTA Intensities parameter file and directory
133 intensities_dir = os.path.join(data_dir, 'Intensities')
134 if not os.path.exists(intensities_dir):
135 os.mkdir(intensities_dir)
137 param_file = os.path.join(TESTDATA_DIR, 'rta_intensities_config_1.12.4.2.xml')
138 shutil.copy(param_file, os.path.join(intensities_dir, 'RTAConfig.xml'))
140 return intensities_dir
142 def make_rta_basecalls_1870(intensities_dir):
144 Construct an artificial RTA Intensities parameter file and directory
146 basecalls_dir = os.path.join(intensities_dir, 'BaseCalls')
147 if not os.path.exists(basecalls_dir):
148 os.mkdir(basecalls_dir)
150 param_file = os.path.join(TESTDATA_DIR, 'rta_basecalls_config_1870.xml')
151 shutil.copy(param_file, os.path.join(basecalls_dir, 'config.xml'))
155 def make_rta_basecalls_1_10(intensities_dir):
157 Construct an artificial RTA Intensities parameter file and directory
159 basecalls_dir = os.path.join(intensities_dir, 'BaseCalls')
160 if not os.path.exists(basecalls_dir):
161 os.mkdir(basecalls_dir)
163 make_qseqs(basecalls_dir, basecall_info=ABXX_BASE_CALL_INFO)
164 param_file = os.path.join(TESTDATA_DIR, 'rta_basecalls_config_1.10.xml')
165 shutil.copy(param_file, os.path.join(basecalls_dir, 'config.xml'))
169 def make_rta_basecalls_1_12(intensities_dir):
171 Construct an artificial RTA Intensities parameter file and directory
173 basecalls_dir = os.path.join(intensities_dir, 'BaseCalls')
174 if not os.path.exists(basecalls_dir):
175 os.mkdir(basecalls_dir)
177 make_qseqs(basecalls_dir, basecall_info=ABXX_BASE_CALL_INFO)
178 param_file = os.path.join(TESTDATA_DIR, 'rta_basecalls_config_1.12.4.2.xml')
179 shutil.copy(param_file, os.path.join(basecalls_dir, 'config.xml'))
184 def make_qseqs(bustard_dir, basecall_info=None):
186 Fill gerald directory with qseq files
188 if basecall_info is None:
189 qseq_file = '42BRJAAXX_8_1_0039_qseq.txt'
190 tile_list = TILE_LIST
191 summary_file = '42BRJAAXX_BustardSummary.xml'
193 qseq_file = basecall_info.qseq_file
194 tile_list = basecall_info.tile_list
195 summary_file = basecall_info.basecall_summary
197 # 42BRJ 8 1 0039 happened to be a better than usual tile, in that there
198 # was actually sequence at the start
199 source = os.path.join(TESTDATA_DIR, qseq_file)
200 destdir = bustard_dir
201 if not os.path.isdir(destdir):
204 for lane in LANE_LIST:
205 for tile in tile_list:
206 destination = os.path.join(bustard_dir, 's_%d_1_%04d_qseq.txt' % (lane, tile))
207 shutil.copy(source, destination)
209 make_matrix_dir(bustard_dir)
210 make_phasing_dir(bustard_dir)
212 summary_source = os.path.join(TESTDATA_DIR, summary_file)
213 summary_dest = os.path.join(bustard_dir, 'BustardSummary.xml')
214 shutil.copy(summary_source, summary_dest)
218 def make_scores(gerald_dir, in_temp=True):
220 Fill gerald directory with score temp files
221 will create the directory if it doesn't exist.
223 source = os.path.join(TESTDATA_DIR, 's_1_0001_score.txt')
226 destdir = os.path.join(destdir, 'Temp')
227 if not os.path.isdir(destdir):
230 for lane in LANE_LIST:
231 for tile in TILE_LIST:
232 destination = os.path.join(destdir, 's_%d_%04d_score.txt' % (lane, tile))
233 shutil.copy(source, destination)
237 def make_matrix_dir(bustard_dir):
239 Create several matrix files in <bustard_dir>/Matrix/
243 destdir = os.path.join(bustard_dir, 'Matrix')
244 if not os.path.isdir(destdir):
247 source = os.path.join(TESTDATA_DIR, '42BRJAAXX_8_02_matrix.txt')
248 for lane in LANE_LIST:
249 destination = os.path.join(destdir, 's_%d_02_matrix.txt' % ( lane, ))
250 shutil.copy(source, destination)
252 def make_matrix(matrix_filename):
253 contents = """# Auto-generated frequency response matrix
258 0.77 0.15 -0.04 -0.04
259 0.76 1.02 -0.05 -0.06
260 -0.10 -0.10 1.17 -0.03
261 -0.13 -0.12 0.80 1.27
263 f = open(matrix_filename, 'w')
267 def make_matrix_dir_rta160(bustard_dir):
269 Create several matrix files in <bustard_dir>/Matrix/
271 destdir = os.path.join(bustard_dir, 'Matrix')
272 if not os.path.isdir(destdir):
275 source = os.path.join(TESTDATA_DIR, '61MMFAAXX_4_1_matrix.txt')
276 lane_fragments = [ "_%d" % (l,) for l in LANE_LIST]
277 for fragment in lane_fragments:
278 destination = os.path.join(destdir, 's%s_1_matrix.txt' % ( fragment, ))
279 shutil.copy(source, destination)
281 def make_matrix_dir_rta_1_10(bustard_dir):
282 make_matrix_dir_rta160(bustard_dir)
284 def make_matrix_dir_rta_1_12(bustard_dir):
285 make_matrix_dir_rta160(bustard_dir)
287 def make_phasing_dir(bustard_dir):
289 Create several phasing files in <bustard_dir>/Phasing/
293 destdir = os.path.join(bustard_dir, 'Phasing')
294 if not os.path.isdir(destdir):
297 source = os.path.join(TESTDATA_DIR, '42BRJAAXX_8_01_phasing.xml')
298 for lane in LANE_LIST:
299 destination = os.path.join(destdir, 's_%d_01_phasing.xml' % ( lane, ))
300 shutil.copy(source, destination)
302 def make_phasing_params(bustard_dir):
303 for lane in LANE_LIST:
304 pathname = os.path.join(bustard_dir, 'params%d.xml' % (lane))
305 f = open(pathname, 'w')
306 f.write("""<Parameters>
307 <Phasing>0.009900</Phasing>
308 <Prephasing>0.003500</Prephasing>
313 def make_gerald_config_026(gerald_dir):
314 source = os.path.join(TESTDATA_DIR, 'gerald_config_0.2.6.xml')
315 destination = os.path.join(gerald_dir, 'config.xml')
316 shutil.copy(source, destination)
318 def make_gerald_config_100(gerald_dir):
319 source = os.path.join(TESTDATA_DIR, 'gerald_config_1.0.xml')
320 destination = os.path.join(gerald_dir, 'config.xml')
321 shutil.copy(source, destination)
323 def make_gerald_config_1_7(gerald_dir):
324 """CASAVA 1.7 gerald config"""
325 source = os.path.join(TESTDATA_DIR, 'gerald_config_1.7.xml')
326 destination = os.path.join(gerald_dir, 'config.xml')
327 shutil.copy(source, destination)
329 def make_summary_htm_100(gerald_dir):
330 source = os.path.join(TESTDATA_DIR, 'Summary-pipeline100.htm')
331 destination = os.path.join(gerald_dir, 'Summary.htm')
332 shutil.copy(source, destination)
334 def make_summary_htm_110(gerald_dir):
335 source = os.path.join(TESTDATA_DIR, 'Summary-pipeline110.htm')
336 destination = os.path.join(gerald_dir, 'Summary.htm')
337 shutil.copy(source, destination)
339 def make_summary_paired_htm(gerald_dir):
340 source = os.path.join(TESTDATA_DIR, 'Summary-paired-pipeline110.htm')
341 destination = os.path.join(gerald_dir, 'Summary.htm')
342 shutil.copy(source, destination)
344 def make_summary_ipar130_htm(gerald_dir):
345 source = os.path.join(TESTDATA_DIR, 'Summary-ipar130.htm')
346 destination = os.path.join(gerald_dir, 'Summary.htm')
347 shutil.copy(source, destination)
349 def make_summary_rta160_xml(gerald_dir):
350 source = os.path.join(TESTDATA_DIR, 'Summary-rta160.xml')
351 destination = os.path.join(gerald_dir, 'Summary.xml')
352 shutil.copy(source, destination)
355 def make_summary_casava1_7_xml(gerald_dir):
356 source = os.path.join(TESTDATA_DIR, 'Summary-casava1.7.xml')
357 destination = os.path.join(gerald_dir, 'Summary.xml')
358 shutil.copy(source, destination)
361 def make_eland_results(gerald_dir):
362 eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759 ACATAGNCACAGACATAAACATAGACATAGAC U0 1 1 3 chrUextra.fa 28189829 R D.
363 >HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 chr2L.fa 8796855 R DD 24T
364 >HWI-EAS229_24_207BTAAXX:1:7:776:582 AGCTCANCCGATCGAAAACCTCNCCAAGCAAT NM 0 0 0
365 >HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 Lambda.fa 8796855 R DD 24T
368 pathname = os.path.join(gerald_dir,
369 's_%d_eland_result.txt' % (i,))
370 f = open(pathname, 'w')
371 f.write(eland_result)
374 def make_eland_multi(gerald_dir, paired=False, lane_list=LANE_LIST):
375 eland_multi = [""">HWI-EAS229_60_30DP9AAXX:1:1:1221:788 AAGATATCTACGACGTGGTATGGCGGTGTCTGGTCGT NM
376 >HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:2 chr5.fa:55269838R0
377 >HWI-EAS229_60_30DP9AAXX:1:1:1121:379 AGAAGAGACATTAAGAGTTCCTGAAATTTATATCTGG 2:1:0 chr16.fa:46189180R1,chr7.fa:122968519R0,chr8.fa:48197174F0
378 >HWI-EAS229_60_30DP9AAXX:1:1:892:1155 ACATTCTCCTTTCCTTCTGAAGTTTTTACGATTCTTT 0:9:10 chr10.fa:114298201F1,chr12.fa:8125072F1,19500297F2,42341293R2,chr13.fa:27688155R2,95069772R1,chr15.fa:51016475F2,chr16.fa:27052155F2,chr1.fa:192426217R2,chr21.fa:23685310R2,chr2.fa:106680068F1,chr3.fa:185226695F2,chr4.fa:106626808R2,chr5.fa:14704894F1,43530779F1,126543189F2,chr6.fa:74284101F1,chr7.fa:22516603F1,chr9.fa:134886204R
379 >HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:0 spike.fa/sample1:55269838R0
380 >HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:0 spike.fa/sample2:55269838R0
381 """, """>HWI-EAS229_60_30DP9AAXX:1:1:1221:788 AAGATATCTACGACGTGGTATGGCGGTGTCTGGTCGT NM
382 >HWI-EAS229_60_30DP9AAXX:1:1:1221:788 NNNNNNNNNNNNNNGTGGTATGGCGGTGTCTGGTCGT QC
383 >HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:2 chr5.fa:55269838R0
384 >HWI-EAS229_60_30DP9AAXX:1:1:1121:379 AGAAGAGACATTAAGAGTTCCTGAAATTTATATCTGG 2:1:0 chr16.fa:46189180R1,chr7.fa:122968519R0,chr8.fa:48197174F0,chr7.fa:22516603F1,chr9.fa:134886204R
385 >HWI-EAS229_60_30DP9AAXX:1:1:892:1155 ACATTCTCCTTTCCTTCTGAAGTTTTTACGATTCTTT 0:9:10 chr10.fa:114298201F1,chr12.fa:8125072F1,19500297F2,42341293R2,chr13.fa:27688155R2,95069772R1,chr15.fa:51016475F2,chr16.fa:27052155F2,chr1.fa:192426217R2,chr21.fa:23685310R2,chr2.fa:106680068F1,chr3.fa:185226695F2,chr4.fa:106626808R2,chr5.fa:14704894F1,43530779F1,126543189F2,chr6.fa:74284101F1
386 >HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:0 spike.fa/sample1:55269838R0
387 >HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:0 spike.fa/sample2:55269838R0
392 pathname = os.path.join(gerald_dir,
393 's_%d_%d_eland_multi.txt' % (i,e))
394 f = open(pathname, 'w')
395 f.write(eland_multi[e-1])
399 pathname = os.path.join(gerald_dir,
400 's_%d_eland_multi.txt' % (i,))
401 f = open(pathname, 'w')
402 f.write(eland_multi[0])
405 def make_eland_export(gerald_dir, paired=False, lane_list=LANE_LIST):
406 source = os.path.join(TESTDATA_DIR, 'casava_1.7_export.txt')
409 destination = os.path.join(gerald_dir,
410 's_%d_export.txt' % (i,))
411 shutil.copy(source, destination)
414 def make_scarf(gerald_dir, lane_list=LANE_LIST):
415 seq = """HWI-EAS229_92_30VNBAAXX:1:1:0:161:NCAATTACACGACGCTAGCCCTAAAGCTATTTCGAGG:E[aaaabb^a\a_^^a[S`ba_WZUXaaaaaaUKPER
416 HWI-EAS229_92_30VNBAAXX:1:1:0:447:NAGATGCGCATTTGAAGTAGGAGCAAAAGATCAAGGT:EUabaab^baabaaaaaaaa^^Uaaaaa\aaaa__`a
417 HWI-EAS229_92_30VNBAAXX:1:1:0:1210:NATAGCCTCTATAGAAGCCACTATTATTTTTTTCTTA:EUa`]`baaaaa^XQU^a`S``S_`J_aaaaaabb^V
418 HWI-EAS229_92_30VNBAAXX:1:1:0:1867:NTGGAGCAGATATAAAAACAGATGGTGACGTTGAAGT:E[^UaaaUaba^aaa^aa^XV\baaLaLaaaaQVXV^
419 HWI-EAS229_92_30VNBAAXX:1:1:0:1898:NAGCTCGTGTCGTGAGATGTTAGGTTAAGTCCTGCAA:EK_aaaaaaaaaaaUZaaZaXM[aaaXSM\aaZ]URE
422 pathname = os.path.join(gerald_dir, 's_%d_sequence.txt' %(l,))
423 f = open(pathname,'w')
427 def make_fastq(gerald_dir, lane_list=LANE_LIST):
428 seq = """@HWI-EAS229:1:2:182:712#0/1
429 AAAAAAAAAAAAAAAAAAAAANAAAAAAAAAAAAAAA
430 +HWI-EAS229:1:2:182:712#0/1
431 \\bab_bbaabbababbaaa]]D]bb_baabbab\baa
432 @HWI-EAS229:1:2:198:621#0/1
433 CCCCCCCCCCCCCCCCCCCCCNCCCCCCCCCCCCCCC
434 +HWI-EAS229:1:2:198:621#0/1
435 [aaaaaaa`_`aaaaaaa[`ZDZaaaaaaaaaaaaaa
436 @HWI-EAS229:1:2:209:1321#0/1
437 AAAAAAAAAAAAAAAAAAAAANAAAAAAAAAAAAAAA
438 +HWI-EAS229:1:2:209:1321#0/1
439 _bbbbbaaababaabbbbab]D]aaaaaaaaaaaaaa
442 pathname = os.path.join(gerald_dir, 's_%d_sequence.txt' %(l,))
443 f = open(pathname,'w')
447 UNALIGNED_READS = [1,2]
448 UNALIGNED_SAMPLES = [ (1, UNALIGNED_READS, '11111', None, None),
449 (2, UNALIGNED_READS, '11112', None, None),
450 (3, UNALIGNED_READS, '11113', 1, 'ATCACG'),
451 (3, UNALIGNED_READS, '11113', 2, 'CGATGT'),
452 (3, UNALIGNED_READS, '11113', 3, 'TTAGGC'),
453 (4, UNALIGNED_READS, '11114', 6, 'GCCAAT'),
454 (5, UNALIGNED_READS, '11115', 1, 'ATCACG'),
455 (5, UNALIGNED_READS, '11116', 7, 'ACTTGA'),
456 (5, UNALIGNED_READS, '11117', 9, 'GATCAG'),
457 (6, UNALIGNED_READS, '11118', 1, 'ATCACG'),
458 (7, UNALIGNED_READS, '11119', 2, 'CGATGT'),
459 (8, UNALIGNED_READS, '11120', 3, 'TTAGGC'),
460 (1, UNALIGNED_READS, None, None, None),
461 (2, UNALIGNED_READS, None, None, None),
462 (3, UNALIGNED_READS, None, None, None),
463 (4, UNALIGNED_READS, None, None, None),
464 (5, UNALIGNED_READS, None, None, None)]
467 def make_aligned_eland_export(aligned_dir, flowcell_id):
468 summary_source = os.path.join(TESTDATA_DIR, 'sample_summary_1_12.htm')
469 for lane, read, project_id, index_id, index_seq in UNALIGNED_SAMPLES:
470 paths = DemultiplexedPaths(aligned_dir,
476 paths.make_sample_dirs()
477 paths.make_summary_dirs()
478 summary_dest = os.path.join(paths.summary_dir, 'Sample_Summary.htm')
479 shutil.copy(summary_source, summary_dest)
481 body = get_unaligned_sample_export(lane, index_seq)
482 for split in ['001','002']:
483 for read in UNALIGNED_READS:
484 suffix = 'R{0}_{1}_export.txt.gz'.format(read, split)
485 pathname = paths.make_test_filename(suffix)
486 stream = gzip.open(pathname, 'w')
491 def make_unaligned_fastqs_1_12(unaligned_dir, flowcell_id):
492 """Create a default mix of unaligned sample files
494 for lane, read, name, index_id, index in UNALIGNED_SAMPLES:
495 make_unaligned_fastq_sample_1_12(unaligned_dir,
503 def make_unaligned_fastq_sample_1_12(unaligned_dir,
511 paths = DemultiplexedPaths(unaligned_dir,
517 paths.make_sample_dirs()
519 sample_seq = get_unaligned_sample_fastq_data(flowcell_id, lane, index_seq)
520 for split in ['001','002']:
522 suffix = 'R{0}_{1}.fastq.gz'.format(read, split)
523 pathname = paths.make_test_filename(suffix)
524 stream = gzip.open(pathname, 'w')
525 stream.write(sample_seq)
528 sheetname = os.path.join(paths.sample_dir, 'SampleSheet.csv')
529 stream = open(sheetname, 'w')
530 stream.write('FCID,Lane,SampleID,SampleRef,Index,Description,Control,Recipe,Operator,SampleProject'+os.linesep)
531 template = '{flowcell},{lane},{id},mm9,{index},Sample #{id},N,PR_indexing,Operator,{sample_project}'+os.linesep
532 stream.write(template.format(flowcell=flowcell_id,
535 index=paths.index_seq,
536 sample_project=paths.sample_project))
540 class DemultiplexedPaths(object):
541 def __init__(self, basedir, flowcell_id, lane, project_id, index_id, index_seq):
542 if lane not in LANE_LIST:
543 raise ValueError("Invalid lane ID: {0}".format(lane))
544 self.basedir = basedir
545 self.flowcell_id = flowcell_id
548 if project_id is None:
551 self.sample_id = 'lane{0}'.format(lane)
552 self.sample_project = 'Undetermined_indices'
553 self.rootname = 'lane{lane}_Undetermined_L00{lane}_'.format(
555 self.project_dir = 'Undetermined_indices'
556 self.sample_dir = 'Sample_lane{lane}'.format(lane=lane)
557 elif index_seq is None:
559 self.sample_id = project_id
560 self.sample_project = '{project_id}'.format(project_id=project_id)
561 self.rootname = '{project_id}_NoIndex_L00{lane}_'.format(
562 project_id=project_id,
564 self.project_dir = 'Project_' + self.sample_project
565 self.sample_dir = 'Sample_{project_id}'.format(
566 project_id=project_id)
568 self.index_seq = index_seq
569 self.sample_id = project_id
570 self.sample_project = '{project_id}_Index{index_id}'.format(
571 project_id=project_id,
573 self.rootname = '{project_id}_{index}_L00{lane}_'.format(
574 project_id=project_id,
577 self.project_dir = 'Project_' + self.sample_project
578 self.sample_dir = 'Sample_{project_id}'.format(
579 project_id=project_id)
581 self.project_dir = os.path.join(self.basedir, self.project_dir)
582 self.sample_dir = os.path.join(self.project_dir, self.sample_dir)
583 self.summary_dir = 'Summary_Stats_{0}'.format(self.flowcell_id)
584 self.summary_dir = os.path.join(self.project_dir, self.summary_dir)
585 print "HI:", self.summary_dir
588 def make_sample_dirs(self):
589 if not os.path.isdir(self.project_dir):
590 os.mkdir(self.project_dir)
591 if not os.path.isdir(self.sample_dir):
592 os.mkdir(self.sample_dir)
594 def make_summary_dirs(self):
595 print "HI:", self.summary_dir
596 if not os.path.isdir(self.summary_dir):
597 os.mkdir(self.summary_dir)
599 def make_test_filename(self, suffix):
600 filename = self.rootname + suffix
601 pathname = os.path.join(self.sample_dir, filename)
604 print ('index seq: {0}'.format(self.index_seq))
606 print ('project dir: {0}'.format(self.project_dir))
607 print ('sample dir: {0}'.format(self.sample_dir))
608 print ('rootname: {0}'.format(self.rootname))
609 print ('path: {0}'.format(
610 os.path.join(self.project_dir,
612 self.rootname+'R1_001.fastq.gz')))
615 def get_unaligned_sample_fastq_data(flowcell_id, lane, index_seq):
616 seq = """@HWI-ST0787:101:{flowcell}:{lane}:1101:2416:3469 1:Y:0:{index}
617 TCCTTCATTCCACCGGAGTCTGTGGAATTCTCGGGTGCCAAGGAACTCCA
619 CCCFFFFFHHHHHJJJJJJJJJIJJJJJJJJJJJJJJJJIIJJIIJJJJJ
620 @HWI-ST0787:101:{flowcell}:{lane}:1101:2677:3293 1:Y:0:{index}
621 TGGAAATCCATTGGGGTTTCCCCTGGAATTCTCGGGTGCCAAGGAACTCC
623 @CCFF3BDHHHHHIIIIIHHIIIDIIIGIIIEGIIIIIIIIIIIIIIIHH
624 @HWI-ST0787:101:{flowcell}:{lane}:1101:2616:3297 1:Y:0:{index}
625 TAATACTGCCGGGTAATGATGGCTGGAATTCTCGGGTGCCAAGGAACTCC
627 CCCFFFFFHHHHHCGHJJJJJJJJJJJJJJJJJIIJJJJJJJJJIHJJJI
628 @HWI-ST0787:101:{flowcell}:{lane}:1101:2545:3319 1:N:0:{index}
629 TCCTTCATTCCACCGGAGTCTGCTGGAATTCTCGGGTGCCAAGGAACTCC
631 CCCFFFFFHHHFHJGIGHIJHIIGHIGIGIGEHFIJJJIHIJHJIIJJIH
632 """.format(flowcell=flowcell_id, lane=lane, index=index_seq)
635 def get_unaligned_sample_export(lane, index_seq):
636 body = """HWI-ST0787\t102\t{lane}\t1101\t1207\t1993\t{index}\t1\tAANGGATTCGATCCGGCTTAAGAGATGAAAACCGAAAGGGCCGACCGAA\taaBS`ccceg[`ae[dRR_[[SPPPP__ececfYYWaegh^\\ZLLY\\X`\tNM\t\t\t\t\t\t
637 HWI-ST0787\t102 {lane} 1101 1478 1997 {index} 1 CAAGAACCCCGGGGGGGGGGGGGCAGAGAGGGGGAATTTTTTTTTTGTT BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB NM N
638 HWI-ST0787 102 {lane} 1101 1625 1994 {index} 1 AANAATGCTACAGAGACAAAACAAAACTGATATGAAAGTTGAGAATAAA \^BS\cccgegg[Q[QQQ[`egdgffbeggfgh^^YcfgfhXaHY^O^c chrII.fa
639 """.format(lane=lane, index=index_seq)
643 for dirpath, dirnames, filenames in os.walk(root):
644 for filename in filenames:
645 print os.path.join(dirpath, filename)
648 class BaseCallInfo(object):
649 """Provide customization for how to setup the base call mock data
651 def __init__(self, qseq_file, tile_list, basecall_summary):
652 self.qseq_file = qseq_file
653 self.tile_list = tile_list
654 self.basecall_summary = basecall_summary
656 # First generation HiSeq Flowcell
657 ABXX_BASE_CALL_INFO = BaseCallInfo(
658 qseq_file='AA01CCABXX_8_2_2207_qseq.txt',
659 tile_list = HISEQ_TILE_LIST,
660 basecall_summary = 'AA01CCABXX_BustardSummary.xml')