53c7301483eae6b009a3dd92b4af8c29d94c260b
[htsworkflow.git] / htsworkflow / pipelines / test / simulate_runfolder.py
1 """
2 Create simulated solexa/illumina runfolders for testing
3 """
4
5 import os
6 import shutil
7
8 TEST_CODE_DIR = os.path.split(__file__)[0]
9 TESTDATA_DIR = os.path.join(TEST_CODE_DIR, 'testdata')
10  
11 def make_firecrest_dir(data_dir, version="1.9.2", start=1, stop=37):
12     firecrest_dir = os.path.join(data_dir, 
13                                  'C%d-%d_Firecrest%s_12-04-2008_diane' % (start, stop, version)
14                                  )
15     os.mkdir(firecrest_dir)
16     return firecrest_dir
17     
18 def make_ipar_dir(data_dir):
19     """
20     Construct an artificial ipar parameter file and directory
21     """
22     ipar1_01_file = os.path.join(TESTDATA_DIR, 'IPAR1.01.params')
23     shutil.copy(ipar1_01_file, os.path.join(data_dir, '.params'))
24
25     ipar_dir = os.path.join(data_dir, 'IPAR_1.01')
26     if not os.path.exists(ipar_dir):
27       os.mkdir(ipar_dir)
28     return ipar_dir
29
30 def make_flowcell_id(runfolder_dir, flowcell_id=None):
31     if flowcell_id is None:
32         flowcell_id = '207BTAAXY'
33
34     config = """<?xml version="1.0"?>
35 <FlowcellId>
36   <Text>%s</Text>
37 </FlowcellId>""" % (flowcell_id,)
38     config_dir = os.path.join(runfolder_dir, 'Config')
39
40     if not os.path.exists(config_dir):
41         os.mkdir(config_dir)
42     pathname = os.path.join(config_dir, 'FlowcellId.xml')
43     f = open(pathname,'w')
44     f.write(config)
45     f.close()
46
47 def make_matrix(matrix_filename):
48     contents = """# Auto-generated frequency response matrix
49 > A
50 > C
51 > G
52 > T
53 0.77 0.15 -0.04 -0.04
54 0.76 1.02 -0.05 -0.06
55 -0.10 -0.10 1.17 -0.03
56 -0.13 -0.12 0.80 1.27
57 """
58     f = open(matrix_filename, 'w')
59     f.write(contents)
60     f.close()
61
62 def make_phasing_params(bustard_dir):
63     for lane in range(1,9):
64         pathname = os.path.join(bustard_dir, 'params%d.xml' % (lane))
65         f = open(pathname, 'w')
66         f.write("""<Parameters>
67   <Phasing>0.009900</Phasing>
68   <Prephasing>0.003500</Prephasing>
69 </Parameters>
70 """)
71         f.close()
72
73 def make_gerald_config_026(gerald_dir):
74     source = os.path.join(TESTDATA_DIR, 'gerald_config_0.2.6.xml')
75     destination = os.path.join(gerald_dir, 'config.xml')
76     shutil.copy(source, destination)
77
78 def make_gerald_config_100(gerald_dir):
79     source = os.path.join(TESTDATA_DIR, 'gerald_config_1.0.xml')
80     destination = os.path.join(gerald_dir, 'config.xml')
81     shutil.copy(source, destination)
82
83 def make_summary_htm_100(gerald_dir):
84     source = os.path.join(TESTDATA_DIR, 'Summary-pipeline100.htm')
85     destination = os.path.join(gerald_dir, 'Summary.htm')
86     shutil.copy(source, destination)
87
88 def make_summary_htm_110(gerald_dir):
89     source = os.path.join(TESTDATA_DIR, 'Summary-pipeline110.htm')
90     destination = os.path.join(gerald_dir, 'Summary.htm')
91     shutil.copy(source, destination)
92
93 def make_summary_paired_htm(gerald_dir):
94     source = os.path.join(TESTDATA_DIR, 'Summary-paired-pipeline110.htm')
95     destination = os.path.join(gerald_dir, 'Summary.htm')
96     shutil.copy(source, destination)
97
98 def make_summary_ipar130_htm(gerald_dir):
99     source = os.path.join(TESTDATA_DIR, 'Summary-ipar130.htm')
100     destination = os.path.join(gerald_dir, 'Summary.htm')
101     shutil.copy(source, destination)
102
103 def make_eland_results(gerald_dir):
104     eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759    ACATAGNCACAGACATAAACATAGACATAGAC U0      1       1       3       chrUextra.fa    28189829        R       D.
105 >HWI-EAS229_24_207BTAAXX:1:7:205:842    AAACAANNCTCCCAAACACGTAAACTGGAAAA  U1      0       1       0       chr2L.fa        8796855 R       DD      24T
106 >HWI-EAS229_24_207BTAAXX:1:7:776:582    AGCTCANCCGATCGAAAACCTCNCCAAGCAAT        NM      0       0       0
107 >HWI-EAS229_24_207BTAAXX:1:7:205:842    AAACAANNCTCCCAAACACGTAAACTGGAAAA        U1      0       1       0       Lambda.fa        8796855 R       DD      24T
108 """
109     for i in range(1,9):
110         pathname = os.path.join(gerald_dir,
111                                 's_%d_eland_result.txt' % (i,))
112         f = open(pathname, 'w')
113         f.write(eland_result)
114         f.close()
115
116 def make_eland_multi(gerald_dir, paired=False):
117     eland_multi = [""">HWI-EAS229_60_30DP9AAXX:1:1:1221:788   AAGATATCTACGACGTGGTATGGCGGTGTCTGGTCGT      NM
118 >HWI-EAS229_60_30DP9AAXX:1:1:931:747    AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA   1:0:2   chr5.fa:55269838R0
119 >HWI-EAS229_60_30DP9AAXX:1:1:1121:379   AGAAGAGACATTAAGAGTTCCTGAAATTTATATCTGG   2:1:0   chr16.fa:46189180R1,chr7.fa:122968519R0,chr8.fa:48197174F0
120 >HWI-EAS229_60_30DP9AAXX:1:1:892:1155   ACATTCTCCTTTCCTTCTGAAGTTTTTACGATTCTTT   0:9:10  chr10.fa:114298201F1,chr12.fa:8125072F1,19500297F2,42341293R2,chr13.fa:27688155R2,95069772R1,chr15.fa:51016475F2,chr16.fa:27052155F2,chr1.fa:192426217R2,chr21.fa:23685310R2,chr2.fa:106680068F1,chr3.fa:185226695F2,chr4.fa:106626808R2,chr5.fa:14704894F1,43530779F1,126543189F2,chr6.fa:74284101F1,chr7.fa:22516603F1,chr9.fa:134886204R
121 >HWI-EAS229_60_30DP9AAXX:1:1:931:747    AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA   1:0:0   spike.fa/sample1:55269838R0
122 >HWI-EAS229_60_30DP9AAXX:1:1:931:747    AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA   1:0:0   spike.fa/sample2:55269838R0
123 """, """>HWI-EAS229_60_30DP9AAXX:1:1:1221:788   AAGATATCTACGACGTGGTATGGCGGTGTCTGGTCGT      NM
124 >HWI-EAS229_60_30DP9AAXX:1:1:1221:788   NNNNNNNNNNNNNNGTGGTATGGCGGTGTCTGGTCGT     QC 
125 >HWI-EAS229_60_30DP9AAXX:1:1:931:747    AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA   1:0:2   chr5.fa:55269838R0
126 >HWI-EAS229_60_30DP9AAXX:1:1:1121:379   AGAAGAGACATTAAGAGTTCCTGAAATTTATATCTGG   2:1:0   chr16.fa:46189180R1,chr7.fa:122968519R0,chr8.fa:48197174F0,chr7.fa:22516603F1,chr9.fa:134886204R
127 >HWI-EAS229_60_30DP9AAXX:1:1:892:1155   ACATTCTCCTTTCCTTCTGAAGTTTTTACGATTCTTT   0:9:10  chr10.fa:114298201F1,chr12.fa:8125072F1,19500297F2,42341293R2,chr13.fa:27688155R2,95069772R1,chr15.fa:51016475F2,chr16.fa:27052155F2,chr1.fa:192426217R2,chr21.fa:23685310R2,chr2.fa:106680068F1,chr3.fa:185226695F2,chr4.fa:106626808R2,chr5.fa:14704894F1,43530779F1,126543189F2,chr6.fa:74284101F1
128 >HWI-EAS229_60_30DP9AAXX:1:1:931:747    AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA   1:0:0   spike.fa/sample1:55269838R0
129 >HWI-EAS229_60_30DP9AAXX:1:1:931:747    AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA   1:0:0   spike.fa/sample2:55269838R0
130 """]
131     if paired:
132         for e in [1,2]:
133             for i in range(1,9):
134                 pathname = os.path.join(gerald_dir,
135                                         's_%d_%d_eland_multi.txt' % (i,e))
136                 f = open(pathname, 'w')
137                 f.write(eland_multi[e-1])
138                 f.close()
139     else:
140         for i in range(1,9):
141             pathname = os.path.join(gerald_dir,
142                                     's_%d_eland_multi.txt' % (i,))
143             f = open(pathname, 'w')
144             f.write(eland_multi[0])
145             f.close()