Make it easier to run srf2named_fastq.py
[htsworkflow.git] / scripts / srf2named_fastq.py
1 #!/usr/bin/env python
2 from optparse import OptionParser
3 import os
4 from subprocess import Popen, PIPE
5 import sys
6
7 from htsworkflow.util.opener import autoopen
8
9
10 def main(cmdline=None):
11     parser = make_parser()
12     opts, args = parser.parse_args(cmdline)
13
14     if len(args) != 1:
15         parser.error("Requires one argument")
16
17     if opts.flowcell is not None:
18         header = "%s_" % (opts.flowcell,)
19     else:
20         header = ''
21
22     if opts.single:
23         left = open_write(opts.single, opts.force)
24     else:
25         left = open_write(opts.left, opts.force)
26         right = open_write(opts.right, opts.force)
27     
28     # open the srf, fastq, or compressed fastq
29     if is_srf(args[0]):
30         source = srf_open(args[0], opts.cnf1)
31     else:
32         source = autoopen(args[0])
33
34     if opts.single:
35         convert_single_to_fastq(source, left, header)
36     else:
37         convert_single_to_two_fastq(source, left, right, opts.mid, header)
38    
39     return 0
40
41 def make_parser():
42     parser = OptionParser("""%prog: [options] file
43
44 file can be either a fastq file or a srf file.
45 You can also force the flowcell ID to be added to the header.""")
46     parser.add_option('-c','--cnf1',default=False, action="store_true",
47       help="pass -c to srf2fastq, needed for calibrated quality values"
48     )
49     parser.add_option('--force', default=False, action="store_true",
50                       help="overwrite existing files.")
51     parser.add_option('--flowcell', default=None,
52                       help="add flowcell id header to sequence")
53     parser.add_option('-l','--left', default="r1.fastq",
54                       help='left side filename')
55     parser.add_option('-r','--right', default="r2.fastq",
56                       help='right side filename')
57     parser.add_option('-m','--mid', default=None, 
58                       help='actual sequence mid point')
59     parser.add_option('-s','--single', default=None,
60                       help="single fastq target name")
61     return parser
62
63
64 def srf_open(filename, cnf1=False):
65     """
66     Make a stream from srf file using srf2fastq
67     """
68     
69     cmd = ['srf2fastq']
70     if cnf1:
71         cmd.append('-c')
72     cmd.append(filename)
73         
74     p = Popen(cmd, stdout=PIPE)
75     return p.stdout
76     
77
78 def convert_single_to_fastq(instream, target1, header=''):
79     for line in instream:
80         # sequence header
81         if line[0] == '@':
82             line = line.strip()
83             target1.write('@')
84             target1.write(header)
85             target1.write(line[1:])
86             target1.write(os.linesep)
87
88         # quality header
89         elif line[0] == '+':
90             target1.write(line)
91         # sequence or quality data
92         else:
93             target1.write(line)
94         
95 def convert_single_to_two_fastq(instream, target1, target2, mid=None, header=''):
96     if mid is not None:
97         mid = int(mid)
98
99     for line in instream:
100         # sequence header
101         if line[0] == '@':
102             line = line.strip()
103             target1.write('@')
104             target1.write(header)
105             target1.write(line[1:])
106             target1.write("/1")
107             target1.write(os.linesep)
108
109             target2.write('@')
110             target2.write(header)
111             target2.write(line[1:])
112             target2.write("/1")
113             target2.write(os.linesep)
114
115         # quality header
116         elif line[0] == '+':
117             target1.write(line)
118             target2.write(line)
119         # sequence or quality data
120         else:
121             line = line.strip()
122             if mid is None:
123                 mid = len(line)/2
124             target1.write(line[:mid])
125             target1.write(os.linesep)
126             target2.write(line[mid:])
127             target2.write(os.linesep)
128
129 def is_srf(filename):
130     """
131     Check filename to see if it is likely to be a SRF file
132     """
133     f = open(filename, 'r')
134     header = f.read(4)
135     f.close()
136     return header == "SSRF"
137
138 def open_write(filename, force=False):
139     """
140     Open a file, but throw an exception if it already exists
141     """
142     if not force:
143         if os.path.exists(filename):
144             raise RuntimeError("%s exists" % (filename,))
145
146     return open(filename, 'w')
147
148 def foo():
149     path, name = os.path.split(filename)
150     base, ext = os.path.splitext(name)
151
152     target1_name = base + '_r1.fastq'
153     target2_name = base + '_r2.fastq'
154
155     for target_name in [target1_name, target2_name]:
156         print 'target name', target_name
157         if os.path.exists(target_name):
158             raise RuntimeError("%s exists" % (target_name,))
159
160     instream = open(filename,'r')
161     target1 = open(target1_name,'w')
162     target2 = open(target2_name,'w')
163
164
165
166 if __name__ == "__main__":
167     sys.exit(main(sys.argv[1:]))