2 from optparse import OptionParser
4 from subprocess import Popen, PIPE
7 from htsworkflow.util.opener import autoopen
10 def main(cmdline=None):
11 parser = make_parser()
12 opts, args = parser.parse_args(cmdline)
15 parser.error("Requires one argument")
17 if opts.flowcell is not None:
18 header = "%s_" % (opts.flowcell,)
23 left = open_write(opts.single, opts.force)
25 left = open_write(opts.left, opts.force)
26 right = open_write(opts.right, opts.force)
28 # open the srf, fastq, or compressed fastq
30 source = srf_open(args[0], opts.cnf1)
32 source = autoopen(args[0])
35 convert_single_to_fastq(source, left, header)
37 convert_single_to_two_fastq(source, left, right, opts.mid, header)
42 parser = OptionParser("""%prog: [options] file
44 file can be either a fastq file or a srf file.
45 You can also force the flowcell ID to be added to the header.""")
46 parser.add_option('-c','--cnf1',default=False, action="store_true",
47 help="pass -c to srf2fastq, needed for calibrated quality values"
49 parser.add_option('--force', default=False, action="store_true",
50 help="overwrite existing files.")
51 parser.add_option('--flowcell', default=None,
52 help="add flowcell id header to sequence")
53 parser.add_option('-l','--left', default="r1.fastq",
54 help='left side filename')
55 parser.add_option('-r','--right', default="r2.fastq",
56 help='right side filename')
57 parser.add_option('-m','--mid', default=None,
58 help='actual sequence mid point')
59 parser.add_option('-s','--single', default=None,
60 help="single fastq target name")
64 def srf_open(filename, cnf1=False):
66 Make a stream from srf file using srf2fastq
74 p = Popen(cmd, stdout=PIPE)
78 def convert_single_to_fastq(instream, target1, header=''):
85 target1.write(line[1:])
86 target1.write(os.linesep)
91 # sequence or quality data
95 def convert_single_to_two_fastq(instream, target1, target2, mid=None, header=''):
104 target1.write(header)
105 target1.write(line[1:])
107 target1.write(os.linesep)
110 target2.write(header)
111 target2.write(line[1:])
113 target2.write(os.linesep)
119 # sequence or quality data
124 target1.write(line[:mid])
125 target1.write(os.linesep)
126 target2.write(line[mid:])
127 target2.write(os.linesep)
129 def is_srf(filename):
131 Check filename to see if it is likely to be a SRF file
133 f = open(filename, 'r')
136 return header == "SSRF"
138 def open_write(filename, force=False):
140 Open a file, but throw an exception if it already exists
143 if os.path.exists(filename):
144 raise RuntimeError("%s exists" % (filename,))
146 return open(filename, 'w')
149 path, name = os.path.split(filename)
150 base, ext = os.path.splitext(name)
152 target1_name = base + '_r1.fastq'
153 target2_name = base + '_r2.fastq'
155 for target_name in [target1_name, target2_name]:
156 print 'target name', target_name
157 if os.path.exists(target_name):
158 raise RuntimeError("%s exists" % (target_name,))
160 instream = open(filename,'r')
161 target1 = open(target1_name,'w')
162 target2 = open(target2_name,'w')
166 if __name__ == "__main__":
167 sys.exit(main(sys.argv[1:]))