3 from optparse import OptionParser
5 from subprocess import Popen, PIPE
8 from htsworkflow.util.opener import autoopen
11 def main(cmdline=None):
12 parser = make_parser()
13 opts, args = parser.parse_args(cmdline)
16 parser.error("Requires one argument")
19 logging.basicConfig(level=logging.INFO)
21 logging.basicConfig(level=logging.WARN)
23 if opts.flowcell is not None:
24 header = "%s_" % (opts.flowcell,)
29 left = open_write(opts.single, opts.force)
31 left = open_write(opts.left, opts.force)
32 right = open_write(opts.right, opts.force)
34 # open the srf, fastq, or compressed fastq
36 source = srf_open(args[0], opts.cnf1)
38 source = autoopen(args[0])
41 convert_single_to_fastq(source, left, header)
43 convert_single_to_two_fastq(source, left, right, opts.mid, header)
48 parser = OptionParser("""%prog: [options] file
50 file can be either a fastq file or a srf file.
51 You can also force the flowcell ID to be added to the header.""")
52 parser.add_option('-c','--cnf1',default=False, action="store_true",
53 help="pass -c to srf2fastq, needed for calibrated quality values"
55 parser.add_option('--force', default=False, action="store_true",
56 help="overwrite existing files.")
57 parser.add_option('--flowcell', default=None,
58 help="add flowcell id header to sequence")
59 parser.add_option('-l','--left', default="r1.fastq",
60 help='left side filename')
61 parser.add_option('-r','--right', default="r2.fastq",
62 help='right side filename')
63 parser.add_option('-m','--mid', default=None,
64 help='actual sequence mid point')
65 parser.add_option('-s','--single', default=None,
66 help="single fastq target name")
67 parser.add_option('-v', '--verbose', default=False, action="store_true",
68 help="show information about what we're doing.")
72 def srf_open(filename, cnf1=False):
74 Make a stream from srf file using srf2fastq
82 logging.info('srf command: %s' % (" ".join(cmd),))
83 p = Popen(cmd, stdout=PIPE)
87 def convert_single_to_fastq(instream, target1, header=''):
94 target1.write(line[1:])
95 target1.write(os.linesep)
100 # sequence or quality data
104 def convert_single_to_two_fastq(instream, target1, target2, mid=None, header=''):
108 for line in instream:
113 target1.write(header)
114 target1.write(line[1:])
116 target1.write(os.linesep)
119 target2.write(header)
120 target2.write(line[1:])
122 target2.write(os.linesep)
128 # sequence or quality data
133 target1.write(line[:mid])
134 target1.write(os.linesep)
135 target2.write(line[mid:])
136 target2.write(os.linesep)
138 def is_srf(filename):
140 Check filename to see if it is likely to be a SRF file
142 f = open(filename, 'r')
145 return header == "SSRF"
147 def open_write(filename, force=False):
149 Open a file, but throw an exception if it already exists
152 if os.path.exists(filename):
153 raise RuntimeError("%s exists" % (filename,))
155 return open(filename, 'w')
158 path, name = os.path.split(filename)
159 base, ext = os.path.splitext(name)
161 target1_name = base + '_r1.fastq'
162 target2_name = base + '_r2.fastq'
164 for target_name in [target1_name, target2_name]:
165 print 'target name', target_name
166 if os.path.exists(target_name):
167 raise RuntimeError("%s exists" % (target_name,))
169 instream = open(filename,'r')
170 target1 = open(target1_name,'w')
171 target2 = open(target2_name,'w')
175 if __name__ == "__main__":
176 sys.exit(main(sys.argv[1:]))