5 from optparse import OptionParser
10 def qseq2fastq(destination, qseqs, trim=None, pf=False):
14 record = line.strip().split('\t')
15 machine_name = record[0]
16 run_number = record[1]
17 lane_number = record[2]
23 sequence = record[8].replace('.','N')
24 # Illumina scores are Phred + 64
25 # Fastq scores are Phread + 33
26 # the following code grabs the string, converts to short ints and
27 # subtracts 31 (64-33) to convert between the two score formats.
28 # The numpy solution is twice as fast as some of my other
29 # ideas for the conversion.
30 # sorry about the uglyness in changing from character, to 8-bit int
31 # and back to a character array
32 quality = numpy.asarray(record[9],'c')
33 quality.dtype = numpy.uint8
35 quality.dtype = '|S1' # I'd like to know what the real numpy char type is
39 pass_qc_msg = " pf=%s" % (pass_qc)
43 destination.write('@%s_%s:%s:%s:%s:%s/%s%s%s' % ( \
53 destination.write(sequence[trim])
54 destination.write(os.linesep)
55 destination.write('+')
56 destination.write(os.linesep)
57 destination.write(quality[trim].tostring())
58 destination.write(os.linesep)
60 def file_generator(pattern_list):
61 for pattern in pattern_list:
62 for filename in glob(pattern):
63 yield open(filename,'r')
65 def tarfile_generator(tarfilename):
66 archive = tarfile.open(tarfilename,'r|*')
67 for tarinfo in archive:
68 yield archive.extractfile(tarinfo)
71 usage = "%prog: [options] *_qseq.txt"
72 parser = OptionParser(usage)
73 parser.add_option('-i', '--infile', default=None,
74 help='source tar file (if reading from an archive instead of a directory)')
75 parser.add_option('-o', '--output', help='output fastq file', default=None)
76 parser.add_option('-s', '--slice',
77 help='specify python slice, e.g. 0:75, 0:-1',
79 parser.add_option('--pf', help="report pass filter flag", default=False,
83 def parse_slice(slice_text):
84 if slice_text is None or len(slice_text) == 0:
88 for element in slice_text.split(':'):
92 element = int(element)
93 slice_data.append(element)
95 return slice(*slice_data)
98 def main(cmdline=None):
99 parser = make_parser()
100 opts, args = parser.parse_args(cmdline)
102 if opts.infile is not None:
103 qseq_generator = tarfile_generator(opts.infile)
105 qseq_generator = file_generator(args)
107 if opts.output is not None:
108 dest = open(opts.output, 'w')
112 subseq = parse_slice(opts.slice)
114 qseq2fastq(dest, qseq_generator, subseq, opts.pf)
118 if __name__ == "__main__":