5 from optparse import OptionParser
10 class Qseq2Fastq(object):
12 Convert qseq files to fastq (or fasta) files.
14 def __init__(self, sources, pass_destination, nopass_destination=None):
15 self.sources = sources
16 self.pass_destination = pass_destination
17 if nopass_destination is not None:
18 self.nopass_destination = nopass_destination
20 self.nopass_destination = pass_destination
24 self.trim = slice(None)
25 self.reportFilter = False
27 def _format_flowcell_id(self):
29 Return formatted flowcell ID
31 return self.flowcell_id+"_"
33 def _convert_illumina_quality(self, illumina_quality):
35 Convert an Illumina ASCII encoded quality score to a Phred ASCII quality score.
37 # Illumina scores are Phred + 64
38 # Fastq scores are Phread + 33
39 # the following code grabs the string, converts to short ints and
40 # subtracts 31 (64-33) to convert between the two score formats.
41 # The numpy solution is twice as fast as some of my other
42 # ideas for the conversion.
43 # sorry about the uglyness in changing from character, to 8-bit int
44 # and back to a character array
45 quality = numpy.asarray(illumina_quality,'c')
46 quality.dtype = numpy.uint8
48 quality.dtype = '|S1' # I'd like to know what the real numpy char type is
57 header_template += self._format_flowcell_id() + '%s_%s:%s:%s:%s:%s/%s%s%s'
59 for qstream in self.sources:
62 record = line.rstrip().split('\t')
63 machine_name = record[0]
64 run_number = record[1]
65 lane_number = record[2]
71 sequence = record[8].replace('.','N')
72 quality = self._convert_illumina_quality(record[9])
74 # add pass qc filter if we want it
75 pass_qc = int(record[10])
77 pass_qc_msg = " pf=%s" % (pass_qc)
81 header = header_template % ( \
93 # if we passed the filter write to the "good" file
95 destination = self.pass_destination
97 destination = self.nopass_destination
99 destination.write(header)
100 destination.write(sequence[self.trim])
101 destination.write(os.linesep)
103 destination.write('+')
104 destination.write(os.linesep)
105 destination.write(quality[self.trim].tostring())
106 destination.write(os.linesep)
108 def file_generator(pattern_list):
109 for pattern in pattern_list:
110 for filename in glob(pattern):
111 yield open(filename,'r')
113 def tarfile_generator(tarfilename):
114 archive = tarfile.open(tarfilename,'r|*')
115 for tarinfo in archive:
116 yield archive.extractfile(tarinfo)
119 def parse_slice(slice_text):
120 if slice_text is None or len(slice_text) == 0:
124 for element in slice_text.split(':'):
125 if len(element) == 0:
128 element = int(element)
129 slice_data.append(element)
131 return slice(*slice_data)
134 usage = "%prog: [options] *_qseq.txt"
135 parser = OptionParser(usage)
136 parser.add_option('-a', '--fasta', default=False, action="store_true",
137 help="produce fasta files instead of fastq files")
138 parser.add_option('-f', '--flowcell', default='',
139 help="Set flowcell ID for output file")
140 parser.add_option('-i', '--infile', default=None,
141 help='source tar file (if reading from an archive instead of a directory)')
142 parser.add_option('-o', '--output', default=None,
143 help='output fastq file')
144 parser.add_option('-n', '--nopass-output', default=None,
145 help='if provided send files that failed illumina filter '\
146 'to a differentfile')
147 parser.add_option('-s', '--slice',
148 help='specify python slice, e.g. 0:75, 0:-1',
150 parser.add_option('--pf', help="report pass filter flag", default=False,
154 def main(cmdline=None):
155 parser = make_parser()
156 opts, args = parser.parse_args(cmdline)
158 if opts.infile is not None:
159 qseq_generator = tarfile_generator(opts.infile)
161 qseq_generator = file_generator(args)
163 qseq_generator = [sys.stdin]
166 if opts.output is not None:
167 output = open(opts.output, 'w')
171 if opts.nopass_output is not None:
172 nopass_output = open(opts.nopass_output, 'w')
176 qseq_parser = Qseq2Fastq(qseq_generator, output, nopass_output)
177 qseq_parser.fastq = not opts.fasta
178 qseq_parser.flowcell_id = opts.flowcell
179 qseq_parser.trim = parse_slice(opts.slice)
180 qseq_parser.reportFilter = opts.pf
184 if __name__ == "__main__":