Commit patch to not break on spaces.
[bowtie.git] / pat.cpp
1 #include <cmath>
2 #include <iostream>
3 #include <string>
4 #include <stdexcept>
5 #include <seqan/sequence.h>
6 #include <seqan/file.h>
7
8 #include "pat.h"
9 #include "filebuf.h"
10
11 using namespace std;
12 using namespace seqan;
13
14 /**
15  * Parse a single quality string from fb and store qualities in r.
16  * Assume the next character obtained via fb.get() is the first
17  * character of the quality string.  When returning, the next
18  * character returned by fb.peek() or fb.get() should be the first
19  * character of the following line.
20  */
21 int parseQuals(ReadBuf& r,
22                FileBuf& fb,
23                int readLen,
24                int trim3,
25                int trim5,
26                bool intQuals,
27                bool phred64,
28                bool solexa64)
29 {
30         int qualsRead = 0;
31         int c = 0;
32         assert(fb.peek() != '\n' && fb.peek() != '\r');
33         _setBegin (r.qual, (char*)r.qualBuf);
34         _setLength(r.qual, 0);
35         if (intQuals) {
36                 while (c != '\r' && c != '\n' && c != -1) {
37                         bool neg = false;
38                         int num = 0;
39                         while(!isspace(c = fb.peek()) && !fb.eof()) {
40                                 if(c == '-') {
41                                         neg = true;
42                                         assert_eq(num, 0);
43                                 } else {
44                                         if(!isdigit(c)) {
45                                                 char buf[2048];
46                                                 cerr << "Warning: could not parse quality line:" << endl;
47                                                 fb.getPastNewline();
48                                                 cerr << fb.copyLastN(buf);
49                                                 buf[2047] = '\0';
50                                                 cerr << buf;
51                                                 throw 1;
52                                         }
53                                         assert(isdigit(c));
54                                         num *= 10;
55                                         num += (c - '0');
56                                 }
57                                 fb.get();
58                         }
59                         if(neg) num = 0;
60                         // Phred-33 ASCII encode it and add it to the back of the
61                         // quality string
62                         r.qualBuf[qualsRead++] = ('!' + num);
63                         // Skip over next stretch of whitespace
64                         c = fb.peek();
65                         while(c != '\r' && c != '\n' && isspace(c) && !fb.eof()) {
66                                 fb.get();
67                                 c = fb.peek();
68                         }
69                 }
70         } else {
71                 while (c != '\r' && c != '\n' && c != -1) {
72                         c = fb.get();
73                         r.qualBuf[qualsRead++] = charToPhred33(c, solexa64, phred64);
74                         c = fb.peek();
75                         while(c != '\r' && c != '\n' && isspace(c) && !fb.eof()) {
76                                 fb.get();
77                                 c = fb.peek();
78                         }
79                 }
80         }
81         if (qualsRead < readLen-1 ||
82             (qualsRead < readLen && !r.color))
83         {
84                 tooFewQualities(r.name);
85         }
86         qualsRead -= trim3;
87         if(qualsRead <= 0) return 0;
88         int trimmedReadLen = readLen-trim3-trim5;
89         if(trimmedReadLen < 0) trimmedReadLen = 0;
90         if(qualsRead > trimmedReadLen) {
91                 // Shift everybody left
92                 for(int i = 0; i < readLen; i++) {
93                         r.qualBuf[i] = r.qualBuf[i+qualsRead-trimmedReadLen];
94                 }
95         }
96         _setLength(r.qual, trimmedReadLen);
97         while(fb.peek() == '\n' || fb.peek() == '\r') fb.get();
98         return qualsRead;
99 }
100
101 void wrongQualityFormat(const String<char>& read_name) {
102         cerr << "Encountered a space parsing the quality string for read " << read_name << endl
103              << "If this is a FASTQ file with integer (non-ASCII-encoded) qualities, please" << endl
104              << "re-run Bowtie with the --integer-quals option.  If this is a FASTQ file with" << endl
105              << "alternate basecall information, please re-run Bowtie with the --fuzzy option." << endl;
106         throw 1;
107 }
108
109 void tooFewQualities(const String<char>& read_name) {
110         cerr << "Too few quality values for read: " << read_name << endl
111                  << "\tare you sure this is a FASTQ-int file?" << endl;
112         throw 1;
113 }
114
115 void tooManyQualities(const String<char>& read_name) {
116         cerr << "Reads file contained a pattern with more than 1024 quality values." << endl
117                  << "Please truncate reads and quality values and and re-run Bowtie" << endl;
118         throw 1;
119 }
120
121 void tooManySeqChars(const String<char>& read_name) {
122         cerr << "Reads file contained a pattern with more than 1024 sequence characters." << endl
123                  << "Please truncate reads and quality values and and re-run Bowtie." << endl
124                  << "Offending read: " << read_name << endl;
125         throw 1;
126 }
127
128 /**
129  * C++ version char* style "itoa":
130  */
131 char* itoa10(int value, char* result) {
132         // Check that base is valid
133         char* out = result;
134         int quotient = value;
135         do {
136                 *out = "0123456789"[ std::abs( quotient % 10 ) ];
137                 ++out;
138                 quotient /= 10;
139         } while ( quotient );
140
141         // Only apply negative sign for base 10
142         if (value < 0) *out++ = '-';
143         std::reverse( result, out );
144
145         *out = 0; // terminator
146         return out;
147 }