1 // This file is part of the Mussa source distribution.
2 // http://mussa.caltech.edu/
3 // Contact author: Tristan De Buysscher, tristan@caltech.edu
5 // This program and all associated source code files are Copyright (C) 2005
6 // the California Institute of Technology, Pasadena, CA, 91125 USA. It is
7 // under the GNU Public License; please see the included LICENSE.txt
8 // file for more information, or contact Tristan directly.
11 // ----------------------------------------
12 // ---------- flp_seqcomp.cc -----------
13 // ----------------------------------------
19 // Note one recording RC matches. This version of 'seqcomp' records RC matches
20 // as index of the window start in its nonRC form. However, past versions of
21 // the analysis recorded it as the index of the rear of the window, as the
22 // following legacy comment indicates. FR still uses this method
24 // Titus thinks the start of an RC window should be its "rear" in the
25 // normal version of the sequence. He's wrong, of course, and in the
26 // future my followers will annihilate his in the Final Battle, but for
27 // now I'll let him have his way.
29 // note seq2_i is actually the index of the leaving bp, so need to +1
32 FLPs::add(int seq1_i, int seq2_i, int a_score, int i2_offset)
37 if (a_score >= hard_threshold)
39 a_match.index = seq2_i + i2_offset;
40 a_match.score = a_score;
42 (*all_matches)[seq1_i].push_back(a_match);
48 FLPs::seqcomp(string sseq1, string sseq2, bool is_RC)
50 int start_i, seq1_i, seq2_i, win_i; // loop variables
51 int matches; // number of matches in to a window
54 int seq1_win_num = sseq1.size() - window_size + 1;
55 int seq2_win_num = sseq2.size() - window_size + 1;
56 alloc_matches(sseq1.size());
57 assert(seq1_win_num == size());
59 seq1 = (char *) sseq1.c_str();
60 seq2 = (char *) sseq2.c_str();
64 i2_offset = window_size - sseq2.size();
69 // this does the "upper diagonals" of the search
71 // loop thru the start positions for sequence 1
72 for(start_i = 0; start_i < size(); start_i++)
75 // compare initial window
76 for(win_i = 0; win_i < window_size; win_i++)
77 if ((seq1[start_i+win_i] == seq2[win_i]) &&
78 (seq1[start_i+win_i] != 'N')) // N's match nothing **
81 //seq2_i is always 0 for initial win
84 // call inlined function that adds match if it is above threshold
85 add(start_i, seq2_i, matches, i2_offset);
87 // run through rest of seq1 and seq2 with current seq1 offset (ie start_i)
88 // compare the bps leaving and entering the window and modify matches count
90 while( (seq1_i < seq1_win_num-1) && (seq2_i < seq2_win_num-1) )
92 // copmpare the bp leaving the window
93 if ((seq1[seq1_i] == seq2[seq2_i]) && (seq1[seq1_i] != 'N')) {
98 // compare the bp entering the window
99 if ((seq1[seq1_i+window_size] == seq2[seq2_i+window_size]) &&
100 (seq1[seq1_i+window_size] != 'N')) { // N's match nothing
101 matches = matches + 1;
103 add(seq1_i + 1, seq2_i + 1, matches, i2_offset);
105 seq1_i = seq1_i + 1; // increment seq1_i to next window
106 seq2_i = seq2_i + 1; // increment seq2_i to next window
107 } // end of loop thru the current offset sequence
108 } // end of loop thru the start positions of seq1 sequence
111 // this does the "lower diagonals" of the search
113 // loop thru the start positions for sequence 1
114 for(start_i = 1; start_i < seq2_win_num; start_i++)
118 // compare initial window
119 for(win_i = 0; win_i < window_size; win_i++)
120 if ((seq2[start_i+win_i] == seq1[win_i]) &&
121 (seq2[start_i+win_i] != 'N')) // N's match nothing
124 //seq2_i is always start_i for initial window
127 add(0, seq2_i, matches, i2_offset);
129 // run through rest of seq1 and seq2 with current seq1 offset (ie start_i)
130 // compare the bps leaving and entering the window and modify matches count
132 while( (seq1_i < seq1_win_num-1) && (seq2_i < seq2_win_num-1) )
134 // copmpare the bp leaving the window
135 if ((seq1[seq1_i] == seq2[seq2_i]) &&
136 (seq1[seq1_i] != 'N')) // N's match nothing
137 matches = matches - 1;
139 // compare the bp entering the window
140 if ((seq1[seq1_i+window_size] == seq2[seq2_i+window_size]) &&
141 (seq1[seq1_i+window_size] != 'N')) // N's match nothing
142 matches = matches + 1;
144 add(seq1_i + 1, seq2_i + 1, matches, i2_offset);
146 seq1_i = seq1_i + 1; // increment seq1_i to next window
147 seq2_i = seq2_i + 1; // increment seq2_i to next window
148 } // end of loop thru the current offset sequence
149 } // end of loop thru the start positions of seq2 sequence