// ---------- flp_seqcomp.cc -----------
// ----------------------------------------
+#include "mussa_exceptions.hpp"
#include "alg/flp.hpp"
+#include "alg/sequence.hpp"
#include <cassert>
+#include <sstream>
using namespace std;
// Note one recording RC matches. This version of 'seqcomp' records RC matches
{
match a_match;
-
if (a_score >= hard_threshold)
{
a_match.index = seq2_i + i2_offset;
}
}
+void FLPs::validate_sequence(const Sequence& seq) const
+{
+ if (seq.size() < window_size) {
+ ostringstream msg;
+ msg << "Sequence " << seq.get_name() << " of length " << seq.size()
+ << " must be longer than window size " << window_size;
+ throw seqcomp_error(msg.str());
+ }
+}
void
-FLPs::seqcomp(string sseq1, string sseq2, bool is_RC)
+FLPs::seqcomp(const Sequence& sseq1, const Sequence& sseq2, bool is_RC)
{
int start_i, seq1_i, seq2_i, win_i; // loop variables
int matches; // number of matches in to a window
int i2_offset;
- char * seq1, * seq2;
- int seq1_win_num = sseq1.size() - window_size + 1;
- int seq2_win_num = sseq2.size() - window_size + 1;
+ std::string sseq1_str(sseq1.get_sequence());
+ std::string sseq2_str(sseq2.get_sequence());
+ const char *seq1 = sseq1_str.c_str();
+ const char *seq2 = sseq2_str.c_str();
+
+ validate_sequence(sseq1);
+ validate_sequence(sseq2);
+
+ size_type seq1_win_num = sseq1.size() - window_size + 1;
+ size_type seq2_win_num = sseq2.size() - window_size + 1;
alloc_matches(sseq1.size());
- assert(seq1_win_num == size());
-
- seq1 = (char *) sseq1.c_str();
- seq2 = (char *) sseq2.c_str();
-
+ if (seq1_win_num != size()) {
+ ostringstream msg;
+ msg << "Assert failed, seq1_win_num["
+ << seq1_win_num << "] != size[" << size() << "]" << endl;
+ throw mussa_error(msg.str());
+ }
if (is_RC)
i2_offset = window_size - sseq2.size();
else
i2_offset = 0;
+ // I'm not sure why we're -2 instead of -1, but I experimentally
+ // determined that I need to -2 to get seqcomp_i to == seqcomp_end
+ seqcomp_end = size() + seq2_win_num -2;
// this does the "upper diagonals" of the search
// loop thru the start positions for sequence 1
- for(start_i = 0; start_i < size(); start_i++)
+ for(start_i = 0; start_i != size(); ++start_i, ++seqcomp_i)
{
matches = 0;
// compare initial window
} // end of loop thru the current offset sequence
} // end of loop thru the start positions of seq1 sequence
-
// this does the "lower diagonals" of the search
// loop thru the start positions for sequence 1
- for(start_i = 1; start_i < seq2_win_num; start_i++)
+ for(start_i = 1; start_i != seq2_win_num; ++start_i, ++seqcomp_i)
{
matches = 0;
seq2_i = seq2_i + 1; // increment seq2_i to next window
} // end of loop thru the current offset sequence
} // end of loop thru the start positions of seq2 sequence
+
+ if (seqcomp_i != seqcomp_end) {
+ clog << "seqcomp_i " << seqcomp_i << " " << seqcomp_end << endl;
+ //throw mussa_error("internal error with seqcomp seqcomp_i != seqcomp_end");
+ }
+ seqcomp_i = seqcomp_end = FLPs::seqcomp_not_running;
}