From: Diane Trout Date: Sat, 14 Oct 2006 01:05:05 +0000 (+0000) Subject: don't seqcomp sequences that are too small X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=mussa.git;a=commitdiff_plain;h=ba0d9d232948802b2117e5fbd08289d9ebaac4f9 don't seqcomp sequences that are too small ticket:151 seqcomp gets really grumpy when it trys to analyze a sequence that is shorter than the window sequence. So instead of letting it try to malloc all of your memory, throw a more useful exception. Also I updated the SubanalysisWindow to catch the exception and report it to the user. --- diff --git a/alg/flp.hpp b/alg/flp.hpp index 8e7dbb9..9b56187 100644 --- a/alg/flp.hpp +++ b/alg/flp.hpp @@ -28,6 +28,8 @@ class Sequence; class FLPs { public: + typedef size_t size_type; + FLPs(); FLPs(const FLPs& ); //! Setup a FLP and reserve space for the match lists @@ -90,7 +92,10 @@ public: /*! this is mostly so seqcomp can use operator[] */ void alloc_matches(std::string::size_type len1=0); - + + //! make sure that a sequence is acceptable to seqcomp + void validate_sequence(const Sequence&) const; + //! current loop index int seqcomp_i; //! end seqcomp index (when terminating, seqcomp_i == seqcomp_end. diff --git a/alg/flp_seqcomp.cpp b/alg/flp_seqcomp.cpp index 04eca42..f46c404 100644 --- a/alg/flp_seqcomp.cpp +++ b/alg/flp_seqcomp.cpp @@ -45,6 +45,15 @@ FLPs::add(int seq1_i, int seq2_i, int a_score, int i2_offset) } } +void FLPs::validate_sequence(const Sequence& seq) const +{ + if (seq.size() < window_size) { + ostringstream msg; + msg << "Sequence " << seq.get_name() << " of length " << seq.size() + << " must be longer than window size " << window_size; + throw seqcomp_error(msg.str()); + } +} void FLPs::seqcomp(const Sequence& sseq1, const Sequence& sseq2, bool is_RC) @@ -55,8 +64,11 @@ FLPs::seqcomp(const Sequence& sseq1, const Sequence& sseq2, bool is_RC) const char *seq1 = sseq1.c_str(); const char *seq2 = sseq2.c_str(); - int seq1_win_num = sseq1.size() - window_size + 1; - int seq2_win_num = sseq2.size() - window_size + 1; + validate_sequence(sseq1); + validate_sequence(sseq2); + + size_type seq1_win_num = sseq1.size() - window_size + 1; + size_type seq2_win_num = sseq2.size() - window_size + 1; alloc_matches(sseq1.size()); if (seq1_win_num != size()) { ostringstream msg; diff --git a/alg/mussa.cpp b/alg/mussa.cpp index ae21f5b..7b62daf 100644 --- a/alg/mussa.cpp +++ b/alg/mussa.cpp @@ -465,8 +465,7 @@ Mussa::analyze() throw mussa_analysis_error("you need to have at least 2 sequences to " "do an analysis."); } - //cout << "nway ana: seq_num = " << the_seqs.size() << endl; - + seqcomp(); the_paths.setup(window, threshold); nway(); diff --git a/alg/sequence_location.cpp b/alg/sequence_location.cpp index 704cb89..cbce781 100644 --- a/alg/sequence_location.cpp +++ b/alg/sequence_location.cpp @@ -1,4 +1,6 @@ #include "alg/sequence_location.hpp" + +#include SequenceLocation::SequenceLocation( const boost::shared_ptr s, @@ -67,7 +69,7 @@ void SequenceLocation::setCount(int c) int SequenceLocation::getCount() const { - return right - left; + return std::max(right - left, 0); } void SequenceLocation::setRight(int r) diff --git a/alg/test/test_mussa.cpp b/alg/test/test_mussa.cpp index 7eb3015..4e0340d 100644 --- a/alg/test/test_mussa.cpp +++ b/alg/test/test_mussa.cpp @@ -475,6 +475,19 @@ BOOST_AUTO_TEST_CASE( three_way_local_alignment ) } } +BOOST_AUTO_TEST_CASE( mussa_window_larger_than_sequence ) +{ + string s0("AGCAGGG"); + string s1("CAGCGGG"); + + Mussa analysis; + analysis.append_sequence(s0); + analysis.append_sequence(s1); + analysis.set_threshold(23); + analysis.set_window(30); + BOOST_CHECK_THROW(analysis.analyze(), seqcomp_error); +} + BOOST_AUTO_TEST_CASE( subanalysis ) { Sequence s1("AATGAAGATTTTAATGCTTTAATTTTGTTTTGTAAACTTCGAATTTCCAAAATTTGAAA"); diff --git a/mussa_exceptions.hpp b/mussa_exceptions.hpp index a78dd4a..9027a07 100644 --- a/mussa_exceptions.hpp +++ b/mussa_exceptions.hpp @@ -79,6 +79,14 @@ public: mussa_error(msg) {}; }; +//! failure running seqcomp +class seqcomp_error : public mussa_analysis_error +{ +public: + explicit seqcomp_error(const std::string& msg) : + mussa_analysis_error(msg) {}; +}; + //! couldn't normalize a motif /* class motif_normalize_error : public mussa_error diff --git a/qui/SubanalysisWindow.cpp b/qui/SubanalysisWindow.cpp index 1ecbcbc..6d5f3ce 100644 --- a/qui/SubanalysisWindow.cpp +++ b/qui/SubanalysisWindow.cpp @@ -4,6 +4,7 @@ #include "mussa_exceptions.hpp" #include "alg/mussa.hpp" +#include #include #include #include @@ -94,13 +95,20 @@ void SubanalysisWindow::run() m->append_sequence(itor->getSelectedSequence()); } - m->set_window(window->value()); - m->set_threshold(threshold->value()); - m->analyze(); - MussaWindow *mw = new MussaWindow(m); - mw->show(); - model.clear(); - hide(); + try { + m->set_window(window->value()); + m->set_threshold(threshold->value()); + m->analyze(); + MussaWindow *mw = new MussaWindow(m); + mw->show(); + model.clear(); + hide(); + } catch(mussa_error e) { + QMessageBox::critical(this, + "Mussa Subanalysis Error", + QString(e.what()), + QMessageBox::Ok, 0, 0); + } } void SubanalysisWindow::modelUpdated(const QModelIndex&, int, int )