don't seqcomp sequences that are too small
authorDiane Trout <diane@caltech.edu>
Sat, 14 Oct 2006 01:05:05 +0000 (01:05 +0000)
committerDiane Trout <diane@caltech.edu>
Sat, 14 Oct 2006 01:05:05 +0000 (01:05 +0000)
ticket:151
seqcomp gets really grumpy when it trys to analyze a sequence that
is shorter than the window sequence. So instead of letting it try
to malloc all of your memory, throw a more useful exception.

Also I updated the SubanalysisWindow to catch the exception and
report it to the user.

alg/flp.hpp
alg/flp_seqcomp.cpp
alg/mussa.cpp
alg/sequence_location.cpp
alg/test/test_mussa.cpp
mussa_exceptions.hpp
qui/SubanalysisWindow.cpp

index 8e7dbb9eba1e8318233b6c0f48befef9789858e3..9b561874fbb1572c895fd15cc6542fc0dd52d963 100644 (file)
@@ -28,6 +28,8 @@ class Sequence;
 class FLPs
 {
 public:
+    typedef size_t size_type;
+    
     FLPs();
     FLPs(const FLPs& );
     //! Setup a FLP and reserve space for the match lists
@@ -90,7 +92,10 @@ public:
     /*! this is mostly so seqcomp can use operator[]
      */
     void alloc_matches(std::string::size_type len1=0);
-
+    
+    //! make sure that a sequence is acceptable to seqcomp
+    void validate_sequence(const Sequence&) const;
+    
     //! current loop index
     int seqcomp_i;
     //! end seqcomp index (when terminating, seqcomp_i == seqcomp_end.
index 04eca42dd0711266d3876a505c6e1e6bccedb348..f46c40498d58b4e12025deda0b293e3d0ee60acb 100644 (file)
@@ -45,6 +45,15 @@ FLPs::add(int seq1_i, int seq2_i, int a_score, int i2_offset)
   }
 }
 
+void FLPs::validate_sequence(const Sequence& seq) const
+{
+  if (seq.size() < window_size) {
+    ostringstream msg;
+    msg << "Sequence " << seq.get_name() << " of length " << seq.size()
+        << " must be longer than window size " << window_size;
+    throw seqcomp_error(msg.str());
+  }
+}
 
 void
 FLPs::seqcomp(const Sequence& sseq1, const Sequence& sseq2, bool is_RC)
@@ -55,8 +64,11 @@ FLPs::seqcomp(const Sequence& sseq1, const Sequence& sseq2, bool is_RC)
   const char *seq1 = sseq1.c_str();
   const char *seq2 = sseq2.c_str();
 
-  int seq1_win_num = sseq1.size() - window_size + 1;
-  int seq2_win_num = sseq2.size() - window_size + 1;
+  validate_sequence(sseq1);
+  validate_sequence(sseq2);
+  
+  size_type seq1_win_num = sseq1.size() - window_size + 1;
+  size_type seq2_win_num = sseq2.size() - window_size + 1;
   alloc_matches(sseq1.size());
   if (seq1_win_num != size()) {
     ostringstream msg;
index ae21f5bf736f845ca28a4f5c68d6b320a7687995..7b62dafc174de20a5f6747a6f7f0b470c02200f9 100644 (file)
@@ -465,8 +465,7 @@ Mussa::analyze()
     throw mussa_analysis_error("you need to have at least 2 sequences to "
                                "do an analysis.");
   }
-  //cout << "nway ana: seq_num = " << the_seqs.size() << endl;
-
+  
   seqcomp();
   the_paths.setup(window, threshold);
   nway();
index 704cb89e4469c974b772e684c2ab9daedd368f88..cbce781c520400949f1ea13ac37aad6d8e529a87 100644 (file)
@@ -1,4 +1,6 @@
 #include "alg/sequence_location.hpp"
+
+#include <cstdlib>
     
 SequenceLocation::SequenceLocation(
     const boost::shared_ptr<Sequence> s, 
@@ -67,7 +69,7 @@ void SequenceLocation::setCount(int c)
 
 int SequenceLocation::getCount() const
 {
-  return right - left;
+  return std::max(right - left, 0);
 }
 
 void SequenceLocation::setRight(int r)
index 7eb301572777dc2e9037cfbe4410c3fe6ea416e9..4e0340df08123ebecb66ef2ff4e904f3f88866b3 100644 (file)
@@ -475,6 +475,19 @@ BOOST_AUTO_TEST_CASE( three_way_local_alignment )
   }   
 }
 
+BOOST_AUTO_TEST_CASE( mussa_window_larger_than_sequence )
+{
+  string s0("AGCAGGG");
+  string s1("CAGCGGG");
+  
+  Mussa analysis;
+  analysis.append_sequence(s0);
+  analysis.append_sequence(s1);
+  analysis.set_threshold(23);
+  analysis.set_window(30);
+  BOOST_CHECK_THROW(analysis.analyze(), seqcomp_error);
+}
+
 BOOST_AUTO_TEST_CASE( subanalysis )
 {
   Sequence s1("AATGAAGATTTTAATGCTTTAATTTTGTTTTGTAAACTTCGAATTTCCAAAATTTGAAA");
index a78dd4a51be50067bfee1fda485404dd2d338ffe..9027a079a3b709552cec72272d50834d81476a94 100644 (file)
@@ -79,6 +79,14 @@ public:
     mussa_error(msg) {};
 };
 
+//! failure running seqcomp
+class seqcomp_error : public mussa_analysis_error
+{
+public:
+  explicit seqcomp_error(const std::string& msg) : 
+    mussa_analysis_error(msg) {};
+};
+
 //! couldn't normalize a motif
 /*
 class motif_normalize_error : public mussa_error
index 1ecbcbca28d80b40c7f12b6b63e5c54507716e1e..6d5f3ce99e80277181d02150c7492f22fcb3dba8 100644 (file)
@@ -4,6 +4,7 @@
 #include "mussa_exceptions.hpp"
 #include "alg/mussa.hpp"
 
+#include <QMessageBox>
 #include <QVBoxLayout>
 #include <QHBoxLayout>
 #include <QGridLayout>
@@ -94,13 +95,20 @@ void SubanalysisWindow::run()
     m->append_sequence(itor->getSelectedSequence());
   }
 
-  m->set_window(window->value());
-  m->set_threshold(threshold->value());
-  m->analyze();
-  MussaWindow *mw = new MussaWindow(m);
-  mw->show();
-  model.clear();
-  hide();
+  try {
+    m->set_window(window->value());
+    m->set_threshold(threshold->value());
+    m->analyze();
+    MussaWindow *mw = new MussaWindow(m);
+    mw->show();
+    model.clear();
+    hide();
+  } catch(mussa_error e) {
+    QMessageBox::critical(this,
+                          "Mussa Subanalysis Error",
+                          QString(e.what()),
+                          QMessageBox::Ok, 0, 0);
+  }
 }
 
 void SubanalysisWindow::modelUpdated(const QModelIndex&, int, int )