More progress toward subanalysis
authorDiane Trout <diane@caltech.edu>
Wed, 21 Jun 2006 02:11:45 +0000 (02:11 +0000)
committerDiane Trout <diane@caltech.edu>
Wed, 21 Jun 2006 02:11:45 +0000 (02:11 +0000)
With our previous efforts at turnning Sequence into a string, the
places where we were comparing a Sequence to a string didn't work so well.
So this patch lets the compiler cast strings and const char *s to Sequence.

additionally I made the copySelectedTrackAs functions a bit more generic
by making the base function copySelectedTracks a template function
that returns a list of formatted objects. The two string style copy
functions AsFasta and AsString, then needed a bit more code to convert
the list back into a string. But I needed this so I can let users
edit what regions they want for SubanalysisWindows.

And lastly, there's a very simple box for defining a subanalysis that
still needs to be connected to the above copy functions.

14 files changed:
alg/glseqbrowser.cpp
alg/glseqbrowser.hpp
alg/mussa.cpp
alg/mussa.hpp
alg/sequence.cpp
alg/sequence.hpp
alg/test/test_mussa.cpp
alg/test/test_nway.cpp
py/mussa.cpp
qui/CMakeLists.txt
qui/MussaWindow.cpp
qui/MussaWindow.hpp
qui/SubanalysisWindow.cpp [new file with mode: 0644]
qui/SubanalysisWindow.hpp [new file with mode: 0644]

index cab62f67fa7ef8bff9d7ca7ec199d97c6a97bc42..2a159afd05b1690acc95aa111c74e40c797f4687 100644 (file)
@@ -7,6 +7,14 @@
 
 using namespace std;
 
+GlSeqBrowser::SequenceLocation::SequenceLocation(
+    const Sequence& s, 
+    int l, 
+    int c
+) : sequence(s), left(l), count(c)
+{
+}
+
 GlSeqBrowser::GlSeqBrowser()
   : border_width(25),
     cur_ortho(400.0, 0.0, 600.0, 0.0),
@@ -397,10 +405,11 @@ const set<int>& GlSeqBrowser::selectedPaths() const
 }
 
 //! copy sequence from selected track using formating function
-void GlSeqBrowser::copySelectedTracks(std::string& copy_buffer, 
-                                      format_track formatter)
+template<class Item>
+void GlSeqBrowser::copySelectedTracks(std::list<Item>& result, 
+             Item (*formatter)(const Sequence& s, int left, int right))
 {
-  copy_buffer.clear();
+  result.clear();
 
   for(selected_track_iterator track_i = selected_tracks.begin();
       track_i != selected_tracks.end();
@@ -414,7 +423,7 @@ void GlSeqBrowser::copySelectedTracks(std::string& copy_buffer,
     } else {
       // we should be safe
       const Sequence& seq = track_container[track_index].sequence();
-      copy_buffer += formatter(seq, track_i->left, track_i->right);
+      result.push_back(formatter(seq, track_i->left, track_i->right));
     }
   }
 }
@@ -422,6 +431,7 @@ void GlSeqBrowser::copySelectedTracks(std::string& copy_buffer,
 //! copy sequence from selected tracks as FASTA sequences
 void GlSeqBrowser::copySelectedTracksAsFasta(std::string& copy_buffer)
 {
+  std::list<std::string> result;
   struct AsFasta {
     static string formatter(const Sequence& seq, int left, int right)
     {
@@ -433,31 +443,45 @@ void GlSeqBrowser::copySelectedTracksAsFasta(std::string& copy_buffer)
       return s.str();
     }
   };
-  copySelectedTracks(copy_buffer, AsFasta::formatter);
+  copySelectedTracks(result, AsFasta::formatter);
+  // I wish there was some way to use for_each and bind here
+  for (list<string>::iterator result_i = result.begin();
+       result_i != result.end();
+       ++result_i)
+  {
+    copy_buffer.append(*result_i);
+  }
 }
 
 //! copy sequence from selected tracks as new sequences
-/*
-void GlSeqBrowser::copySelectedTracksAsSequence(std::string& copy_buffer)
+void GlSeqBrowser::copySelectedTracksAsSequences(std::list<Sequence>& result)
 {
   struct AsSequence {
-    static string formatter(const Sequence& seq, int left, int right)
+    static Sequence formatter(const Sequence& seq, int left, int right)
     {
-      stringstream s;
-      s << ">" << seq.get_header() 
-        << "|" << "subregion=" << left << "-" << right+1
-        << std::endl
-        << seq.subseq(left, right-left+1) << std::endl;
-      return s.str();
+      return seq.subseq(left, right-left+1);
+    }
+  };
+  copySelectedTracks(result, AsSequence::formatter);
+}
+
+void GlSeqBrowser::copySelectedTracksAsSeqLocation(
+    std::list<GlSeqBrowser::SequenceLocation>& result)
+{
+  struct AsSeqLocation {
+    static GlSeqBrowser::SequenceLocation 
+           formatter(const Sequence& seq, int left, int right)
+    {
+      return SequenceLocation(seq, left, right);
     }
   };
-  copySelectedTracks(copy_buffer, AsFasta::formatter);
+  copySelectedTracks(result, AsSeqLocation::formatter);
 }
-*/
 
 //! copy sequence from selected tracks as plain sequences
 void GlSeqBrowser::copySelectedTracksAsString(std::string& copy_buffer)
 {
+  std::list<string> result;
   struct AsString {
     static string formatter(const Sequence& seq, int left, int right)
     {
@@ -467,7 +491,15 @@ void GlSeqBrowser::copySelectedTracksAsString(std::string& copy_buffer)
     }
   };
 
-  copySelectedTracks(copy_buffer, AsString::formatter);
+  copySelectedTracks(result, AsString::formatter);
+  // I wish there was some way to use for_each and bind here
+  for (list<string>::iterator result_i = result.begin();
+       result_i != result.end();
+       ++result_i)
+  {
+    copy_buffer.append(*result_i);
+  }
+
 }
 
 void GlSeqBrowser::centerOnPath(const vector<int>& paths)
index 99ddd36580d636c2dfa129bd147a36f27f95ad67..80d80a810403f27106f86088c39a91f23248a3db 100644 (file)
@@ -79,14 +79,26 @@ public:
   void link(const std::vector<int>& path, const std::vector<bool>& isRC, int length);
   //! returns the index of pathids based on order added by link
   const std::set<int>& selectedPaths() const;
-  //! define our function for formating sequence copy
-  typedef std::string format_track(const Sequence& s, int left, int right);
   //! copy sequence from selected track using formating function
-  void copySelectedTracks(std::string& copy_buffer, format_track func);
-  //! copy sequence from selected tracks as plain sequences
-  void copySelectedTracksAsString(std::string& copy_buffer);
+  template<class Item>
+  void copySelectedTracks(std::list<Item>& result, 
+             Item (*format_track)(const Sequence& s, int left, int right));
   //! copy sequence from selected tracks as FASTA sequences
   void copySelectedTracksAsFasta(std::string& copy_buffer);
+  //! copy sequence from selected tracks as a list of sequences
+  void copySelectedTracksAsSequences(std::list<Sequence>& result);
+  //! copy sequence from selected tracks as plain sequences
+  void copySelectedTracksAsString(std::string& copy_buffer);
+
+  //! convenience structure for holding selected track segments
+  struct SequenceLocation {
+    const Sequence& sequence;
+    int left;
+    int count;
+    SequenceLocation(const Sequence& s, int l, int c);
+  };
+  //! copy tracks as a sequence and its coordinates
+  void copySelectedTracksAsSeqLocation(std::list<SequenceLocation>& result);
   
   
   //! Provide a logical name for a type discriminator for our glName stack
index f21c3fa03325c3f422ba98f742110cb450c3da3f..4677a6c84e871c0db9cac12dfa0ae20eedf76f18 100644 (file)
@@ -253,12 +253,9 @@ void Mussa::createLocalAlignment(std::list<ConservedPath>::iterator begin,
 }
 
 
-// takes a string and sets it as the next seq 
-void
-Mussa::add_a_seq(string a_seq)
+void Mussa::append_sequence(Sequence a_seq)
 {
-  Sequence aSeq(a_seq);
-  the_seqs.push_back(aSeq);
+  the_seqs.push_back(a_seq);
 }
 
 const vector<Sequence>& 
index 55547ec7cc3d60598b0f85a80da820d533aa15d8..6f7d0a33fb5c4846f6d252e953e9b8c83dad13b0 100644 (file)
@@ -112,7 +112,10 @@ public:
     void nway();
 
     //! appends a string sequence to the list of the_seqs
-    void add_a_seq(std::string a_seq);
+    // void append_sequence(std::string a_seq);
+    //! appends a sequence to the list of the_seqs
+    void append_sequence(Sequence a_seq);
+
     //! Load a sequence from a fasta file and any annotations
     /*! \param[in] seq_file the full path to the fasta file
      *  \param[in] annot_file the full path to an annotation file,
index c99f3a68f7892f10d023360a9d94e79cdb8ea406..56bad887f584252f289c6fa62bb1794d4c35fa22 100644 (file)
@@ -97,10 +97,12 @@ Sequence::~Sequence()
 {
 }
 
+Sequence::Sequence(const char *seq)
+{
+  set_filtered_sequence(seq);
+}
+
 Sequence::Sequence(const std::string& seq) 
- :  std::string(),
-    header(""),
-    species("")
 {
   set_filtered_sequence(seq);
 }
@@ -122,16 +124,11 @@ Sequence &Sequence::operator=(const Sequence& s)
     header = s.header;
     species = s.species;
     annots = s.annots;
+    motif_list = s.motif_list;
   }
   return *this;
 }
 
-Sequence &Sequence::operator=(const std::string& s)
-{
-  set_filtered_sequence(s);
-  return *this;
-}
-
 static void multiplatform_getline(std::istream& in, std::string& line)
 {
   line.clear();
index e2a37d38766df9fa0bf5a5055998e561c39b26bd..858892ecab34a96abaf5c90e4b2e060e9aa7fcc3 100644 (file)
@@ -69,11 +69,11 @@ class Sequence : public std::string
   public:
     Sequence();
     ~Sequence();
+    Sequence(const char* seq);
     Sequence(const std::string& seq);
     Sequence(const Sequence& seq);
     //! assignment to constant sequences
     Sequence &operator=(const Sequence&);
-    Sequence &operator=(const std::string &);
 
     //! set sequence to a (sub)string containing nothing but AGCTN
     void set_filtered_sequence(const std::string& seq, 
index 30bc4a355370860817ed23d7bac167f6be61f9d0..79356abcbe85ee46baddc0d406cefd1f74b0d4d0 100644 (file)
@@ -65,9 +65,9 @@ BOOST_AUTO_TEST_CASE( mussa_sequences )
   std::string s2("TTTTNNNN");
 
   Mussa analysis;
-  analysis.add_a_seq(s0);
-  analysis.add_a_seq(s1);
-  analysis.add_a_seq(s2);
+  analysis.append_sequence(s0);
+  analysis.append_sequence(s1);
+  analysis.append_sequence(s2);
 
   BOOST_CHECK_EQUAL( analysis.sequences().size(), 3 );
   BOOST_CHECK_EQUAL( analysis.sequences()[0], s0);
@@ -143,8 +143,8 @@ BOOST_AUTO_TEST_CASE( mussa_load_motif )
   istringstream test_istream(data);
 
   Mussa m1;
-  m1.add_a_seq("AAAAGGGGTTTT");
-  m1.add_a_seq("GGGCCCCTTGGTT");
+  m1.append_sequence("AAAAGGGGTTTT");
+  m1.append_sequence("GGGCCCCTTGGTT");
   m1.load_motifs(test_istream);
 
   for (vector<Sequence>::const_iterator seq_i = m1.sequences().begin();
@@ -163,8 +163,8 @@ BOOST_AUTO_TEST_CASE( mussa_add_motif )
   colors.push_back(Color(1.0, 0.0, 0.0));
   
   Mussa m1;
-  m1.add_a_seq("AAAAGGGGTTTT");
-  m1.add_a_seq("GGGCCCCTTGGTT");
+  m1.append_sequence("AAAAGGGGTTTT");
+  m1.append_sequence("GGGCCCCTTGGTT");
   m1.add_motifs(motifs, colors);
   int first_size = m1.motifs().size();
   BOOST_CHECK_EQUAL( first_size, 1 );
@@ -212,8 +212,8 @@ BOOST_AUTO_TEST_CASE( local_alignment )
   Sequence seq1(s1);
 
   Mussa analysis;
-  analysis.add_a_seq(s0);
-  analysis.add_a_seq(s1);
+  analysis.append_sequence(s0);
+  analysis.append_sequence(s1);
   analysis.set_threshold(3);
   analysis.set_window(4);
   analysis.analyze();
@@ -245,4 +245,32 @@ BOOST_AUTO_TEST_CASE( local_alignment )
 
 }
 
+BOOST_AUTO_TEST_CASE( subanalysis )
+{
+  Sequence s1("AATGAAGATTTTAATGCTTTAATTTTGTTTTGTAAACTTCGAATTTCCAAAATTTGAAA");
+  Sequence s2("AGGAGCAAGTTCGCTTCATCGAGAATTTTTAATTTTTAGTCAAATTTTCCAATGTCTGA");
+
+  Mussa analysis;
+  analysis.append_sequence(s1);
+  analysis.append_sequence(s2);
+  analysis.set_threshold(8);
+  analysis.set_window(8);
+  analysis.analyze();
+
+  NwayPaths perfect_path = analysis.paths();
+  int perfect_match_count = perfect_path.pathz.size();
+
+  Sequence sub1 = s1.subseq(2, s1.size()-4);
+  Sequence sub2 = s2.subseq(2, s2.size()-4);
+  Mussa subanalysis;
+  subanalysis.append_sequence(sub1);
+  subanalysis.append_sequence(sub2);
+  subanalysis.set_threshold(7);
+  subanalysis.set_window(8);
+  subanalysis.analyze();
+  NwayPaths one_mismatch_path = subanalysis.paths();
+  int one_mismatch_count = one_mismatch_path.pathz.size();
+
+  BOOST_CHECK( perfect_match_count < one_mismatch_count );
+}
 
index 4131f83ab3c1ac65a86b71825f586efd97c62927..22c84dd712a556f7609d8bee64e884c8725f67f9 100644 (file)
@@ -19,9 +19,9 @@ BOOST_AUTO_TEST_CASE( nway_null )
   string s2("TTTTNNNN");
 
   Mussa analysis;
-  analysis.add_a_seq(s0);
-  analysis.add_a_seq(s1);
-  analysis.add_a_seq(s2);
+  analysis.append_sequence(s0);
+  analysis.append_sequence(s1);
+  analysis.append_sequence(s2);
   analysis.set_window(4);
   analysis.set_threshold(3);
   analysis.analyze();
@@ -42,8 +42,8 @@ BOOST_AUTO_TEST_CASE( nway_test )
   Sequence seq1(s1);
 
   Mussa analysis;
-  analysis.add_a_seq(s0);
-  analysis.add_a_seq(s1);
+  analysis.append_sequence(s0);
+  analysis.append_sequence(s1);
   analysis.set_window(4);
   analysis.set_threshold(3);
   analysis.analyze();
@@ -183,9 +183,9 @@ GTTTTAATAAATGCACAATGCTCTCTTCCTGTTCTTC";
 
   // now that we've got some data lets see if it crashes
   Mussa m1;
-  m1.add_a_seq(seq1);
-  m1.add_a_seq(seq2);
-  m1.add_a_seq(seq3);
+  m1.append_sequence(seq1);
+  m1.append_sequence(seq2);
+  m1.append_sequence(seq3);
 
   m1.set_window(10);
   m1.set_threshold(8);
index aecf2b146f928376e7c3073c8f045fbaa2b6fe45..e0efaaeb9dcd8151d6062310d8bb89d6cfa33491 100644 (file)
@@ -26,7 +26,7 @@ void export_mussa()
     .def("analyze", &Mussa::analyze, "Run the analysis")
     .def("paths", &Mussa::paths, py::return_internal_reference<>())
     //.def("sequences", &Mussa::sequences)
-    .def("addSequence", &Mussa::add_a_seq)  
+    .def("addSequence", &Mussa::append_sequence)  
   ;
 
   py::enum_<Mussa::analysis_modes>("analysis_modes")
index b3dd1d8a7759363840ecbd6dfd4c4b1319b1ccc9..9a261781f6e64d6580dab42c3c6ad82ae155de8a 100644 (file)
@@ -19,6 +19,7 @@ SET(MOC_HEADERS
       IntAction.hpp           
       MussaAlignedWindow.hpp  
       MussaWindow.hpp
+      SubanalysisWindow.hpp
       ThresholdWidget.hpp
       ZoomWidget.hpp
       motif_editor/MotifDetail.hpp
@@ -41,6 +42,7 @@ SET(GUI_SOURCES
       IntAction.cpp            
       MussaAlignedWindow.cpp
       MussaWindow.cpp
+      SubanalysisWindow.cpp
       ThresholdWidget.cpp
       ZoomWidget.cpp
       motif_editor/MotifDetail.cpp
index 9a93a9ff13f5bd68e33efe3f60803a2b64f12f9b..de33b9d5dc9660317979afcaf52d66b0536a92f8 100644 (file)
@@ -155,7 +155,7 @@ void MussaWindow::setupActions()
           this, SLOT(createNewAnalysis()));
   createNewAnalysisAction->setIcon(QIcon(":/icons/filenew.png"));
   
-  createSubAnalysisAction = new QAction(tr("Define SubAnalysis"), this);
+  createSubAnalysisAction = new QAction(tr("Add to Subanalysis"), this);
   connect(createSubAnalysisAction, SIGNAL(triggered()), 
           this, SLOT(createSubAnalysis()));
 
@@ -238,7 +238,6 @@ void MussaWindow::setupMainMenu()
   newMenu->addAction(createNewAnalysisAction);
   newMenu->addAction(loadMupaAction);
   newMenu->addAction(loadSavedAnalysisAction);
-  //newMenu->addAction(createSubAnalysisAction);
   newMenu->addSeparator();
   newMenu->addAction(loadMotifListAction);
   newMenu->addAction(saveMotifListAction);
@@ -249,6 +248,7 @@ void MussaWindow::setupMainMenu()
 
   newMenu = menuBar()->addMenu(tr("&Edit"));
   newMenu->addAction(&browser.getCopySelectedSequenceAsFastaAction());
+  newMenu->addAction(createSubAnalysisAction);
  
   newMenu = menuBar()->addMenu(tr("&View"));
   newMenu->addAction(editMotifsAction);
@@ -266,6 +266,7 @@ void MussaWindow::setupMainMenu()
   // add some extra features to the context menu
   QMenu& popupMenu = browser.getPopupMenu();
   popupMenu.addAction(viewMussaAlignmentAction);
+  popupMenu.addAction(createSubAnalysisAction);
 }
 
 void MussaWindow::setupAssistant()
@@ -330,7 +331,9 @@ void MussaWindow::createNewAnalysis()
 
 void MussaWindow::createSubAnalysis()
 {
-  NotImplementedBox();
+  if (not subanalysis_window.isVisible()) {
+    subanalysis_window.show();
+  }
 }
 
 void MussaWindow::editMotifs()
index dfbe89cc7719fbcbd87ff14e2a884f948aa68fcd..0e15a55e78d2855c2efb7fbfc6b650428cf70ad0 100644 (file)
@@ -14,6 +14,7 @@
 #include "qui/motif_editor/MotifEditor.hpp"
 #include "qui/mussa_setup_dialog/MussaSetupDialog.hpp"
 #include "qui/seqbrowser/SequenceBrowserWidget.hpp"
+#include "qui/SubanalysisWindow.hpp"
 #include "qui/ThresholdWidget.hpp"
 #include "qui/ZoomWidget.hpp"
 
@@ -88,6 +89,7 @@ protected:
   std::list<boost::shared_ptr<MussaAlignedWindow> > aligned_windows;
   MotifEditor *motif_editor;
   MussaSetupDialog setup_analysis_dialog;
+  SubanalysisWindow subanalysis_window;
 
   // display our wonderful mussa output
   SequenceBrowserWidget browser;
diff --git a/qui/SubanalysisWindow.cpp b/qui/SubanalysisWindow.cpp
new file mode 100644 (file)
index 0000000..bf32cd6
--- /dev/null
@@ -0,0 +1,27 @@
+#include "qui/SubanalysisWindow.hpp"
+
+#include <QTextEdit>
+#include <QPushButton>
+
+#include <QVBoxLayout>
+#include <QHBoxLayout>
+
+SubanalysisWindow::SubanalysisWindow(QWidget *parent)
+  : QWidget(parent),
+    text(0),
+    ok(0),
+    cancel(0)
+{
+  QHBoxLayout *buttonLayout = new QHBoxLayout();
+  QVBoxLayout *verticalLayout = new QVBoxLayout();
+
+  ok = new QPushButton(tr("&OK"), this);
+  cancel = new QPushButton(tr("Cancel"), this);
+  text = new QTextEdit(this);
+
+  buttonLayout->addWidget(ok);
+  buttonLayout->addWidget(cancel);
+  verticalLayout->addWidget(text);
+  verticalLayout->addLayout(buttonLayout);
+  setLayout(verticalLayout);
+}
diff --git a/qui/SubanalysisWindow.hpp b/qui/SubanalysisWindow.hpp
new file mode 100644 (file)
index 0000000..043c2dc
--- /dev/null
@@ -0,0 +1,21 @@
+#ifndef _SUBANALYSIS_H_
+#define _SUBANALYSIS_H_
+
+#include <QWidget>
+
+class QTextEdit;
+class QPushButton;
+
+class SubanalysisWindow : public QWidget
+{
+  Q_OBJECT 
+
+public: 
+  SubanalysisWindow(QWidget *parent = 0);
+
+private:  
+  QTextEdit *text;
+  QPushButton *ok;
+  QPushButton *cancel;
+};
+#endif