From 19e5c26f240e4a8226b34b4ca4fa688d791fc3bc Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Sat, 20 May 2006 01:16:26 +0000 Subject: [PATCH] ticket:62 fix local alignment This appears to fix the problem where the local alignment was being miscomputed. In implementing this I collapsed ExtendedConservedPath into ConservedPath, and I finished seperating computing the local alignment from drawing said alignment. Once I seperated the two and could actually call the core algorithm I was able to add some (simple) unittesting of the local alignment code, and use some much simpler drawing code. --- alg/conserved_path.cpp | 70 +++------------- alg/conserved_path.hpp | 27 ++----- alg/mussa.cpp | 87 ++++++++++++++++++++ alg/mussa.hpp | 7 ++ alg/nway_entropy.cpp | 2 +- alg/nway_other.cpp | 4 +- alg/nway_paths.cpp | 18 ++--- alg/nway_paths.hpp | 8 +- alg/test/test_conserved_path.cpp | 22 ++--- alg/test/test_mussa.cpp | 77 ++++++++++++++++++ py/conserved_path.cpp | 5 +- qui/MussaAlignedWindow.cpp | 133 +++++++------------------------ qui/MussaAlignedWindow.hpp | 2 +- qui/MussaWindow.cpp | 2 +- 14 files changed, 246 insertions(+), 218 deletions(-) diff --git a/alg/conserved_path.cpp b/alg/conserved_path.cpp index a3778da..c67b454 100644 --- a/alg/conserved_path.cpp +++ b/alg/conserved_path.cpp @@ -6,25 +6,29 @@ using namespace std; ///////////////////// ConservedPath::ConservedPath() - : score(0.0), + : window_size(0), + score(0.0), track_indexes() { } ConservedPath::ConservedPath(const ConservedPath::ConservedPath& other) - : score(other.score), + : window_size(other.window_size), + score(other.score), track_indexes(other.track_indexes) { } -ConservedPath::ConservedPath(double s, ConservedPath::path_type path) - : score(s), +ConservedPath::ConservedPath(size_t win_size, double s, ConservedPath::path_type path) + : window_size(win_size), + score(s), track_indexes(path) { } void ConservedPath::clear() { + window_size = 0; score = 0.0; track_indexes.clear(); } @@ -52,7 +56,8 @@ bool operator!=(const ConservedPath::ConservedPath& a, std::ostream& operator<<(std::ostream& out, const ConservedPath& path) { - out << "::iterator pbegin() { return pathz.begin() ; } std::list::iterator pend() { return pathz.end() ; } size_t path_size() const { return refined_pathz.size(); } - std::list::iterator rpbegin() { return refined_pathz.begin() ; } - std::list::iterator rpend() { return refined_pathz.end() ; } + std::list::iterator rpbegin() { return refined_pathz.begin() ; } + std::list::iterator rpend() { return refined_pathz.end() ; } size_t refined_path_size() const { return refined_pathz.size(); } // these probably shouldn't be public, but lets start // simple std::list pathz; - std::list refined_pathz; + std::list refined_pathz; }; #endif diff --git a/alg/test/test_conserved_path.cpp b/alg/test/test_conserved_path.cpp index 7050a73..f8d670c 100644 --- a/alg/test/test_conserved_path.cpp +++ b/alg/test/test_conserved_path.cpp @@ -22,7 +22,7 @@ BOOST_AUTO_TEST_CASE ( conserved_path_simple ) cp1.track_indexes = path; BOOST_CHECK_EQUAL( cp1.size(), path.size() ); - ConservedPath cp2(18, path); + ConservedPath cp2(20, 18, path); BOOST_CHECK_EQUAL (cp1, cp2); BOOST_CHECK_EQUAL( cp2.size(), path.size() ); @@ -69,7 +69,7 @@ BOOST_AUTO_TEST_CASE ( conserved_path_reverse_compliment ) vector path; path += 3,4,-5,1; // magic from boost assign - ConservedPath cp1(10, path); + ConservedPath cp1(20, 10, path); vector reversed = cp1.reverseComplimented(); BOOST_CHECK_EQUAL( path.size(), reversed.size()); @@ -88,24 +88,24 @@ BOOST_AUTO_TEST_CASE ( extended_conserved_path_simple ) vector path; path += 3,4,5,1; // magic from boost assign - ExtendedConservedPath ecp1; + ConservedPath ecp1; ecp1.window_size = 30; ecp1.score = 18; ecp1.track_indexes = path; BOOST_CHECK_EQUAL ( ecp1.size(), path.size() ); - ExtendedConservedPath ecp2(30, 18.0, path); + ConservedPath ecp2(30, 18.0, path); BOOST_CHECK_EQUAL ( ecp1, ecp2 ); BOOST_CHECK_EQUAL ( ecp2.size(), path.size() ); - ExtendedConservedPath ecp_ne(35, 20.0, path); + ConservedPath ecp_ne(35, 20.0, path); BOOST_CHECK(ecp2 != ecp_ne); ConservedPath cp1; cp1.score = 18; cp1.track_indexes = path; - ExtendedConservedPath ecp3(30, cp1); + ConservedPath ecp3(cp1); BOOST_CHECK_EQUAL( ecp2, ecp3 ); BOOST_CHECK_EQUAL( ecp3.size(), path.size() ); } @@ -123,10 +123,10 @@ BOOST_AUTO_TEST_CASE ( extended_conserved_path_growth ) path_not_next += 4,5,6,5; const int window_size = 4; - ExtendedConservedPath ecp_base(window_size, 20, path_base); - ConservedPath cp_next(25, path_next); - ConservedPath cp_next_reversed(25, path_next_reversed); - ConservedPath cp_not_next(22, path_not_next); + ConservedPath ecp_base(window_size, 20, path_base); + ConservedPath cp_next(window_size, 25, path_next); + ConservedPath cp_next_reversed(window_size, 25, path_next_reversed); + ConservedPath cp_not_next(window_size, 22, path_not_next); BOOST_CHECK(ecp_base.nextTo(cp_next)); BOOST_CHECK(!ecp_base.nextTo(cp_not_next)); @@ -145,7 +145,7 @@ BOOST_AUTO_TEST_CASE ( extended_conserved_normalized ) vector normalized_path; normalized_path += 3,4, 5,9; - ExtendedConservedPath ecp1; + ConservedPath ecp1; ecp1.window_size = 10; ecp1.score = 18; ecp1.track_indexes = path; diff --git a/alg/test/test_mussa.cpp b/alg/test/test_mussa.cpp index da17b22..f215816 100644 --- a/alg/test/test_mussa.cpp +++ b/alg/test/test_mussa.cpp @@ -1,6 +1,10 @@ #include #include namespace fs = boost::filesystem; +#include +#include +#include +namespace assign = boost::assign; #include #include @@ -160,3 +164,76 @@ BOOST_AUTO_TEST_CASE( mussa_add_motif ) m1.add_motifs(motifs, colors); BOOST_CHECK_EQUAL( first_size, m1.motifs().size() ); } + +static void +local_align_test(const vector &seqs, + const list& result, + const list >& reversed) +{ + map > m; + assign::insert(m)('A', assign::list_of('A')('T') ) + ('T', assign::list_of('T')('A') ) + ('G', assign::list_of('G')('C') ) + ('C', assign::list_of('C')('G') ); + list >::const_iterator rc_i = reversed.begin(); + + for(list::const_iterator base_i = result.begin(); + base_i != result.end(); + ++base_i, ++rc_i) + { + // since the reverse compliment flag is relative to the first sequence + // the first one should always be false + BOOST_CHECK_EQUAL( (*rc_i)[0], false ); + const int first_path_basepair_index = (*base_i)[0]; + const int second_path_basepair_index = (*base_i)[1]; + const char first_basepair = seqs[0][first_path_basepair_index]; + const char second_basepair = seqs[1][second_path_basepair_index]; + // get our index into our reverse compliment map m + const int second_compliment_index = (*rc_i)[1]; + // lookup the forward or reverse compliment depending on our rc flag + const char complimented_second = m[second_basepair][second_compliment_index]; + + BOOST_CHECK_EQUAL( first_basepair, complimented_second) ; + } +} + + +BOOST_AUTO_TEST_CASE( local_alignment ) +{ + string s0("GCGCATAT"); + string s1("AAAAAAAT"); + Sequence seq1(s1); + + Mussa analysis; + analysis.add_a_seq(s0); + analysis.add_a_seq(s1); + analysis.analyze(4,3); + NwayPaths npath = analysis.paths(); + list result; + list > reversed; + list::iterator pathz_i = npath.pathz.begin(); + + list selected_paths; + selected_paths.push_back(*pathz_i); + analysis.createLocalAlignment(selected_paths.begin(), + selected_paths.end(), + result, + reversed); + + local_align_test(analysis.sequences(), result, reversed); + + ++pathz_i; + result.clear(); + reversed.clear(); + selected_paths.clear(); + selected_paths.push_back(*pathz_i); + analysis.createLocalAlignment(selected_paths.begin(), + selected_paths.end(), + result, + reversed); + local_align_test(analysis.sequences(), result, reversed); + + +} + + diff --git a/py/conserved_path.cpp b/py/conserved_path.cpp index 65a5d06..0ab7a55 100644 --- a/py/conserved_path.cpp +++ b/py/conserved_path.cpp @@ -13,14 +13,11 @@ void export_conserved_path() ; class_("ConservedPath") + .def_readwrite("window_size", &ConservedPath::window_size) .def_readwrite("score", &ConservedPath::score) .def_readwrite("track_indexes", &ConservedPath::track_indexes) //.add_property("track_indexes", //iterator >()) // range(&ConservedPath::track_indexes.begin(), // &ConservedPath::track_indexes.end())) ; - - class_ >("ExtendedConservedPath") - .def_readwrite("window_size", &ExtendedConservedPath::window_size) - ; } diff --git a/qui/MussaAlignedWindow.cpp b/qui/MussaAlignedWindow.cpp index a65a955..d113f48 100644 --- a/qui/MussaAlignedWindow.cpp +++ b/qui/MussaAlignedWindow.cpp @@ -58,8 +58,8 @@ void MussaAlignedWindow::setSelectedPaths(Mussa &m, const set& sel_paths) { // sets are sorted set::iterator sel_i = sel_paths.begin(); - list::const_iterator path_i = m.paths().refined_pathz.begin(); - list::const_iterator path_end = m.paths().refined_pathz.end(); + list::const_iterator path_i = m.paths().refined_pathz.begin(); + list::const_iterator path_end = m.paths().refined_pathz.end(); size_t path_size = m.paths().refined_pathz.size(); size_t pathid=0; @@ -91,7 +91,7 @@ void MussaAlignedWindow::setupMenus() pick_actions.clear(); view_actions.clear(); - for(vector::iterator pathz_i=selected_paths.begin(); + for(vector::iterator pathz_i=selected_paths.begin(); pathz_i != selected_paths.end(); ++pathz_i) { @@ -144,110 +144,31 @@ void MussaAlignedWindow::update() void MussaAlignedWindow::computeMatchLines() { - const vector& raw_seq = analysis.sequences(); - vector aligned_path; - size_t i2, i3; - int x_start, x_end; - int window_length, win_i; - int rc_1 = 0; - int rc_2 = 0; - vector rc_list; - bool full_match; - vector matched; - int align_counter; - browser.clear_links(); - align_counter = 0; - for(vector::iterator pathz_i=selected_paths.begin(); - pathz_i != selected_paths.end(); - ++pathz_i) + + // filter out conserved paths + list filtered_paths; + vector::iterator path_i = selected_paths.begin(); + list result; + list > reversed; + + for(vector::size_type count = 0; + count != selected_paths.size(); + ++count, ++path_i) { - if (view_paths[align_counter]) - { - ExtendedConservedPath& a_path = *pathz_i; - window_length = a_path.window_size; - // determine which parts of the path are RC relative to first species - rc_list = a_path.reverseComplimented(); - - // loop over each bp in the conserved region for all sequences - for(win_i = 0; win_i < window_length; win_i++) - { - aligned_path.clear(); - // determine which exact base pairs match between the sequences - full_match = true; - for(i2 = 0; i2 < a_path.size()-1; i2++) - { - // assume not rc as most likely, adjust below - rc_1 = 0; - rc_2 = 0; - // no matter the case, any RC node needs adjustments - if (a_path[i2] < 0) - rc_1 = window_length-1; - if (a_path[i2+1] < 0) - rc_2 = window_length-1; - - x_start = (abs(a_path[i2]-rc_1+win_i)); - x_end = (abs(a_path[i2+1]-rc_2+win_i)); - - // RC case handling - // ugh, and xor...only want rc coloring if just one of the nodes is rc - // if both nodes are rc, then they are 'normal' relative to each other - if((rc_list[i2] || rc_list[i2+1] )&&!(rc_list[i2] && rc_list[i2+1])) - { //the hideous rc matching logic - not complex, but annoying - if(!(( (raw_seq[i2][x_start]=='A')&&(raw_seq[i2+1][x_end]=='T')) || - ((raw_seq[i2][x_start]=='T')&&(raw_seq[i2+1][x_end]=='A')) || - ((raw_seq[i2][x_start]=='G')&&(raw_seq[i2+1][x_end]=='C')) || - ((raw_seq[i2][x_start]=='C')&&(raw_seq[i2+1][x_end]=='G'))) ) - full_match = false; - } - else - { - if (!( (raw_seq[i2][x_start] == raw_seq[i2+1][x_end]) && - (raw_seq[i2][x_start] != 'N') && - (raw_seq[i2+1][x_end] != 'N') ) ) - full_match = false; - } - } - - // draw for matches stretching across all sequences - if (full_match) - { - // now can draw the line for each bp in this window that matches - // grrr, need to ask if anyone cares if I switch the seq - // top-bot order... - i3 = 0; - //y_loc = y_min + 5; - for(i2 = 0; i2 < a_path.size()-1; i2++) - { - // assume not rc as most likely, adjust below - rc_1 = 0; - rc_2 = 0; - // no matter the case, any RC node needs adjustments - if (a_path[i2] < 0) - { - rc_1 = window_length; - } - if (a_path[i2] < 0) - { - rc_2 = window_length; - } - - // maybe shouldn't recalc these, but store values from first loop - x_start = (abs((int) (a_path[i2]-rc_1+win_i))); - x_end = (abs((int) (a_path[i2+1]-rc_2+win_i))); - aligned_path.push_back(x_start); - // if we're on the last time through the loop, save x_end too - if (i2 == a_path.size()-2) { - aligned_path.push_back(x_end); - } - } - } - if (aligned_path.size() > 0) { - browser.link(aligned_path, rc_list,1); - } - } - } - align_counter++; + if (view_paths[count]) + filtered_paths.push_back(*path_i); + } + analysis.createLocalAlignment(filtered_paths.begin(), + filtered_paths.end(), + result, + reversed); + + list::const_iterator result_i = result.begin(); + list >::const_iterator reversed_i = reversed.begin(); + for(int i = 0; i != result.size(); ++i, ++result_i, ++reversed_i) + { + // make 1 base long links + browser.link(*result_i, *reversed_i, 1); } } - diff --git a/qui/MussaAlignedWindow.hpp b/qui/MussaAlignedWindow.hpp index eb8be98..ec426fb 100644 --- a/qui/MussaAlignedWindow.hpp +++ b/qui/MussaAlignedWindow.hpp @@ -38,7 +38,7 @@ protected: Mussa& analysis; //std::vector sequences; //const std::set& selected_paths; - std::vector selected_paths; + std::vector selected_paths; std::vector view_paths; SequenceBrowserWidget browser; QMenu pick_align_menu; diff --git a/qui/MussaWindow.cpp b/qui/MussaWindow.cpp index 0c86b7d..b32dcfa 100644 --- a/qui/MussaWindow.cpp +++ b/qui/MussaWindow.cpp @@ -434,7 +434,7 @@ void MussaWindow::updateLinks() bool reversed = false; const NwayPaths& nway = analysis->paths(); - typedef list conserved_paths; + typedef list conserved_paths; typedef conserved_paths::const_iterator const_conserved_paths_itor; for(const_conserved_paths_itor path_itor = nway.refined_pathz.begin(); path_itor != nway.refined_pathz.end(); -- 2.30.2