/////////////////////
ConservedPath::ConservedPath()
- : score(0.0),
+ : window_size(0),
+ score(0.0),
track_indexes()
{
}
ConservedPath::ConservedPath(const ConservedPath::ConservedPath& other)
- : score(other.score),
+ : window_size(other.window_size),
+ score(other.score),
track_indexes(other.track_indexes)
{
}
-ConservedPath::ConservedPath(double s, ConservedPath::path_type path)
- : score(s),
+ConservedPath::ConservedPath(size_t win_size, double s, ConservedPath::path_type path)
+ : window_size(win_size),
+ score(s),
track_indexes(path)
{
}
void ConservedPath::clear()
{
+ window_size = 0;
score = 0.0;
track_indexes.clear();
}
std::ostream& operator<<(std::ostream& out, const ConservedPath& path)
{
- out << "<path score=" << path.score
+ out << "<path win_size" << path.window_size
+ << " score=" << path.score
<< " len=" << path.track_indexes.size();
if (path.track_indexes.size() > 0)
{
++this_itor)
{
if (*this_itor < 0) {
- paths.push_back(-*this_itor);
+ paths.push_back(-(*this_itor)-window_size);
} else {
paths.push_back(*this_itor);
}
return paths;
}
-
-/////////////////////
-ExtendedConservedPath::ExtendedConservedPath()
- : ConservedPath(),
- window_size(0)
-{
-}
-
-ExtendedConservedPath::ExtendedConservedPath(const ExtendedConservedPath& other)
- : ConservedPath(other),
- window_size(other.window_size)
-{
-}
-
-ExtendedConservedPath::ExtendedConservedPath(int win_size, ConservedPath other)
- : ConservedPath(other),
- window_size(win_size)
-{
-}
-
-ExtendedConservedPath::ExtendedConservedPath(int win_size,
- double s,
- ConservedPath::path_type p)
- : ConservedPath(s, p),
- window_size(win_size)
-{
-}
-
-std::vector<ConservedPath::path_element> ExtendedConservedPath::normalizedIndexes() const
-{
- vector<path_element> paths;
- for (ConservedPath::const_iterator this_itor = begin();
- this_itor != end();
- ++this_itor)
- {
- if (*this_itor < 0) {
- paths.push_back(-(*this_itor) - window_size);
- } else {
- paths.push_back((*this_itor));
- }
- }
- return paths;
-}
-
-ExtendedConservedPath& ExtendedConservedPath::extend(int growth)
+ConservedPath& ConservedPath::extend(int growth)
{
window_size += growth;
return *this;
}
-std::ostream& operator<<(std::ostream& out, const ExtendedConservedPath& path)
-{
- out << "<extended_path size=" << path.window_size;
- out << (ConservedPath&)path;
- return out;
-}
-
ConservedPath();
ConservedPath(const ConservedPath& );
- ConservedPath(double score, path_type path);
+ ConservedPath(size_t window_size, double score, path_type path);
//! reset our path to be empty (including setting threshold to 0)
void clear();
std::vector<bool> reverseComplimented() const;
//! return the list of indexes normalized (to the left)
std::vector<path_element> normalizedIndexes() const;
+ //! extend our current path
+ //! (aka increment our window size by growth)
+ ConservedPath& extend(int growth=1);
+ //! window size (really should always be positive
+ size_t window_size;
//! either number of conserved bases or average entropy
double score;
//! offsets into each of our sequences representing the start of our window
std::vector<path_element> track_indexes;
};
-
-struct ExtendedConservedPath : public ConservedPath
-{
- ExtendedConservedPath();
- ExtendedConservedPath(const ExtendedConservedPath& other);
- ExtendedConservedPath(int win_size, ConservedPath conserved_path);
- ExtendedConservedPath(int win_size, double score, std::vector<int> path);
-
- std::vector<ConservedPath::path_element> ExtendedConservedPath::normalizedIndexes() const;
- //! extend our current path
- /*! (aka increment our window size by growth)
- */
- ExtendedConservedPath& extend(int growth=1);
-
- // output some useful information
- friend std::ostream& operator<<(std::ostream&, const ExtendedConservedPath&);
-
- //! size of extended window
- int window_size;
-};
#endif
return the_paths;
}
+//template <class IteratorT>
+//void Mussa::createLocalAlignment(IteratorT begin, IteratorT end)
+void Mussa::createLocalAlignment(std::list<ConservedPath>::iterator begin,
+ std::list<ConservedPath>::iterator end,
+ std::list<ConservedPath::path_type>& result,
+ std::list<std::vector<bool> >& reversed)
+{
+ const vector<Sequence>& raw_seq = the_seqs;
+ ConservedPath::path_type aligned_path;
+ size_t i2, i3;
+ int x_start, x_end;
+ int window_length, win_i;
+ int rc_1 = 0;
+ int rc_2 = 0;
+ vector<bool> rc_list;
+ bool full_match;
+ vector<bool> matched;
+ int align_counter;
+
+ align_counter = 0;
+ for(std::list<ConservedPath>::iterator pathz_i=begin; pathz_i != end; ++pathz_i)
+ {
+ ConservedPath& a_path = *pathz_i;
+ window_length = a_path.window_size;
+ // determine which parts of the path are RC relative to first species
+ rc_list = a_path.reverseComplimented();
+
+ // loop over each bp in the conserved region for all sequences
+ for(win_i = 0; win_i < window_length; win_i++)
+ {
+ aligned_path.clear();
+ // determine which exact base pairs match between the sequences
+ full_match = true;
+ for(i2 = 0; i2 < a_path.size()-1; i2++)
+ {
+ // assume not rc as most likely, adjust below
+ rc_1 = 0;
+ rc_2 = 0;
+ // no matter the case, any RC node needs adjustments
+ if (a_path[i2] < 0)
+ rc_1 = window_length-1;
+ if (a_path[i2+1] < 0)
+ rc_2 = window_length-1;
+
+ x_start = (abs(a_path[i2]-rc_1+win_i));
+ x_end = (abs(a_path[i2+1]-rc_2+win_i));
+
+ // RC case handling
+ // ugh, and xor...only want rc coloring if just one of the nodes is rc
+ // if both nodes are rc, then they are 'normal' relative to each other
+ if((rc_list[i2] || rc_list[i2+1] )&&!(rc_list[i2] && rc_list[i2+1]))
+ { //the hideous rc matching logic - not complex, but annoying
+ if(!(( (raw_seq[i2][x_start]=='A')&&(raw_seq[i2+1][x_end]=='T')) ||
+ ((raw_seq[i2][x_start]=='T')&&(raw_seq[i2+1][x_end]=='A')) ||
+ ((raw_seq[i2][x_start]=='G')&&(raw_seq[i2+1][x_end]=='C')) ||
+ ((raw_seq[i2][x_start]=='C')&&(raw_seq[i2+1][x_end]=='G'))) )
+ {
+ full_match = false;
+ } else {
+ aligned_path.push_back(x_start);
+ }
+ }
+ else
+ { // forward match
+ if (!( (raw_seq[i2][x_start] == raw_seq[i2+1][x_end]) &&
+ (raw_seq[i2][x_start] != 'N') &&
+ (raw_seq[i2+1][x_end] != 'N') ) ) {
+ full_match = false;
+ } else {
+ aligned_path.push_back(x_start);
+ }
+ }
+ }
+ // grab the last part of our path, assuming we matched
+ if (full_match)
+ aligned_path.push_back(x_end);
+
+ if (aligned_path.size() > 0) {
+ result.push_back(aligned_path);
+ reversed.push_back(rc_list);
+ }
+ }
+ align_counter++;
+ }
+}
+
+
// takes a string and sets it as the next seq
void
Mussa::add_a_seq(string a_seq)
//! return the refined paths found by the nway analysis.
const NwayPaths& paths() const;
+ //! given selected_paths, and view_paths, compute per base pair matches
+ //template <class IteratorT>
+ void createLocalAlignment(std::list<ConservedPath>::iterator begin,
+ std::list<ConservedPath>::iterator end,
+ std::list<ConservedPath::path_type>& result,
+ std::list<std::vector<bool> >& reversed);
+
//! run seqcomp and the nway filtering algorithm.
/*!analyze will run seqcomp and then the nway algorithm
* on whatever sequences have been loaded into this mussa instance.
// check entropy <---------------------------------------------------
avg_entropy = path_entropy(path);
if (avg_entropy <= ent_thres)
- pathz.push_back(ConservedPath(avg_entropy, path));
+ pathz.push_back(ConservedPath(win_size, avg_entropy, path));
// now advance the right iterator
not_advanced = true;
// add path that each species iterator is pointing to
set_path_to_cur_sp_itor_track(path, win_i, sp_itor_begin);
- pathz.push_back(ConservedPath(soft_thres, path));
+ pathz.push_back(ConservedPath(win_size, soft_thres, path));
// now advance the right iterator
still_paths = advance_sp_itor_track(sp_itor_begin,
// if the path is transitive, save the path
if (is_transitive_path(path, all_comparisons, soft_thres)) {
- ConservedPath new_path(soft_thres, path);
+ ConservedPath new_path(win_size, soft_thres, path);
pathz.push_back(new_path);
}
NwayPaths::simple_refine()
{
// ext_path remembers the first window set in an extending path
- ExtendedConservedPath ext_path, new_path;
+ ConservedPath ext_path, new_path;
list<ConservedPath>::iterator cur_path, next_path;
list<ConservedPath>::iterator pathz_i;
int win_ext_len = 0;
// only try to extend when pathz isn't empty.
if (pathz_i != pathz.end())
{
- ext_path = ExtendedConservedPath( win_size, *pathz_i);
+ ext_path = *pathz_i;
while(pathz_i != pathz.end())
{
if (not end) {
// reset stuff
win_ext_len = 0;
- ext_path = ExtendedConservedPath( win_size, *next_path);
+ ext_path = *next_path;
}
}
}
void
NwayPaths::add_path(int threshold, vector<int>& loaded_path)
{
- pathz.push_back(ConservedPath(threshold, loaded_path));
+ pathz.push_back(ConservedPath(threshold, 0.0, loaded_path));
}
void
NwayPaths::save(fs::path save_file_path)
{
fs::fstream save_file;
- list<ExtendedConservedPath >::iterator path_i, paths_end;
+ list<ConservedPath >::iterator path_i, paths_end;
save_file.open(save_file_path, ios::out);
//paths_end = pathz.end();
while (path_i != paths_end)
{
- ExtendedConservedPath& a_path = *path_i;
+ ConservedPath& a_path = *path_i;
//cout << a_path.size() << endl;
//first entry is the window length of the windows in the path
save_file << a_path.window_size << ":";
file_data_line = file_data_line.substr(comma_split_i+1);
}
assert (loaded_path.size() == species_num );
- refined_pathz.push_back(ExtendedConservedPath(atoi(path_width.c_str()),
- threshold,
- loaded_path));
+ refined_pathz.push_back(ConservedPath(atoi(path_width.c_str()),
+ threshold,
+ loaded_path));
}
getline(load_file,file_data_line);
}
friend class SeqView;
protected:
int threshold;
- int win_size;
+ size_t win_size;
int soft_thres;
double ent_thres;
std::list<ConservedPath>::iterator pbegin() { return pathz.begin() ; }
std::list<ConservedPath>::iterator pend() { return pathz.end() ; }
size_t path_size() const { return refined_pathz.size(); }
- std::list<ExtendedConservedPath>::iterator rpbegin() { return refined_pathz.begin() ; }
- std::list<ExtendedConservedPath>::iterator rpend() { return refined_pathz.end() ; }
+ std::list<ConservedPath>::iterator rpbegin() { return refined_pathz.begin() ; }
+ std::list<ConservedPath>::iterator rpend() { return refined_pathz.end() ; }
size_t refined_path_size() const { return refined_pathz.size(); }
// these probably shouldn't be public, but lets start
// simple
std::list<ConservedPath> pathz;
- std::list<ExtendedConservedPath > refined_pathz;
+ std::list<ConservedPath > refined_pathz;
};
#endif
cp1.track_indexes = path;
BOOST_CHECK_EQUAL( cp1.size(), path.size() );
- ConservedPath cp2(18, path);
+ ConservedPath cp2(20, 18, path);
BOOST_CHECK_EQUAL (cp1, cp2);
BOOST_CHECK_EQUAL( cp2.size(), path.size() );
vector<int> path;
path += 3,4,-5,1; // magic from boost assign
- ConservedPath cp1(10, path);
+ ConservedPath cp1(20, 10, path);
vector<bool> reversed = cp1.reverseComplimented();
BOOST_CHECK_EQUAL( path.size(), reversed.size());
vector<int> path;
path += 3,4,5,1; // magic from boost assign
- ExtendedConservedPath ecp1;
+ ConservedPath ecp1;
ecp1.window_size = 30;
ecp1.score = 18;
ecp1.track_indexes = path;
BOOST_CHECK_EQUAL ( ecp1.size(), path.size() );
- ExtendedConservedPath ecp2(30, 18.0, path);
+ ConservedPath ecp2(30, 18.0, path);
BOOST_CHECK_EQUAL ( ecp1, ecp2 );
BOOST_CHECK_EQUAL ( ecp2.size(), path.size() );
- ExtendedConservedPath ecp_ne(35, 20.0, path);
+ ConservedPath ecp_ne(35, 20.0, path);
BOOST_CHECK(ecp2 != ecp_ne);
ConservedPath cp1;
cp1.score = 18;
cp1.track_indexes = path;
- ExtendedConservedPath ecp3(30, cp1);
+ ConservedPath ecp3(cp1);
BOOST_CHECK_EQUAL( ecp2, ecp3 );
BOOST_CHECK_EQUAL( ecp3.size(), path.size() );
}
path_not_next += 4,5,6,5;
const int window_size = 4;
- ExtendedConservedPath ecp_base(window_size, 20, path_base);
- ConservedPath cp_next(25, path_next);
- ConservedPath cp_next_reversed(25, path_next_reversed);
- ConservedPath cp_not_next(22, path_not_next);
+ ConservedPath ecp_base(window_size, 20, path_base);
+ ConservedPath cp_next(window_size, 25, path_next);
+ ConservedPath cp_next_reversed(window_size, 25, path_next_reversed);
+ ConservedPath cp_not_next(window_size, 22, path_not_next);
BOOST_CHECK(ecp_base.nextTo(cp_next));
BOOST_CHECK(!ecp_base.nextTo(cp_not_next));
vector<int> normalized_path;
normalized_path += 3,4, 5,9;
- ExtendedConservedPath ecp1;
+ ConservedPath ecp1;
ecp1.window_size = 10;
ecp1.score = 18;
ecp1.track_indexes = path;
#include <boost/test/auto_unit_test.hpp>
#include <boost/filesystem/operations.hpp>
namespace fs = boost::filesystem;
+#include <boost/assign/list_of.hpp>
+#include <boost/assign/list_inserter.hpp>
+#include <boost/assign.hpp>
+namespace assign = boost::assign;
#include <string>
#include <sstream>
m1.add_motifs(motifs, colors);
BOOST_CHECK_EQUAL( first_size, m1.motifs().size() );
}
+
+static void
+local_align_test(const vector<Sequence> &seqs,
+ const list<ConservedPath::path_type>& result,
+ const list<vector<bool> >& reversed)
+{
+ map<char, vector <char> > m;
+ assign::insert(m)('A', assign::list_of('A')('T') )
+ ('T', assign::list_of('T')('A') )
+ ('G', assign::list_of('G')('C') )
+ ('C', assign::list_of('C')('G') );
+ list<vector<bool> >::const_iterator rc_i = reversed.begin();
+
+ for(list<ConservedPath::path_type>::const_iterator base_i = result.begin();
+ base_i != result.end();
+ ++base_i, ++rc_i)
+ {
+ // since the reverse compliment flag is relative to the first sequence
+ // the first one should always be false
+ BOOST_CHECK_EQUAL( (*rc_i)[0], false );
+ const int first_path_basepair_index = (*base_i)[0];
+ const int second_path_basepair_index = (*base_i)[1];
+ const char first_basepair = seqs[0][first_path_basepair_index];
+ const char second_basepair = seqs[1][second_path_basepair_index];
+ // get our index into our reverse compliment map m
+ const int second_compliment_index = (*rc_i)[1];
+ // lookup the forward or reverse compliment depending on our rc flag
+ const char complimented_second = m[second_basepair][second_compliment_index];
+
+ BOOST_CHECK_EQUAL( first_basepair, complimented_second) ;
+ }
+}
+
+
+BOOST_AUTO_TEST_CASE( local_alignment )
+{
+ string s0("GCGCATAT");
+ string s1("AAAAAAAT");
+ Sequence seq1(s1);
+
+ Mussa analysis;
+ analysis.add_a_seq(s0);
+ analysis.add_a_seq(s1);
+ analysis.analyze(4,3);
+ NwayPaths npath = analysis.paths();
+ list<ConservedPath::path_type> result;
+ list<vector<bool> > reversed;
+ list<ConservedPath>::iterator pathz_i = npath.pathz.begin();
+
+ list<ConservedPath> selected_paths;
+ selected_paths.push_back(*pathz_i);
+ analysis.createLocalAlignment(selected_paths.begin(),
+ selected_paths.end(),
+ result,
+ reversed);
+
+ local_align_test(analysis.sequences(), result, reversed);
+
+ ++pathz_i;
+ result.clear();
+ reversed.clear();
+ selected_paths.clear();
+ selected_paths.push_back(*pathz_i);
+ analysis.createLocalAlignment(selected_paths.begin(),
+ selected_paths.end(),
+ result,
+ reversed);
+ local_align_test(analysis.sequences(), result, reversed);
+
+
+}
+
+
;
class_<ConservedPath>("ConservedPath")
+ .def_readwrite("window_size", &ConservedPath::window_size)
.def_readwrite("score", &ConservedPath::score)
.def_readwrite("track_indexes", &ConservedPath::track_indexes)
//.add_property("track_indexes", //iterator<std::vector<int> >())
// range(&ConservedPath::track_indexes.begin(),
// &ConservedPath::track_indexes.end()))
;
-
- class_<ExtendedConservedPath, bases<ConservedPath> >("ExtendedConservedPath")
- .def_readwrite("window_size", &ExtendedConservedPath::window_size)
- ;
}
{
// sets are sorted
set<int>::iterator sel_i = sel_paths.begin();
- list<ExtendedConservedPath>::const_iterator path_i = m.paths().refined_pathz.begin();
- list<ExtendedConservedPath>::const_iterator path_end = m.paths().refined_pathz.end();
+ list<ConservedPath>::const_iterator path_i = m.paths().refined_pathz.begin();
+ list<ConservedPath>::const_iterator path_end = m.paths().refined_pathz.end();
size_t path_size = m.paths().refined_pathz.size();
size_t pathid=0;
pick_actions.clear();
view_actions.clear();
- for(vector<ExtendedConservedPath >::iterator pathz_i=selected_paths.begin();
+ for(vector<ConservedPath >::iterator pathz_i=selected_paths.begin();
pathz_i != selected_paths.end();
++pathz_i)
{
void MussaAlignedWindow::computeMatchLines()
{
- const vector<Sequence>& raw_seq = analysis.sequences();
- vector<int> aligned_path;
- size_t i2, i3;
- int x_start, x_end;
- int window_length, win_i;
- int rc_1 = 0;
- int rc_2 = 0;
- vector<bool> rc_list;
- bool full_match;
- vector<bool> matched;
- int align_counter;
-
browser.clear_links();
- align_counter = 0;
- for(vector<ExtendedConservedPath >::iterator pathz_i=selected_paths.begin();
- pathz_i != selected_paths.end();
- ++pathz_i)
+
+ // filter out conserved paths
+ list<ConservedPath> filtered_paths;
+ vector<ConservedPath>::iterator path_i = selected_paths.begin();
+ list<ConservedPath::path_type> result;
+ list<vector<bool> > reversed;
+
+ for(vector<ConservedPath>::size_type count = 0;
+ count != selected_paths.size();
+ ++count, ++path_i)
{
- if (view_paths[align_counter])
- {
- ExtendedConservedPath& a_path = *pathz_i;
- window_length = a_path.window_size;
- // determine which parts of the path are RC relative to first species
- rc_list = a_path.reverseComplimented();
-
- // loop over each bp in the conserved region for all sequences
- for(win_i = 0; win_i < window_length; win_i++)
- {
- aligned_path.clear();
- // determine which exact base pairs match between the sequences
- full_match = true;
- for(i2 = 0; i2 < a_path.size()-1; i2++)
- {
- // assume not rc as most likely, adjust below
- rc_1 = 0;
- rc_2 = 0;
- // no matter the case, any RC node needs adjustments
- if (a_path[i2] < 0)
- rc_1 = window_length-1;
- if (a_path[i2+1] < 0)
- rc_2 = window_length-1;
-
- x_start = (abs(a_path[i2]-rc_1+win_i));
- x_end = (abs(a_path[i2+1]-rc_2+win_i));
-
- // RC case handling
- // ugh, and xor...only want rc coloring if just one of the nodes is rc
- // if both nodes are rc, then they are 'normal' relative to each other
- if((rc_list[i2] || rc_list[i2+1] )&&!(rc_list[i2] && rc_list[i2+1]))
- { //the hideous rc matching logic - not complex, but annoying
- if(!(( (raw_seq[i2][x_start]=='A')&&(raw_seq[i2+1][x_end]=='T')) ||
- ((raw_seq[i2][x_start]=='T')&&(raw_seq[i2+1][x_end]=='A')) ||
- ((raw_seq[i2][x_start]=='G')&&(raw_seq[i2+1][x_end]=='C')) ||
- ((raw_seq[i2][x_start]=='C')&&(raw_seq[i2+1][x_end]=='G'))) )
- full_match = false;
- }
- else
- {
- if (!( (raw_seq[i2][x_start] == raw_seq[i2+1][x_end]) &&
- (raw_seq[i2][x_start] != 'N') &&
- (raw_seq[i2+1][x_end] != 'N') ) )
- full_match = false;
- }
- }
-
- // draw for matches stretching across all sequences
- if (full_match)
- {
- // now can draw the line for each bp in this window that matches
- // grrr, need to ask if anyone cares if I switch the seq
- // top-bot order...
- i3 = 0;
- //y_loc = y_min + 5;
- for(i2 = 0; i2 < a_path.size()-1; i2++)
- {
- // assume not rc as most likely, adjust below
- rc_1 = 0;
- rc_2 = 0;
- // no matter the case, any RC node needs adjustments
- if (a_path[i2] < 0)
- {
- rc_1 = window_length;
- }
- if (a_path[i2] < 0)
- {
- rc_2 = window_length;
- }
-
- // maybe shouldn't recalc these, but store values from first loop
- x_start = (abs((int) (a_path[i2]-rc_1+win_i)));
- x_end = (abs((int) (a_path[i2+1]-rc_2+win_i)));
- aligned_path.push_back(x_start);
- // if we're on the last time through the loop, save x_end too
- if (i2 == a_path.size()-2) {
- aligned_path.push_back(x_end);
- }
- }
- }
- if (aligned_path.size() > 0) {
- browser.link(aligned_path, rc_list,1);
- }
- }
- }
- align_counter++;
+ if (view_paths[count])
+ filtered_paths.push_back(*path_i);
+ }
+ analysis.createLocalAlignment(filtered_paths.begin(),
+ filtered_paths.end(),
+ result,
+ reversed);
+
+ list<ConservedPath::path_type>::const_iterator result_i = result.begin();
+ list<vector<bool> >::const_iterator reversed_i = reversed.begin();
+ for(int i = 0; i != result.size(); ++i, ++result_i, ++reversed_i)
+ {
+ // make 1 base long links
+ browser.link(*result_i, *reversed_i, 1);
}
}
-
Mussa& analysis;
//std::vector<Sequence> sequences;
//const std::set<int>& selected_paths;
- std::vector<ExtendedConservedPath> selected_paths;
+ std::vector<ConservedPath> selected_paths;
std::vector<bool> view_paths;
SequenceBrowserWidget browser;
QMenu pick_align_menu;
bool reversed = false;
const NwayPaths& nway = analysis->paths();
- typedef list<ExtendedConservedPath> conserved_paths;
+ typedef list<ConservedPath> conserved_paths;
typedef conserved_paths::const_iterator const_conserved_paths_itor;
for(const_conserved_paths_itor path_itor = nway.refined_pathz.begin();
path_itor != nway.refined_pathz.end();