--- /dev/null
+#include "alg/conserved_path.h"
+#include <stdexcept>
+
+using namespace std;
+
+/////////////////////
+
+ConservedPath::ConservedPath()
+ : score(0.0),
+ track_indexes()
+{
+}
+
+ConservedPath::ConservedPath(const ConservedPath::ConservedPath& other)
+ : score(other.score),
+ track_indexes(other.track_indexes)
+{
+}
+
+ConservedPath::ConservedPath(double s, ConservedPath::path_type path)
+ : score(s),
+ track_indexes(path)
+{
+}
+
+void ConservedPath::clear()
+{
+ score = 0.0;
+ track_indexes.clear();
+}
+
+ConservedPath::path_element&
+ConservedPath::operator[](ConservedPath::size_type index)
+{
+ return track_indexes[index];
+}
+
+bool operator==(const ConservedPath::ConservedPath& a,
+ const ConservedPath::ConservedPath& b)
+{
+ // this isn't good as score is now a double
+ return ( (a.score == b.score)
+ and (a.track_indexes.size() == b.track_indexes.size())
+ and (a.track_indexes == b.track_indexes));
+}
+
+bool operator!=(const ConservedPath::ConservedPath& a,
+ const ConservedPath::ConservedPath& b)
+{
+ return not (a == b);
+}
+
+std::ostream& operator<<(std::ostream& out, const ConservedPath& path)
+{
+ out << "<path score=" << path.score
+ << " len=" << path.track_indexes.size();
+ if (path.track_indexes.size() > 0)
+ {
+ // print the first element
+ ConservedPath::const_iterator itor = path.begin();
+ out << ">" << *itor;
+ // print the remaining elements
+ for (;
+ itor != path.end();
+ ++itor)
+ {
+ out << ", " << *itor;
+ }
+ out << "</path>";
+ }
+ else
+ {
+ // if we had no elements just close the block
+ out << "/>";
+ }
+}
+
+void ConservedPath::push_back(const ConservedPath::path_element& element)
+{
+ track_indexes.push_back(element);
+}
+
+void ConservedPath::pop_back()
+{
+ track_indexes.pop_back();
+}
+
+ConservedPath::size_type ConservedPath::size() const
+{
+ return track_indexes.size();
+}
+
+ConservedPath::iterator ConservedPath::begin()
+{
+ return track_indexes.begin();
+}
+
+ConservedPath::const_iterator ConservedPath::begin() const
+{
+ return track_indexes.begin();
+}
+
+ConservedPath::iterator ConservedPath::end()
+{
+ return track_indexes.end();
+}
+
+ConservedPath::const_iterator ConservedPath::end() const
+{
+ return track_indexes.end();
+}
+
+bool ConservedPath::nextTo(const ConservedPath& next) const
+{
+ if (size() != next.size() ) {
+ throw runtime_error("paths must be the same length");
+ }
+ ConservedPath::const_iterator this_itor = begin();
+ ConservedPath::const_iterator next_itor = next.begin();
+ for (; this_itor != end(); ++this_itor, ++next_itor)
+ {
+ if ( (*this_itor + 1) != *next_itor )
+ return false;
+ }
+ return true;
+}
+
+/////////////////////
+ExtendedConservedPath::ExtendedConservedPath()
+ : window_size(0)
+{
+}
+
+ExtendedConservedPath::ExtendedConservedPath(const ExtendedConservedPath& other)
+ : ConservedPath(other),
+ window_size(other.window_size)
+{
+}
+
+ExtendedConservedPath::ExtendedConservedPath(int win_size, ConservedPath other)
+ : ConservedPath(other),
+ window_size(win_size)
+{
+}
+
+ExtendedConservedPath::ExtendedConservedPath(int win_size,
+ double s,
+ ConservedPath::path_type p)
+ : ConservedPath(s, p),
+ window_size(win_size)
+{
+}
+
+ExtendedConservedPath& ExtendedConservedPath::extend(int growth)
+{
+ window_size += growth;
+
+ // What does this actually do? Tristan's code was doing this operation
+ // but I don't understand how it properly adjusts reverse compliment
+ // windows?
+ for(ConservedPath::iterator path_i = begin();
+ path_i != end();
+ ++path_i)
+ {
+ if (*path_i < 0)
+ *path_i += growth;
+ }
+ return *this;
+}
+
+
--- /dev/null
+#ifndef _CONSERVED_PATH_H
+#define _CONSERVED_PATH_H
+#include <vector>
+#include <ostream>
+
+struct ConservedPath
+{
+ typedef int path_element;
+ typedef std::vector<path_element> path_type;
+ typedef path_type::iterator iterator;
+ typedef path_type::const_iterator const_iterator;
+ typedef path_type::size_type size_type;
+
+ ConservedPath();
+ ConservedPath(const ConservedPath& );
+ ConservedPath(double score, path_type path);
+
+ //! reset our path to be empty (including setting threshold to 0)
+ void clear();
+ path_element& operator[](size_type index);
+ void push_back(const path_element& );
+ void pop_back();
+
+ size_type size() const;
+ iterator begin();
+ const_iterator begin() const;
+ iterator end();
+ const_iterator end() const;
+
+ friend bool operator==(const ConservedPath& a, const ConservedPath &b);
+ friend bool operator!=(const ConservedPath& a, const ConservedPath &b);
+ friend std::ostream& operator<<(std::ostream&, const ConservedPath&);
+
+ //! return true if all elements of the path are "next to" our current path.
+ /*! Next to is defined as being window index + 1
+ * that definition may not properly track reverse compliment
+ */
+ bool nextTo(const ConservedPath& next) const;
+
+ //! either number of conserved bases or average entropy
+ double score;
+ //! offsets into each of our sequences representing the start of our window
+ std::vector<path_element> track_indexes;
+};
+
+struct ExtendedConservedPath : public ConservedPath
+{
+ ExtendedConservedPath();
+ ExtendedConservedPath(const ExtendedConservedPath& other);
+ ExtendedConservedPath(int win_size, ConservedPath conserved_path);
+ ExtendedConservedPath(int win_size, double score, std::vector<int> path);
+
+ //! extend our current path
+ /*! (aka increment our window size by growth)
+ */
+ ExtendedConservedPath& extend(int growth=1);
+
+ //! size of extended window
+ int window_size;
+};
+#endif
using namespace std;
+bool operator==(const FLPs::match& a, const FLPs::match& b)
+{
+ return (a.index == b.index && a.score == b.score);
+}
+
+ostream &operator<<(ostream& out, const FLPs::match& m)
+{
+ out << "(" << m.score << ", " << m.index << ")";
+ return out;
+}
+
FLPs::FLPs() :
window_size(0),
hard_threshold(0),
// so we shouldn't reallocate ourselves.
return;
}
- all_matches = new std::vector<list<match> >;
+ all_matches = new std::vector<list<FLPs::match> >;
- list<match> empty_match_list;
- std::vector<list<match> >::size_type window_count;
+ list<FLPs::match> empty_match_list;
+ std::vector<list<FLPs::match> >::size_type window_count;
if (len1 > 0) {
// we actually know how much to preallocate
window_count = len1 - window_size+1;
window_count = 0;
}
- for (std::vector<list<match> >::size_type i=0; i < window_count; ++i) {
+ for (std::vector<list<FLPs::match> >::size_type i=0; i < window_count; ++i) {
all_matches->push_back(empty_match_list);
}
assert (all_matches->size() == window_count);
}
*/
+list<FLPs::match>
+FLPs::matches(int index) const
+{
+ if (all_matches == 0)
+ throw runtime_error("please call FLPs.seqcomp first");
+
+ list<FLPs::match>::iterator list_i, list_end;
+
+ index = abs(index);
+ list_i = (*all_matches)[index].begin();
+ list_end = (*all_matches)[index].end();
+ list<FLPs::match> these_matches(list_i, list_end);
+ return these_matches;
+}
+
list<int>
-FLPs::matches(int index)
+FLPs::match_locations(int index) const
{
if (all_matches == 0)
throw runtime_error("please call FLPs.seqcomp first");
list<int> these_matches;
- list<match>::iterator list_i, list_end;
+ list<FLPs::match>::iterator list_i, list_end;
index = abs(index);
list_i = (*all_matches)[index].begin();
throw runtime_error("please call FLPs.seqcomp first");
list<int> thres_matches;
- list<match>::iterator list_i, list_end;
+ list<FLPs::match>::iterator list_i, list_end;
index = abs(index);
list_i = (*all_matches)[index].begin();
save_file << "<Seqcomp win=" << window_size
<< " thres=" << hard_threshold << endl;
- for(vector<list<match> >::const_iterator win_citor = all_matches->begin();
+ for(vector<list<FLPs::match> >::const_iterator win_citor = all_matches->begin();
win_citor != all_matches->end();
++win_citor)
{
if (!win_citor->empty())
{
- for( list<match>::const_iterator match_itor = win_citor->begin();
+ for( list<FLPs::match>::const_iterator match_itor = win_citor->begin();
match_itor != win_citor->end();
++match_itor)
{
match a_match;
string::size_type split_index, comma_index;
bool tag_open = false;
- list<match> a_match_list;
+ list<FLPs::match> a_match_list;
// initialize our all_matches pointer
alloc_matches();
#include <list>
#include <string>
#include <vector>
+#include <iostream>
//! FLP = Fixed Length Pairs (Data)
/*!
*/
class FLPs
{
- private:
- //! the number of base pairs in the sliding window
- int window_size;
- //! the minimum tnumber of base pairs need for this window to be saved.
- int hard_threshold;
-
- struct match
- {
- int index;
- int score;
- };
- //! add a match from location seq1_i to seq2_i with a threshold of a_score
- /*! i2_offset is used to shift the window start when doing a reverse
- * compliment. (This is called by seqcomp() )
- */
- inline void add(int seq1_i, int seq2_i, int a_score, int i2_offset);
- //! this list of matches between the two sequences
- /*! All of the matches are stored here, it appears each window position
- * in sequence 1 gets an entry in the vector, the list contains matches
- * between sequence 1 and sequence 2
- */
- std::vector<std::list<match> > *all_matches;
- //! reserve space for the appropriate number of windows.
- /*! this is mostly so seqcomp can use operator[]
- */
- void alloc_matches(std::string::size_type len1=0);
-
public:
FLPs();
//! Setup a FLP and reserve space for the match lists
* Initialize the all_matches structure with a list of matches
* for enough windows for the size of sequence 1
*/
+
+ struct match
+ {
+ int index;
+ int score;
+
+ friend bool operator==(const match&, const match& );
+ friend std::ostream &operator<<(std::ostream&, const match&);
+ };
+
void setup(int win_size, int hard_thres);
//! compare two sequences and store the list of results in all_matches
void seqcomp(std::string seq1, std::string seq2, bool is_RC);
//bool FLPs::match_less(match *match1, match *match2);
//void FLPs::sort();
//! Return all the matches for a particular window?
- std::list<int> matches(int index);
- //! Return all the matches for a particular window abouve threshold
+ std::list<match> matches(int index) const;
+ //! Return all the match indexes for a particular window?
+ std::list<int> match_locations(int index) const;
+ //! Return all the match indexes for a particular window above a threshold
std::list<int> thres_matches(int index, int thres) const;
//! Return the number of windows stored in this FLPs
/*! (this is should be the same as seq1_len-window_size+1
void save(std::string save_file_path);
//! Load a vector of a lists of FLPs
void load(std::string file_path);
+ private:
+ //! the number of base pairs in the sliding window
+ int window_size;
+ //! the minimum tnumber of base pairs need for this window to be saved.
+ int hard_threshold;
+
+ //! add a match from location seq1_i to seq2_i with a threshold of a_score
+ /*! i2_offset is used to shift the window start when doing a reverse
+ * compliment. (This is called by seqcomp() )
+ */
+ inline void add(int seq1_i, int seq2_i, int a_score, int i2_offset);
+ //! this list of matches between the two sequences
+ /*! All of the matches are stored here, it appears each window position
+ * in sequence 1 gets an entry in the vector, the list contains matches
+ * between sequence 1 and sequence 2
+ */
+ std::vector<std::list<match> > *all_matches;
+ //! reserve space for the appropriate number of windows.
+ /*! this is mostly so seqcomp can use operator[]
+ */
+ void alloc_matches(std::string::size_type len1=0);
+
};
#endif
CURDIR := $(BASEDIR)alg/
-SOURCES.cxx := flp.cxx \
+SOURCES.cxx := conserved_path.cxx \
+ flp.cxx \
flp_seqcomp.cxx \
mussa_class.cxx \
nway_paths.cxx \
# mussa_nway_refine.cxx (broken)
MUSSA_ALG_SRC := $(addprefix $(CURDIR), $(SOURCES.cxx))
+MUSSA_ALG_OBJ := $(MUSSA_ALG_SRC:.cxx=$(OBJEXT))
-SRC += $(MUSSA_ALG_SRC)
-CXXFLAGS +=
-
-MUSSA_ALG_LIB := $(CURDIR)libmussa$(LIBEXT)
-LIBDIRS += -L$(CURDIR)
-
-TARGETLIBS += $(MUSSA_ALG_LIB)
-
-$(MUSSA_ALG_LIB): $(MUSSA_ALG_SRC:.cxx=$(OBJEXT)) $(MUSSA_ALG_SRC:.cxx=.d)
- $(AR) rv $@ $?
+GLSOURCES.cxx := glsequence.cxx
+MUSSA_ALG_GL_SRC := $(addprefix $(CURDIR), $(GLSOURCES.cxx))
+MUSSA_ALG_GL_OBJ := $(MUSSA_ALG_GL_SRC:.cxx=$(OBJEXT))
+SRC += $(MUSSA_ALG_SRC) $(MUSSA_ALG_GL_SRC)
+CXXFLAGS +=
{
int seq_num = the_paths.sequence_count();
- cout << "No BAM\n";
+ cout << "No BAM " << seq_num << endl; ;
a_file_path = file_path_base + ".museq";
file_data_line = file_data_line.substr(space_split_i+1);
}
//cout << endl;
- the_paths.add_path(loaded_path);
+ // FIXME: do we have any information about what the threshold should be?
+ the_paths.add_path(0, loaded_path);
}
save_file.close();
while ( (sp_i < all_comparisons.size()) && (some_matches) )
{
new_nodes.clear();
- new_nodes = all_comparisons[0][sp_i].matches(win_i);
+ new_nodes = all_comparisons[0][sp_i].match_locations(win_i);
if (new_nodes.empty())
some_matches = false;
//cout << "fie\n";
while (still_paths)
{
- // add path that each species iterator is pointing to
- path.clear();
- path.push_back(win_i);
+ // add path that each species iterator is pointing to
+ path.clear();
+ path.push_back(win_i);
- //cout << win_i;
- for (sp_i = 0; sp_i < all_comparisons.size()-1; sp_i++)
- {
- //cout << ", " << *(sp_nodes[sp_i]);
- path.push_back(*(sp_nodes[sp_i]));
- }
- //cout << endl;
+ //cout << win_i;
+ for (sp_i = 0; sp_i < all_comparisons.size()-1; sp_i++)
+ {
+ //cout << ", " << *(sp_nodes[sp_i]);
+ path.push_back(*(sp_nodes[sp_i]));
+ }
+ //cout << endl;
- // check entropy <---------------------------------------------------
- avg_entropy = path_entropy(path);
- if (avg_entropy <= ent_thres)
- pathz.push_back(path);
-
- // now advance the right iterator
- not_advanced = true;
- sp_depth = all_comparisons.size()- 2; // this roves up & down species list
- while ( (not_advanced) && (sp_depth != -1) )
- {
- //cout << "foe\n";
- //cout << sp_depth << ".." << *(sp_nodes[sp_depth]) << endl;
- (sp_nodes[sp_depth])++; // advance iter at current depth
- //cout << sp_depth << ".." << *(sp_nodes[sp_depth]) << endl;
-
- // if we reached the end of the list, reset iter & go up one depth
- if (sp_nodes[sp_depth] == sp_nodes_end[sp_depth])
- {
- //cout << "fum\n";
- sp_nodes[sp_depth] = all_matches[sp_depth].begin();
- sp_depth--;
- //cout << "depth = " << sp_depth << endl;
- }
- else
- not_advanced = false;
- }
-
- if (sp_depth == -1) // jumped up to first species, all paths searched
- still_paths = false;
- else // otherwise just reset to max depth and continue
- sp_depth = all_comparisons.size() - 2;
+ // check entropy <---------------------------------------------------
+ avg_entropy = path_entropy(path);
+ if (avg_entropy <= ent_thres)
+ pathz.push_back(ConservedPath(avg_entropy, path));
+
+ // now advance the right iterator
+ not_advanced = true;
+ sp_depth = all_comparisons.size()- 2; // this roves up & down species list
+ while ( (not_advanced) && (sp_depth != -1) )
+ {
+ //cout << "foe\n";
+ //cout << sp_depth << ".." << *(sp_nodes[sp_depth]) << endl;
+ (sp_nodes[sp_depth])++; // advance iter at current depth
+ //cout << sp_depth << ".." << *(sp_nodes[sp_depth]) << endl;
+
+ // if we reached the end of the list, reset iter & go up one depth
+ if (sp_nodes[sp_depth] == sp_nodes_end[sp_depth])
+ {
+ //cout << "fum\n";
+ sp_nodes[sp_depth] = all_matches[sp_depth].begin();
+ sp_depth--;
+ //cout << "depth = " << sp_depth << endl;
+ }
+ else
+ not_advanced = false;
+ }
+
+ if (sp_depth == -1) // jumped up to first species, all paths searched
+ still_paths = false;
+ else // otherwise just reset to max depth and continue
+ sp_depth = all_comparisons.size() - 2;
}
}
}
// add path that each species iterator is pointing to
set_path_to_cur_sp_itor_track(path, win_i, sp_itor_begin);
- pathz.push_back(path);
+ pathz.push_back(ConservedPath(soft_thres, path));
// now advance the right iterator
still_paths = advance_sp_itor_track(sp_itor_begin,
// if the path is transitive, save the path
if (is_transitive_path(path, all_comparisons, soft_thres))
- pathz.push_back(path);
+ pathz.push_back(ConservedPath(soft_thres, path));
// now advance the right iterator
still_paths = advance_sp_itor_track(sp_itor_begin,
// ---------- mussa_nway.cc -----------
// ----------------------------------------
-#include "nway_paths.hh"
+#include "alg/nway_paths.hh"
+#include "alg/conserved_path.h"
+
#include <fstream>
#include <iostream>
+#include <stdexcept>
using namespace std;
NwayPaths::simple_refine()
{
// ext_path remembers the first window set in an extending path
- vector<int> ext_path, cur_path, next_path, new_path;
- list<vector<int> >::iterator pathz_i;
+ ExtendedConservedPath ext_path, new_path;
+ list<ConservedPath>::iterator cur_path, next_path;
+ list<ConservedPath>::iterator pathz_i;
int win_ext_len = 0;
bool extending, end = false;
cout << "path number is: " << pathz.size() << endl;
pathz_i = pathz.begin();
- ext_path = *pathz_i;
+ ext_path = ExtendedConservedPath( win_size, *pathz_i);
while(pathz_i != pathz.end())
{
// keep track of current path and advance to next path
- cur_path = *pathz_i;
+ cur_path = pathz_i;
++pathz_i;
if (pathz_i == pathz.end())
end = true;
else
- next_path = *pathz_i;
+ next_path = pathz_i;
if (not end)
{
// if node for each seq is equal to the next node+1 then for all
// sequences then we are extending
- extending = true;
- for(vector<int>::size_type i2 = 0; i2 != pathz_i->size(); i2++)
- {
- //cout << cur_path[i2] << " vs " << endl;
- //cout << next_path[i2] << endl;
- if (cur_path[i2] + 1 != next_path[i2])
- extending = false;
- }
+ extending = cur_path->nextTo(*next_path);
}
else
extending = false;
else
{
// add the extend window length as first element and add as refined
+ // now that we have the path to extend save it
new_path = ext_path;
- new_path.insert(new_path.begin(),win_size + win_ext_len);
- for(vector<int>::size_type i2 = 1; i2 <= pathz_i->size(); i2++)
- {
- if (new_path[i2] < 0)
- {
- //cout << "before: " << new_path[i2];
- new_path[i2] += win_ext_len;
- //cout << " after: " << new_path[i2] << endl;
- }
- }
+ new_path.extend(win_ext_len);
refined_pathz.push_back(new_path);
// reset stuff
win_ext_len = 0;
- ext_path = next_path;
+ ext_path = ExtendedConservedPath( win_size, *next_path);
}
}
cout << "r_path number is: " << refined_pathz.size() << endl;
void
-NwayPaths::add_path(vector<int> loaded_path)
+NwayPaths::add_path(int threshold, vector<int>& loaded_path)
+{
+ pathz.push_back(ConservedPath(threshold, loaded_path));
+}
+
+void
+NwayPaths::add_path(ConservedPath loaded_path)
{
pathz.push_back(loaded_path);
}
NwayPaths::save(string save_file_path)
{
fstream save_file;
- list<vector<int> >::iterator path_i, paths_end;
- vector<int> a_path;
+ list<ExtendedConservedPath >::iterator path_i, paths_end;
unsigned int i;
save_file.open(save_file_path.c_str(), ios::out);
//paths_end = pathz.end();
while (path_i != paths_end)
{
- a_path = *path_i;
+ ExtendedConservedPath& a_path = *path_i;
//cout << a_path.size() << endl;
//first entry is the window length of the windows in the path
- save_file << a_path[0] << ":";
- for(i = 1; i <= sequence_count(); ++i)
+ save_file << a_path.window_size << ":";
+ for(i = 0; i != sequence_count(); ++i)
{
save_file << a_path[i];
if (i != sequence_count())
int
NwayPaths::sequence_count()
{
- if (pathz.begin() == pathz.end() )
+ if (refined_pathz.begin() == refined_pathz.end() )
return 0;
else
- return pathz.begin()->size();
+ return refined_pathz.begin()->size();
}
NwayPaths::load(string load_file_path)
{
fstream load_file;
- string file_data_line, header_data, data, path_node, path_length;
+ string file_data_line, header_data, data, path_node, path_width;
int i, space_split_i, equal_split_i, comma_split_i, colon_split_i;
vector<int> loaded_path;
string err_msg;
cout << "BAM!!!!\n";
return err_msg;
}
-
else
{
- // get header data
- // grab mussa tag - discard for now...maybe check in future...
- getline(load_file,file_data_line);
- space_split_i = file_data_line.find(" ");
- file_data_line = file_data_line.substr(space_split_i+1);
- // grab type tag - need in future to distinguish between flp and vlp paths
- space_split_i = file_data_line.find(" ");
- file_data_line = file_data_line.substr(space_split_i+1);
- // get species/seq number
- space_split_i = file_data_line.find(" ");
- header_data = file_data_line.substr(0,space_split_i);
- equal_split_i = header_data.find("=");
- data = file_data_line.substr(equal_split_i+1);
- unsigned int species_num = atoi (data.c_str());
- file_data_line = file_data_line.substr(space_split_i+1);
- // get window size
- space_split_i = file_data_line.find(" ");
- header_data = file_data_line.substr(0,space_split_i);
- equal_split_i = header_data.find("=");
- data = file_data_line.substr(equal_split_i+1);
- win_size = atoi (data.c_str());
- file_data_line = file_data_line.substr(space_split_i+1);
- // get window size
- space_split_i = file_data_line.find(" ");
- header_data = file_data_line.substr(0,space_split_i);
- equal_split_i = header_data.find("=");
- data = file_data_line.substr(equal_split_i+1);
- threshold = atoi (data.c_str());
- file_data_line = file_data_line.substr(space_split_i+1);
-
- cout << "seq_num=" << species_num << " win=" << win_size;
- cout << " thres=" << threshold << endl;
-
- // clear out the current data
- refined_pathz.clear();
-
- int temp;
-
- getline(load_file,file_data_line);
- while ( (!load_file.eof()) && (file_data_line != "</Mussa>") )
- {
- if (file_data_line != "")
+ // get header data
+ // grab mussa tag - discard for now...maybe check in future...
+ getline(load_file,file_data_line);
+ space_split_i = file_data_line.find(" ");
+ file_data_line = file_data_line.substr(space_split_i+1);
+ // grab type tag - need in future to distinguish between flp and vlp paths
+ space_split_i = file_data_line.find(" ");
+ file_data_line = file_data_line.substr(space_split_i+1);
+ // get species/seq number
+ space_split_i = file_data_line.find(" ");
+ header_data = file_data_line.substr(0,space_split_i);
+ equal_split_i = header_data.find("=");
+ data = file_data_line.substr(equal_split_i+1);
+ unsigned int species_num = atoi (data.c_str());
+ file_data_line = file_data_line.substr(space_split_i+1);
+ // get window size
+ space_split_i = file_data_line.find(" ");
+ header_data = file_data_line.substr(0,space_split_i);
+ equal_split_i = header_data.find("=");
+ data = file_data_line.substr(equal_split_i+1);
+ win_size = atoi (data.c_str());
+ file_data_line = file_data_line.substr(space_split_i+1);
+ // get window size
+ space_split_i = file_data_line.find(" ");
+ header_data = file_data_line.substr(0,space_split_i);
+ equal_split_i = header_data.find("=");
+ data = file_data_line.substr(equal_split_i+1);
+ threshold = atoi (data.c_str());
+ file_data_line = file_data_line.substr(space_split_i+1);
+
+ cout << "seq_num=" << species_num << " win=" << win_size;
+ cout << " thres=" << threshold << endl;
+
+ // clear out the current data
+ refined_pathz.clear();
+
+ int temp;
+
+ getline(load_file,file_data_line);
+ while ( (!load_file.eof()) && (file_data_line != "</Mussa>") )
{
- loaded_path.clear();
- colon_split_i = file_data_line.find(":");
- path_length = file_data_line.substr(0,colon_split_i);
- loaded_path.push_back(atoi (path_length.c_str()));
- file_data_line = file_data_line.substr(colon_split_i+1);
- for(i = 0; i < species_num; i++)
+ cout << "fdl: " << file_data_line << endl;
+ if (file_data_line != "")
{
- comma_split_i = file_data_line.find(",");
- path_node = file_data_line.substr(0, comma_split_i);
- temp = atoi (path_node.c_str());
- loaded_path.push_back(temp);
- file_data_line = file_data_line.substr(comma_split_i+1);
+ loaded_path.clear();
+ colon_split_i = file_data_line.find(":");
+ // whats our window size?
+ path_width = file_data_line.substr(0,colon_split_i);
+ file_data_line = file_data_line.substr(colon_split_i+1);
+ for(i = 0; i < species_num; i++)
+ {
+ comma_split_i = file_data_line.find(",");
+ path_node = file_data_line.substr(0, comma_split_i);
+ temp = atoi (path_node.c_str());
+ loaded_path.push_back(temp);
+ file_data_line = file_data_line.substr(comma_split_i+1);
+ }
+ assert (loaded_path.size() == species_num );
+ refined_pathz.push_back(ExtendedConservedPath(atoi(path_width.c_str()),
+ threshold,
+ loaded_path));
}
- refined_pathz.push_back(loaded_path);
+ getline(load_file,file_data_line);
}
- getline(load_file,file_data_line);
- }
-
- load_file.close();
+ load_file.close();
- return "";
+ return "";
}
}
void
-NwayPaths::path_search(vector<vector<FLPs> > all_comparisons, vector<int> path, int depth)
+NwayPaths::path_search(vector<vector<FLPs> > all_comparisons, ConservedPath path, int depth)
{
list<int> new_nodes, trans_check_nodes;
list<int>::iterator new_nodes_i, new_nodes_end;
int i;
bool trans_check_good;
- new_nodes = all_comparisons[depth - 1][depth].matches(path[depth-1]);
+ new_nodes = all_comparisons[depth - 1][depth].match_locations(path[depth-1]);
new_nodes_i = new_nodes.begin();
new_nodes_end = new_nodes.end();
while(new_nodes_i != new_nodes_end)
trans_check_good = true;
for(i = 0; i < depth - 1; i++)
{
- trans_check_nodes = all_comparisons[i][depth].matches(path[i]);
+ trans_check_nodes = all_comparisons[i][depth].match_locations(path[i]);
if ( (trans_check_nodes.end() == find(trans_check_nodes.begin(),
trans_check_nodes.end(),
*new_nodes_i) ) &&
void
NwayPaths::find_paths_r(vector<vector<FLPs> > all_comparisons)
{
- vector<int> path;
+ ConservedPath path;
int win_i, window_num;
list<int> new_nodes;
list<int>::iterator new_nodes_i, new_nodes_end;
{
path.clear();
path.push_back(win_i);
- new_nodes = all_comparisons[0][1].matches(path[0]);
+ new_nodes = all_comparisons[0][1].match_locations(path[0]);
new_nodes_i = new_nodes.begin();
new_nodes_end = new_nodes.end();
//if (new_nodes_i != new_nodes_end)
NwayPaths::save_old(string save_file_path)
{
fstream save_file;
- list<vector<int> >::iterator path_i, paths_end;
- vector<int> a_path;
+ list<ConservedPath >::iterator path_i, paths_end;
int i;
save_file.open(save_file_path.c_str(), ios::app);
paths_end = pathz.end();
while(path_i != paths_end)
{
- a_path = *path_i;
+ ConservedPath& a_path = *path_i;
//cout << a_path.size() << endl;
for(i = 0; i < path_i->size(); ++i)
save_file << i << "," << a_path[i] << " ";
#include <vector>
#include "alg/flp.hh"
+#include "alg/conserved_path.h"
class NwayPaths
{
// old recursive transitive nway ... has issues checking all links?
void find_paths_r(std::vector<std::vector<FLPs> > all_comparisons);
- void path_search(std::vector<std::vector<FLPs> > all_comparisons, std::vector<int> path, int depth);
+ void path_search(std::vector<std::vector<FLPs> > all_comparisons, ConservedPath path, int depth);
void simple_refine();
void save(std::string save_file_path);
std::string load(std::string load_file_path);
- void add_path(std::vector<int> loaded_path);
+ void add_path(int threshold, std::vector<int>& loaded_path);
+ void add_path(ConservedPath loaded_path);
//! how many sequences are in our comparison
int sequence_count();
// these probably shouldn't be public, but lets start
// simple
- std::list<std::vector<int> > pathz;
- std::list<std::vector<int> > refined_pathz;
-
+ std::list<ConservedPath> pathz;
+ std::list<ExtendedConservedPath > refined_pathz;
};
#endif
--- /dev/null
+#include <boost/test/auto_unit_test.hpp>
+#include <boost/assign/std/vector.hpp>
+using namespace boost::assign;
+
+#include <vector>
+using namespace std;
+
+#include "alg/conserved_path.h"
+
+//! write tests for our conserved path types (just to make sure they
+//! don't change on us.
+BOOST_AUTO_TEST_CASE ( conserved_path_simple )
+{
+ vector<int> path;
+ path += 3,4,5,1; // magic from boost assign
+
+ ConservedPath cp1;
+ BOOST_CHECK_EQUAL( cp1.size(), 0 ); //empty
+ BOOST_CHECK( cp1.begin() == cp1.end() );
+
+ cp1.score = 18;
+ cp1.track_indexes = path;
+ BOOST_CHECK_EQUAL( cp1.size(), path.size() );
+
+ ConservedPath cp2(18, path);
+ BOOST_CHECK_EQUAL (cp1, cp2);
+ BOOST_CHECK_EQUAL( cp2.size(), path.size() );
+
+ ConservedPath::iterator cp2_itor;
+ vector<int>::iterator path_itor;
+ for (cp2_itor = cp2.begin(), path_itor = path.begin();
+ cp2_itor != cp2.end() && path_itor != path.end();
+ ++cp2_itor, ++path_itor)
+ {
+ BOOST_CHECK_EQUAL( *cp2_itor, *path_itor );
+ }
+
+ ConservedPath cp3( cp2 );
+ BOOST_CHECK_EQUAL( cp3.size(), cp2.size() );
+ ConservedPath::iterator cp3_itor;
+ for (cp2_itor = cp2.begin(), cp3_itor = cp3.begin();
+ cp2_itor != cp2.end() && cp3_itor != cp3.end();
+ ++cp2_itor, ++cp3_itor)
+ {
+ BOOST_CHECK_EQUAL( *cp2_itor, *cp3_itor );
+ }
+}
+
+//! do vector like operations work?
+BOOST_AUTO_TEST_CASE ( conserved_path_vector )
+{
+ ConservedPath cp;
+ cp.score = 21.0;
+
+ BOOST_CHECK_EQUAL( cp.size(), 0 );
+ cp.push_back( 3 );
+ BOOST_CHECK_EQUAL( cp.size(), 1);
+ BOOST_CHECK_EQUAL( cp[0], 3 );
+ cp.push_back( 4 );
+ BOOST_CHECK_EQUAL( cp.size(), 2);
+ BOOST_CHECK_EQUAL( cp[1], 4 );
+ cp.pop_back();
+ BOOST_CHECK_EQUAL( cp.size(), 1 );
+}
+
+BOOST_AUTO_TEST_CASE ( extended_conserved_path_simple )
+{
+ vector<int> path;
+ path += 3,4,5,1; // magic from boost assign
+
+ ExtendedConservedPath ecp1;
+ ecp1.window_size = 30;
+ ecp1.score = 18;
+ ecp1.track_indexes = path;
+ BOOST_CHECK_EQUAL ( ecp1.size(), path.size() );
+
+ ExtendedConservedPath ecp2(30, 18.0, path);
+ BOOST_CHECK_EQUAL ( ecp1, ecp2 );
+ BOOST_CHECK_EQUAL ( ecp2.size(), path.size() );
+
+ ExtendedConservedPath ecp_ne(35, 20.0, path);
+ BOOST_CHECK(ecp2 != ecp_ne);
+
+ ConservedPath cp1;
+ cp1.score = 18;
+ cp1.track_indexes = path;
+
+ ExtendedConservedPath ecp3(30, cp1);
+ BOOST_CHECK_EQUAL( ecp2, ecp3 );
+ BOOST_CHECK_EQUAL( ecp3.size(), path.size() );
+}
+
+//! Can we grow succesfully?
+BOOST_AUTO_TEST_CASE ( extended_conserved_path_growth )
+{
+ vector<int> path_base;
+ path_base += 3,4,5,1; // magic from boost assign
+ vector<int> path_next;
+ path_next += 4,5,6,2;
+ vector<int> path_next_reversed;
+ path_next_reversed += 4,5,6,-2;
+ vector<int> path_not_next;
+ path_not_next += 4,5,6,5;
+ const int window_size = 4;
+
+ ExtendedConservedPath ecp_base(window_size, 20, path_base);
+ ConservedPath cp_next(25, path_next);
+ ConservedPath cp_next_reversed(25, path_next_reversed);
+ ConservedPath cp_not_next(22, path_not_next);
+
+ BOOST_CHECK(ecp_base.nextTo(cp_next));
+ BOOST_CHECK(!ecp_base.nextTo(cp_not_next));
+ // FIXME: should something like this actually extend?
+ BOOST_CHECK(!ecp_base.nextTo(cp_next_reversed));
+
+ const int growth = 2;
+ ecp_base.extend(growth);
+ BOOST_CHECK_EQUAL( ecp_base.window_size, window_size + growth );
+}
CURDIR := $(BASEDIR)test/
-SOURCES.cxx := test_flp.cxx \
+SOURCES.cxx := test_conserved_path.cxx \
+ test_flp.cxx \
test_glsequence.cxx \
test_main.cxx \
test_mussa.cxx \
../qui/GlSequence.cxx
TESTSRC := $(addprefix $(CURDIR), $(SOURCES.cxx))
-
+TESTOBJ := $(TESTSRC:.cxx=$(OBJEXT))
SRC += $(TESTSRC)
CXXFLAGS +=
TEST := $(BASEDIR)/unittests$(BINEXT)
TARGETBINS += $(TEST)
-$(TEST): $(TESTSRC:.cxx=$(OBJEXT)) $(MUSSA_ALG_LIB)
+$(TEST): $(TESTOBJ) $(MUSSA_ALG_OBJ) $(MUSSA_ALG_GL_OBJ)
g++ $(CXXFLAGS) -lGL -lboost_unit_test_framework -lboost_filesystem -o $@ $^
f.setup(5, 4);
f.seqcomp("AAGGTAAGGT", "AAGGTAAGGT", false);
- for (int i=0; i < f.size(); i++)
+ // are the match_locations correct?
+ for (int i=0; i != f.size(); i++)
{
- std::list<int> window_matches = f.matches(i);
- for (std::list<int>::iterator matches_i = window_matches.begin();
- matches_i != window_matches.end();
- ++matches_i)
+ std::list<int> window_locations = f.match_locations(i);
+ std::list<FLPs::match> window_matches = f.matches(i);
+ std::list<int>::iterator loc_i = window_locations.begin();
+ std::list<FLPs::match>::iterator match_i = window_matches.begin();
+ for (; loc_i != window_locations.end(); ++loc_i, ++match_i)
{
// I'm missing pythons easy lists and in operator here.
- if (i == 0) BOOST_CHECK( *matches_i == 0 || *matches_i == 5);
- else if (i == 1) BOOST_CHECK( *matches_i == 1 || *matches_i == 6);
- else if (i == 2) BOOST_CHECK( *matches_i == 2 );
- else if (i == 3) BOOST_CHECK( *matches_i == 3 );
- else if (i == 4) BOOST_CHECK( *matches_i == 4 );
- else if (i == 6) BOOST_CHECK( *matches_i == 5 || *matches_i == 0);
+ if (i == 0) BOOST_CHECK( *loc_i == 0 || *loc_i == 5);
+ else if (i == 1) BOOST_CHECK( *loc_i == 1 || *loc_i == 6);
+ else if (i == 2) BOOST_CHECK( *loc_i == 2 );
+ else if (i == 3) BOOST_CHECK( *loc_i == 3 );
+ else if (i == 4) BOOST_CHECK( *loc_i == 4 );
+ else if (i == 6) BOOST_CHECK( *loc_i == 5 || *loc_i == 0);
+ BOOST_CHECK_EQUAL( *loc_i, match_i->index );
}
}
}
BOOST_CHECK_EQUAL( f1.size(), f2.size() );
for (int win=0; win < f1.size(); ++win)
{
- std::list<int> f1_matches = f1.matches(win);
- std::list<int> f2_matches = f2.matches(win);
- std::list<int>::const_iterator f1_match_i = f1_matches.begin();
- std::list<int>::const_iterator f2_match_i = f2_matches.begin();
+ std::list<FLPs::match> f1_matches = f1.matches(win);
+ std::list<FLPs::match> f2_matches = f2.matches(win);
+ std::list<FLPs::match>::const_iterator f1_match_i = f1_matches.begin();
+ std::list<FLPs::match>::const_iterator f2_match_i = f2_matches.begin();
for( ;
f1_match_i != f1_matches.end() && f2_match_i != f2_matches.end();
++f1_match_i, ++f2_match_i)
#include <unistd.h>
BOOST_AUTO_TEST_CASE( mussa_load_mupa )
{
- Mussa m;
+ Mussa m1;
chdir( "examples" );
- m.load_mupa_file( "mck3test.mupa" );
- m.analyze(30, 25);
+ m1.load_mupa_file( "mck3test.mupa" );
+ m1.analyze(0, 0);
+ BOOST_CHECK_EQUAL( m1.get_name(), std::string("mck3test") );
+ BOOST_CHECK( m1.size() > 0 );
+
+ Mussa m2;
+ std::string saved_analysis_path("mck3test_w30_t20");
+ m2.load( saved_analysis_path );
chdir( ".." );
+
+ BOOST_CHECK_EQUAL( m2.get_name(), saved_analysis_path );
+ BOOST_CHECK_EQUAL( m1.size(), m2.size() );
}
analysis.analyze(4,3);
NwayPaths npath = analysis.paths();
// there should be no paths for these sequences
- for (std::list<std::vector<int> >::iterator pathz_i = npath.pathz.begin();
+ for (std::list<ConservedPath >::iterator pathz_i = npath.pathz.begin();
pathz_i != npath.pathz.end();
++pathz_i)
{
analysis.add_a_seq(s1);
analysis.analyze(4,3);
NwayPaths npath = analysis.paths();
- for (std::list<std::vector<int> >::iterator pathz_i = npath.pathz.begin();
+ for (std::list<ConservedPath >::iterator pathz_i = npath.pathz.begin();
pathz_i != npath.pathz.end();
++pathz_i)
{
- for( std::vector<int>::iterator path_i = pathz_i->begin();
+ for( ConservedPath::iterator path_i = pathz_i->begin();
path_i != pathz_i->end();
++path_i)
{
}
}
}
+
+
void
ConnView::scale_paths()
{
- vector<int> a_path;
- list<vector<int> >::iterator pathz_i;
+ ExtendedConservedPath a_path;
+ list<ExtendedConservedPath >::iterator pathz_i;
int i2;
for(pathz_i = P->refined_pathz.begin(); pathz_i != P->refined_pathz.end(); ++pathz_i)
{
a_path = *pathz_i;
- for(i2 = 0; i2 <= seq_num; i2++)
+ a_path.window_size = (int)(a_path.window_size / x_scale_factor);
+ for(i2 = 0; i2 != a_path.size(); i2++)
+ {
a_path[i2] = (int) (a_path[i2] / x_scale_factor);
+ }
scaled_pathz.push_back(a_path);
}
}
fl_color(FL_WHITE);
fl_rectf(x(), y(), w(), h());
-
// draw the scale indicators if they are on
// put into own method soon...
int i, div_num, x_loc;
void
ConnView::draw_paths()
{
- list<vector<int> >::iterator i;
+ list<ExtendedConservedPath >::iterator i;
int i2, i3, y_loc, x_loc, x_start, x_end;
- vector<int> a_path;
list<bool>::iterator highlight_i;
int window_size;
bool rc_color;
int path_start, path_end;
- //Sequence a_seq;
-
// determine which paths to highlight
highlight.clear();
for(i = scaled_pathz.begin(); i != scaled_pathz.end(); ++i)
{
- a_path = *i;
+ ExtendedConservedPath& a_path = *i;
// determine if path falls within the selected region and mark it for
// highlighted color
- path_start = abs(a_path[ref_seq_num+1]);
+ path_start = abs(a_path[ref_seq_num]);
path_end = path_start + a_path[0];
if ( ( (path_start >= drag_start-x()) && (path_end <= drag_end-x()) ) ||
( (path_start < drag_start-x()) && (path_end > drag_end-x()) ) ||
highlight_i = highlight.begin();
for(i = scaled_pathz.begin(); i != scaled_pathz.end(); ++i)
{
- a_path = *i;
+ ExtendedConservedPath& a_path = *i;
y_loc = y()+y_pad;
- // get window size to determine line width
- window_size = a_path[0];
+ window_size = a_path.window_size;
// make sure width is at least 1 - might be zero to my slack rounding
if (window_size == 0)
window_size = 1;
if (!(*highlight_i))
- for(i2 = 1; i2 < seq_num; i2++)
+ for(i2 = 0; i2 < a_path.size()-1; i2++)
{
// RC case handling
// ugh, an xor...only want blue if one of the nodes is rc
highlight_i = highlight.begin();
for(i = scaled_pathz.begin(); i != scaled_pathz.end(); ++i)
{
- a_path = *i;
+ ExtendedConservedPath& a_path = *i;
y_loc = y()+y_pad;
// get window size to determine line width
- window_size = a_path[0];
+ window_size = a_path.window_size;
// make sure width is at least 1 - might be zero to my slack rounding
if (window_size == 0)
window_size = 1;
if (*highlight_i)
- for(i2 = 1; i2 < seq_num; i2++)
+ for(i2 = 0; i2 != a_path.size()-1 ; i2++)
{
// RC case handling
// ugh, an xor...only want blue if one of the nodes is rc
void
ConnView::spawnSeq()
{
- list<vector<int> > selected_paths;
- list<vector<int> >::iterator pathz_i;
+ list<ExtendedConservedPath > selected_paths;
+ list<ExtendedConservedPath >::iterator pathz_i;
int i2, i3, y_loc, x_loc, x_start, x_end;
- vector<int> a_path;
list<bool>::iterator highlight_i;
int y_max;
string window_name;
-
if (selected)
{
// make new list of connections that are highlighted
selected_paths.clear();
highlight_i = highlight.begin();
- for(pathz_i = P->refined_pathz.begin(); pathz_i != P->refined_pathz.end(); ++pathz_i)
+ for(pathz_i = P->refined_pathz.begin();
+ pathz_i != P->refined_pathz.end(); ++pathz_i)
{
if (*highlight_i)
{
- a_path = *pathz_i;
+ ExtendedConservedPath& a_path = *pathz_i;
selected_paths.push_back(a_path);
}
++highlight_i;
window_name = "Mussa Sequence: " + analysis_name;
a_seq_win = new SeqWindow(800, y_max, (const char*) window_name.c_str(),
- seq_num,
- S, selected_paths, seq_lens, &some_motifs);
+ seq_num, S, selected_paths, seq_lens,
+ &some_motifs);
}
}
std::list<bool> highlight;
//path data scaled for current view size
- std::list<std::vector<int> > scaled_pathz;
+ std::list<ExtendedConservedPath > scaled_pathz;
std::vector<int> seq_lens;
std::vector<int> seq_scales;
int bar_interval, line_interval;
fl_alert(e.what());
}
}
+ conn_box->redraw();
}
void
SeqView::setup(string name, int sq_num,
vector<Sequence> *some_seqs,
- list<vector<int> > some_paths, vector<int> some_lens,
+ list<ExtendedConservedPath > some_paths, vector<int> some_lens,
vector<motif> *some_motifs)
{
int seq_i;
- list<vector<int> >::iterator pathz_i;
-
+ list<ExtendedConservedPath >::iterator pathz_i;
analysis_name = name;
seq_num = sq_num;
//scroll_pos = 0;
drag_change = 0;
- cout << "waaaaa!\n";
- cout << x() << " " << y() << " " << w() << " " << h() << endl;
+ cout << "waaaaa!\n";
+ cout << x() << " " << y() << " " << w() << " " << h() << endl;
}
void
}
-
-
void
SeqView::draw()
{
fl_color(FL_WHITE);
fl_rectf(x(), y(), w(), h());
-
fl_font(FL_COURIER, 14);
//blatantly stolen from FR2
ch_width = fl_width('A'); // monospaced: all characters are same width
SeqView::draw_match_lines(double ch_width)
{
int i, y_loc;
- vector<int> a_path;
- list<vector<int> >::iterator pathz_i;
+ list<ExtendedConservedPath >::iterator pathz_i;
int i2, i3;
int x_start, y_start, x_end, y_end;
int window_length, win_i;
{
if (show_aligns[align_counter])
{
- a_path = *pathz_i;
- window_length = a_path[0];
-
- // determine which parts of the path are RC relative to first species
- rc_list.clear();
- for(i2 = 1; i2 <= seq_num; i2++)
- {
- if (a_path[i2] < 0)
- rc_list.push_back(true);
- else
- rc_list.push_back(false);
- }
+ ExtendedConservedPath& a_path = *pathz_i;
+ window_length = a_path.window_size;
- // loop over each bp in the conserved region for all sequences
- for(win_i = 0; win_i < window_length; win_i++)
- {
- // determine which exact base pairs match between the sequences
- full_match = true;
- for(i2 = 1; i2 < seq_num; i2++)
+ // determine which parts of the path are RC relative to first species
+ rc_list.clear();
+ for(i2 = 0; i2 < a_path.size()-1; i2++)
{
- // assume not rc as most likely, adjust below
- rc_1 = 0;
- rc_2 = 0;
- // no matter the case, any RC node needs adjustments
if (a_path[i2] < 0)
- rc_1 = window_length-1;
- if (a_path[i2+1] < 0)
- rc_2 = window_length-1;
-
- x_start = (abs(a_path[i2]-rc_1+win_i));
- x_end = (abs(a_path[i2+1]-rc_2+win_i));
-
- // RC case handling
- // ugh, and xor...only want rc coloring if just one of the nodes is rc
- // if both nodes are rc, then they are 'normal' relative to each other
- if ( ( rc_list[i2] || rc_list[i2-1] ) &&
- !(rc_list[i2] && rc_list[i2-1] ) )
- { //the hideous rc matching logic - not complex, but annoying
- if ( !( ( (raw_sequence[i2-1][x_start] == 'A') &&
- (raw_sequence[i2][x_end] == 'T') ) ||
- ( (raw_sequence[i2-1][x_start] == 'T') &&
- (raw_sequence[i2][x_end] == 'A') ) ||
- ( (raw_sequence[i2-1][x_start] == 'G') &&
- (raw_sequence[i2][x_end] == 'C') ) ||
- ( (raw_sequence[i2-1][x_start] == 'C') &&
- (raw_sequence[i2][x_end] == 'G') ) ) )
- full_match = false;
- }
+ rc_list.push_back(true);
else
- {
- if (!( (raw_sequence[i2-1][x_start] == raw_sequence[i2][x_end]) &&
- (raw_sequence[i2-1][x_start] != 'N') &&
- (raw_sequence[i2][x_end] != 'N') ) )
- full_match = false;
- }
+ rc_list.push_back(false);
}
-
- // draw for matches stretching across all sequences
- if (full_match)
+
+ // loop over each bp in the conserved region for all sequences
+ for(win_i = 0; win_i < window_length; win_i++)
{
- fl_line_style(FL_SOLID, 1, NULL);
-
- // now can draw the line for each bp in this window that matches
- // grrr, need to ask if anyone cares if I switch the seq top-bot order...
- i3 = 0;
- y_loc = y_min + 5;
- for(i2 = 1; i2 < seq_num; i2++)
- {
- // assume not rc as most likely, adjust below
- rc_1 = 0;
- rc_2 = 0;
- // this makes the lines start in the middle of the drawn char/bp
- center1 = 0.5;
- center2 = 0.5;
- // no matter the case, any RC node needs adjustments
- if (a_path[i2] < 0)
+ // determine which exact base pairs match between the sequences
+ full_match = true;
+ for(i2 = 0; i2 < a_path.size()-1; i2++)
{
- rc_1 = window_length;
- center1 = -center1;
+ // assume not rc as most likely, adjust below
+ rc_1 = 0;
+ rc_2 = 0;
+ // no matter the case, any RC node needs adjustments
+ if (a_path[i2] < 0)
+ rc_1 = window_length-1;
+ if (a_path[i2+1] < 0)
+ rc_2 = window_length-1;
+
+ x_start = (abs(a_path[i2]-rc_1+win_i));
+ x_end = (abs(a_path[i2+1]-rc_2+win_i));
+
+ // RC case handling
+ // ugh, and xor...only want rc coloring if just one of the nodes is rc
+ // if both nodes are rc, then they are 'normal' relative to each other
+ if ( ( rc_list[i2] || rc_list[i2+1] ) &&
+ !(rc_list[i2] && rc_list[i2+1] ) )
+ { //the hideous rc matching logic - not complex, but annoying
+ if ( !( ( (raw_sequence[i2][x_start] == 'A') &&
+ (raw_sequence[i2+1][x_end] == 'T') ) ||
+ ( (raw_sequence[i2][x_start] == 'T') &&
+ (raw_sequence[i2+1][x_end] == 'A') ) ||
+ ( (raw_sequence[i2][x_start] == 'G') &&
+ (raw_sequence[i2+1][x_end] == 'C') ) ||
+ ( (raw_sequence[i2][x_start] == 'C') &&
+ (raw_sequence[i2+1][x_end] == 'G') ) ) )
+ full_match = false;
+ }
+ else
+ {
+ if (!( (raw_sequence[i2][x_start] == raw_sequence[i2+1][x_end]) &&
+ (raw_sequence[i2][x_start] != 'N') &&
+ (raw_sequence[i2+1][x_end] != 'N') ) )
+ full_match = false;
+ }
}
- if (a_path[i2+1] < 0)
+
+ // draw for matches stretching across all sequences
+ if (full_match)
{
- rc_2 = window_length;
- center2 = -center2;
- }
-
- // set offset based on current alignment for which bp to show
- offset1 = seq_align_offsets[i2-1];
- offset2 = seq_align_offsets[i2];
-
- if ( ( rc_list[i2] || rc_list[i2-1] ) &&
- !(rc_list[i2] && rc_list[i2-1] ) )
- fl_color(FL_BLUE);
- else
- fl_color(FL_RED);
+ fl_line_style(FL_SOLID, 1, NULL);
- // maybe shouldn't recalc these, but store values from first loop
- x_start = (abs((int) (a_path[i2]-rc_1+win_i)));
- x_end = (abs((int) (a_path[i2+1]-rc_2+win_i)));
-
- fl_line( (int)((x_start+center1-offset1+scroll_offset)*ch_width)
- + index_pad, y_loc + 10,
- (int)((x_end+center2-offset2+scroll_offset)*ch_width)
- + index_pad, y_loc+y_seq_incre );
- y_loc += y_seq_incre;
+ // now can draw the line for each bp in this window that matches
+ // grrr, need to ask if anyone cares if I switch the seq
+ // top-bot order...
+ i3 = 0;
+ y_loc = y_min + 5;
+ for(i2 = 0; i2 < a_path.size()-1; i2++)
+ {
+ // assume not rc as most likely, adjust below
+ rc_1 = 0;
+ rc_2 = 0;
+ // this makes the lines start in the middle of the drawn char/bp
+ center1 = 0.5;
+ center2 = 0.5;
+ // no matter the case, any RC node needs adjustments
+ if (a_path[i2] < 0)
+ {
+ rc_1 = window_length;
+ center1 = -center1;
+ }
+ if (a_path[i2] < 0)
+ {
+ rc_2 = window_length;
+ center2 = -center2;
+ }
+
+ // set offset based on current alignment for which bp to show
+ offset1 = seq_align_offsets[i2];
+ offset2 = seq_align_offsets[i2+1];
+
+ if ( ( rc_list[i2] || rc_list[i2+1] ) &&
+ !(rc_list[i2] && rc_list[i2+1] ) )
+ fl_color(FL_BLUE);
+ else
+ fl_color(FL_RED);
+
+ // maybe shouldn't recalc these, but store values from first loop
+ x_start = (abs((int) (a_path[i2]-rc_1+win_i)));
+ x_end = (abs((int) (a_path[i2+1]-rc_2+win_i)));
+
+ fl_line( (int)((x_start+center1-offset1+scroll_offset)*ch_width)
+ + index_pad, y_loc + 10,
+ (int)((x_end+center2-offset2+scroll_offset)*ch_width)
+ + index_pad, y_loc+y_seq_incre );
+ y_loc += y_seq_incre;
+ }
+ }
}
}
- }
- }
align_counter++;
}
}
void
SeqView::align_offsets(int align_num)
{
- list<vector<int> >::iterator pathz_i;
+ list<ExtendedConservedPath >::iterator pathz_i;
int i;
- vector<int> a_path;
int window_length;
cout << "alignment: " << align_num << endl;
}
void setup(std::string name, int sq_num, std::vector<Sequence> *some_seqs,
- std::list<std::vector<int> > some_paths,
+ std::list<ExtendedConservedPath > some_paths,
std::vector<int> some_lens, std::vector<motif> *some_motifs);
void align_offsets(int align_num);
void toggle_align(int align_num);
//this data is passed as pointers to the instantiated classes
std::vector<Sequence> *S;
//list of paths in selection box
- std::list<std::vector<int> > P;
+ std::list<ExtendedConservedPath > P;
std::vector<int> seq_lens;
//pointer to passed motif data
std::vector<motif> *the_motifs;
void
SeqWindow::real_set_align_cb(int which_align)
{
- vector<int> a_path;
- int window_length;
-
-
- a_path = *P.begin();
- window_length = a_path[0];
- cout << window_length << endl;
-
- cout << "fum1\n";
seq_box->align_offsets(which_align);
seq_box->redraw();
}
SeqWindow::SeqWindow(int w, int h, const char* title, int sq_num,
vector<Sequence> *some_seqs,
- list<vector<int> > some_paths,
+ list<ExtendedConservedPath > some_paths,
vector<int> some_lens,
vector<motif> *some_motifs):
Fl_Double_Window(w,h,title)
{
menu_align_data_bundle * some_menu_data;
- vector<int> a_path;
int window_length, align_number;
int i;
ostringstream align_id_ostr;
string align_id_string;
- list<vector<int> >::iterator align_iter;
-
-
+ list<ExtendedConservedPath >::iterator align_iter;
// most of this stuff is here just to pass to SeqView object
// some is needed to setup the window menus
//this data is passed as pointers to the instantiated classes
std::vector<Sequence> *S;
// list of paths in selection box
- std::list<std::vector<int> > P;
+ std::list<ExtendedConservedPath > P;
std::vector<int> seq_lens;
//pointer to passed motif data
std::vector<motif> *the_motifs;
SeqWindow(int w, int h, const char* title, int sq_num,
std::vector<Sequence> *some_seqs,
- std::list<std::vector<int> > some_paths,
+ std::list<ExtendedConservedPath > some_paths,
std::vector<int> some_lens,
std::vector<motif> *some_motifs);
virtual ~SeqWindow(){ std::cout << "dying\n"; }
SubAnalysisWindow.cxx
MUSSA_FLTK_SRC := $(addprefix $(CURDIR), $(SOURCES.cxx))
+MUSSA_FLTK_OBJ := $(MUSSA_FLTK_SRC:.cxx=$(OBJEXT))
SRC += $(MUSSA_FLTK_SRC)
CXXFLAGS +=
-
-MUSSA_FLTK_LIB := $(CURDIR)libmussa_fltk$(LIBEXT)
-LIBDIRS += -L$(CURDIR)
-
-TARGETLIBS += $(MUSSA_FLTK_LIB)
-
-$(MUSSA_FLTK_LIB): $(MUSSA_FLTK_SRC:.cxx=$(OBJEXT))
- $(AR) rv $@ $?
-
MUSSA := $(CURDIR)/mussa$(BINEXT)
TARGETBINS += $(MUSSA)
-$(MUSSA): $(MUSSASRC:.cxx=$(OBJEXT)) $(MUSSA_FLTK_LIB) $(MUSSA_ALG_LIB)
+$(MUSSA): $(MUSSASRC:.cxx=$(OBJEXT)) $(MUSSA_FLTK_OBJ) $(MUSSA_ALG_OBJ)
g++ $(CXXFLAGS) -lfltk -o $@ $^
-
qui/ThresholdWidget.h \
qui/ImageScaler.h \
qui/ImageSaveDialog.h \
+ alg/conserved_path.h \
alg/flp.hh \
alg/mussa_class.hh \
alg/nway_paths.hh \
qui/ThresholdWidget.cxx \
qui/ImageScaler.cxx \
qui/ImageSaveDialog.cxx \
+ alg/conserved_path.cxx \
alg/flp.cxx \
alg/flp_seqcomp.cxx \
alg/mussa_class.cxx \
glBegin(GL_LINES);
glVertex3f(seq_x, seq_y, seq_z);
glVertex3f(seq_x+seq_width, seq_y, seq_z);
- clog << "drawing " << seq_x << " " << seq_y << " " << seq_width
- << std::endl;
+ //clog << "drawing " << seq_x << " " << seq_y << " " << seq_width
+ // << std::endl;
glEnd();
// draw annotations
GLfloat annotation_z = seq_z + 1.0;
glLineWidth(0.5);
vector<GlSequence>::const_iterator track_itor;
- for(list<vector<int> >::const_iterator path_itor = nway.refined_pathz.begin();
+
+ typedef list<ExtendedConservedPath> conserved_paths;
+ typedef conserved_paths::const_iterator const_conserved_paths_itor;
+ for(const_conserved_paths_itor path_itor = nway.refined_pathz.begin();
path_itor != nway.refined_pathz.end();
++path_itor)
{
track_itor = tracks.begin();
- vector<int>::const_iterator sp_itor = path_itor->begin();
// since we were drawing to the start of a window, and opengl lines
// are centered around the two connecting points our lines were slightly
// offset. the idea of window_offset is to adjust them to the
// right for forward compliment or left for reverse compliment
// FIXME: figure out how to unit test these computations
- GLfloat window_offset = (*sp_itor)/2.0;
- sp_itor++;
+ GLfloat window_offset = (path_itor->window_size)/2.0;
glBegin(GL_LINE_STRIP);
- for (;
+ for (vector<int>::const_iterator sp_itor = path_itor->begin();
sp_itor != path_itor->end();
++sp_itor, ++track_itor)
{