1 // This file is part of the Mussa source distribution.
2 // http://mussa.caltech.edu/
3 // Contact author: Tristan De Buysscher, tristan@caltech.edu
5 // This program and all associated source code files are Copyright (C) 2005
6 // the California Institute of Technology, Pasadena, CA, 91125 USA. It is
7 // under the GNU Public License; please see the included LICENSE.txt
8 // file for more information, or contact Tristan directly.
11 #include "nway_paths.hpp"
16 //! dump the matches for a particular window
17 void dump_matches_win(int win_i, vector<list<int> > all_matches)
19 cout << "<win_debug win=" << win_i << ">" << endl;
20 for (vector<list<int> >::const_iterator sp_i = all_matches.begin();
21 sp_i != all_matches.end();
24 for(list<int>::const_iterator debug_i = sp_i->begin();
25 debug_i != sp_i->end();
28 cout << *debug_i << " ";
32 cout << "</win_debug>"<< endl;
35 /*! (re-)initialize all_matches with the list of matches between
36 * species (sequence) 0 and all the other sequences
37 * \return true if there were matches against all the species, false otherwise
40 make_all_seq_win_matches(const vector<vector<FLPs> >& all_comparisons,
41 vector<list<int> >& all_matches,
42 int window_index, int soft_threshold)
45 bool some_matches=true;
47 for(vector<FLPs>::size_type sp_i=1;
48 (sp_i < all_comparisons.size()) && (some_matches); sp_i++ )
52 //new_nodes = all_comparisons[0][sp_i].matches(win_i);
53 new_nodes = all_comparisons[0][sp_i].thres_matches(window_index,
55 if (new_nodes.empty())
58 all_matches.push_back(new_nodes);
60 // we only record matches from seq 0 to another seq, so we should have seq-1
61 // matches if there were enough matches.
62 // if there weren't enough we should have even fewer matches
63 // (this complicated boolean hides all the logic in one assert call,
64 // hopefully allowing the assert to be optimized away)
65 assert(( some_matches && (all_matches.size() == (all_comparisons.size()-1)))
66 ||(!some_matches && (all_matches.size() < (all_comparisons.size()-1))));
70 /*! reset all the speces begin and end iterators to the start and end
71 * of the match pairs stored in all_matches
73 void reset_species_iterators(vector<list<int> >& all_matches,
74 vector<list<int>::iterator>& sp_itor_begin,
75 vector<list<int>::iterator>& sp_itor_end)
77 sp_itor_begin.clear();
79 // set each species list of matches to beginning
80 for (list<int>::size_type sp_i = 0; sp_i < all_matches.size(); sp_i++)
82 sp_itor_begin.push_back(all_matches[sp_i].begin());
83 sp_itor_end.push_back(all_matches[sp_i].end());
87 /*! Set the path vector to be the list of window positions from
88 * the current locations of our species beginning iterators
90 void set_path_to_cur_sp_itor_track(
92 int base_window_index,
93 const vector<list<int>::iterator>& sp_itor_begin)
95 // add path that each species iterator is pointing to
97 path.push_back(base_window_index);
99 for (vector<int>::size_type sp_i = 0; sp_i < sp_itor_begin.size(); sp_i++)
101 path.push_back(*(sp_itor_begin[sp_i]));
105 /*! advance the species iterator track
106 * if we were able to advance the track return true
107 * if we weren't able to advance, we've looked at every track so return false
109 bool advance_sp_itor_track(vector<list<int>::iterator>& sp_itor_begin,
110 const vector<list<int>::iterator>& sp_itor_end,
111 vector<list<int> >& all_matches)
113 bool not_advanced = true;
114 int sp_depth = all_matches.size() - 1; // this roves up & down species list
115 while ( (not_advanced) && (sp_depth != -1) )
117 //cout << sp_depth << ".." << *(sp_itor_begin[sp_depth]) << endl;
118 (sp_itor_begin[sp_depth])++; // advance iter at current depth
119 //cout << sp_depth << ".." << *(sp_itor_begin[sp_depth]) << endl;
121 // if we reached the end of the list, reset iter & go up one depth
122 if (sp_itor_begin[sp_depth] == sp_itor_end[sp_depth])
124 sp_itor_begin[sp_depth] = all_matches[sp_depth].begin();
126 //cout << "depth = " << sp_depth << endl;
129 not_advanced = false;
131 if (sp_depth == -1) // jumped up to first species, all paths searched
138 NwayPaths::radiate_path_search(vector<vector<FLPs> > all_comparisons)
141 int win_i, window_num;
143 vector<list<int> > all_matches;
144 vector<list<int>::iterator> sp_itor_begin(all_comparisons.size());
145 vector<list<int>::iterator> sp_itor_end(all_comparisons.size());
148 window_num = all_comparisons[0][1].size();
150 // loop thru all windows in first species
151 for (win_i = 0; win_i < window_num; win_i++)
153 // if 1st seq does have match to all the others, then make all possible
154 // paths out of all these matches (in all_matches)
155 if(make_all_seq_win_matches(all_comparisons, all_matches, win_i, soft_thres))
157 //debug dump_matches_win(win_i, all_matches);
158 reset_species_iterators(all_matches, sp_itor_begin, sp_itor_end);
163 // add path that each species iterator is pointing to
164 set_path_to_cur_sp_itor_track(path, win_i, sp_itor_begin);
166 pathz.push_back(ConservedPath(win_size, soft_thres, path));
168 // now advance the right iterator
169 still_paths = advance_sp_itor_track(sp_itor_begin,
177 bool is_transitive_path(const vector<int>& path,
178 const vector<vector<FLPs> >& all_comparisons,
179 const int soft_threshold)
182 list<int> trans_check_nodes;
184 for (vector<int>::size_type sp_depth=1; sp_depth != path.size()-1; ++sp_depth)
186 cur_node = path[sp_depth];
187 for(vector<int>::size_type i = sp_depth+1; i != path.size()-1; i++)
190 //trans_check_nodes = all_comparisons[sp_depth][i].matches(cur_node);
192 all_comparisons[sp_depth][i].thres_matches(cur_node, soft_threshold);
194 if ( (trans_check_nodes.end() == find(trans_check_nodes.begin(),
195 trans_check_nodes.end(),
197 &&(trans_check_nodes.end() == find(trans_check_nodes.begin(),
198 trans_check_nodes.end(),
207 NwayPaths::trans_path_search(vector<vector<FLPs> > all_comparisons)
209 if (all_comparisons.size() == 0 or all_comparisons[0].size() == 0)
212 int win_i, window_num;
214 list<int> trans_check_nodes;
215 vector<list<int> > all_matches;
216 vector<list<int>::iterator> sp_itor_begin(all_comparisons.size());
217 vector<list<int>::iterator> sp_itor_end(all_comparisons.size());
219 //cout << "trans: softhres = " << soft_thres;
220 //cout << ", window = " << win_size << ", ";
223 window_num = all_comparisons[0][1].size();
224 //cout << "window number = " << window_num << endl; // just a sanity check
225 //cout << "trans: comparison size= " << all_comparisons.size() << endl;
226 // loop thru all windows in first species
227 for (win_i = 0; win_i != window_num; win_i++)
229 // if 1st seq has a match to all the others for this window,
230 // then make all possible paths out of all these matches (in all_matches)
231 if(make_all_seq_win_matches(all_comparisons, all_matches, win_i,soft_thres))
233 //debug? //dump_matches_win(win_i, all_matches);
234 reset_species_iterators(all_matches, sp_itor_begin, sp_itor_end);
238 // initialize a path to however far each species iterator has been
240 set_path_to_cur_sp_itor_track(path, win_i, sp_itor_begin);
242 // if the path is transitive, save the path
243 if (is_transitive_path(path, all_comparisons, soft_thres)) {
244 ConservedPath new_path(win_size, soft_thres, path);
245 pathz.push_back(new_path);
248 // now advance the right iterator
249 still_paths = advance_sp_itor_track(sp_itor_begin,
254 if ((win_i % 1000) == 0) {
255 emit progress("transitive refinement", win_i, window_num);
258 emit progress("transitive refinement", window_num, window_num);
259 //clog << "pathz=" << pathz.size()
260 // << " all_cmp=" << all_comparisons.size();
261 //if (pathz.begin() != pathz.end())
262 // clog << " path_size=" << pathz.begin()->size();
264 // clog << " path empty";
266 assert ( (pathz.begin() == pathz.end())
267 || (pathz.begin()->size() == 0)
268 || (all_comparisons.size() == pathz.begin()->size()));