1 // This file is part of the Mussa source distribution.
2 // http://mussa.caltech.edu/
3 // Contact author: Tristan De Buysscher, tristan@caltech.edu
5 // This program and all associated source code files are Copyright (C) 2005
6 // the California Institute of Technology, Pasadena, CA, 91125 USA. It is
7 // under the GNU Public License; please see the included LICENSE.txt
8 // file for more information, or contact Tristan directly.
12 #include "alg/nway_paths.hpp"
17 // vars that will be availble to entropy function when in its nway class
18 //vector<char *> c_sequences;
19 //int window = 5, seq_num;
23 NwayPaths::setup_ent(double new_entropy_thres, vector<Sequence> some_seqs)
25 ent_thres = new_entropy_thres;
28 for (vector<string>::size_type i = 0; i < some_seqs.size(); i++)
29 c_sequences.push_back((char *)some_seqs[i].get_sequence().c_str());
34 NwayPaths::path_entropy(vector<int> path)
37 double bp_prob, frac_ent, avg_entropy;
38 vector<double> entropies;
40 int window = win_size;
42 for(i = 0; i < window; i++) // loop thru all bp positions
44 for(i2 = 0; i2 < 5; i2++) // clear old occurences
45 bp_occurences[i2] = 0;
47 //loop thru all sequences at pos i
48 for(vector<int>::size_type seq_i = 0; seq_i != path.size(); seq_i++)
52 switch (c_sequences[seq_i][i+path[seq_i]])
70 cout << "error, nonAGCTN on seq: " << seq_i << " at index: ";
71 cout << i+path[seq_i] << endl;
76 switch (c_sequences[seq_i][-1 * (i + path[seq_i] - window + 1) ])
94 cout << "error, nonAGCTN on seq: " << seq_i << " at index: ";
95 cout << (i + path[seq_i] - window + 1) << endl;
100 entropies.push_back(0.000);
101 for(i2 = 0; i2 < 5; i2++) //calculate entropies
103 bp_prob = (double) bp_occurences[i2] / (double) path.size();
104 //cout << bp_prob << "::";
105 if (bp_prob == 0.0000)
108 frac_ent = bp_prob * ( log10(bp_prob) / log10(2.0) );
109 //cout << frac_ent << " ";
110 entropies[i] += frac_ent;
116 for(i = 0; i < window; i++)
118 avg_entropy += entropies[i];
119 //cout << entropies[i] << endl;
122 avg_entropy = -1.00 * (avg_entropy / (double) window);
123 //cout << "average entropy: " << avg_entropy << endl;
128 NwayPaths::entropy_path_search(vector<vector<FLPs> > all_comparisons)
132 int win_i, sp_depth, window_num;
134 bool some_matches, still_paths, not_advanced;
135 list<int> new_nodes, trans_check_nodes;
136 vector<list<int> > all_matches;
137 vector<list<int>::iterator> sp_nodes, sp_nodes_end;
138 list<int>::iterator debug_i;
142 window_num = all_comparisons[0][1].size();
143 cout << window_num << endl; // just a sanity check
145 sp_nodes.reserve(all_comparisons.size());
146 sp_nodes_end.reserve(all_comparisons.size());
148 // loop thru all windows in first species
149 for (win_i = 0; win_i < window_num; win_i++)
151 // first we see if the first seq has matches to all other seqs at this win
155 while ( (sp_i < all_comparisons.size()) && (some_matches) )
158 new_nodes = all_comparisons[0][sp_i].match_locations(win_i);
159 if (new_nodes.empty())
160 some_matches = false;
162 all_matches.push_back(new_nodes);
167 // if 1st seq does match to all others, make all possible paths
168 // out of all these matches
172 sp_nodes_end.clear();
173 // set each species list of matches to beginning
174 for (sp_i = 0; sp_i < all_comparisons.size()-1; sp_i++)
176 sp_nodes.push_back(all_matches[sp_i].begin());
177 sp_nodes_end.push_back(all_matches[sp_i].end());
184 // add path that each species iterator is pointing to
186 path.push_back(win_i);
189 for (sp_i = 0; sp_i < all_comparisons.size()-1; sp_i++)
191 //cout << ", " << *(sp_nodes[sp_i]);
192 path.push_back(*(sp_nodes[sp_i]));
196 // check entropy <---------------------------------------------------
197 avg_entropy = path_entropy(path);
198 if (avg_entropy <= ent_thres)
199 pathz.push_back(ConservedPath(win_size, avg_entropy, path));
201 // now advance the right iterator
203 sp_depth = all_comparisons.size()- 2; // this roves up & down species list
204 while ( (not_advanced) && (sp_depth != -1) )
207 //cout << sp_depth << ".." << *(sp_nodes[sp_depth]) << endl;
208 (sp_nodes[sp_depth])++; // advance iter at current depth
209 //cout << sp_depth << ".." << *(sp_nodes[sp_depth]) << endl;
211 // if we reached the end of the list, reset iter & go up one depth
212 if (sp_nodes[sp_depth] == sp_nodes_end[sp_depth])
215 sp_nodes[sp_depth] = all_matches[sp_depth].begin();
217 //cout << "depth = " << sp_depth << endl;
220 not_advanced = false;
223 if (sp_depth == -1) // jumped up to first species, all paths searched
225 else // otherwise just reset to max depth and continue
226 sp_depth = all_comparisons.size() - 2;
233 //initial coding testing
235 int main(int argc, char **argv)
237 vector<string> sequences;
238 vector<int> i_starts;
243 sequences.push_back("AAAAA");
244 sequences.push_back("AAAAT");
245 sequences.push_back("AATTG");
246 sequences.push_back("ATTGC");
252 for(seq_i = 0; seq_i < seq_num; seq_i++) //loop thru all sequences at pos i
254 c_sequences.push_back((char *)sequences[seq_i].c_str());
255 i_starts.push_back(0);
258 avg_entropy = Window_Entropy(i_starts);
260 cout << "average entropy: " << avg_entropy << endl;