12 #include "mussa_nway.hh"
15 // vars that will be availble to entropy function when in its nway class
16 //vector<char *> c_sequences;
17 //int window = 5, seq_num;
21 Nway_Paths::setup_ent(double new_entropy_thres, vector<string> some_Seqs)
25 ent_thres = new_entropy_thres;
28 for (i = 0; i < species_num; i++)
29 c_sequences.push_back((char *)some_Seqs[i].c_str());
34 Nway_Paths::path_entropy(vector<int> path)
37 double bp_prob, frac_ent, avg_entropy;
38 vector<double> entropies;
40 int window = win_size;
41 int seq_num = species_num;
44 for(i = 0; i < window; i++) // loop thru all bp positions
46 for(i2 = 0; i2 < 5; i2++) // clear old occurences
47 bp_occurences[i2] = 0;
49 for(seq_i = 0; seq_i < seq_num; seq_i++) //loop thru all sequences at pos i
51 switch (c_sequences[seq_i][i+path[seq_i]])
69 cout << "error, nonAGCTN on seq: " << seq_i << " at index: ";
70 cout << i+path[seq_i] << endl;
73 switch (c_sequences[seq_i][-1 * (i + path[seq_i] - window + 1) ])
91 cout << "error, nonAGCTN on seq: " << seq_i << " at index: ";
92 cout << (i + path[seq_i] - window + 1) << endl;
96 entropies.push_back(0.000);
97 for(i2 = 0; i2 < 5; i2++) //calculate entropies
99 bp_prob = (double) bp_occurences[i2] / (double) seq_num;
100 //cout << bp_prob << "::";
101 if (bp_prob == 0.0000)
104 frac_ent = bp_prob * ( log10(bp_prob) / log10(2.0) );
105 //cout << frac_ent << " ";
106 entropies[i] += frac_ent;
112 for(i = 0; i < window; i++)
114 avg_entropy += entropies[i];
115 //cout << entropies[i] << endl;
118 avg_entropy = -1.00 * (avg_entropy / (double) window);
119 //cout << "average entropy: " << avg_entropy << endl;
128 Nway_Paths::entropy_path_search(vector<vector<FLPs> > all_comparisons)
132 int win_i, sp_i, sp_depth, window_num, i, cur_node;
133 bool some_matches, still_paths, not_advanced, trans_good;
134 list<int> new_nodes, trans_check_nodes;
135 vector<list<int> > all_matches;
136 vector<list<int>::iterator> sp_nodes, sp_nodes_end;
137 list<int>::iterator debug_i;
141 window_num = all_comparisons[0][1].win_num();
142 cout << window_num << endl; // just a sanity check
144 sp_nodes.reserve(species_num);
145 sp_nodes_end.reserve(species_num);
147 // loop thru all windows in first species
148 for (win_i = 0; win_i < window_num; win_i++)
150 // first we see if the first seq has matches to all other seqs at this win
154 while ( (sp_i < species_num) && (some_matches) )
157 new_nodes = all_comparisons[0][sp_i].matches(win_i);
158 if (new_nodes.empty())
159 some_matches = false;
161 all_matches.push_back(new_nodes);
166 // if 1st seq does match to all others, make all possible paths
167 // out of all these matches
171 sp_nodes_end.clear();
172 // set each species list of matches to beginning
173 for (sp_i = 0; sp_i < species_num-1; sp_i++)
175 sp_nodes.push_back(all_matches[sp_i].begin());
176 sp_nodes_end.push_back(all_matches[sp_i].end());
183 // add path that each species iterator is pointing to
185 path.push_back(win_i);
188 for (sp_i = 0; sp_i < species_num-1; sp_i++)
190 //cout << ", " << *(sp_nodes[sp_i]);
191 path.push_back(*(sp_nodes[sp_i]));
195 // check entropy <---------------------------------------------------
196 avg_entropy = path_entropy(path);
197 if (avg_entropy <= ent_thres)
198 pathz.push_back(path);
200 // now advance the right iterator
202 sp_depth = species_num - 2; // this roves up & down species list
203 while ( (not_advanced) && (sp_depth != -1) )
206 //cout << sp_depth << ".." << *(sp_nodes[sp_depth]) << endl;
207 (sp_nodes[sp_depth])++; // advance iter at current depth
208 //cout << sp_depth << ".." << *(sp_nodes[sp_depth]) << endl;
210 // if we reached the end of the list, reset iter & go up one depth
211 if (sp_nodes[sp_depth] == sp_nodes_end[sp_depth])
214 sp_nodes[sp_depth] = all_matches[sp_depth].begin();
216 //cout << "depth = " << sp_depth << endl;
219 not_advanced = false;
222 if (sp_depth == -1) // jumped up to first species, all paths searched
224 else // otherwise just reset to max depth and continue
225 sp_depth = species_num - 2;
232 //initial coding testing
234 int main(int argc, char **argv)
236 vector<string> sequences;
237 vector<int> i_starts;
242 sequences.push_back("AAAAA");
243 sequences.push_back("AAAAT");
244 sequences.push_back("AATTG");
245 sequences.push_back("ATTGC");
251 for(seq_i = 0; seq_i < seq_num; seq_i++) //loop thru all sequences at pos i
253 c_sequences.push_back((char *)sequences[seq_i].c_str());
254 i_starts.push_back(0);
257 avg_entropy = Window_Entropy(i_starts);
259 cout << "average entropy: " << avg_entropy << endl;