1 // This file is part of the Mussa source distribution.
2 // http://mussa.caltech.edu/
3 // Contact author: Tristan De Buysscher, tristan@caltech.edu
5 // This program and all associated source code files are Copyright (C) 2005
6 // the California Institute of Technology, Pasadena, CA, 91125 USA. It is
7 // under the GNU Public License; please see the included LICENSE.txt
8 // file for more information, or contact Tristan directly.
12 #include "mussa_nway.hh"
17 // vars that will be availble to entropy function when in its nway class
18 //vector<char *> c_sequences;
19 //int window = 5, seq_num;
23 Nway_Paths::setup_ent(double new_entropy_thres, vector<string> some_Seqs)
27 ent_thres = new_entropy_thres;
30 for (i = 0; i < species_num; i++)
31 c_sequences.push_back((char *)some_Seqs[i].c_str());
36 Nway_Paths::path_entropy(vector<int> path)
39 double bp_prob, frac_ent, avg_entropy;
40 vector<double> entropies;
42 int window = win_size;
43 int seq_num = species_num;
46 for(i = 0; i < window; i++) // loop thru all bp positions
48 for(i2 = 0; i2 < 5; i2++) // clear old occurences
49 bp_occurences[i2] = 0;
51 for(seq_i = 0; seq_i < seq_num; seq_i++) //loop thru all sequences at pos i
53 switch (c_sequences[seq_i][i+path[seq_i]])
71 cout << "error, nonAGCTN on seq: " << seq_i << " at index: ";
72 cout << i+path[seq_i] << endl;
75 switch (c_sequences[seq_i][-1 * (i + path[seq_i] - window + 1) ])
93 cout << "error, nonAGCTN on seq: " << seq_i << " at index: ";
94 cout << (i + path[seq_i] - window + 1) << endl;
98 entropies.push_back(0.000);
99 for(i2 = 0; i2 < 5; i2++) //calculate entropies
101 bp_prob = (double) bp_occurences[i2] / (double) seq_num;
102 //cout << bp_prob << "::";
103 if (bp_prob == 0.0000)
106 frac_ent = bp_prob * ( log10(bp_prob) / log10(2.0) );
107 //cout << frac_ent << " ";
108 entropies[i] += frac_ent;
114 for(i = 0; i < window; i++)
116 avg_entropy += entropies[i];
117 //cout << entropies[i] << endl;
120 avg_entropy = -1.00 * (avg_entropy / (double) window);
121 //cout << "average entropy: " << avg_entropy << endl;
130 Nway_Paths::entropy_path_search(vector<vector<FLPs> > all_comparisons)
134 int win_i, sp_i, sp_depth, window_num, i, cur_node;
135 bool some_matches, still_paths, not_advanced, trans_good;
136 list<int> new_nodes, trans_check_nodes;
137 vector<list<int> > all_matches;
138 vector<list<int>::iterator> sp_nodes, sp_nodes_end;
139 list<int>::iterator debug_i;
143 window_num = all_comparisons[0][1].win_num();
144 cout << window_num << endl; // just a sanity check
146 sp_nodes.reserve(species_num);
147 sp_nodes_end.reserve(species_num);
149 // loop thru all windows in first species
150 for (win_i = 0; win_i < window_num; win_i++)
152 // first we see if the first seq has matches to all other seqs at this win
156 while ( (sp_i < species_num) && (some_matches) )
159 new_nodes = all_comparisons[0][sp_i].matches(win_i);
160 if (new_nodes.empty())
161 some_matches = false;
163 all_matches.push_back(new_nodes);
168 // if 1st seq does match to all others, make all possible paths
169 // out of all these matches
173 sp_nodes_end.clear();
174 // set each species list of matches to beginning
175 for (sp_i = 0; sp_i < species_num-1; sp_i++)
177 sp_nodes.push_back(all_matches[sp_i].begin());
178 sp_nodes_end.push_back(all_matches[sp_i].end());
185 // add path that each species iterator is pointing to
187 path.push_back(win_i);
190 for (sp_i = 0; sp_i < species_num-1; sp_i++)
192 //cout << ", " << *(sp_nodes[sp_i]);
193 path.push_back(*(sp_nodes[sp_i]));
197 // check entropy <---------------------------------------------------
198 avg_entropy = path_entropy(path);
199 if (avg_entropy <= ent_thres)
200 pathz.push_back(path);
202 // now advance the right iterator
204 sp_depth = species_num - 2; // this roves up & down species list
205 while ( (not_advanced) && (sp_depth != -1) )
208 //cout << sp_depth << ".." << *(sp_nodes[sp_depth]) << endl;
209 (sp_nodes[sp_depth])++; // advance iter at current depth
210 //cout << sp_depth << ".." << *(sp_nodes[sp_depth]) << endl;
212 // if we reached the end of the list, reset iter & go up one depth
213 if (sp_nodes[sp_depth] == sp_nodes_end[sp_depth])
216 sp_nodes[sp_depth] = all_matches[sp_depth].begin();
218 //cout << "depth = " << sp_depth << endl;
221 not_advanced = false;
224 if (sp_depth == -1) // jumped up to first species, all paths searched
226 else // otherwise just reset to max depth and continue
227 sp_depth = species_num - 2;
234 //initial coding testing
236 int main(int argc, char **argv)
238 vector<string> sequences;
239 vector<int> i_starts;
244 sequences.push_back("AAAAA");
245 sequences.push_back("AAAAT");
246 sequences.push_back("AATTG");
247 sequences.push_back("ATTGC");
253 for(seq_i = 0; seq_i < seq_num; seq_i++) //loop thru all sequences at pos i
255 c_sequences.push_back((char *)sequences[seq_i].c_str());
256 i_starts.push_back(0);
259 avg_entropy = Window_Entropy(i_starts);
261 cout << "average entropy: " << avg_entropy << endl;