1 // This file is part of the Mussa source distribution.
2 // http://mussa.caltech.edu/
3 // Contact author: Tristan De Buysscher, tristan@caltech.edu
5 // This program and all associated source code files are Copyright (C) 2005
6 // the California Institute of Technology, Pasadena, CA, 91125 USA. It is
7 // under the GNU Public License; please see the included LICENSE.txt
8 // file for more information, or contact Tristan directly.
22 #include "mussa_nway.hh"
25 // vars that will be availble to entropy function when in its nway class
26 //vector<char *> c_sequences;
27 //int window = 5, seq_num;
31 Nway_Paths::setup_ent(double new_entropy_thres, vector<string> some_Seqs)
35 ent_thres = new_entropy_thres;
38 for (i = 0; i < species_num; i++)
39 c_sequences.push_back((char *)some_Seqs[i].c_str());
44 Nway_Paths::path_entropy(vector<int> path)
47 double bp_prob, frac_ent, avg_entropy;
48 vector<double> entropies;
50 int window = win_size;
51 int seq_num = species_num;
54 for(i = 0; i < window; i++) // loop thru all bp positions
56 for(i2 = 0; i2 < 5; i2++) // clear old occurences
57 bp_occurences[i2] = 0;
59 for(seq_i = 0; seq_i < seq_num; seq_i++) //loop thru all sequences at pos i
61 switch (c_sequences[seq_i][i+path[seq_i]])
79 cout << "error, nonAGCTN on seq: " << seq_i << " at index: ";
80 cout << i+path[seq_i] << endl;
83 switch (c_sequences[seq_i][-1 * (i + path[seq_i] - window + 1) ])
101 cout << "error, nonAGCTN on seq: " << seq_i << " at index: ";
102 cout << (i + path[seq_i] - window + 1) << endl;
106 entropies.push_back(0.000);
107 for(i2 = 0; i2 < 5; i2++) //calculate entropies
109 bp_prob = (double) bp_occurences[i2] / (double) seq_num;
110 //cout << bp_prob << "::";
111 if (bp_prob == 0.0000)
114 frac_ent = bp_prob * ( log10(bp_prob) / log10(2.0) );
115 //cout << frac_ent << " ";
116 entropies[i] += frac_ent;
122 for(i = 0; i < window; i++)
124 avg_entropy += entropies[i];
125 //cout << entropies[i] << endl;
128 avg_entropy = -1.00 * (avg_entropy / (double) window);
129 //cout << "average entropy: " << avg_entropy << endl;
138 Nway_Paths::entropy_path_search(vector<vector<FLPs> > all_comparisons)
142 int win_i, sp_i, sp_depth, window_num, i, cur_node;
143 bool some_matches, still_paths, not_advanced, trans_good;
144 list<int> new_nodes, trans_check_nodes;
145 vector<list<int> > all_matches;
146 vector<list<int>::iterator> sp_nodes, sp_nodes_end;
147 list<int>::iterator debug_i;
151 window_num = all_comparisons[0][1].win_num();
152 cout << window_num << endl; // just a sanity check
154 sp_nodes.reserve(species_num);
155 sp_nodes_end.reserve(species_num);
157 // loop thru all windows in first species
158 for (win_i = 0; win_i < window_num; win_i++)
160 // first we see if the first seq has matches to all other seqs at this win
164 while ( (sp_i < species_num) && (some_matches) )
167 new_nodes = all_comparisons[0][sp_i].matches(win_i);
168 if (new_nodes.empty())
169 some_matches = false;
171 all_matches.push_back(new_nodes);
176 // if 1st seq does match to all others, make all possible paths
177 // out of all these matches
181 sp_nodes_end.clear();
182 // set each species list of matches to beginning
183 for (sp_i = 0; sp_i < species_num-1; sp_i++)
185 sp_nodes.push_back(all_matches[sp_i].begin());
186 sp_nodes_end.push_back(all_matches[sp_i].end());
193 // add path that each species iterator is pointing to
195 path.push_back(win_i);
198 for (sp_i = 0; sp_i < species_num-1; sp_i++)
200 //cout << ", " << *(sp_nodes[sp_i]);
201 path.push_back(*(sp_nodes[sp_i]));
205 // check entropy <---------------------------------------------------
206 avg_entropy = path_entropy(path);
207 if (avg_entropy <= ent_thres)
208 pathz.push_back(path);
210 // now advance the right iterator
212 sp_depth = species_num - 2; // this roves up & down species list
213 while ( (not_advanced) && (sp_depth != -1) )
216 //cout << sp_depth << ".." << *(sp_nodes[sp_depth]) << endl;
217 (sp_nodes[sp_depth])++; // advance iter at current depth
218 //cout << sp_depth << ".." << *(sp_nodes[sp_depth]) << endl;
220 // if we reached the end of the list, reset iter & go up one depth
221 if (sp_nodes[sp_depth] == sp_nodes_end[sp_depth])
224 sp_nodes[sp_depth] = all_matches[sp_depth].begin();
226 //cout << "depth = " << sp_depth << endl;
229 not_advanced = false;
232 if (sp_depth == -1) // jumped up to first species, all paths searched
234 else // otherwise just reset to max depth and continue
235 sp_depth = species_num - 2;
242 //initial coding testing
244 int main(int argc, char **argv)
246 vector<string> sequences;
247 vector<int> i_starts;
252 sequences.push_back("AAAAA");
253 sequences.push_back("AAAAT");
254 sequences.push_back("AATTG");
255 sequences.push_back("ATTGC");
261 for(seq_i = 0; seq_i < seq_num; seq_i++) //loop thru all sequences at pos i
263 c_sequences.push_back((char *)sequences[seq_i].c_str());
264 i_starts.push_back(0);
267 avg_entropy = Window_Entropy(i_starts);
269 cout << "average entropy: " << avg_entropy << endl;