1 // This file is part of the Mussa source distribution.
2 // http://mussa.caltech.edu/
3 // Contact author: Tristan De Buysscher, tristan@caltech.edu
5 // This program and all associated source code files are Copyright (C) 2005
6 // the California Institute of Technology, Pasadena, CA, 91125 USA. It is
7 // under the GNU Public License; please see the included LICENSE.txt
8 // file for more information, or contact Tristan directly.
11 // ----------------------------------------
12 // ---------- flp.cc -----------
13 // ----------------------------------------
15 #include "alg/flp.hpp"
25 bool operator==(const FLPs::match& a, const FLPs::match& b)
27 return (a.index == b.index && a.score == b.score);
30 ostream &operator<<(ostream& out, const FLPs::match& m)
32 out << "(" << m.score << ", " << m.index << ")";
44 FLPs::setup(int win_size, int hard_thres)
46 window_size = win_size;
47 hard_threshold = hard_thres;
50 void FLPs::alloc_matches(string::size_type len1)
52 if (all_matches != 0) {
53 // if we're being called again its likely because
54 // seqcomp is being called to do the reverse compliment
55 // so we shouldn't reallocate ourselves.
58 all_matches = new std::vector<list<FLPs::match> >;
60 list<FLPs::match> empty_match_list;
61 std::vector<list<FLPs::match> >::size_type window_count;
63 // we actually know how much to preallocate
64 window_count = len1 - window_size+1;
66 // we have no idea, so don't allocate anything
70 for (std::vector<list<FLPs::match> >::size_type i=0; i < window_count; ++i) {
71 all_matches->push_back(empty_match_list);
73 assert (all_matches->size() == window_count);
80 if (all_matches != 0) {
81 return all_matches->size();
90 FLPs::match_less(match *match1, match *match2)
92 if (match1->score < match2->score)
94 else if ( (match1->score == match2->score) &&
95 (match1->index < match2->index) )
106 for(i = 0; i < seq1_win_num; i++)
107 if (!all_matches[i].empty())
108 all_matches[i].sort(&FLPs::match_less);
113 FLPs::matches(int index) const
115 if (all_matches == 0)
116 throw runtime_error("please call FLPs.seqcomp first");
118 list<FLPs::match>::iterator list_i, list_end;
121 list_i = (*all_matches)[index].begin();
122 list_end = (*all_matches)[index].end();
123 list<FLPs::match> these_matches(list_i, list_end);
124 return these_matches;
128 FLPs::match_locations(int index) const
130 if (all_matches == 0)
131 throw runtime_error("please call FLPs.seqcomp first");
133 list<int> these_matches;
134 list<FLPs::match>::iterator list_i, list_end;
137 list_i = (*all_matches)[index].begin();
138 list_end = (*all_matches)[index].end();
139 while (list_i != list_end)
141 these_matches.push_back(list_i->index);
144 return these_matches;
148 FLPs::thres_matches(int index, int thres) const
150 if (all_matches == 0)
151 throw runtime_error("please call FLPs.seqcomp first");
153 list<int> thres_matches;
154 list<FLPs::match>::iterator list_i, list_end;
157 list_i = (*all_matches)[index].begin();
158 list_end = (*all_matches)[index].end();
159 thres_matches.clear();
161 while (list_i != list_end)
163 if (list_i->score >= thres)
164 thres_matches.push_back(list_i->index);
167 return thres_matches;
171 FLPs::save(string save_file_path)
173 if (all_matches == 0)
174 throw runtime_error("please call FLPs.seqcomp first");
177 save_file.open(save_file_path.c_str(), ios::out);
179 save_file << "<Seqcomp win=" << window_size
180 << " thres=" << hard_threshold << endl;
182 for(vector<list<FLPs::match> >::const_iterator win_citor = all_matches->begin();
183 win_citor != all_matches->end();
186 if (!win_citor->empty())
188 for( list<FLPs::match>::const_iterator match_itor = win_citor->begin();
189 match_itor != win_citor->end();
192 save_file << match_itor->index << "," << match_itor->score << " ";
198 save_file << "</Seqcomp>\n";
204 FLPs::load(string file_path)
207 string file_data, file_data_line, pair_data, index_data, score_data;
209 string::size_type split_index, comma_index;
210 bool tag_open = false;
211 list<FLPs::match> a_match_list;
212 // initialize our all_matches pointer
216 data_file.open(file_path.c_str(), ios::in);
218 getline(data_file,file_data_line);
219 // parse seqcomp open tag and parameters
220 // eg <Seqcomp type=mussa win=30 thres=21>
221 // if parse successful...
224 while ((!data_file.eof()) && tag_open)
226 // intialize list to empty
227 a_match_list.clear();
229 getline(data_file,file_data_line);
230 if (file_data_line == "</Seqcomp>")
234 // parse line of matches
235 else if (file_data_line == "")
237 //cout << "empty line\n";
238 all_matches->push_back(a_match_list);
242 split_index = file_data_line.find(" ");
244 while (split_index != string::npos)
246 pair_data = file_data_line.substr(0,split_index);
247 file_data_line = file_data_line.substr(split_index+1);
248 //cout << "pair_data = " << pair_data << "...";
249 // parse out the 2 pieces of data, index and score of pair match
250 comma_index = pair_data.find(",");
251 index_data = pair_data.substr(0, comma_index);
252 a_match.index = atoi(index_data.c_str() );
253 score_data = pair_data.substr(comma_index+1);
254 a_match.score = atoi(score_data.c_str() );
255 //cout << a_match.index << "," << a_match.score << " ";
257 a_match_list.push_back(a_match);
259 split_index = file_data_line.find(" ");
261 all_matches->push_back(a_match_list);
262 //cout << all_matches->size() << "\n";
265 //cout << "windows in flp = " << all_matches->size() << endl;