1 // This file is part of the Mussa source distribution.
2 // http://mussa.caltech.edu/
3 // Contact author: Tristan De Buysscher, tristan@caltech.edu
5 // This program and all associated source code files are Copyright (C) 2005
6 // the California Institute of Technology, Pasadena, CA, 91125 USA. It is
7 // under the GNU Public License; please see the included LICENSE.txt
8 // file for more information, or contact Tristan directly.
11 // ----------------------------------------
12 // ---------- flp.cc -----------
13 // ----------------------------------------
15 #include <boost/filesystem/operations.hpp>
16 #include <boost/filesystem/fstream.hpp>
17 namespace fs = boost::filesystem;
19 #include "alg/flp.hpp"
26 #include "mussa_exceptions.hpp"
29 bool operator==(const FLPs::match& a, const FLPs::match& b)
31 return (a.index == b.index && a.score == b.score);
34 ostream &operator<<(ostream& out, const FLPs::match& m)
36 out << "(" << m.score << ", " << m.index << ")";
44 seqcomp_i(seqcomp_not_running),
45 seqcomp_end(seqcomp_not_running)
49 FLPs::FLPs(const FLPs& o) :
50 window_size(o.window_size),
51 hard_threshold(o.hard_threshold),
52 all_matches(o.all_matches),
53 seqcomp_i(o.seqcomp_i),
54 seqcomp_end(o.seqcomp_end)
59 FLPs::setup(int win_size, int hard_thres)
61 window_size = win_size;
62 hard_threshold = hard_thres;
65 void FLPs::alloc_matches(string::size_type len1)
67 if (all_matches != 0) {
68 // if we're being called again its likely because
69 // seqcomp is being called to do the reverse compliment
70 // so we shouldn't reallocate ourselves.
73 all_matches = new std::vector<list<FLPs::match> >;
75 list<FLPs::match> empty_match_list;
76 std::vector<list<FLPs::match> >::size_type window_count;
78 // we actually know how much to preallocate
79 window_count = len1 - window_size+1;
81 // we have no idea, so don't allocate anything
85 for (std::vector<list<FLPs::match> >::size_type i=0; i < window_count; ++i) {
86 all_matches->push_back(empty_match_list);
88 assert (all_matches->size() == window_count);
95 if (all_matches != 0) {
96 return all_matches->size();
105 FLPs::match_less(match *match1, match *match2)
107 if (match1->score < match2->score)
109 else if ( (match1->score == match2->score) &&
110 (match1->index < match2->index) )
121 for(i = 0; i < seq1_win_num; i++)
122 if (!all_matches[i].empty())
123 all_matches[i].sort(&FLPs::match_less);
128 FLPs::matches(int index) const
130 if (all_matches == 0)
131 throw runtime_error("please call FLPs.seqcomp first");
133 list<FLPs::match>::iterator list_i, list_end;
136 list_i = (*all_matches)[index].begin();
137 list_end = (*all_matches)[index].end();
138 list<FLPs::match> these_matches(list_i, list_end);
139 return these_matches;
143 FLPs::match_locations(int index) const
145 if (all_matches == 0)
146 throw runtime_error("please call FLPs.seqcomp first");
148 list<int> these_matches;
149 list<FLPs::match>::iterator list_i, list_end;
152 list_i = (*all_matches)[index].begin();
153 list_end = (*all_matches)[index].end();
154 while (list_i != list_end)
156 these_matches.push_back(list_i->index);
159 return these_matches;
163 FLPs::thres_matches(int index, int thres) const
165 if (all_matches == 0)
166 throw runtime_error("please call FLPs.seqcomp first");
168 list<int> thres_matches;
169 list<FLPs::match>::iterator list_i, list_end;
172 list_i = (*all_matches)[index].begin();
173 list_end = (*all_matches)[index].end();
174 thres_matches.clear();
176 while (list_i != list_end)
178 if (list_i->score >= thres)
179 thres_matches.push_back(list_i->index);
182 return thres_matches;
186 FLPs::save(fs::path save_file_path)
188 if (all_matches == 0)
189 throw runtime_error("please call FLPs.seqcomp first");
191 fs::fstream save_file;
192 save_file.open(save_file_path, ios::out);
194 save_file << "<Seqcomp win=" << window_size
195 << " thres=" << hard_threshold << endl;
197 for(vector<list<FLPs::match> >::const_iterator win_citor = all_matches->begin();
198 win_citor != all_matches->end();
201 if (!win_citor->empty())
203 for( list<FLPs::match>::const_iterator match_itor = win_citor->begin();
204 match_itor != win_citor->end();
207 save_file << match_itor->index << "," << match_itor->score << " ";
213 save_file << "</Seqcomp>\n";
219 FLPs::load(fs::path file_path)
221 fs::fstream data_file;
222 string file_data, file_data_line, pair_data, index_data, score_data;
224 string::size_type split_index, comma_index;
225 bool tag_open = false;
226 list<FLPs::match> a_match_list;
227 // initialize our all_matches pointer
230 if (fs::exists(file_path)) {
231 data_file.open(file_path, ios::in);
233 getline(data_file,file_data_line);
234 // parse seqcomp open tag and parameters
235 // eg <Seqcomp type=mussa win=30 thres=21>
236 // if parse successful...
239 while ((data_file.good()) && tag_open)
241 // intialize list to empty
242 a_match_list.clear();
244 getline(data_file,file_data_line);
245 if (file_data_line == "</Seqcomp>")
249 // parse line of matches
250 else if (file_data_line == "")
252 //cout << "empty line\n";
253 all_matches->push_back(a_match_list);
257 split_index = file_data_line.find(" ");
259 while (split_index != string::npos)
261 pair_data = file_data_line.substr(0,split_index);
262 file_data_line = file_data_line.substr(split_index+1);
263 //cout << "pair_data = " << pair_data << "...";
264 // parse out the 2 pieces of data, index and score of pair match
265 comma_index = pair_data.find(",");
266 index_data = pair_data.substr(0, comma_index);
267 a_match.index = atoi(index_data.c_str() );
268 score_data = pair_data.substr(comma_index+1);
269 a_match.score = atoi(score_data.c_str() );
270 //cout << a_match.index << "," << a_match.score << " ";
272 a_match_list.push_back(a_match);
274 split_index = file_data_line.find(" ");
276 all_matches->push_back(a_match_list);
277 //cout << all_matches->size() << "\n";
280 //cout << "windows in flp = " << all_matches->size() << endl;
283 throw mussa_load_error(file_path.string() + "was not found");
288 float FLPs::progress() const
290 if (seqcomp_end == FLPs::seqcomp_not_running) {
291 return FLPs::seqcomp_not_running;
293 return static_cast<float>(seqcomp_i)/static_cast<float>(seqcomp_end);