1 // This file is part of the Mussa source distribution.
2 // http://mussa.caltech.edu/
3 // Contact author: Tristan De Buysscher, tristan@caltech.edu
5 // This program and all associated source code files are Copyright (C) 2005
6 // the California Institute of Technology, Pasadena, CA, 91125 USA. It is
7 // under the GNU Public License; please see the included LICENSE.txt
8 // file for more information, or contact Tristan directly.
11 // ----------------------------------------
12 // ---------- flp.cc -----------
13 // ----------------------------------------
15 #include <boost/filesystem/operations.hpp>
16 #include <boost/filesystem/fstream.hpp>
17 namespace fs = boost::filesystem;
19 #include "alg/flp.hpp"
26 #include "mussa_exceptions.hpp"
29 bool operator==(const FLPs::match& a, const FLPs::match& b)
31 return (a.index == b.index && a.score == b.score);
34 ostream &operator<<(ostream& out, const FLPs::match& m)
36 out << "(" << m.score << ", " << m.index << ")";
47 FLPs::FLPs(const FLPs& o) :
48 window_size(o.window_size),
49 hard_threshold(o.hard_threshold),
50 all_matches(o.all_matches)
55 FLPs::setup(int win_size, int hard_thres)
57 window_size = win_size;
58 hard_threshold = hard_thres;
61 void FLPs::alloc_matches(string::size_type len1)
63 if (all_matches != 0) {
64 // if we're being called again its likely because
65 // seqcomp is being called to do the reverse compliment
66 // so we shouldn't reallocate ourselves.
69 all_matches = new std::vector<list<FLPs::match> >;
71 list<FLPs::match> empty_match_list;
72 std::vector<list<FLPs::match> >::size_type window_count;
74 // we actually know how much to preallocate
75 window_count = len1 - window_size+1;
77 // we have no idea, so don't allocate anything
81 for (std::vector<list<FLPs::match> >::size_type i=0; i < window_count; ++i) {
82 all_matches->push_back(empty_match_list);
84 assert (all_matches->size() == window_count);
91 if (all_matches != 0) {
92 return all_matches->size();
101 FLPs::match_less(match *match1, match *match2)
103 if (match1->score < match2->score)
105 else if ( (match1->score == match2->score) &&
106 (match1->index < match2->index) )
117 for(i = 0; i < seq1_win_num; i++)
118 if (!all_matches[i].empty())
119 all_matches[i].sort(&FLPs::match_less);
124 FLPs::matches(int index) const
126 if (all_matches == 0)
127 throw runtime_error("please call FLPs.seqcomp first");
129 list<FLPs::match>::iterator list_i, list_end;
132 list_i = (*all_matches)[index].begin();
133 list_end = (*all_matches)[index].end();
134 list<FLPs::match> these_matches(list_i, list_end);
135 return these_matches;
139 FLPs::match_locations(int index) const
141 if (all_matches == 0)
142 throw runtime_error("please call FLPs.seqcomp first");
144 list<int> these_matches;
145 list<FLPs::match>::iterator list_i, list_end;
148 list_i = (*all_matches)[index].begin();
149 list_end = (*all_matches)[index].end();
150 while (list_i != list_end)
152 these_matches.push_back(list_i->index);
155 return these_matches;
159 FLPs::thres_matches(int index, int thres) const
161 if (all_matches == 0)
162 throw runtime_error("please call FLPs.seqcomp first");
164 list<int> thres_matches;
165 list<FLPs::match>::iterator list_i, list_end;
168 list_i = (*all_matches)[index].begin();
169 list_end = (*all_matches)[index].end();
170 thres_matches.clear();
172 while (list_i != list_end)
174 if (list_i->score >= thres)
175 thres_matches.push_back(list_i->index);
178 return thres_matches;
182 FLPs::save(fs::path save_file_path)
184 if (all_matches == 0)
185 throw runtime_error("please call FLPs.seqcomp first");
187 fs::fstream save_file;
188 save_file.open(save_file_path, ios::out);
190 save_file << "<Seqcomp win=" << window_size
191 << " thres=" << hard_threshold << endl;
193 for(vector<list<FLPs::match> >::const_iterator win_citor = all_matches->begin();
194 win_citor != all_matches->end();
197 if (!win_citor->empty())
199 for( list<FLPs::match>::const_iterator match_itor = win_citor->begin();
200 match_itor != win_citor->end();
203 save_file << match_itor->index << "," << match_itor->score << " ";
209 save_file << "</Seqcomp>\n";
215 FLPs::load(fs::path file_path)
217 fs::fstream data_file;
218 string file_data, file_data_line, pair_data, index_data, score_data;
220 string::size_type split_index, comma_index;
221 bool tag_open = false;
222 list<FLPs::match> a_match_list;
223 // initialize our all_matches pointer
226 if (fs::exists(file_path)) {
227 data_file.open(file_path, ios::in);
229 getline(data_file,file_data_line);
230 // parse seqcomp open tag and parameters
231 // eg <Seqcomp type=mussa win=30 thres=21>
232 // if parse successful...
235 while ((data_file.good()) && tag_open)
237 // intialize list to empty
238 a_match_list.clear();
240 getline(data_file,file_data_line);
241 if (file_data_line == "</Seqcomp>")
245 // parse line of matches
246 else if (file_data_line == "")
248 //cout << "empty line\n";
249 all_matches->push_back(a_match_list);
253 split_index = file_data_line.find(" ");
255 while (split_index != string::npos)
257 pair_data = file_data_line.substr(0,split_index);
258 file_data_line = file_data_line.substr(split_index+1);
259 //cout << "pair_data = " << pair_data << "...";
260 // parse out the 2 pieces of data, index and score of pair match
261 comma_index = pair_data.find(",");
262 index_data = pair_data.substr(0, comma_index);
263 a_match.index = atoi(index_data.c_str() );
264 score_data = pair_data.substr(comma_index+1);
265 a_match.score = atoi(score_data.c_str() );
266 //cout << a_match.index << "," << a_match.score << " ";
268 a_match_list.push_back(a_match);
270 split_index = file_data_line.find(" ");
272 all_matches->push_back(a_match_list);
273 //cout << all_matches->size() << "\n";
276 //cout << "windows in flp = " << all_matches->size() << endl;
279 throw mussa_load_error(file_path.string() + "was not found");