1 // This file is part of the Mussa source distribution.
2 // http://mussa.caltech.edu/
3 // Contact author: Tristan De Buysscher, tristan@caltech.edu
5 // This program and all associated source code files are Copyright (C) 2005
6 // the California Institute of Technology, Pasadena, CA, 91125 USA. It is
7 // under the GNU Public License; please see the included LICENSE.txt
8 // file for more information, or contact Tristan directly.
11 // ----------------------------------------
12 // ---------- flp.cc -----------
13 // ----------------------------------------
15 #include <boost/filesystem/operations.hpp>
16 #include <boost/filesystem/fstream.hpp>
17 namespace fs = boost::filesystem;
19 #include "alg/flp.hpp"
26 #include "mussa_exceptions.hpp"
29 bool operator==(const FLPs::match& a, const FLPs::match& b)
31 return (a.index == b.index && a.score == b.score);
34 ostream &operator<<(ostream& out, const FLPs::match& m)
36 out << "(" << m.score << ", " << m.index << ")";
48 FLPs::setup(int win_size, int hard_thres)
50 window_size = win_size;
51 hard_threshold = hard_thres;
54 void FLPs::alloc_matches(string::size_type len1)
56 if (all_matches != 0) {
57 // if we're being called again its likely because
58 // seqcomp is being called to do the reverse compliment
59 // so we shouldn't reallocate ourselves.
62 all_matches = new std::vector<list<FLPs::match> >;
64 list<FLPs::match> empty_match_list;
65 std::vector<list<FLPs::match> >::size_type window_count;
67 // we actually know how much to preallocate
68 window_count = len1 - window_size+1;
70 // we have no idea, so don't allocate anything
74 for (std::vector<list<FLPs::match> >::size_type i=0; i < window_count; ++i) {
75 all_matches->push_back(empty_match_list);
77 assert (all_matches->size() == window_count);
84 if (all_matches != 0) {
85 return all_matches->size();
94 FLPs::match_less(match *match1, match *match2)
96 if (match1->score < match2->score)
98 else if ( (match1->score == match2->score) &&
99 (match1->index < match2->index) )
110 for(i = 0; i < seq1_win_num; i++)
111 if (!all_matches[i].empty())
112 all_matches[i].sort(&FLPs::match_less);
117 FLPs::matches(int index) const
119 if (all_matches == 0)
120 throw runtime_error("please call FLPs.seqcomp first");
122 list<FLPs::match>::iterator list_i, list_end;
125 list_i = (*all_matches)[index].begin();
126 list_end = (*all_matches)[index].end();
127 list<FLPs::match> these_matches(list_i, list_end);
128 return these_matches;
132 FLPs::match_locations(int index) const
134 if (all_matches == 0)
135 throw runtime_error("please call FLPs.seqcomp first");
137 list<int> these_matches;
138 list<FLPs::match>::iterator list_i, list_end;
141 list_i = (*all_matches)[index].begin();
142 list_end = (*all_matches)[index].end();
143 while (list_i != list_end)
145 these_matches.push_back(list_i->index);
148 return these_matches;
152 FLPs::thres_matches(int index, int thres) const
154 if (all_matches == 0)
155 throw runtime_error("please call FLPs.seqcomp first");
157 list<int> thres_matches;
158 list<FLPs::match>::iterator list_i, list_end;
161 list_i = (*all_matches)[index].begin();
162 list_end = (*all_matches)[index].end();
163 thres_matches.clear();
165 while (list_i != list_end)
167 if (list_i->score >= thres)
168 thres_matches.push_back(list_i->index);
171 return thres_matches;
175 FLPs::save(fs::path save_file_path)
177 if (all_matches == 0)
178 throw runtime_error("please call FLPs.seqcomp first");
180 fs::fstream save_file;
181 save_file.open(save_file_path, ios::out);
183 save_file << "<Seqcomp win=" << window_size
184 << " thres=" << hard_threshold << endl;
186 for(vector<list<FLPs::match> >::const_iterator win_citor = all_matches->begin();
187 win_citor != all_matches->end();
190 if (!win_citor->empty())
192 for( list<FLPs::match>::const_iterator match_itor = win_citor->begin();
193 match_itor != win_citor->end();
196 save_file << match_itor->index << "," << match_itor->score << " ";
202 save_file << "</Seqcomp>\n";
208 FLPs::load(fs::path file_path)
210 fs::fstream data_file;
211 string file_data, file_data_line, pair_data, index_data, score_data;
213 string::size_type split_index, comma_index;
214 bool tag_open = false;
215 list<FLPs::match> a_match_list;
216 // initialize our all_matches pointer
219 if (fs::exists(file_path)) {
220 data_file.open(file_path, ios::in);
222 getline(data_file,file_data_line);
223 // parse seqcomp open tag and parameters
224 // eg <Seqcomp type=mussa win=30 thres=21>
225 // if parse successful...
228 while ((data_file.good()) && tag_open)
230 // intialize list to empty
231 a_match_list.clear();
233 getline(data_file,file_data_line);
234 if (file_data_line == "</Seqcomp>")
238 // parse line of matches
239 else if (file_data_line == "")
241 //cout << "empty line\n";
242 all_matches->push_back(a_match_list);
246 split_index = file_data_line.find(" ");
248 while (split_index != string::npos)
250 pair_data = file_data_line.substr(0,split_index);
251 file_data_line = file_data_line.substr(split_index+1);
252 //cout << "pair_data = " << pair_data << "...";
253 // parse out the 2 pieces of data, index and score of pair match
254 comma_index = pair_data.find(",");
255 index_data = pair_data.substr(0, comma_index);
256 a_match.index = atoi(index_data.c_str() );
257 score_data = pair_data.substr(comma_index+1);
258 a_match.score = atoi(score_data.c_str() );
259 //cout << a_match.index << "," << a_match.score << " ";
261 a_match_list.push_back(a_match);
263 split_index = file_data_line.find(" ");
265 all_matches->push_back(a_match_list);
266 //cout << all_matches->size() << "\n";
269 //cout << "windows in flp = " << all_matches->size() << endl;
272 throw mussa_load_error(file_path.string() + "was not found");