Update mussa to build on ubuntu 10.04 with qt 4.6.2 +boost 1.40.0.1
[mussa.git] / alg / flp.cpp
index 3076e7b437b24526010adff9262d53d470f388d4..8c425d59666ad73a081b1efc810edb74c1710386 100644 (file)
 //                            ---------- flp.cc  -----------
 //                        ----------------------------------------
 
+#include <boost/filesystem/operations.hpp>
+#include <boost/filesystem/fstream.hpp>
+namespace fs = boost::filesystem;
+
 #include "alg/flp.hpp"
 
-#include <fstream>
 #include <iostream>
 #include <string>
 #include <stdexcept>
 #include <cassert>
 
+#include "mussa_exceptions.hpp"
 using namespace std;
 
 bool operator==(const FLPs::match& a, const FLPs::match& b)
@@ -36,7 +40,18 @@ ostream &operator<<(ostream& out, const FLPs::match& m)
 FLPs::FLPs() :
   window_size(0),
   hard_threshold(0),
-  all_matches(0)
+  all_matches(0),
+  seqcomp_i(seqcomp_not_running),
+  seqcomp_end(seqcomp_not_running)
+{
+}
+
+FLPs::FLPs(const FLPs& o) :
+  window_size(o.window_size),
+  hard_threshold(o.hard_threshold),
+  all_matches(o.all_matches),
+  seqcomp_i(o.seqcomp_i),
+  seqcomp_end(o.seqcomp_end)
 {
 }
 
@@ -168,13 +183,13 @@ FLPs::thres_matches(int index, int thres) const
 }
 
 void
-FLPs::save(string save_file_path)
+FLPs::save(fs::path save_file_path)
 {
   if (all_matches == 0) 
     throw runtime_error("please call FLPs.seqcomp first");
 
-  fstream save_file;
-  save_file.open(save_file_path.c_str(), ios::out);
+  fs::fstream save_file;
+  save_file.open(save_file_path, ios::out);
 
   save_file << "<Seqcomp win=" << window_size
             << " thres=" << hard_threshold << endl;
@@ -201,9 +216,9 @@ FLPs::save(string save_file_path)
 }
 
 void
-FLPs::load(string file_path)
+FLPs::load(fs::path file_path)
 {
-  fstream data_file;
+  fs::fstream data_file;
   string file_data, file_data_line, pair_data, index_data, score_data;
   match a_match;
   string::size_type split_index, comma_index;
@@ -212,57 +227,69 @@ FLPs::load(string file_path)
   // initialize our all_matches pointer
   alloc_matches();
 
-
-  data_file.open(file_path.c_str(), ios::in);
-
-  getline(data_file,file_data_line);
-  // parse seqcomp open tag and parameters
-  // eg <Seqcomp type=mussa win=30 thres=21>
-  // if parse successful...
-  tag_open = true;
-
-  while ((!data_file.eof()) && tag_open)
-  {
-    // intialize list to empty
-    a_match_list.clear();
+  if (fs::exists(file_path)) {
+    data_file.open(file_path, ios::in);
 
     getline(data_file,file_data_line);
-    if (file_data_line == "</Seqcomp>")
-    {
-      tag_open = false;
-    }
-    // parse line of matches
-    else if (file_data_line == "")
-    {
-      //cout << "empty line\n";
-      all_matches->push_back(a_match_list); 
-    }
-    else
+    // parse seqcomp open tag and parameters
+    // eg <Seqcomp type=mussa win=30 thres=21>
+    // if parse successful...
+    tag_open = true;
+
+    while ((data_file.good()) && tag_open)
     {
-      split_index = file_data_line.find(" ");
-      
-      while (split_index != string::npos)
-      {
-        pair_data = file_data_line.substr(0,split_index); 
-        file_data_line = file_data_line.substr(split_index+1);
-        //cout << "pair_data = " << pair_data << "...";
-        // parse out the 2 pieces of data, index and score of pair match
-        comma_index = pair_data.find(",");
-        index_data = pair_data.substr(0, comma_index);
-        a_match.index = atoi(index_data.c_str() );
-        score_data = pair_data.substr(comma_index+1); 
-        a_match.score = atoi(score_data.c_str() );
-        //cout << a_match.index << "," << a_match.score << " ";
-
-        a_match_list.push_back(a_match);
+      // intialize list to empty
+      a_match_list.clear();
 
+      getline(data_file,file_data_line);
+      if (file_data_line == "</Seqcomp>")
+      {
+        tag_open = false;
+      }
+      // parse line of matches
+      else if (file_data_line == "")
+      {
+        //cout << "empty line\n";
+        all_matches->push_back(a_match_list); 
+      }
+      else
+      {
         split_index = file_data_line.find(" ");
+      
+        while (split_index != string::npos)
+        {
+          pair_data = file_data_line.substr(0,split_index); 
+          file_data_line = file_data_line.substr(split_index+1);
+          //cout << "pair_data = " << pair_data << "...";
+          // parse out the 2 pieces of data, index and score of pair match
+          comma_index = pair_data.find(",");
+          index_data = pair_data.substr(0, comma_index);
+          a_match.index = atoi(index_data.c_str() );
+          score_data = pair_data.substr(comma_index+1); 
+          a_match.score = atoi(score_data.c_str() );
+          //cout << a_match.index << "," << a_match.score << " ";
+
+          a_match_list.push_back(a_match);
+
+          split_index = file_data_line.find(" ");
+        }
+        all_matches->push_back(a_match_list);
+        //cout << all_matches->size() << "\n";
       }
-      all_matches->push_back(a_match_list);
-      //cout << all_matches->size() << "\n";
     }
+    //cout << "windows in flp = " << all_matches->size() << endl;
+    data_file.close();
+  } else {
+    throw mussa_load_error(file_path.string() + "was not found");
   }
-  //cout << "windows in flp = " << all_matches->size() << endl;
-  data_file.close();
+  
 }
 
+float FLPs::progress() const
+{
+  if (seqcomp_end == FLPs::seqcomp_not_running) {
+    return FLPs::seqcomp_not_running;
+  } else {
+    return static_cast<float>(seqcomp_i)/static_cast<float>(seqcomp_end);
+  }
+}