[project @ 4]
[mussa.git] / mussa_class.cc
index d00f76667c0ea91810a6eac1e748abdbee2fe208..110d6f525984fb3c0d34039552037a77c105007f 100644 (file)
 
 #include "mussa_class.hh"
 
+// doesn't do neg ints...
+string
+int_to_str(int an_int)
+{
+  string converted_int;
+  int remainder;
+
+  converted_int = "";
+
+  if (an_int == 0)
+    converted_int = "0";
+
+  while (an_int != 0)
+  {
+    remainder = an_int % 10;
+
+    if (remainder == 0)
+      converted_int = "0" + converted_int;
+    else if (remainder == 1)
+      converted_int = "1" + converted_int;
+    else if (remainder == 2)
+      converted_int = "2" + converted_int;
+    else if (remainder == 3)
+      converted_int = "3" + converted_int;
+    else if (remainder == 4)
+      converted_int = "4" + converted_int;
+    else if (remainder == 5)
+      converted_int = "5" + converted_int;
+    else if (remainder == 6)
+      converted_int = "6" + converted_int;
+    else if (remainder == 7)
+      converted_int = "7" + converted_int;
+    else if (remainder == 8)
+      converted_int = "8" + converted_int;
+    else if (remainder == 9)
+      converted_int = "9" + converted_int;
+
+    an_int = an_int / 10;
+  }
+
+  return converted_int;
+}
+
 
 Mussa::Mussa()
 {
 }
 
+char
+Mussa::parse_args(int argc, char **argv)
+{
+  int i;
+  string an_arg;
+  char run_mode;
+
+  win_override = false;
+  thres_override = false;
+  // minimal arg reading structure, not very robust to errors
+
+
+  i = 1;
+  while (i < argc)
+  {
+    an_arg = * ++argv;
+    i++;
+
+    if (an_arg == "-v")
+    {
+      ana_name = * ++argv;
+      i++;
+      run_mode = 'v';
+    }
+    else if (an_arg == "-n")
+    {
+      para_file_path = * ++argv;
+      i++;
+      run_mode = 'n';
+    }
+    else if (an_arg == "-w")
+    {
+      window = atoi(* ++argv);
+      i++;
+      win_override = true;
+    }
+    else if (an_arg == "-t")
+    {
+      threshold = atoi(* ++argv);
+      i++;
+      thres_override = true;
+      cout << thres_override << endl;
+    }
+    else
+    {
+      para_file_path = an_arg;
+      run_mode = 'f';
+    }
+  }
+  return run_mode;
+}
+
+
 void
-Mussa::setup(char * para_file_path)
+Mussa::setup()
 {
   ifstream para_file;
   string file_data_line;
   string param, value, annot_file;
   int split_index, fasta_index;
+  int sub_seq_start, sub_seq_end;
   bool seq_params, did_seq;
   int bogo;
 
 
   win_append = false;
   thres_append = false;
+  seq_files.clear();
+  fasta_indices.clear();
+  annot_files.clear();
+  sub_seq_starts.clear();
+  sub_seq_ends.clear();
 
-  para_file.open(para_file_path, ios::in);
+  para_file.open(para_file_path.c_str(), ios::in);
 
   getline(para_file,file_data_line);
   split_index = file_data_line.find(" ");
@@ -43,14 +145,22 @@ Mussa::setup(char * para_file_path)
     else if (param == "SEQUENCE_NUM")
       seq_num = atoi(value.c_str());
     else if (param == "WINDOW")
-      window = atoi(value.c_str());
+    {
+      if (!win_override)
+        window = atoi(value.c_str());
+    }
     else if (param == "THRESHOLD")
-      threshold = atoi(value.c_str());
+    {
+      if (!thres_override)
+          threshold = atoi(value.c_str());
+    }
     else if (param == "SEQUENCE")
     {
       seq_files.push_back(value);
       fasta_index = 1;
       annot_file = "";
+      sub_seq_start = 0;
+      sub_seq_end = 0;
       seq_params = true;
 
       while ((!para_file.eof()) && seq_params)
@@ -64,6 +174,13 @@ Mussa::setup(char * para_file_path)
           fasta_index = atoi(value.c_str());
         else if (param == "ANNOTATION")
           annot_file = value;
+        else if (param == "SEQ_START")
+          sub_seq_start = atoi(value.c_str());
+        else if (param == "SEQ_END")
+        {
+          cout << "hey!  " << atoi(value.c_str()) << endl;
+          sub_seq_end = atoi(value.c_str());
+        }
         //ignore empty lines or that start with '#'
         else if ((param == "") || (param == "#")) {}
         else seq_params = false;
@@ -72,6 +189,8 @@ Mussa::setup(char * para_file_path)
 
       fasta_indices.push_back(fasta_index);
       annot_files.push_back(annot_file);
+      sub_seq_starts.push_back(sub_seq_start);
+      sub_seq_ends.push_back(sub_seq_end);
       did_seq = true;
       
     }
@@ -99,29 +218,44 @@ Mussa::setup(char * para_file_path)
   cout << "window = " << window << " threshold = " << threshold << "\n";
 }
 
+//        if (!((param == "") || (param == "#")))
+//          cout << value << " = " << param << endl;
 
 void
 Mussa::get_Seqs()
 {
   list<string>::iterator seq_files_i, annot_files_i;
-  list<int>::iterator fasta_indices_i;
+  list<int>::iterator fasta_indices_i, seq_starts_i, seq_ends_i;
   Sequence aSeq;
 
   seq_files_i = seq_files.begin();
   fasta_indices_i = fasta_indices.begin();
   annot_files_i = annot_files.begin();
+  seq_starts_i = sub_seq_starts.begin();
+  seq_ends_i = sub_seq_ends.begin();
 
-  while ( (seq_files_i != seq_files.end()) &&
+
+  while (seq_files_i != seq_files.end())
+          /* it should be guarenteed that each of the following exist
+ &&
           (fasta_indices_i != fasta_indices.end()) &&
-          (annot_files_i != annot_files.end()) )
+          (annot_files_i != annot_files.end())  && 
+          (seq_starts_i != sub_seq_starts.end())  &&
+          (seq_ends_i != sub_seq_ends.end())         )
+          */
   {
-    aSeq.load_fasta(*seq_files_i, *fasta_indices_i);
+    aSeq.load_fasta(*seq_files_i, *fasta_indices_i,*seq_starts_i,*seq_ends_i);
+    if (*annot_files_i != "")
+      aSeq.load_annot(*annot_files_i);
     the_Seqs.push_back(aSeq);
     cout << aSeq.hdr() << endl;
+    //cout << aSeq.seq() << endl;
     aSeq.clear();
     ++seq_files_i;
     ++fasta_indices_i;
     ++annot_files_i;
+    ++seq_starts_i;
+    ++seq_ends_i;
   }
 }
 
@@ -158,28 +292,103 @@ Mussa::seqcomp()
       all_comps[i][i2].setup("m", window, threshold, seq_lens[i],seq_lens[i2]);
       all_comps[i][i2].seqcomp(the_Seqs[i].seq(), the_Seqs[i2].seq(), false);
       all_comps[i][i2].seqcomp(the_Seqs[i].seq(),the_Seqs[i2].rev_comp(),true);
-      save_file_string = "ana";
-      if (i == 0)
-        save_file_string += "0";
-      else if (i == 1)
-        save_file_string += "1";
-      if (i2 == 1)
-        save_file_string += "1";
-      else if (i2 == 2)
-        save_file_string += "2";
 
       all_comps[i][i2].file_save(save_file_string);
     }
 }
 
 
+
 void
 Mussa::nway()
 {
-  the_paths.setup(seq_num);
+  the_paths.setup(seq_num, window, threshold);
   the_paths.find_paths_r(all_comps);
+  the_paths.simple_refine();
 }
 
+void
+Mussa::save()
+{
+  string save_path_base, save_path;
+  fstream save_file;
+  int i;
+
+  // gotta do bit with adding win & thres if to be appended - need itos
+
+  // not sure why, but gotta close file each time since can't pass file streams
+
+  save_path_base = ana_name;
+
+  if (win_append)
+    save_path_base += "_w" + int_to_str(window);
+
+  if (thres_append)
+    save_path_base += "_t" + int_to_str(threshold);
+
+  // save sequence and annots to a special mussa file
+  save_path = save_path_base + ".museq";
+  save_file.open(save_path.c_str(), ios::out);
+  save_file << "<Mussa_Sequence>" << endl;
+  //save_file.close();
+
+  for(i = 0; i < seq_num; i++)
+    the_Seqs[i].save(save_file);
+
+  //save_file.open(save_path.c_str(), ios::app);
+  save_file << "</Mussa_Sequence>" << endl;
+  save_file.close();
+
+  // save nway paths to its mussa save file
+  save_path = save_path_base + ".muway";
+  the_paths.save(save_path);
+}
+
+void
+Mussa::load()
+{
+  int i;
+  string load_file_path;
+  Sequence tmp_seq;
+
+
+  load_file_path = ana_name + ".muway";
+  seq_num = the_paths.load(load_file_path);
+
+  load_file_path = ana_name + ".museq";
+  for (i = 1; i <= seq_num; i++)
+  {
+    tmp_seq.clear();
+    tmp_seq.load_museq(load_file_path, i);
+    the_Seqs.push_back(tmp_seq);
+  }
+}
+
+
+
+// In Memorial to Everything that's gone wrong in the last week
+// and Everything that will go wrong in the next 2 weeks 03/02/2004 - Tristan
+void
+Mussa::FuckingPieceOfShit(int x_max, int y_max)
+{
+  Fl_Window *conn_window = new Fl_Window(x_max, y_max, "Mussa Connections");
+  ConnView *conn_box = new ConnView(0, 0, x_max, y_max);
+  conn_box->setup(ana_name, seq_num, window, &the_Seqs, &the_paths);
+  conn_box->scale_paths();
+  conn_window->end();
+  conn_window->show();
+
+  Fl::run();
+}
+
+/*
+      cout << "fee\n";
+      cout << "fie\n";
+      cout << "foe\n";
+      cout << "fum\n";
+*/
+
+
 
 void
 Mussa::save_old()
@@ -211,7 +420,7 @@ Mussa::load_old(char * load_file_path, int s_num)
   Sequence a_seq;
 
   seq_num = s_num;
-  the_paths.setup(seq_num);
+  the_paths.setup(seq_num, 0, 0);
   save_file.open(load_file_path, ios::in);
 
   // currently loads old mussa format
@@ -252,30 +461,3 @@ Mussa::load_old(char * load_file_path, int s_num)
 
   //the_paths.save("tmp.save");
 }
-
-
-// In Memorial to Everything that's gone wrong in the last week
-// and Everything that will go wrong in the next 2 weeks 03/02/2004 - Tristan
-void
-Mussa::FuckingPieceOfShit(int x_max, int y_max)
-{
-
-  Fl_Window *conn_window = new Fl_Window(x_max, y_max, "Mussa Connections");
-  ConnView *conn_box = new ConnView(0, 0, x_max, y_max);
-  conn_box->setup(ana_name, seq_num, window, &the_Seqs, &the_paths);
-  conn_box->scale_paths();
-  conn_box->spawnSeq();
-  conn_window->end();
-  conn_window->show();
-
-  Fl::run();
-}
-
-/*
-      cout << "fee\n";
-      cout << "fie\n";
-      cout << "foe\n";
-      cout << "fum\n";
-*/
-
-