--- /dev/null
+#CFLAGS=-ftemplate-depth-20
+CFLAGS=-O -ftemplate-depth-20
+LDFLAGS=
+
+#all: seqcomp
+
+sequence.o : sequence.cc sequence.hh
+ g++ $(CFLAGS) -c sequence.cc
+
+flp.o : flp.cc flp.hh
+ g++ $(CFLAGS) -c flp.cc
+
+flp_seqcomp.o : flp_seqcomp.cc flp.hh
+ g++ $(CFLAGS) -c flp_seqcomp.cc
+
+seqcomp : seqcomp.cc flp.o flp_seqcomp.o sequence.o
+ g++ $(CFLAGS) -o seqcomp seqcomp.cc flp.o flp_seqcomp.o sequence.o
+
+mussa_nway.o : mussa_nway.cc mussa_nway.hh
+ g++ $(CFLAGS) -c mussa_nway.cc
+
+mussa_class.o : mussa_class.cc mussa_class.hh
+ g++ $(CFLAGS) -c mussa_class.cc
+
+mussa : mussa.cc mussa_class.o mussa_nway.o flp_seqcomp.o flp.o sequence.o
+ g++ $(CFLAGS) -o mussa mussa.cc mussa_class.o mussa_nway.o flp_seqcomp.o flp.o sequence.o
+
+
+#match_list_type.o : modules/match_list_type.c modules/match_list_type.h \
+# modules/seqcomp_defs.h
+# gcc $(CFLAGS) -c modules/match_list_type.c
+
+#match_list_head_type.o : modules/match_list_head_type.c \
+# modules/match_list_head_type.h modules/seqcomp_defs.h
+# gcc $(CFLAGS) -c modules/match_list_head_type.c
+
+#result_type.o : modules/result_type.c modules/result_type.h \
+# modules/seqcomp_defs.h
+# gcc $(CFLAGS) -c modules/result_type.c
+
+#compare.o : modules/compare.c modules/compare.h \
+# modules/seqcomp_defs.h
+# gcc $(CFLAGS) -c modules/compare.c
+
+#seqcomp : seqcomp.c modules/seqcomp_defs.h sequence_type.o match_list_type.o \
+# match_list_head_type.o result_type.o compare.o
+# gcc $(LDFLAGS) -o seqcomp seqcomp.c sequence_type.o match_list_type.o \
+# match_list_head_type.o result_type.o compare.o
--- /dev/null
+// ----------------------------------------
+// ---------- flp.cc -----------
+// ----------------------------------------
+
+#include "flp.hh"
+
+
+FLPs::FLPs()
+{
+}
+
+void
+FLPs::setup(string type, int win_size, int hard_thres, int len1, int len2)
+{
+ list<match> empty_match_list;
+ int i;
+
+ window_size = win_size;
+ hard_threshold = hard_thres;
+ seq1_length = len1;
+ seq2_length = len2;
+ ana_type = type;
+ seq1_win_num = seq1_length - win_size + 1;
+ seq2_win_num = seq2_length - win_size + 1;
+ all_matches.reserve(seq1_win_num);
+
+ empty_match_list.clear();
+ for(i = 0; i < seq1_win_num; i++)
+ all_matches.push_back(empty_match_list);
+}
+
+int
+FLPs::win_num()
+{
+ return seq1_win_num;
+}
+
+/*
+bool
+FLPs::match_less(match *match1, match *match2)
+{
+ if (match1->score < match2->score)
+ return true;
+ else if ( (match1->score == match2->score) &&
+ (match1->index < match2->index) )
+ return true;
+ else
+ return false;
+}
+
+void
+FLPs::sort()
+{
+ int i;
+
+ for(i = 0; i < seq1_win_num; i++)
+ if (!all_matches[i].empty())
+ all_matches[i].sort(&FLPs::match_less);
+}
+*/
+
+list<int>
+FLPs::matches(int index)
+{
+ list<int> these_matches;
+ list<match>::iterator list_i, list_end;
+
+ index = abs(index);
+ list_i = all_matches[index].begin();
+ list_end = all_matches[index].end();
+ //if (list_i == list_end)
+ //cout << "its fuckin empty!!!!";
+ while (list_i != list_end)
+ {
+ these_matches.push_back(list_i->index);
+ //cout << list_i->index << " ";
+ ++list_i;
+ }
+ //cout << endl;
+
+ return these_matches;
+}
+
+
+void
+FLPs::file_save(string save_file_path)
+{
+ fstream save_file;
+ list<match>::iterator match_i, match_list_end;
+ int i;
+
+ save_file.open(save_file_path.c_str(), ios::out);
+
+ save_file << "<Seqcomp type=" << ana_type << " win=" << window_size;
+ save_file << " thres=" << hard_threshold << ">\n";
+
+ for(i = 0; i < seq1_win_num; i++)
+ {
+ if (!all_matches[i].empty())
+ {
+ match_i = all_matches[i].begin();
+ match_list_end = all_matches[i].end();
+ while (match_i != match_list_end)
+ {
+ save_file << match_i->index << "," << match_i->score << " ";
+ ++match_i;
+ }
+ }
+ save_file << endl;
+ }
+
+ save_file << "</Seqcomp>\n";
+
+ save_file.close();
+}
+
+void
+FLPs::file_load(string file_path)
+{
+ fstream data_file;
+ string file_data, file_data_line, index_data, score_data;
+ int type, window, hard_thres, index, score;
+ bool tag_open = false;
+
+
+ data_file.open(file_path.c_str(), ios::in);
+
+ getline(data_file,file_data_line);
+ // parse seqcomp open tag and parameters
+ // eg <Seqcomp type=mussa win=30 thres=21>
+ // if parse successful...
+ tag_open = true;
+
+ while ((!data_file.eof()) && tag_open)
+ {
+ getline(data_file,file_data_line);
+ /*
+ if ( = "</Seqcomp>")
+ tag_open = false;
+ // parse line of matches
+ else
+ {
+ needs stuff...um, code...
+ {
+ index = atoi(index_data);
+ core = atoi(score_data);
+ cout << index << "," << score << " ";
+ data_file >> index_data;
+ {
+ }
+ cout << "\n";
+ */
+ }
+
+ data_file.close();
+}
+
+/*
+ cout << "fee\n";
+ cout << "fie\n";
+ cout << "foe ";
+ cout << "fum\n";
+*/
+
+
--- /dev/null
+// ----------------------------------------
+// ---------- flp.hh -----------
+// ----------------------------------------
+
+#include "sequence.hh"
+
+
+// FLP = Fixed Length Pairs (Data)
+// vector of linked lists of the match type struct
+class FLPs
+{
+ private:
+ int window_size;
+ int hard_threshold;
+ int seq1_length, seq2_length, seq1_win_num, seq2_win_num;
+ string ana_type;
+
+
+ struct match
+ {
+ int index;
+ int score;
+ };
+
+ vector<list<match> > all_matches;
+
+
+ public:
+ FLPs();
+ void setup(string type, int win_size, int hard_thres, int len1, int len2);
+ inline void add(int seq1_i, int seq2_i, int a_score, int i2_offset);
+ void seqcomp(string seq1, string seq2, bool is_RC);
+ //bool FLPs::match_less(match *match1, match *match2);
+ //void FLPs::sort();
+ list<int> matches(int index); //later maybe version with int threshold
+ int win_num();
+ void file_save(string save_file_path);
+ void file_load(string file_path);
+};
--- /dev/null
+// ----------------------------------------
+// ---------- flp_seqcomp.cc -----------
+// ----------------------------------------
+
+#include "flp.hh"
+
+// Titus thinks the start of an RC window should be its "rear" in the
+// normal version of the sequence. He's wrong, of course, and in the
+// future my followers will annihilate his in the Final Battle, but for
+// now I'll let him have his way.
+// note seq2_i is actually the index of the leaving bp, so need to +1
+
+inline void
+FLPs::add(int seq1_i, int seq2_i, int a_score, int i2_offset)
+{
+ match a_match;
+
+
+ if (a_score >= hard_threshold)
+ {
+ a_match.index = seq2_i + i2_offset;
+ a_match.score = a_score;
+
+ all_matches[seq1_i].push_back(a_match);
+ }
+}
+
+
+void
+FLPs::seqcomp(string sseq1, string sseq2, bool is_RC)
+{
+ int start_i, seq1_i, seq2_i, win_i; // loop variables
+ int matches; // number of matches in to a window
+ int i2_offset;
+ char * seq1, * seq2;
+
+
+ seq1 = (char *) sseq1.c_str();
+ seq2 = (char *) sseq2.c_str();
+
+
+ if (is_RC)
+ i2_offset = window_size - seq2_length;
+ else
+ i2_offset = 1;
+
+
+ // loop thru the start positions for sequence 1
+ for(start_i = 0; start_i < seq1_win_num; start_i++)
+ {
+ matches = 0;
+ // compare initial window
+ for(win_i = 0; win_i < window_size; win_i++)
+ if ((seq1[start_i+win_i] == seq2[win_i]) &&
+ (seq1[start_i+win_i] != 'N')) // N's match nothing **
+ matches++;
+
+ //seq2_i is always 0 for initial win
+ seq2_i = 0;
+
+ // call inlined function that adds match if it is above threshold
+ add(start_i, seq2_i, matches, i2_offset);
+
+ // run through rest of seq1 and seq2 with current seq1 offset (ie start_i)
+ // compare the bps leaving and entering the window and modify matches count
+ seq1_i = start_i;
+ while( (seq1_i < seq1_win_num-1) && (seq2_i < seq2_win_num-1) )
+ {
+ // copmpare the bp leaving the window
+ if ((seq1[seq1_i] == seq2[seq2_i]) &&
+ (seq1[seq1_i] != 'N')) // N's match nothing
+ matches = matches -1;
+
+ // compare the bp entering the window
+ if ((seq1[seq1_i+window_size] == seq2[seq2_i+window_size]) &&
+ (seq1[seq1_i+window_size] != 'N')) // N's match nothing
+ matches = matches + 1;
+
+ add(seq1_i + 1, seq2_i, matches, i2_offset);
+
+ seq1_i = seq1_i + 1; // increment seq1_i to next window
+ seq2_i = seq2_i + 1; // increment seq2_i to next window
+ } // end of loop thru the current offset sequence
+ } // end of loop thru the start positions of seq1 sequence
+
+ // loop thru the start positions for sequence 1
+ for(start_i = 1; start_i < seq2_win_num; start_i++)
+ {
+ matches = 0;
+
+ // compare initial window
+ for(win_i = 0; win_i < window_size; win_i++)
+ if ((seq2[start_i+win_i] == seq1[win_i]) &&
+ (seq2[start_i+win_i] != 'N')) // N's match nothing
+ matches++;
+
+ //seq2_i is always start_i for initial window
+ seq2_i = start_i;
+
+ add(0, seq2_i, matches, i2_offset);
+
+ // run through rest of seq1 and seq2 with current seq1 offset (ie start_i)
+ // compare the bps leaving and entering the window and modify matches count
+ seq1_i = 0;
+ while( (seq1_i < seq1_win_num-1) && (seq2_i < seq2_win_num-1) )
+ {
+ // copmpare the bp leaving the window
+ if ((seq1[seq1_i] == seq2[seq2_i]) &&
+ (seq1[seq1_i] != 'N')) // N's match nothing
+ matches = matches - 1;
+
+ // compare the bp entering the window
+ if ((seq1[seq1_i+window_size] == seq2[seq2_i+window_size]) &&
+ (seq1[seq1_i+window_size] != 'N')) // N's match nothing
+ matches = matches + 1;
+
+ add(seq1_i + 1, seq2_i, matches, i2_offset);
+
+ seq1_i = seq1_i + 1; // increment seq1_i to next window
+ seq2_i = seq2_i + 1; // increment seq2_i to next window
+ } // end of loop thru the current offset sequence
+ } // end of loop thru the start positions of seq2 sequence
+}
+
+/*
+ cout << "fee\n";
+ cout << "fie\n";
+ cout << "foe\n";
+ cout << "fum\n";
+*/
--- /dev/null
+#include "mussa_class.hh"
+#include "time.h"
+
+int main(int argc, char **argv)
+{
+ Mussa overlord;
+ char * para_file;
+ time_t t1, t2, begin, end;
+ double setuptime, seqloadtime, seqcomptime, nwaytime, savetime, totaltime;
+
+ begin = time(NULL);
+
+ // need more sophisticated arg reading structure
+ // support -w and -t override options...other stuff??
+ para_file = * ++argv;
+
+ t1 = time(NULL);
+ overlord.setup(para_file);
+ t2 = time(NULL);
+ setuptime = difftime(t2, t1);
+
+
+ cout << "fee\n";
+ t1 = time(NULL);
+ overlord.get_Seqs();
+ t2 = time(NULL);
+ seqloadtime = difftime(t2, t1);
+
+
+ cout << "fie\n";
+ t1 = time(NULL);
+ overlord.seqcomp();
+ t2 = time(NULL);
+ seqcomptime = difftime(t2, t1);
+
+
+ cout << "foe\n";
+ t1 = time(NULL);
+ overlord.nway();
+ t2 = time(NULL);
+ nwaytime = difftime(t2, t1);
+
+
+ cout << "fum\n";
+ t1 = time(NULL);
+ overlord.save();
+ t2 = time(NULL);
+ savetime = difftime(t2, t1);
+
+ end = time(NULL);
+ totaltime = difftime(end, begin);
+
+ cout << "setup\tseqload\tseqcomp\tnway\tsave\ttotal\n";
+ cout << setuptime << "\t";
+ cout << seqloadtime << "\t";
+ cout << seqcomptime << "\t";
+ cout << nwaytime << "\t";
+ cout << savetime << "\t";
+ cout << totaltime << "\n";
+
+}
+
+
+
+
+/*
+ cout << "fee\n";
+ cout << "fie\n";
+ cout << "foe\n";
+ cout << "fum\n";
+*/
--- /dev/null
+// ----------------------------------------
+// ---------- mussa_class.cc -----------
+// ----------------------------------------
+
+#include "mussa_class.hh"
+
+
+Mussa::Mussa()
+{
+}
+
+void
+Mussa::setup(char * para_file_path)
+{
+ ifstream para_file;
+ string file_data_line;
+ string param, value, annot_file;
+ int split_index, fasta_index;
+ bool seq_params, did_seq;
+ int bogo;
+
+
+ win_append = false;
+ thres_append = false;
+
+ para_file.open(para_file_path, ios::in);
+
+ getline(para_file,file_data_line);
+ split_index = file_data_line.find(" ");
+ param = file_data_line.substr(0,split_index);
+ value = file_data_line.substr(split_index+1);
+
+
+ while (!para_file.eof())
+ {
+ did_seq = false;
+ if (param == "ANA_NAME")
+ ana_name = value;
+ else if (param == "APPEND_WIN")
+ win_append = true;
+ else if (param == "APPEND_THRES")
+ thres_append = true;
+ else if (param == "SEQUENCE_NUM")
+ seq_num = atoi(value.c_str());
+ else if (param == "WINDOW")
+ window = atoi(value.c_str());
+ else if (param == "THRESHOLD")
+ threshold = atoi(value.c_str());
+ else if (param == "SEQUENCE")
+ {
+ seq_files.push_back(value);
+ fasta_index = 1;
+ annot_file = "";
+ seq_params = true;
+
+ while ((!para_file.eof()) && seq_params)
+ {
+ getline(para_file,file_data_line);
+ split_index = file_data_line.find(" ");
+ param = file_data_line.substr(0,split_index);
+ value = file_data_line.substr(split_index+1);
+
+ if (param == "FASTA_INDEX")
+ fasta_index = atoi(value.c_str());
+ else if (param == "ANNOTATION")
+ annot_file = value;
+ //ignore empty lines or that start with '#'
+ else if ((param == "") || (param == "#")) {}
+ else seq_params = false;
+ }
+
+
+ fasta_indices.push_back(fasta_index);
+ annot_files.push_back(annot_file);
+ did_seq = true;
+
+ }
+ //ignore empty lines or that start with '#'
+ else if ((param == "") || (param == "#")) {}
+ else
+ {
+ cout << "Illegal/misplaced mussa parameter in file\n";
+ cout << param << "\n";
+ }
+
+ if (!did_seq)
+ {
+ getline(para_file,file_data_line);
+ split_index = file_data_line.find(" ");
+ param = file_data_line.substr(0,split_index);
+ value = file_data_line.substr(split_index+1);
+ did_seq = false;
+ }
+ }
+
+ para_file.close();
+
+ cout << "ana_name = " << ana_name << win_append << win_append << "\n";
+ cout << "window = " << window << " threshold = " << threshold << "\n";
+}
+
+
+void
+Mussa::get_Seqs()
+{
+ list<string>::iterator seq_files_i, annot_files_i;
+ list<int>::iterator fasta_indices_i;
+ Sequence aSeq;
+
+ seq_files_i = seq_files.begin();
+ fasta_indices_i = fasta_indices.begin();
+ annot_files_i = annot_files.begin();
+
+ while ( (seq_files_i != seq_files.end()) &&
+ (fasta_indices_i != fasta_indices.end()) &&
+ (annot_files_i != annot_files.end()) )
+ {
+ aSeq.load_fasta(*seq_files_i, *fasta_indices_i);
+ the_Seqs.push_back(aSeq);
+ cout << aSeq.hdr() << endl;
+ aSeq.clear();
+ ++seq_files_i;
+ ++fasta_indices_i;
+ ++annot_files_i;
+ }
+}
+
+
+void
+Mussa::seqcomp()
+{
+ int i, i2; // loop vars over sequences to analyze
+ vector<int> seq_lens;
+ vector<FLPs> empty_FLP_vector;
+ FLPs dummy_comp;
+ string save_file_string;
+
+ empty_FLP_vector.clear();
+ for(i = 0; i < seq_num; i++)
+ {
+ all_comps.push_back(empty_FLP_vector);
+ for(i2 = 0; i2 < seq_num; i2++)
+ all_comps[i].push_back(dummy_comp);
+ }
+
+ for(i = 0; i < seq_num; i++)
+ seq_lens.push_back(the_Seqs[i].len());
+
+ for(i = 0; i < seq_num; i++)
+ for(i2 = i+1; i2 < seq_num; i2++)
+ {
+ all_comps[i][i2].setup("m", window, threshold, seq_lens[i],seq_lens[i2]);
+ all_comps[i][i2].seqcomp(the_Seqs[i].seq(), the_Seqs[i2].seq(), false);
+ all_comps[i][i2].seqcomp(the_Seqs[i].seq(),the_Seqs[i2].rev_comp(),true);
+ }
+}
+
+
+void
+Mussa::nway()
+{
+ the_paths.setup(seq_num);
+ the_paths.find_paths_r(all_comps);
+}
+
+
+void
+Mussa::save()
+{
+ int i;
+
+ save_file.open(ana_name.c_str(), ios::out);
+
+ for(i = 0; i < seq_num; i++)
+ save_file << the_Seqs[i].seq() << endl;
+
+ save_file << window << endl;
+ save_file.close();
+ //note more complex eventually since ana_name may need to have
+ //window size, threshold and other stuff to modify it...
+ the_paths.save_old(ana_name);
+}
+
+/*
+ cout << "fee\n";
+ cout << "fie\n";
+ cout << "foe\n";
+ cout << "fum\n";
+*/
+
+
--- /dev/null
+// ----------------------------------------
+// ---------- mussa_class.hh -----------
+// ----------------------------------------
+
+#include "mussa_nway.hh"
+
+
+class Mussa
+{
+ private:
+ string ana_name;
+ int seq_num, window, threshold;
+ list<string> seq_files, annot_files;
+ list<int> fasta_indices;
+ bool win_append, thres_append;
+
+ vector<Sequence> the_Seqs;
+ vector<vector<FLPs> > all_comps;
+ Nway_Paths the_paths;
+
+ public:
+ Mussa();
+ void setup(char * para_file_path);
+ void get_Seqs();
+ void seqcomp();
+ void nway();
+ void save();
+
+ fstream save_file;
+};
+
--- /dev/null
+// ----------------------------------------
+// ---------- mussa_nway.cc -----------
+// ----------------------------------------
+
+#include "mussa_nway.hh"
+/*
+ cout << "fee\n";
+ cout << "fie\n";
+ cout << "foe\n";
+ cout << "fum\n";
+*/
+
+
+Nway_Paths::Nway_Paths()
+{
+}
+
+void
+Nway_Paths::setup(int sp_num)
+{
+ species_num = sp_num;
+}
+
+
+void
+Nway_Paths::path_search(vector<vector<FLPs> > all_comparisons, vector<int> path, int depth)
+{
+ list<int> new_nodes, trans_check_nodes;
+ list<int>::iterator new_nodes_i, new_nodes_end;
+ int i;
+ bool trans_check_good;
+
+ new_nodes = all_comparisons[depth - 1][depth].matches(path[depth-1]);
+ new_nodes_i = new_nodes.begin();
+ new_nodes_end = new_nodes.end();
+ while(new_nodes_i != new_nodes_end)
+ {
+ //cout << " * species " << depth << " node: " << *new_nodes_i << endl;
+ // check transitivity with previous nodes in path
+ for(i = 0; i < depth - 1; i++)
+ {
+ trans_check_good = true;
+ trans_check_nodes = all_comparisons[i][depth].matches(path[i]);
+ if (trans_check_nodes.end() == find(trans_check_nodes.begin(),trans_check_nodes.end(), *new_nodes_i) )
+ trans_check_good = false;
+ }
+
+ if (trans_check_good)
+ {
+ path.push_back(*new_nodes_i);
+ if (depth < species_num - 1)
+ path_search(all_comparisons, path, depth + 1);
+ else
+ pathz.push_back(path);
+ path.pop_back();
+ }
+ ++new_nodes_i;
+ }
+}
+// cout << " ****I have the power...\n";
+
+/* use this if I ever get the friggin seqcomp match lists to sort...
+ if (binary_search(trans_check_nodes.begin(), trans_check_nodes.end(),
+ *new_nodes_i))
+*/
+
+void
+Nway_Paths::find_paths_r(vector<vector<FLPs> > all_comparisons)
+{
+ vector<int> path;
+ int win_i, window_num;
+ list<int> new_nodes;
+ list<int>::iterator new_nodes_i, new_nodes_end;
+
+ pathz.clear();
+ window_num = all_comparisons[0][1].win_num();
+ cout << window_num << endl;
+ // loop thru all windows in first species
+ for (win_i = 0; win_i < window_num; win_i++)
+ {
+ path.clear();
+ path.push_back(win_i);
+ new_nodes = all_comparisons[0][1].matches(path[0]);
+ new_nodes_i = new_nodes.begin();
+ new_nodes_end = new_nodes.end();
+ //if (new_nodes_i != new_nodes_end)
+ //cout << "* species 0 node: " << win_i << endl;
+ // cout << "foookin hell\n";
+ path.push_back(0);
+ while(new_nodes_i != new_nodes_end)
+ {
+ //cout << " * species 1 node: " << *new_nodes_i << endl;
+ path[1] = *new_nodes_i;
+ path_search(all_comparisons, path, 2);
+ ++new_nodes_i;
+ }
+ }
+}
+
+void
+Nway_Paths::save(string save_file_path)
+{
+ fstream save_file;
+ list<vector<int> >::iterator path_i, paths_end;
+ vector<int> a_path;
+ int i;
+
+ save_file.open(save_file_path.c_str(), ios::out);
+
+ //save_file << "<Seqcomp type=" << ana_type << " win=" << window_size;
+ //save_file << " thres=" << hard_threshold << ">\n";
+
+ path_i = pathz.begin();
+ paths_end = pathz.end();
+ if (path_i == paths_end)
+ cout << "Arrrrrrgggghhhhhh\n";
+ while(path_i != paths_end)
+ {
+ a_path = *path_i;
+ //cout << a_path.size() << endl;
+ for(i = 0; i < species_num; ++i)
+ save_file << i << "," << a_path[i] << " ";
+ save_file << endl;
+ ++path_i;
+ }
+
+//save_file << "</Mussa>\n";
+ save_file.close();
+}
+
+void
+Nway_Paths::save_old(string save_file_path)
+{
+ fstream save_file;
+ list<vector<int> >::iterator path_i, paths_end;
+ vector<int> a_path;
+ int i;
+
+ save_file.open(save_file_path.c_str(), ios::app);
+
+ path_i = pathz.begin();
+ paths_end = pathz.end();
+ while(path_i != paths_end)
+ {
+ a_path = *path_i;
+ //cout << a_path.size() << endl;
+ for(i = 0; i < species_num; ++i)
+ save_file << i << "," << a_path[i] << " ";
+ save_file << endl;
+ ++path_i;
+ }
+ save_file.close();
+}
+
+
+/*
+void
+Nway_Paths::find_paths(vector<vector<FLPs> > all_comparisons)
+{
+ int win_i, sp_i;
+ vector<list<list<int> > > path_src_tree;
+ list<int> new_nodes;
+ list<int>::iterator node_i, node_end;
+ <list<list<int> > >::iterator branch_i, branch_end;
+
+ pathz.clear();
+ path_src_tree.reserve(species_num - 1);
+
+
+ // loop thru all windows in first species
+ for (win_i = 0; win_i < window_num; win_i++)
+ {
+ // clear the path search tree
+ for(i = 0; i < species_num; i++)
+ path_src_tree[i].clear();
+
+ // top level kept empty even tho implicity has one entry of the first
+ // species at this window - why bother, adds a little speed
+
+ // get connection list for first species, creating a list of nodes
+ // of second species connected to the first species at this window
+ new_nodes = all_comparisons[0][1];
+ path_src_tree[1].push_back(new_nodes);
+
+ // loop thru rest of species for this window to see if any paths of matches
+ // go across all species
+ // if path search tree becomes empty, break out of loop, no reason to search further
+ sp_i = 1;
+ while ((sp_i < species_num) && (path tree not empty))
+ {
+ branch_i = path_src_tree[1].begin();
+ branch_end = path_src_tree[1].end();
+ while (branch_i != branch_end)
+ {
+ node_i = branch_i->begin();
+ node_end = branch_i->end();
+ }
+
+
+ // loop over all current nodes
+ // get connection list for each node
+ // loop over each previous node in list
+ // get those nodes connection list
+ // intersect previous node connections with current
+
+ ++sp_i;
+ }
+
+ // insert any of the paths found into the master list of paths
+
+ // add no paths if tmp_pathz is empty...
+ }
+}
+
+void Nway_Paths::refine()
+{
+}
+
+void Nway_Paths::print()
+{
+ list<list<int> >::iterator pathz_i;
+
+ cout << "printing list of lists\n";
+ for (pathz_i = pathz.begin(); pathz_i != pathz.end(); ++pathz_i)
+ {
+ for (path_i = pathz_i->begin(); path_i != pathz_i->end(); ++path_i)
+ cout << *path_i << " ";
+ cout << endl;
+ }
+}
+*/
--- /dev/null
+// ----------------------------------------
+// ---------- mussa_nway.hh -----------
+// ----------------------------------------
+
+#include "flp.hh"
+
+
+class Nway_Paths
+{
+ private:
+ int species_num;
+ int threshold;
+ int win_size;
+ list<vector<int> > pathz;
+
+
+ public:
+ Nway_Paths();
+ void setup(int sp_num);
+ void find_paths_r(vector<vector<FLPs> > all_comparisons);
+ void path_search(vector<vector<FLPs> > all_comparisons, vector<int> path, int depth);
+ void save(string save_file_path);
+ void save_old(string save_file_path);
+ void load(string load_file_path);
+ void find_paths(vector<vector<FLPs> > all_comparisons);
+ void refine();
+ void print();
+};
--- /dev/null
+#include "flp.hh"
+#include <time.h>
+#include <iomanip.h>
+
+int main(int argc, char **argv)
+{
+ Sequence seqA, seqB;
+ FLPs analysis;
+ string seqA_file, seqB_file, output_file;
+ string ana_type;
+ int window, threshold;
+ int seqA_len, seqB_len;
+
+ time_t t1, t2, begin, end;
+ double setuptime, comp1time, comp2time, sorttime, savetime, totaltime;
+
+ begin = time(NULL);
+
+ cout << "fee fie foe fum" << endl;
+
+ ana_type = * ++argv;
+ seqA_file = * ++argv;
+ seqB_file = * ++argv;
+ window = atoi(* ++argv);
+ threshold = atoi(* ++argv);
+ output_file = * ++argv;
+
+ t1 = time(NULL);
+
+ seqA.load_fasta(seqA_file, 1);
+ seqA_len = seqA.len();
+ //cout << setw(60) << seqA.hdr() << "\n";
+
+ seqB.load_fasta(seqB_file, 1);
+ seqB_len = seqB.len();
+ //cout << seqB.hdr() << "\n";
+
+
+ cout << "Length: Seq A = " << seqA_len;
+ cout << "; Seq B = " << seqB_len << "\n";
+
+ analysis.setup(ana_type, window, threshold, seqA_len, seqB_len);
+
+ t2 = time(NULL);
+ setuptime = difftime(t2, t1);
+
+ t1 = time(NULL);
+ analysis.seqcomp(seqA.seq(), seqB.seq(), false);
+ t2 = time(NULL);
+ comp1time = difftime(t2, t1);
+
+ t1 = time(NULL);
+ analysis.seqcomp(seqA.seq(), seqB.rev_comp(), true);
+ t2 = time(NULL);
+ comp2time = difftime(t2, t1);
+ /*
+ t1 = time(NULL);
+ analysis.sort();
+ t2 = time(NULL);
+ sorttime = difftime(t2, t1);
+ */
+ t1 = time(NULL);
+ analysis.file_save(output_file);
+ t2 = time(NULL);
+ savetime = difftime(t2, t1);
+
+ end = time(NULL);
+ totaltime = difftime(end, begin);
+
+ cout << "setup\tcomp\trc_comp\tsave\ttotal\n";
+ cout << setuptime << "\t";
+ cout << comp1time << "\t";
+ cout << comp2time << "\t";
+ //cout << sorttime << "\t";
+ cout << savetime << "\t";
+ cout << totaltime << "\n";
+}
+
+/*
+ cout << "fee\n";
+ cout << "fie\n";
+ cout << "foe ";
+ cout << "fum\n";
+*/
--- /dev/null
+// ----------------------------------------
+// ---------- sequence.cc -----------
+// ----------------------------------------
+
+#include "sequence.hh"
+
+
+Sequence::Sequence()
+{
+}
+
+
+void
+Sequence::load_fasta(string file_path, int seq_num)
+{
+ fstream data_file;
+ string file_data_line;
+ int header_counter = 0;
+ bool read_seq = true;
+
+
+ data_file.open(file_path.c_str(), ios::in);
+
+
+ // search for the header of the fasta sequence we want
+ while ( (!data_file.eof()) && (header_counter < seq_num) )
+ {
+ getline(data_file,file_data_line);
+ if (file_data_line.substr(0,1) == ">")
+ header_counter++;
+ }
+
+ header = file_data_line.substr(1);
+
+ while ( !data_file.eof() && read_seq )
+ {
+ getline(data_file,file_data_line);
+ if (file_data_line.substr(0,1) == ">")
+ read_seq = false;
+ else sequence += file_data_line;
+ }
+
+ data_file.close();
+
+ transform(sequence.begin(), sequence.end(), sequence.begin(), toupper);
+}
+
+
+void
+Sequence::load_annot(string file_path)
+{
+}
+
+
+int
+Sequence::len()
+{
+ return sequence.length();
+}
+
+
+string
+Sequence::seq()
+{
+ return sequence;
+}
+
+string
+Sequence::rev_comp()
+{
+ string rev_comp;
+ char conversionTable[257];
+ int seq_i, table_i, len;
+
+ len = sequence.length();
+ rev_comp.reserve(len);
+ // make a conversion table
+ // init all parts of conversion table to '~' character
+ // '~' I doubt will ever appear in a sequence file (jeez, I hope)
+ // and may the fleas of 1000 camels infest the genitals of any biologist (and
+ // seven generations of their progeny) who decides to make it mean
+ // something special!!!
+ // PS - double the curse for any smartass non-biologist who tries it as well
+ for(table_i=0; table_i < 256; table_i++)
+ {
+ conversionTable[table_i] = '~';
+ }
+ // add end of string character for printing out table for testing purposes
+ conversionTable[256] = '\0';
+
+ // add in the characters for the bases we want to convert
+ conversionTable['A'] = 'T';
+ conversionTable['T'] = 'A';
+ conversionTable['G'] = 'C';
+ conversionTable['C'] = 'G';
+ conversionTable['N'] = 'N';
+
+ // finally, the actual conversion loop
+ for(seq_i = len - 1; seq_i >= 0; seq_i--)
+ {
+ table_i = (int) sequence[seq_i];
+ rev_comp += conversionTable[table_i];
+ }
+
+ return rev_comp;
+}
+
+
+string
+Sequence::hdr()
+{
+ return header;
+}
+
+/*
+string
+Sequence::species()
+{
+ return species;
+}
+*/
+
+void
+Sequence::clear()
+{
+ sequence = "";
+ length = 0;
+ header = "";
+ species = "";
+ species_num = -4;
+ annots.clear();
+}
--- /dev/null
+// ----------------------------------------
+// ---------- sequence.hh -----------
+// ----------------------------------------
+
+
+#include <iostream>
+#include <list>
+#include <vector>
+#include <string>
+#include <fstream.h>
+#include <stdlib.h>
+#include <algorithm>
+
+// Sequence data class
+
+class Sequence
+{
+ private:
+ string sequence;
+ int length;
+ string header;
+ string species;
+ int species_num;
+
+ struct annot
+ {
+ int start, end;
+ string name, type, color;
+ };
+
+ list<annot> annots;
+
+
+
+
+ public:
+ Sequence();
+ void load_fasta(string file_path, int seq_num);
+ void load_annot(string file_path);
+ string seq();
+ string rev_comp();
+ int len();
+ string hdr();
+ // string species();
+ void clear();
+};