[project @ 4]
authorDiane Trout <diane@caltech.edu>
Fri, 16 Sep 2005 22:45:17 +0000 (22:45 +0000)
committerDiane Trout <diane@caltech.edu>
Fri, 16 Sep 2005 22:45:17 +0000 (22:45 +0000)
mussa_pI.3

21 files changed:
Makefile
annot_test.cc [new file with mode: 0644]
examples/mck3test.mupa [new file with mode: 0644]
examples/mck3test_example.museq [new file with mode: 0644]
examples/mck3test_example.muway [new file with mode: 0644]
examples/seq/human_mck_pro.fa [new file with mode: 0644]
examples/seq/mouse_mck_pro.fa [new file with mode: 0644]
examples/seq/rabbit_mck_pro.fa [new file with mode: 0644]
mussa.cc
mussa_class.cc
mussa_class.hh
mussa_gui_conn.cc
mussa_gui_conn.hh
mussa_gui_seq.cc
mussa_gui_seq.hh
mussa_nway.cc
mussa_nway.hh
mussa_test.cc [new file with mode: 0644]
seqcomp.cc
sequence.cc
sequence.hh

index 580077ab60655408261c27e58920f1c280b2bb7b..7edccd2b458d63a0970848c613012af31dba36e4 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,13 @@
 #CFLAGS=-ftemplate-depth-20
 CFLAGS=-O -Winline -ftemplate-depth-20
+# -ggdb
 LDFLAGS= -lfltk
 
+# this is what's needed on titus's redhat machine
+# tho he may have things setup oddly to accomodate his development setup 
+#LDFLAGS= -lfltk -L/usr/X11R6/lib -lX11
+
+
 #all: seqcomp
 
 sequence.o : sequence.cc sequence.hh
@@ -29,7 +35,12 @@ mussa_class.o : mussa_class.cc mussa_class.hh
        g++ $(CFLAGS) -c mussa_class.cc
 
 mussa : sequence.o flp.o flp_seqcomp.o mussa_nway.o mussa_gui_seq.o mussa_gui_conn.o mussa_class.o mussa.cc  
-       g++ $(CFLAGS) $(LDFLAGS) -o mussa mussa.cc mussa_class.o mussa_gui_conn.o mussa_gui_seq.o mussa_nway.o flp_seqcomp.o flp.o sequence.o
+       g++ $(CFLAGS) -o mussa mussa.cc mussa_class.o mussa_gui_conn.o mussa_gui_seq.o mussa_nway.o flp_seqcomp.o flp.o sequence.o  $(LDFLAGS)
+
+
+
+annot_test : sequence.o annot_test.cc
+       g++ $(CFLAGS) -o annot_test annot_test.cc sequence.o
 
 
 #match_list_type.o : modules/match_list_type.c modules/match_list_type.h \
diff --git a/annot_test.cc b/annot_test.cc
new file mode 100644 (file)
index 0000000..1df5a86
--- /dev/null
@@ -0,0 +1,23 @@
+#include "sequence.hh"
+
+int main(int argc, char **argv) 
+{
+  Sequence seqA;
+  char * seqA_file, * seqA_annot;
+  cout << "fee fie foe fum" << endl;
+
+
+  seqA_file = * ++argv;
+  seqA_annot = * ++argv;
+
+  seqA.load_fasta(seqA_file, 1);
+  seqA.load_annot(seqA_annot);
+  //seqA_len = seqA.len();
+}
+
+/*
+      cout << "fee\n";
+      cout << "fie\n";
+      cout << "foe  ";
+      cout << "fum\n";
+*/
diff --git a/examples/mck3test.mupa b/examples/mck3test.mupa
new file mode 100644 (file)
index 0000000..8ce6a13
--- /dev/null
@@ -0,0 +1,30 @@
+# name of anaylsis directory and stem for associated files
+ANA_NAME mck3test
+
+# if APPEND vars true, a _wXX and/or _tYY added to analysis name
+# where XX = WINDOW and YY = THRESHOLD
+# Highly recommeded with use of command line override of WINDOW or THRESHOLD
+APPEND_WIN true
+APPEND_THRES true
+  
+# how many sequences are being analyzed
+SEQUENCE_NUM 3
+
+# first sequence info
+SEQUENCE seq/mouse_mck_pro.fa
+# ANNOTATION test.annot
+SEQ_START 600
+
+# the second sequence info
+SEQUENCE seq/human_mck_pro.fa
+# ANNOTATION test.annot
+SEQ_START 600
+# SEQ_END 2400
+
+# third sequence info
+SEQUENCE seq/rabbit_mck_pro.fa
+# ANNOTATION test.annot
+
+# analyses parameters: command line args -w -t will override these
+WINDOW 30
+THRESHOLD 20
diff --git a/examples/mck3test_example.museq b/examples/mck3test_example.museq
new file mode 100644 (file)
index 0000000..6184d20
--- /dev/null
@@ -0,0 +1,20 @@
+<Mussa_Sequence>
+<Sequence>
+CCATCCTGGTCTATAGAGAGAGTTCCAGAACAGCCAGGGCTACAGATAAACCCATCTGGAAAAACAAAGTTGAATGACCCAAGAGGGGTTCTCAGAGGGTGGCGTGTGCTCCCTGGCAAGCCTATGACATGGCCGGGGCCTGCCTCTCTCTGCCTCTGACCCTCAGTGGCTCCCATGAACTCCTTGCCCAATGGCATCTTTTTCCTGCGCTCCTTGGGTTATTCCAGTCTCCCCTCAGCATTCCTTCCTCAGGGCCTCGCTCTTCTCTCTGCTCCCTCCTTGCACAGCTGGCTCTGTCCACCTCAGATGTCACAGTGCTCTCTCAGAGGAGGAAGGCACCATGTACCCTCTGTTTCCCAGGTAAGGGTTCAATTTTTAAAAATGGTTTTTTGTTTGTTTGTTTGTTTGTTTGTTTGTTTGTTTTTCAAGACAGGGCTCCTCTGTGTAGTCCTAACTGTCTTGAAACTCCCTCTGTAGACCAGGTCGACCTCGAACTCTTGAAACCTGCCACGGACCACCCAGTCAGGTATGGAGGTCCCTGGAATGAGCGTCCTCGAAGCTAGGTGGGTAAGGGTTCGGCGGTGACAAACAGAAACAAACACAGAGGCAGTTTGAATCTGAGTGTATTTTGCAGCTCTCAAGCAGGGGATTTTATACATAAAAAAAAAAAAAAAAAAAAAACCAAACATTACATCTCTTAGAAACTATATCCAATGAAACAATCACAGATACCAACCAAAACCATTGGGCAGAGTAAAGCACAAAAATCATCCAAGCATTACAACTCTGAAACCATGTATTCAGTGAATCACAAACAGAACAGGTAACATCATTATTAATATAAATCACCAAAATATAACAATTCTAAAAGGATGTATCCAGTGGGGGCTGTCGTCCAAGGCTAGTGGCAGATTTCCAGGAGCAGGTTAGTAAATCTTAACCACTGAACTAACTCTCCAGCCCCATGGTCAATTATTATTTAGCATCTAGTGCCTAATTTTTTTTTATAAATCTTCACTATGTAATTTAAAACTATTTTAATTCTTCCTAATTAAGGCTTTCTTTACCATATACCAAAATTCACCTCCAATGACACACGCGTAGCCATATGAAATTTTATTGTTGGGAAAATTTGTACCTATCATAATAGTTTTGTAAATGATTTAAAAAGCAAAGTGTTAGCCGGGCGTGGTGGCACACGCCTTTAATCCCTGCACTCGGGAGGCAGGGGCAGGAGGATTTCTGAGTTTGAGGCCAGCCTGGTCTACAGAGTGAGTTCCAGGACAGCCAGGGCTACACAGAGAAACCCTGTCTCGAACCCCCCACCCCCCAAAAAAAGCAAAGTGTTGGTTTCCTTGGGGATAAAGTCATGTTAGTGGCCCATCTCTAGGCCCATCTCACCCATTATTCTCGCTTAAGATCTTGGCCTAGGCTACCAGGAACATGTAAATAAGAAAAGGAATAAGAGAAAACAAAACAGAGAGATTGCCATGAGAACTACGGCTCAATATTTTTTCTCTCCGGCGAAGAGTTCCACAACCATCTCCAGGAGGCCTCCACGTTTTGAGGTCAATGGCCTCAGTCTGTGGAACTTGTCACACAGATCTTACTGGAGGTGGTGTGGCAGAAACCCATTCCTTTTAGTGTCTTGGGCTAAAAGTAAAAGGCCCAGAGGAGGCCTTTGCTCATCTGACCATGCTGACAAGGAACACGGGTGCCAGGACAGAGGCTGGACCCCAGGAACACCTTAAACACTTCTTCCCTTCTCCGCCCCCTAGAGCAGGCTCCCCTCACCAGCCTGGGCAGAAATGGGGGAAGATGGAGTGAAGCCATACTGGCTACTCCAGAATCAACAGAGGGAGCCGGGGGCAATACTGGAGAAGCTGGTCTCCCCCCAGGGGCAATCCTGGCACCTCCCAGGCAGAAGAGGAAACTTCCACAGTGCATCTCACTTCCATGAATCCCCTCCTCGGACTCTGAGGTCCTTGGTCACAGCTGAGGTGCAAAAGGCTCCTGTCATATTGTGTCCTGCTCTGGTCTGCCTTCCACAGCTTGGGGGCCACCTAGCCCACCTCTCCCTAGGGATGAGAGCAGCCACTACGGGTCTAGGCTGCCCATGTAAGGAGGCAAGGCCTGGGGACACCCGAGATGCCTGGTTATAATTAACCCAGACATGTGGCTGCCCCCCCCCCCCCAACACCTGCTGCCTGAGCCTCACCCCCACCCCGGTGCCTGGGTCTTAGGCTCTGTACACCATGGAGGAGAAGCTCGCTCTAAAAATAACCCTGTCCCTGGTGGATCCAGGGTGAGGGGCAGGCTGAGGGCGGCCACTTCCCTCAGCCGCAGGTTTGTTTTCCCAAGAATGGTTTTTCTGCTTCTGTAGCTTTTCCTGTCAATTCTGCCATGGTGGAGCAGCCTGCACTGGGCTTCTGGGAGAAACCAAACCGGGTTCTAACCTTTCAGCTACAGTTATTGCCTTTCCTGTAGATGGGCGACTACAGCCCCACCCCCACCCCCGTCTCCTGTATCCTTCCTGGGCCTGGGGATCCTAGGCTTTCACTGGAAATTTCCCCCCAGGTGCTGTAGGCTAGAGTCACGGCTCCCAAGAACAGTGCTTGCCTGGCATGCATGGTTCTGAACCTCCAACTGCAAAAAATGACACATACCTTGACCCTTGGAAGGCTGAGGCAGGGGGATTGCCATGAGTGCAAAGCCAGACTGGGTGGCATAGTTAGACCCTGTCTCAAAAAACCAAAAACAATTAAATAACTAAAGTCAGGCAAGTAATCCTACTCGGGAGACTGAGGCAGAGGGATTGTTACATGTCTGAGGCCAGCCTGGACTACATAGGGTTTCAGGCTAGCCCTGTCTACAGAGTAAGGCCCTATTTCAAAAACACAAACAAAATGGTTCTCCCAGCTGCTAATGCTCACCAGGCAATGAAGCCTGGTGAGCATTAGCAATGAAGGCAATGAAGGAGGGTGCTGGCTACAATCAAGGCTGTGGGGGACTGAGGGCAGGCTGTAACAGGCTTGGGGGCCAGGGCTTATACGTGCCTGGGACTCCCAAAGTATTACTGTTCCATGTTCCCGGCGAAGGGCCAGCTGTCCCCCGCCAGCTAGACTCAGCACTTAGTTTAGGAACCAGTGAGCAAGTCAGCCCTTGGGGCAGCCCATACAAGGCCATGGGGCTGGGCAAGCTGCACGCCTGGGTCCGGGGTGGGCACGGTGCCCGGGCAACGAGCTGAAAGCTCATCTGCTCTCAGGGGCCCCTCCCTGGGGACAGCCCCTCCTGGCTAGTCACACCCTGTAGGCTCCTCTATATAACCCAGGGGCACAGGGGCTGCCCCCGGGTCAC
+</Sequence>
+<Annotations>
+300 600 Glorp Glorptype
+</Annotations>
+<Sequence>
+GGATCCTTCCTCCTTGGCCTCCCAAAGTGCTGGGATTACAGGTGTGAGCCACTGCACCTGGCCTATTACCCTTCTCAGGCTCTGGAGTCCATCCTTCTGCTCTGTCTCCCTCAGTTCAATTGTTTTTTGTTTTTTGTTTTTTTTTTAGACACAGTCTCGCTCTGTCACCAAGGCTGGAGTGCAGCAGTGCGATCACAGCTCACCGCAGCCTCACCTCCCAGGCTCAAGTGATCCTCCCATCTCGGCCTCTGAGTAGCTGAGACTATAGGTGTGTCCACATGTCCGGCTAATTTTTGTATTTTTAGTAGAGACAGGGTTTCACCGCGTTGGCCAGGGTGGTCTTGAACTCCTGAGCTCAAGCAATCCTCCTGCCTCAGCCTCCTTGTTTTGATTTTTAGATCCCACAAATAACTTGTGATGTTTGTCTTTCTATACCTGGTTCATTTAACATTTTCTTTTTCTTTTCTTTTCTTTTTTTTTTTTTTTGTGAGACTGAGTCTTGCTCTGTCACTCAGGCTGGAGGGCAATGGTGCATCTCAGCTCACTGCAACCTCCACCTCCTAGGTTCAAGCAATTCTTATGCCTCAGCCTCCTGGCTAGCTGGGATTACAGGCGTGTGTCACCATGCCAGGCTAATTTTTGTACTTTTAGTAGAGATGGGGTTTCACCATGTTGGCCAGGCTGGTCTTGAACTCCTGGCCTCAAGTGATCCACCCGCCTCCGCCTCTGCCTCCCAAAGTGCTGGGATTACGGGCCTGAGCCACTGTGCCCGGCCCATCTAACATTTTCACTGTCAATCACAATGGGATTAAAACTCCTCCCACAGCCCCTAGGGACCATGGGTCTGCTCCTGTCTCCCCTCCAACCTCATCTTCTTCCTCCCACTCTCTCCTTGGCCCCATCTGCTCCAGTCCCCTGGCCTCCTTCCTGTCTGTCCTCAGATGTGCCCAGCCATTCTCACCTCAGCGCCTTTGCACCTGCTGTTCCCCCCAGAGCCGCACATGGCTGGCTCCCTGTTCTCCTTCAGGTCTCTGCTCAGATGTCATCTTCCCAAAGAGGCCTGCCTCGACCTCCCCTGCTGCTGTGCCGTCCCCTCATCTGTGACCCTCTTGCACTATCACCTCCAGGACGGCGGGGGTTTTGTGTTTTGTTGTAGCCTCAGGAAGTGCCTGATAGATCCCTGTTTCGAGACCAGTTCCATTTGGTTTTCTGGGCCTCAGTTTCCGTAACCGTGAAGGAGACCCTCGGCAATCTGAGCTTGCTGGGAAAGGGCTGGGCCCCATGTAAATATTTCTAAAGCACCCCTCTCCCCTCCCCCCTCAGATCAGGAGTCTGAGGGAGAGGCACAGAGGCTCCCTTTCTCTAAGCCAGTCCTCACCTGCCTAAGAAGATGTGAAGGAGACCCAGGAGACCCTGGGATAGGGAGGAACTCAGAGGGAAGGGACATTCTTTTCTTCGTCGCAATCCTGGGAGCTCCCTGGAGGAGGAGACCCGATCAGCCTGCAATCCTGGCGCGTCCCAGGAGGAGAAAGCGGCTTCCTCTATACTGTACTCTCCTCCACAGAACCCCCCTCTCAGCCCTGGAAGTCCTTGCTCACAGCCGAGGCGCCGAGAGCGCTTGCTCTGCCCAGATCTCGGCGAGTCTGCGCCCGCGCTCTGAACGGCGTCGCTGCCCAGCCCCCTTCCCCGGGAGGTGGGAGCGGCCACCCAGGGCCCCGTGGCTGCCCTTGTAAGGAGGCGAGGCCGAGGACACCCGAGACGCCCGGTTATAATTAACCAGGACACGTGGCGAACCCCCCTCCAACACCTGCCCCCGAACCCCCCCATACCCAGCGCCTCGGGTCTCGGCCTTTGCGGCAGAGGAGACAGCAAAGCGCCCTCTAAAAATAACTCCTTTCCCGGCGACCGAGACCCTCCCTGTCCCCGCACAGCGAAATCTCCCAGTGGCACCGAGGGGGCGAGGGTTAAGTGGGGGGGAGGGTGACCACCGCCTCCCACCCTTGCCCTGAGTTTGAATCTCTCCAACTCAGCCAGCCTCAGTTTCCCCTCCACTCAGTCCCTAGGAGGAAGGGGCGCCCAAGCGGGTTTCTGGGGTTAGACTGCCCTCCATTGCAATTGGTCCTTCTCCCGGCCTCTGCTTCCTCCAGCTCACAGGGTATCTGCTCCTCCTGGAGCCACACCTTGGTTCCCCGAGGTGCCGCTGGGACTCGGGTAGGGGTGAGGGCCCAGGGGCGACAGGGGGAGCCGAGGGCCACAGGAAGGGCTGGTGGCTGAAGGAGACTCAGGGGCCAGGGGACGGTGGCTTCTACGTGCTTGGGACGTTCCCAGCCACCGTCCCATGTTCCCGGCGGGGGCCAGCTGTCCCCACCGCCAGCCCAACTCAGCACTTGGTTAGGGTATCAGCTTGGTGGGGGCGTGAGCCCAGCCCTGGGGCGCTCAGCCCATACAAGGCCATGGGGCTGGGCGCAAAGCATGCCTGGGTTCAGGGTGGGTATGGTGCCGGAGCAGGGAGGTGAGAGGCTCAGCTGCCCTCCAGAACTCCTCCCTGGGGACAACCCCTCCCAGCCAATAGCACAGCCTAGGTCCCCCTATATAAGGCCACGGCTGCTGGCCCTTCCTTTGGGTCAGTGTCACCTCCAGGATACAGACAGCCCCCCTTCAGCCCAGCCCAGCCAGGTACTGCACGGGGCGGGAATCTGGGTGGGGGCCAGAGTAGGGGATTTCTGTGGGTGCTAGAGGCTTGGCTTGGGAAAGGGTCTGTGTGTCACCCCTTGCTCCACCAACATCCTCCTATACAAAGGCAGGTCGGTGCGTGGGAAGGTTGACCCTTGTGTGTCTGGGAGGCCCCTCCATCTGTGAGGCTGCCTGAACCCCCACTGGGACCTGTGATTTCTGCGGCACAG
+</Sequence>
+<Annotations>
+300 600 Glorp Glorptype
+</Annotations>
+<Sequence>
+CTCCCTGAGGGGAGTGCCCCGCTTAGCCCTCAGCCCTGGCACCCACCAGGCAGAGGAGGCAGCTCCCAGAGTCCCCGGCTGAGGGGGCCTGCCCATGTCCTCGCGAGTCGGGCACCCAGTCCCCTTCCCTGGGGGCCTCGGGTGCCTTGTAAGGAGGCAAGGCCCAGGACACCCGAGCTGCCGGCTATAATTAGCCTGGACACGTGGGCCCCCCGCCAGCACCTGCCCTGAGCCCCCCTACCCGCAGCCTTGGGTGCTGGGACTGCCGACAGCAGAGACCGCTCTAAAAATAGCTCCCTTCCTGGACGGCCCCTCCCAGCAGGCCCCCCACCCCCCGCTGAGGCTCACTGCTGGTGTCCCTGCCTGGTGCTCCCTAGGGTGTGCACCCACGGCAGGTGCAGAGGCAGGGGGCACCGCGGGGGGCTGGTAACAGCAGCAGGCCTAGGGGCCAGGGGACGGCGAGCTGTACGTGCCAGGACACCCCCAGCCACTGTCCCGTGCTCCCGGCGGGGGCCAGCTGTCCCCCGCCAGCCCGACTCAGCACTTGCTCTGCACATCGGCCTGGGGCGGCCCATACAAGGGCATGGGGCTGGGCACAAGGCACGCCCGCGTCCAGGTGGGCACTGTGCCCGGGCATGGTGGGGGGAGAGGCTGGGCTGCCTTCCAGGCCCCCTCCCTGGGGCCAGCCCCGCCCAGCCAGTGGCACAGCAGGGTCCCCTACATAAAGCCGGGGCCGCGGGCCCTGGATCTGCCTCAGTGTCGCCTGCAGGACACACGTAGCCCCGCTCAGCACAGCCCAGCCAG
+</Sequence>
+<Annotations>
+300 600 Glorp Glorptype
+</Annotations>
+</Mussa_Sequence>
diff --git a/examples/mck3test_example.muway b/examples/mck3test_example.muway
new file mode 100644 (file)
index 0000000..ed3f1d6
--- /dev/null
@@ -0,0 +1,41 @@
+<Mussa type=flp seq_num=3 win=30 thres=20 >
+31:1895,1497,24
+30:1899,1459,28
+30:1901,1503,30
+30:1901,1461,30
+34:1904,1506,33
+47:2100,1711,130
+30:2123,1733,152
+34:2125,1735,154
+30:2131,1741,160
+30:2133,1743,162
+44:2148,1758,176
+30:2186,1793,208
+40:2261,1868,271
+31:3015,2264,424
+34:3071,2326,485
+31:3085,2339,498
+30:3086,-2346,499
+30:3087,2341,500
+30:3088,-2344,-503
+30:3088,-2344,501
+30:3089,-2343,-502
+30:3089,-2343,502
+30:3090,-2342,-501
+33:3090,-2342,503
+30:3094,-2338,-497
+30:3094,-2338,507
+30:3095,-2337,-496
+30:3095,-2337,508
+30:3096,-2336,-495
+30:3096,-2336,509
+30:3097,-2335,-494
+44:3101,2357,514
+30:3157,2426,556
+30:3158,2542,673
+30:3158,2427,557
+30:3159,2543,674
+64:3159,2428,558
+41:3210,2479,608
+62:3246,2516,647
+</Mussa>
diff --git a/examples/seq/human_mck_pro.fa b/examples/seq/human_mck_pro.fa
new file mode 100644 (file)
index 0000000..0e31f75
--- /dev/null
@@ -0,0 +1,43 @@
+>gi|180579|gb|M21487.1|HUMCKMM1 Human muscle creatine kinase gene (CKMM), 5' flank
+GGATCCTTCCTCCTTGGCCTCCCAAAGTGCTGGGATTACAGGTGTGAGCCACTGCACCTGGCCTATTACC
+CTTCTCAGGCTCTGGAGTCCATCCTTCTGCTCTGTCTCCCTCAGTTCAATTGTTTTTTGTTTTTTGTTTT
+TTTTTTAGACACAGTCTCGCTCTGTCACCAAGGCTGGAGTGCAGCAGTGCGATCACAGCTCACCGCAGCC
+TCACCTCCCAGGCTCAAGTGATCCTCCCATCTCGGCCTCTGAGTAGCTGAGACTATAGGTGTGTCCACAT
+GTCCGGCTAATTTTTGTATTTTTAGTAGAGACAGGGTTTCACCGCGTTGGCCAGGGTGGTCTTGAACTCC
+TGAGCTCAAGCAATCCTCCTGCCTCAGCCTCCTTGTTTTGATTTTTAGATCCCACAAATAACTTGTGATG
+TTTGTCTTTCTATACCTGGTTCATTTAACATTTTCTTTTTCTTTTCTTTTCTTTTTTTTTTTTTTTGTGA
+GACTGAGTCTTGCTCTGTCACTCAGGCTGGAGGGCAATGGTGCATCTCAGCTCACTGCAACCTCCACCTC
+CTAGGTTCAAGCAATTCTTATGCCTCAGCCTCCTGGCTAGCTGGGATTACAGGCGTGTGTCACCATGCCA
+GGCTAATTTTTGTACTTTTAGTAGAGATGGGGTTTCACCATGTTGGCCAGGCTGGTCTTGAACTCCTGGC
+CTCAAGTGATCCACCCGCCTCCGCCTCTGCCTCCCAAAGTGCTGGGATTACGGGCCTGAGCCACTGTGCC
+CGGCCCATCTAACATTTTCACTGTCAATCACAATGGGATTAAAACTCCTCCCACAGCCCCTAGGGACCAT
+GGGTCTGCTCCTGTCTCCCCTCCAACCTCATCTTCTTCCTCCCACTCTCTCCTTGGCCCCATCTGCTCCA
+GTCCCCTGGCCTCCTTCCTGTCTGTCCTCAGATGTGCCCAGCCATTCTCACCTCAGCGCCTTTGCACCTG
+CTGTTCCCCCCAGAGCCGCACATGGCTGGCTCCCTGTTCTCCTTCAGGTCTCTGCTCAGATGTCATCTTC
+CCAAAGAGGCCTGCCTCGACCTCCCCTGCTGCTGTGCCGTCCCCTCATCTGTGACCCTCTTGCACTATCA
+CCTCCAGGACGGCGGGGGTTTTGTGTTTTGTTGTAGCCTCAGGAAGTGCCTGATAGATCCCTGTTTCGAG
+ACCAGTTCCATTTGGTTTTCTGGGCCTCAGTTTCCGTAACCGTGAAGGAGACCCTCGGCAATCTGAGCTT
+GCTGGGAAAGGGCTGGGCCCCATGTAAATATTTCTAAAGCACCCCTCTCCCCTCCCCCCTCAGATCAGGA
+GTCTGAGGGAGAGGCACAGAGGCTCCCTTTCTCTAAGCCAGTCCTCACCTGCCTAAGAAGATGTGAAGGA
+GACCCAGGAGACCCTGGGATAGGGAGGAACTCAGAGGGAAGGGACATTCTTTTCTTCGTCGCAATCCTGG
+GAGCTCCCTGGAGGAGGAGACCCGATCAGCCTGCAATCCTGGCGCGTCCCAGGAGGAGAAAGCGGCTTCC
+TCTATACTGTACTCTCCTCCACAGAACCCCCCTCTCAGCCCTGGAAGTCCTTGCTCACAGCCGAGGCGCC
+GAGAGCGCTTGCTCTGCCCAGATCTCGGCGAGTCTGCGCCCGCGCTCTGAACGGCGTCGCTGCCCAGCCC
+CCTTCCCCGGGAGGTGGGAGCGGCCACCCAGGGCCCCGTGGCTGCCCTTGTAAGGAGGCGAGGCCGAGGA
+CACCCGAGACGCCCGGTTATAATTAACCAGGACACGTGGCGAACCCCCCTCCAACACCTGCCCCCGAACC
+CCCCCATACCCAGCGCCTCGGGTCTCGGCCTTTGCGGCAGAGGAGACAGCAAAGCGCCCTCTAAAAATAA
+CTCCTTTCCCGGCGACCGAGACCCTCCCTGTCCCCGCACAGCGAAATCTCCCAGTGGCACCGAGGGGGCG
+AGGGTTAAGTGGGGGGGAGGGTGACCACCGCCTCCCACCCTTGCCCTGAGTTTGAATCTCTCCAACTCAG
+CCAGCCTCAGTTTCCCCTCCACTCAGTCCCTAGGAGGAAGGGGCGCCCAAGCGGGTTTCTGGGGTTAGAC
+TGCCCTCCATTGCAATTGGTCCTTCTCCCGGCCTCTGCTTCCTCCAGCTCACAGGGTATCTGCTCCTCCT
+GGAGCCACACCTTGGTTCCCCGAGGTGCCGCTGGGACTCGGGTAGGGGTGAGGGCCCAGGGGCGACAGGG
+GGAGCCGAGGGCCACAGGAAGGGCTGGTGGCTGAAGGAGACTCAGGGGCCAGGGGACGGTGGCTTCTACG
+TGCTTGGGACGTTCCCAGCCACCGTCCCATGTTCCCGGCGGGGGCCAGCTGTCCCCACCGCCAGCCCAAC
+TCAGCACTTGGTTAGGGTATCAGCTTGGTGGGGGCGTGAGCCCAGCCCTGGGGCGCTCAGCCCATACAAG
+GCCATGGGGCTGGGCGCAAAGCATGCCTGGGTTCAGGGTGGGTATGGTGCCGGAGCAGGGAGGTGAGAGG
+CTCAGCTGCCCTCCAGAACTCCTCCCTGGGGACAACCCCTCCCAGCCAATAGCACAGCCTAGGTCCCCCT
+ATATAAGGCCACGGCTGCTGGCCCTTCCTTTGGGTCAGTGTCACCTCCAGGATACAGACAGCCCCCCTTC
+AGCCCAGCCCAGCCAGGTACTGCACGGGGCGGGAATCTGGGTGGGGGCCAGAGTAGGGGATTTCTGTGGG
+TGCTAGAGGCTTGGCTTGGGAAAGGGTCTGTGTGTCACCCCTTGCTCCACCAACATCCTCCTATACAAAG
+GCAGGTCGGTGCGTGGGAAGGTTGACCCTTGTGTGTCTGGGAGGCCCCTCCATCTGTGAGGCTGCCTGAA
+CCCCCACTGGGACCTGTGATTTCTGCGGCACAG
\ No newline at end of file
diff --git a/examples/seq/mouse_mck_pro.fa b/examples/seq/mouse_mck_pro.fa
new file mode 100644 (file)
index 0000000..46c2a6a
--- /dev/null
@@ -0,0 +1,49 @@
+>gi|10129974|gb|AF188002.1|AF188002 Mus musculus muscle creatine kinase (Mck) gene, promoter region
+CCATCCTGGTCTATAGAGAGAGTTCCAGAACAGCCAGGGCTACAGATAAACCCATCTGGAAAAACAAAGT
+TGAATGACCCAAGAGGGGTTCTCAGAGGGTGGCGTGTGCTCCCTGGCAAGCCTATGACATGGCCGGGGCC
+TGCCTCTCTCTGCCTCTGACCCTCAGTGGCTCCCATGAACTCCTTGCCCAATGGCATCTTTTTCCTGCGC
+TCCTTGGGTTATTCCAGTCTCCCCTCAGCATTCCTTCCTCAGGGCCTCGCTCTTCTCTCTGCTCCCTCCT
+TGCACAGCTGGCTCTGTCCACCTCAGATGTCACAGTGCTCTCTCAGAGGAGGAAGGCACCATGTACCCTC
+TGTTTCCCAGGTAAGGGTTCAATTTTTAAAAATGGTTTTTTGTTTGTTTGTTTGTTTGTTTGTTTGTTTG
+TTTTTCAAGACAGGGCTCCTCTGTGTAGTCCTAACTGTCTTGAAACTCCCTCTGTAGACCAGGTCGACCT
+CGAACTCTTGAAACCTGCCACGGACCACCCAGTCAGGTATGGAGGTCCCTGGAATGAGCGTCCTCGAAGC
+TAGGTGGGTAAGGGTTCGGCGGTGACAAACAGAAACAAACACAGAGGCAGTTTGAATCTGAGTGTATTTT
+GCAGCTCTCAAGCAGGGGATTTTATACATAAAAAAAAAAAAAAAAAAAAAACCAAACATTACATCTCTTA
+GAAACTATATCCAATGAAACAATCACAGATACCAACCAAAACCATTGGGCAGAGTAAAGCACAAAAATCA
+TCCAAGCATTACAACTCTGAAACCATGTATTCAGTGAATCACAAACAGAACAGGTAACATCATTATTAAT
+ATAAATCACCAAAATATAACAATTCTAAAAGGATGTATCCAGTGGGGGCTGTCGTCCAAGGCTAGTGGCA
+GATTTCCAGGAGCAGGTTAGTAAATCTTAACCACTGAACTAACTCTCCAGCCCCATGGTCAATTATTATT
+TAGCATCTAGTGCCTAATTTTTTTTTATAAATCTTCACTATGTAATTTAAAACTATTTTAATTCTTCCTA
+ATTAAGGCTTTCTTTACCATATACCAAAATTCACCTCCAATGACACACGCGTAGCCATATGAAATTTTAT
+TGTTGGGAAAATTTGTACCTATCATAATAGTTTTGTAAATGATTTAAAAAGCAAAGTGTTAGCCGGGCGT
+GGTGGCACACGCCTTTAATCCCTGCACTCGGGAGGCAGGGGCAGGAGGATTTCTGAGTTTGAGGCCAGCC
+TGGTCTACAGAGTGAGTTCCAGGACAGCCAGGGCTACACAGAGAAACCCTGTCTCGAACCCCCCACCCCC
+CAAAAAAAGCAAAGTGTTGGTTTCCTTGGGGATAAAGTCATGTTAGTGGCCCATCTCTAGGCCCATCTCA
+CCCATTATTCTCGCTTAAGATCTTGGCCTAGGCTACCAGGAACATGTAAATAAGAAAAGGAATAAGAGAA
+AACAAAACAGAGAGATTGCCATGAGAACTACGGCTCAATATTTTTTCTCTCCGGCGAAGAGTTCCACAAC
+CATCTCCAGGAGGCCTCCACGTTTTGAGGTCAATGGCCTCAGTCTGTGGAACTTGTCACACAGATCTTAC
+TGGAGGTGGTGTGGCAGAAACCCATTCCTTTTAGTGTCTTGGGCTAAAAGTAAAAGGCCCAGAGGAGGCC
+TTTGCTCATCTGACCATGCTGACAAGGAACACGGGTGCCAGGACAGAGGCTGGACCCCAGGAACACCTTA
+AACACTTCTTCCCTTCTCCGCCCCCTAGAGCAGGCTCCCCTCACCAGCCTGGGCAGAAATGGGGGAAGAT
+GGAGTGAAGCCATACTGGCTACTCCAGAATCAACAGAGGGAGCCGGGGGCAATACTGGAGAAGCTGGTCT
+CCCCCCAGGGGCAATCCTGGCACCTCCCAGGCAGAAGAGGAAACTTCCACAGTGCATCTCACTTCCATGA
+ATCCCCTCCTCGGACTCTGAGGTCCTTGGTCACAGCTGAGGTGCAAAAGGCTCCTGTCATATTGTGTCCT
+GCTCTGGTCTGCCTTCCACAGCTTGGGGGCCACCTAGCCCACCTCTCCCTAGGGATGAGAGCAGCCACTA
+CGGGTCTAGGCTGCCCATGTAAGGAGGCAAGGCCTGGGGACACCCGAGATGCCTGGTTATAATTAACCCA
+GACATGTGGCTGCCCCCCCCCCCCCAACACCTGCTGCCTGAGCCTCACCCCCACCCCGGTGCCTGGGTCT
+TAGGCTCTGTACACCATGGAGGAGAAGCTCGCTCTAAAAATAACCCTGTCCCTGGTGGATCCAGGGTGAG
+GGGCAGGCTGAGGGCGGCCACTTCCCTCAGCCGCAGGTTTGTTTTCCCAAGAATGGTTTTTCTGCTTCTG
+TAGCTTTTCCTGTCAATTCTGCCATGGTGGAGCAGCCTGCACTGGGCTTCTGGGAGAAACCAAACCGGGT
+TCTAACCTTTCAGCTACAGTTATTGCCTTTCCTGTAGATGGGCGACTACAGCCCCACCCCCACCCCCGTC
+TCCTGTATCCTTCCTGGGCCTGGGGATCCTAGGCTTTCACTGGAAATTTCCCCCCAGGTGCTGTAGGCTA
+GAGTCACGGCTCCCAAGAACAGTGCTTGCCTGGCATGCATGGTTCTGAACCTCCAACTGCAAAAAATGAC
+ACATACCTTGACCCTTGGAAGGCTGAGGCAGGGGGATTGCCATGAGTGCAAAGCCAGACTGGGTGGCATA
+GTTAGACCCTGTCTCAAAAAACCAAAAACAATTAAATAACTAAAGTCAGGCAAGTAATCCTACTCGGGAG
+ACTGAGGCAGAGGGATTGTTACATGTCTGAGGCCAGCCTGGACTACATAGGGTTTCAGGCTAGCCCTGTC
+TACAGAGTAAGGCCCTATTTCAAAAACACAAACAAAATGGTTCTCCCAGCTGCTAATGCTCACCAGGCAA
+TGAAGCCTGGTGAGCATTAGCAATGAAGGCAATGAAGGAGGGTGCTGGCTACAATCAAGGCTGTGGGGGA
+CTGAGGGCAGGCTGTAACAGGCTTGGGGGCCAGGGCTTATACGTGCCTGGGACTCCCAAAGTATTACTGT
+TCCATGTTCCCGGCGAAGGGCCAGCTGTCCCCCGCCAGCTAGACTCAGCACTTAGTTTAGGAACCAGTGA
+GCAAGTCAGCCCTTGGGGCAGCCCATACAAGGCCATGGGGCTGGGCAAGCTGCACGCCTGGGTCCGGGGT
+GGGCACGGTGCCCGGGCAACGAGCTGAAAGCTCATCTGCTCTCAGGGGCCCCTCCCTGGGGACAGCCCCT
+CCTGGCTAGTCACACCCTGTAGGCTCCTCTATATAACCCAGGGGCACAGGGGCTGCCCCCGGGTCAC
\ No newline at end of file
diff --git a/examples/seq/rabbit_mck_pro.fa b/examples/seq/rabbit_mck_pro.fa
new file mode 100644 (file)
index 0000000..171eda6
--- /dev/null
@@ -0,0 +1,13 @@
+>gi|1621|emb|X55146.1|OCMCK1 Rabbit gene for muscle creatine kinase, exon 1
+CTCCCTGAGGGGAGTGCCCCGCTTAGCCCTCAGCCCTGGCACCCACCAGGCAGAGGAGGCAGCTCCCAGA
+GTCCCCGGCTGAGGGGGCCTGCCCATGTCCTCGCGAGTCGGGCACCCAGTCCCCTTCCCTGGGGGCCTCG
+GGTGCCTTGTAAGGAGGCAAGGCCCAGGACACCCGAGCTGCCGGCTATAATTAGCCTGGACACGTGGGCC
+CCCCGCCAGCACCTGCCCTGAGCCCCCCTACCCGCAGCCTTGGGTGCTGGGACTGCCGACAGCAGAGACC
+GCTCTAAAAATAGCTCCCTTCCTGGACGGCCCCTCCCAGCAGGCCCCCCACCCCCCGCTGAGGCTCACTG
+CTGGTGTCCCTGCCTGGTGCTCCCTAGGGTGTGCACCCACGGCAGGTGCAGAGGCAGGGGGCACCGCGGG
+GGGCTGGTAACAGCAGCAGGCCTAGGGGCCAGGGGACGGCGAGCTGTACGTGCCAGGACACCCCCAGCCA
+CTGTCCCGTGCTCCCGGCGGGGGCCAGCTGTCCCCCGCCAGCCCGACTCAGCACTTGCTCTGCACATCGG
+CCTGGGGCGGCCCATACAAGGGCATGGGGCTGGGCACAAGGCACGCCCGCGTCCAGGTGGGCACTGTGCC
+CGGGCATGGTGGGGGGAGAGGCTGGGCTGCCTTCCAGGCCCCCTCCCTGGGGCCAGCCCCGCCCAGCCAG
+TGGCACAGCAGGGTCCCCTACATAAAGCCGGGGCCGCGGGCCCTGGATCTGCCTCAGTGTCGCCTGCAGG
+ACACACGTAGCCCCGCTCAGCACAGCCCAGCCAG
\ No newline at end of file
index af25db2ebb5056887c32d7c29c5e9775481d2726..12c649ae2206dc483e5ff1249d2dd574d0602b12 100644 (file)
--- a/mussa.cc
+++ b/mussa.cc
@@ -1,44 +1,25 @@
 #include "mussa_class.hh"
 #include "time.h"
 
+
 int main(int argc, char **argv) 
 {
   Mussa overlord;
+  char run_mode;
   char * para_file, * ana_file;
-  string  an_arg;
   time_t t1, t2, begin, end;
   double setuptime, seqloadtime, seqcomptime, nwaytime, savetime, totaltime;
-  string run_mode;
   int seq_num;
 
 
   begin = time(NULL);
 
-  // need more sophisticated arg reading structure
-  // support -w and -t override options...other stuff??
-  an_arg = * ++argv;
-
-  if (an_arg == "-v")
-  {
-    seq_num = atoi(* ++argv);
-    ana_file = * ++argv;
-    run_mode = "v";
-  }
-  else
-  {
-    para_file = (char*) an_arg.c_str();
-    run_mode = "f";
-  }
+  run_mode = overlord.parse_args(argc, argv);
 
-  if (run_mode == "v")
-  {
-    cout << "load file = " << ana_file << " seq num = " << seq_num << endl;
-    overlord.load_old(ana_file, seq_num);
-  }
-  else if (run_mode == "f")
+  if ((run_mode == 'f') || (run_mode == 'n'))
   {
     t1 = time(NULL);
-    overlord.setup(para_file);
+    overlord.setup();
     t2 = time(NULL);
     setuptime = difftime(t2, t1);
 
@@ -66,7 +47,7 @@ int main(int argc, char **argv)
 
     cout << "fum\n";
     t1 = time(NULL);
-    overlord.save_old();
+    overlord.save();
     t2 = time(NULL);
     savetime = difftime(t2, t1);
 
@@ -82,8 +63,10 @@ int main(int argc, char **argv)
     cout << totaltime << "\n";
   }
 
+  if  (run_mode == 'v')
+    overlord.load();
 
-  if (run_mode == "v")
+  if  ((run_mode == 'f') || (run_mode == 'v'))
     overlord.FuckingPieceOfShit(1000,500);
 }
 
index d00f76667c0ea91810a6eac1e748abdbee2fe208..110d6f525984fb3c0d34039552037a77c105007f 100644 (file)
 
 #include "mussa_class.hh"
 
+// doesn't do neg ints...
+string
+int_to_str(int an_int)
+{
+  string converted_int;
+  int remainder;
+
+  converted_int = "";
+
+  if (an_int == 0)
+    converted_int = "0";
+
+  while (an_int != 0)
+  {
+    remainder = an_int % 10;
+
+    if (remainder == 0)
+      converted_int = "0" + converted_int;
+    else if (remainder == 1)
+      converted_int = "1" + converted_int;
+    else if (remainder == 2)
+      converted_int = "2" + converted_int;
+    else if (remainder == 3)
+      converted_int = "3" + converted_int;
+    else if (remainder == 4)
+      converted_int = "4" + converted_int;
+    else if (remainder == 5)
+      converted_int = "5" + converted_int;
+    else if (remainder == 6)
+      converted_int = "6" + converted_int;
+    else if (remainder == 7)
+      converted_int = "7" + converted_int;
+    else if (remainder == 8)
+      converted_int = "8" + converted_int;
+    else if (remainder == 9)
+      converted_int = "9" + converted_int;
+
+    an_int = an_int / 10;
+  }
+
+  return converted_int;
+}
+
 
 Mussa::Mussa()
 {
 }
 
+char
+Mussa::parse_args(int argc, char **argv)
+{
+  int i;
+  string an_arg;
+  char run_mode;
+
+  win_override = false;
+  thres_override = false;
+  // minimal arg reading structure, not very robust to errors
+
+
+  i = 1;
+  while (i < argc)
+  {
+    an_arg = * ++argv;
+    i++;
+
+    if (an_arg == "-v")
+    {
+      ana_name = * ++argv;
+      i++;
+      run_mode = 'v';
+    }
+    else if (an_arg == "-n")
+    {
+      para_file_path = * ++argv;
+      i++;
+      run_mode = 'n';
+    }
+    else if (an_arg == "-w")
+    {
+      window = atoi(* ++argv);
+      i++;
+      win_override = true;
+    }
+    else if (an_arg == "-t")
+    {
+      threshold = atoi(* ++argv);
+      i++;
+      thres_override = true;
+      cout << thres_override << endl;
+    }
+    else
+    {
+      para_file_path = an_arg;
+      run_mode = 'f';
+    }
+  }
+  return run_mode;
+}
+
+
 void
-Mussa::setup(char * para_file_path)
+Mussa::setup()
 {
   ifstream para_file;
   string file_data_line;
   string param, value, annot_file;
   int split_index, fasta_index;
+  int sub_seq_start, sub_seq_end;
   bool seq_params, did_seq;
   int bogo;
 
 
   win_append = false;
   thres_append = false;
+  seq_files.clear();
+  fasta_indices.clear();
+  annot_files.clear();
+  sub_seq_starts.clear();
+  sub_seq_ends.clear();
 
-  para_file.open(para_file_path, ios::in);
+  para_file.open(para_file_path.c_str(), ios::in);
 
   getline(para_file,file_data_line);
   split_index = file_data_line.find(" ");
@@ -43,14 +145,22 @@ Mussa::setup(char * para_file_path)
     else if (param == "SEQUENCE_NUM")
       seq_num = atoi(value.c_str());
     else if (param == "WINDOW")
-      window = atoi(value.c_str());
+    {
+      if (!win_override)
+        window = atoi(value.c_str());
+    }
     else if (param == "THRESHOLD")
-      threshold = atoi(value.c_str());
+    {
+      if (!thres_override)
+          threshold = atoi(value.c_str());
+    }
     else if (param == "SEQUENCE")
     {
       seq_files.push_back(value);
       fasta_index = 1;
       annot_file = "";
+      sub_seq_start = 0;
+      sub_seq_end = 0;
       seq_params = true;
 
       while ((!para_file.eof()) && seq_params)
@@ -64,6 +174,13 @@ Mussa::setup(char * para_file_path)
           fasta_index = atoi(value.c_str());
         else if (param == "ANNOTATION")
           annot_file = value;
+        else if (param == "SEQ_START")
+          sub_seq_start = atoi(value.c_str());
+        else if (param == "SEQ_END")
+        {
+          cout << "hey!  " << atoi(value.c_str()) << endl;
+          sub_seq_end = atoi(value.c_str());
+        }
         //ignore empty lines or that start with '#'
         else if ((param == "") || (param == "#")) {}
         else seq_params = false;
@@ -72,6 +189,8 @@ Mussa::setup(char * para_file_path)
 
       fasta_indices.push_back(fasta_index);
       annot_files.push_back(annot_file);
+      sub_seq_starts.push_back(sub_seq_start);
+      sub_seq_ends.push_back(sub_seq_end);
       did_seq = true;
       
     }
@@ -99,29 +218,44 @@ Mussa::setup(char * para_file_path)
   cout << "window = " << window << " threshold = " << threshold << "\n";
 }
 
+//        if (!((param == "") || (param == "#")))
+//          cout << value << " = " << param << endl;
 
 void
 Mussa::get_Seqs()
 {
   list<string>::iterator seq_files_i, annot_files_i;
-  list<int>::iterator fasta_indices_i;
+  list<int>::iterator fasta_indices_i, seq_starts_i, seq_ends_i;
   Sequence aSeq;
 
   seq_files_i = seq_files.begin();
   fasta_indices_i = fasta_indices.begin();
   annot_files_i = annot_files.begin();
+  seq_starts_i = sub_seq_starts.begin();
+  seq_ends_i = sub_seq_ends.begin();
 
-  while ( (seq_files_i != seq_files.end()) &&
+
+  while (seq_files_i != seq_files.end())
+          /* it should be guarenteed that each of the following exist
+ &&
           (fasta_indices_i != fasta_indices.end()) &&
-          (annot_files_i != annot_files.end()) )
+          (annot_files_i != annot_files.end())  && 
+          (seq_starts_i != sub_seq_starts.end())  &&
+          (seq_ends_i != sub_seq_ends.end())         )
+          */
   {
-    aSeq.load_fasta(*seq_files_i, *fasta_indices_i);
+    aSeq.load_fasta(*seq_files_i, *fasta_indices_i,*seq_starts_i,*seq_ends_i);
+    if (*annot_files_i != "")
+      aSeq.load_annot(*annot_files_i);
     the_Seqs.push_back(aSeq);
     cout << aSeq.hdr() << endl;
+    //cout << aSeq.seq() << endl;
     aSeq.clear();
     ++seq_files_i;
     ++fasta_indices_i;
     ++annot_files_i;
+    ++seq_starts_i;
+    ++seq_ends_i;
   }
 }
 
@@ -158,28 +292,103 @@ Mussa::seqcomp()
       all_comps[i][i2].setup("m", window, threshold, seq_lens[i],seq_lens[i2]);
       all_comps[i][i2].seqcomp(the_Seqs[i].seq(), the_Seqs[i2].seq(), false);
       all_comps[i][i2].seqcomp(the_Seqs[i].seq(),the_Seqs[i2].rev_comp(),true);
-      save_file_string = "ana";
-      if (i == 0)
-        save_file_string += "0";
-      else if (i == 1)
-        save_file_string += "1";
-      if (i2 == 1)
-        save_file_string += "1";
-      else if (i2 == 2)
-        save_file_string += "2";
 
       all_comps[i][i2].file_save(save_file_string);
     }
 }
 
 
+
 void
 Mussa::nway()
 {
-  the_paths.setup(seq_num);
+  the_paths.setup(seq_num, window, threshold);
   the_paths.find_paths_r(all_comps);
+  the_paths.simple_refine();
 }
 
+void
+Mussa::save()
+{
+  string save_path_base, save_path;
+  fstream save_file;
+  int i;
+
+  // gotta do bit with adding win & thres if to be appended - need itos
+
+  // not sure why, but gotta close file each time since can't pass file streams
+
+  save_path_base = ana_name;
+
+  if (win_append)
+    save_path_base += "_w" + int_to_str(window);
+
+  if (thres_append)
+    save_path_base += "_t" + int_to_str(threshold);
+
+  // save sequence and annots to a special mussa file
+  save_path = save_path_base + ".museq";
+  save_file.open(save_path.c_str(), ios::out);
+  save_file << "<Mussa_Sequence>" << endl;
+  //save_file.close();
+
+  for(i = 0; i < seq_num; i++)
+    the_Seqs[i].save(save_file);
+
+  //save_file.open(save_path.c_str(), ios::app);
+  save_file << "</Mussa_Sequence>" << endl;
+  save_file.close();
+
+  // save nway paths to its mussa save file
+  save_path = save_path_base + ".muway";
+  the_paths.save(save_path);
+}
+
+void
+Mussa::load()
+{
+  int i;
+  string load_file_path;
+  Sequence tmp_seq;
+
+
+  load_file_path = ana_name + ".muway";
+  seq_num = the_paths.load(load_file_path);
+
+  load_file_path = ana_name + ".museq";
+  for (i = 1; i <= seq_num; i++)
+  {
+    tmp_seq.clear();
+    tmp_seq.load_museq(load_file_path, i);
+    the_Seqs.push_back(tmp_seq);
+  }
+}
+
+
+
+// In Memorial to Everything that's gone wrong in the last week
+// and Everything that will go wrong in the next 2 weeks 03/02/2004 - Tristan
+void
+Mussa::FuckingPieceOfShit(int x_max, int y_max)
+{
+  Fl_Window *conn_window = new Fl_Window(x_max, y_max, "Mussa Connections");
+  ConnView *conn_box = new ConnView(0, 0, x_max, y_max);
+  conn_box->setup(ana_name, seq_num, window, &the_Seqs, &the_paths);
+  conn_box->scale_paths();
+  conn_window->end();
+  conn_window->show();
+
+  Fl::run();
+}
+
+/*
+      cout << "fee\n";
+      cout << "fie\n";
+      cout << "foe\n";
+      cout << "fum\n";
+*/
+
+
 
 void
 Mussa::save_old()
@@ -211,7 +420,7 @@ Mussa::load_old(char * load_file_path, int s_num)
   Sequence a_seq;
 
   seq_num = s_num;
-  the_paths.setup(seq_num);
+  the_paths.setup(seq_num, 0, 0);
   save_file.open(load_file_path, ios::in);
 
   // currently loads old mussa format
@@ -252,30 +461,3 @@ Mussa::load_old(char * load_file_path, int s_num)
 
   //the_paths.save("tmp.save");
 }
-
-
-// In Memorial to Everything that's gone wrong in the last week
-// and Everything that will go wrong in the next 2 weeks 03/02/2004 - Tristan
-void
-Mussa::FuckingPieceOfShit(int x_max, int y_max)
-{
-
-  Fl_Window *conn_window = new Fl_Window(x_max, y_max, "Mussa Connections");
-  ConnView *conn_box = new ConnView(0, 0, x_max, y_max);
-  conn_box->setup(ana_name, seq_num, window, &the_Seqs, &the_paths);
-  conn_box->scale_paths();
-  conn_box->spawnSeq();
-  conn_window->end();
-  conn_window->show();
-
-  Fl::run();
-}
-
-/*
-      cout << "fee\n";
-      cout << "fie\n";
-      cout << "foe\n";
-      cout << "fum\n";
-*/
-
-
index b80bc3e947b9f577165e892fbe49847dcf0c6e10..06877c735a4990d14e4a0e1503c340cd9448d0ef 100644 (file)
@@ -8,10 +8,11 @@
 class Mussa
 {
   private:
-    string ana_name;
+    string para_file_path, ana_name;
     int seq_num, window, threshold;
     list<string> seq_files, annot_files;
-    list<int> fasta_indices;
+    list<int> fasta_indices, sub_seq_starts, sub_seq_ends;
+    bool win_override, thres_override;
     bool win_append, thres_append;
 
     vector<Sequence> the_Seqs;
@@ -20,12 +21,18 @@ class Mussa
 
   public:
     Mussa();
-    void setup(char * para_file_path);
+    char parse_args(int argc, char **argv);
+    void setup();
     void get_Seqs();
     void seqcomp();
     void nway();
+    void save();
+    void load();
+    void FuckingPieceOfShit(int x_max, int y_max);
+
+    // deprecated - support bridge for python version of mussa
     void save_old();
     void load_old(char * load_file_path, int s_num);
-    void FuckingPieceOfShit(int x_max, int y_max);
+
 };
 
index 75a757a59c43baefa422c3f14b5244ed9af40399..aeff03ec514d9d8adad132f465c9d001be607034 100644 (file)
@@ -25,6 +25,11 @@ ConnView::setup(string name, int sq_num, int win_len,
   x_scale_factor = (float) seq_lens[0] / x_max;
   cout << "scale factor is " << x_scale_factor << endl;
   y_seq_incre = (y_max-20) / (seq_num - 1);
+
+  dragging = false;
+  selected = false;
+
+  highlight.clear();
 }
 
 
@@ -38,10 +43,10 @@ ConnView::scale_paths()
 
   scaled_pathz.clear();
 
-  for(pathz_i = P->pathz.begin(); pathz_i != P->pathz.end(); ++pathz_i)
+  for(pathz_i = P->refined_pathz.begin(); pathz_i != P->refined_pathz.end(); ++pathz_i)
   {
     a_path = *pathz_i;
-    for(i2 = 0; i2 < seq_num; i2++)
+    for(i2 = 0; i2 <= seq_num; i2++)
       a_path[i2] = (int) (a_path[i2] / x_scale_factor);
     scaled_pathz.push_back(a_path);
   }
@@ -54,30 +59,108 @@ void
 ConnView::draw()
 {
   list<vector<int> >::iterator i;
-  int i2, y_loc, x_loc;
+  int i2, i3, y_loc, x_loc, x_start, x_end;
   vector<int> a_path;
+  list<annot>::iterator annot_i;
+  int window_size;
+  bool rc_color;
+  int path_start, path_end;
+  list<bool>::iterator highlight_i;
+
+
+  // clear drawing area and set background to white
+  fl_color(FL_WHITE);
+  fl_rectf(x(), y(), w(), h());
+
+  // determine which paths to highlight
+  highlight.clear();
+  for(i = scaled_pathz.begin(); i != scaled_pathz.end(); ++i)
+  {
+    a_path = *i;
+    // determine if path falls within the selected region and mark it for
+    // highlighted color
+    path_start = abs(a_path[1]);
+    path_end = path_start + a_path[0];
+    if ( ( (path_start >= drag_start) && (path_end <= drag_end) ) ||
+         ( (path_start < drag_start) && (path_end > drag_end) ) ||
+         ( (path_start < drag_start) && (path_end > drag_start) ) ||
+         ( (path_start < drag_end) && (path_end > drag_end) )          )
+      highlight.push_back(true);
+    else
+      highlight.push_back(false);
+  }
 
-  fl_color(FL_RED);
   fl_line_style(FL_SOLID, 1, NULL);
+  // draw non-highlight paths (ie not in the selection box)
+  highlight_i = highlight.begin();
+  for(i = scaled_pathz.begin(); i != scaled_pathz.end(); ++i)
+  {
+    a_path = *i;
+    y_loc = 10;
+
+    // get window size to determine line width
+    window_size = a_path[0];
+    // make sure width is at least 1  - might be zero to my slack rounding
+    if (window_size == 0)
+      window_size = 1;
+
+    if (!(*highlight_i))
+      for(i2 = seq_num; i2 > 1; i2--)
+      {
+        // RC case handling
+        // ugh, an xor...only want blue if one of the nodes is rc
+        if ( ((a_path[i2] < 0) || (a_path[i2-1] < 0)) &&
+             !((a_path[i2] < 0) && (a_path[i2-1] < 0)) )
+          fl_color(FL_CYAN);
+        else
+          fl_color(FL_MAGENTA);
+          
+        fl_polygon((int)abs(a_path[i2]), y_loc + 3,
+                   (int)abs(a_path[i2])+window_size, y_loc + 3,
+                   (int)abs(a_path[i2-1])+window_size, y_loc + y_seq_incre - 3,
+                   (int)abs(a_path[i2-1]), y_loc + y_seq_incre - 3);
 
+        y_loc += y_seq_incre;
+      }
+    ++highlight_i;
+  }
+
+  // draw highlighted paths (ie in or partially in selection)
+  // drawing these separately and after other paths so they are on top
+  highlight_i = highlight.begin();
   for(i = scaled_pathz.begin(); i != scaled_pathz.end(); ++i)
   {
     a_path = *i;
     y_loc = 10;
-    for(i2 = seq_num - 1; i2 > 0; i2--)
-    {
-      // RC case handling
-      // ugh, and xor...only want blue if one of the nodes is rc
-      if ( ((a_path[i2] < 0) || (a_path[i2-1] < 0)) &&
-           !((a_path[i2] < 0) && (a_path[i2-1] < 0)) )
-        fl_color(FL_BLUE);
-
-      fl_line((int)abs(a_path[i2]),y_loc,(int)abs(a_path[i2-1]),y_loc + y_seq_incre);
-      y_loc += y_seq_incre;
-      fl_color(FL_RED);
-    }
+
+    // get window size to determine line width
+    window_size = a_path[0];
+    // make sure width is at least 1  - might be zero to my slack rounding
+    if (window_size == 0)
+      window_size = 1;
+
+    if (*highlight_i)
+      for(i2 = seq_num; i2 > 1; i2--)
+      {
+        // RC case handling
+        // ugh, an xor...only want blue if one of the nodes is rc
+        if ( ((a_path[i2] < 0) || (a_path[i2-1] < 0)) &&
+             !((a_path[i2] < 0) && (a_path[i2-1] < 0)) )
+          fl_color(FL_BLUE);
+        else
+          fl_color(FL_RED);
+          
+        fl_polygon((int)abs(a_path[i2]), y_loc + 3,
+                   (int)abs(a_path[i2])+window_size, y_loc + 3,
+                   (int)abs(a_path[i2-1])+window_size, y_loc + y_seq_incre - 3,
+                   (int)abs(a_path[i2-1]), y_loc + y_seq_incre - 3);
+
+        y_loc += y_seq_incre;
+      }
+    ++highlight_i;
   }
 
+  // draw sequence representation lines
   fl_color(FL_BLACK);
   fl_line_style(FL_SOLID, 3, NULL);
   y_loc = 10;
@@ -87,21 +170,116 @@ ConnView::draw()
     fl_line(0,y_loc,x_loc,y_loc);
     y_loc += y_seq_incre;
   }
+
+  // draw annotations
+  fl_color(FL_GREEN);
+  fl_line_style(FL_SOLID, 9, NULL);
+  y_loc = 10;
+  for(i2 = seq_num - 1; i2 >= 0; i2--)
+  {
+    for(annot_i = (*S)[i2].annots.begin(); annot_i != (*S)[i2].annots.end(); ++annot_i)
+    {
+      x_start = (int)(annot_i->start / x_scale_factor);
+      x_end = (int)(annot_i->end / x_scale_factor);
+      fl_line(x_start,y_loc,x_end,y_loc);
+    }
+    y_loc += y_seq_incre;
+  }
+
+  // draw selection box
+  if (selected)
+  {
+    fl_color(FL_BLACK);
+    fl_line_style(FL_SOLID, 2, NULL);
+    fl_rect(drag_start, y_loc - y_seq_incre-8, drag_end-drag_start, 16);
+  }
+}
+
+
+
+int
+ConnView::handle(int e)
+{
+  int return_value;
+
+  switch(e)
+  {
+  case FL_PUSH:
+    if (Fl::event_button3())
+    {
+      spawnSeq();
+      return_value = 1;
+    }
+    break;
+  case FL_DRAG:
+    if (!dragging)
+    {
+      drag_start = Fl::event_x() -x();
+      dragging = true;
+      selected = false;
+      return_value = 1;
+    }
+    fl_color(FL_CYAN);
+    fl_line_style(FL_SOLID, 1, NULL);
+    fl_overlay_rect(drag_start, y_seq_incre*(seq_num-1), Fl::event_x()-drag_start, 16);
+    
+    break;
+  case FL_RELEASE:
+    if (dragging)
+    {
+      drag_end = Fl::event_x() - x();
+      dragging = false;
+      selected = true;
+      redraw();
+      return_value = 1;
+    }
+    break;
+  default:
+    return_value = Fl_Widget::handle(e);
+  }
+
+  return return_value;
 }
 
+
 void
 ConnView::spawnSeq()
 {
-  int seq_box_x;
+  list<vector<int> > selected_paths;
+  list<vector<int> >::iterator pathz_i;
+  int i2, i3, y_loc, x_loc, x_start, x_end;
+  vector<int> a_path;
+  list<bool>::iterator highlight_i;
+
+  // make new list of connections that are highlighted
+  selected_paths.clear();
+  highlight_i = highlight.begin();
+  for(pathz_i = P->refined_pathz.begin(); pathz_i != P->refined_pathz.end(); ++pathz_i)
+  {
+    if (*highlight_i)
+    {
+      a_path = *pathz_i;
+      selected_paths.push_back(a_path);
+      //cout << "bing ";
+    }
+    /*
+    else
+      cout << "bang ";
+    */
+    ++highlight_i;
+  }
+
+  //cout << endl;
 
   Fl_Window *seq_window = new Fl_Window(x_max, y_max, "Mussa Sequence");
-  Fl_Scroll *scroll_seq = new Fl_Scroll(0, 0, x_max, y_max);
 
-  seq_box_x = 32000; 
-    //seq_lens[0]*14;
-  SeqView *seq_box = new SeqView(0, 0, seq_box_x, y_max-20);
-  seq_box->setup(ana_name, seq_num, window, S, P, seq_lens);
+
+  SeqView *seq_box = new SeqView(0, 0, x_max, y_max);
+  seq_box->setup(ana_name, seq_num, S, selected_paths, seq_lens);
+  seq_box->debug_draw();
 
   seq_window->end();
   seq_window->show();
 }
+
+
index 0994dcf69b556fcb84ac9bb6aacd7fd41efc40d1..d2813a8d875144293ab7a62454e9a31e5bb34291 100644 (file)
@@ -16,7 +16,10 @@ class ConnView : public Fl_Box
     int x_max, y_max;
     float x_scale_factor;
     int y_seq_incre;
-  //list< will be list of these later for zooming history
+    int drag_start, drag_end;
+    bool dragging, selected;
+    list<bool> highlight;
+
     list<vector<int> > scaled_pathz;
     vector<int> seq_lens;
 
@@ -33,5 +36,6 @@ class ConnView : public Fl_Box
                vector<Sequence> *, Nway_Paths *);
     void scale_paths();
     void draw();
+    int handle(int e);
     void spawnSeq();
 };
index bf1afb3a2ef5fdec116493ef96cfdd4ee8c0235f..4cf34afc56b638e9fba46310075e205f75cc09f7 100644 (file)
@@ -2,13 +2,12 @@
 
 
 void
-SeqView::setup(string name, int sq_num, int win_len, 
+SeqView::setup(string name, int sq_num, 
                vector<Sequence> *some_seqs,
-               Nway_Paths *some_paths, vector<int> some_lens)
+               list<vector<int> > some_paths, vector<int> some_lens)
 {
   ana_name = name;
   seq_num = sq_num;
-  base_window_len = win_len;
   S = some_seqs;
   P = some_paths;
   seq_lens = some_lens;
@@ -17,6 +16,8 @@ SeqView::setup(string name, int sq_num, int win_len,
 
   fl_font(FL_COURIER, 14);
   cout << fl_width('A') << endl;
+
+  align_offsets(0);
 }
 
 
@@ -34,9 +35,11 @@ SeqView::draw()
   int window_length, win_i;
   int rc_1 = 0; 
   int rc_2 = 0;
+  string sub_seq;
+  int offset1, offset2;
+  float center1, center2;
 
 
-  window_length = base_window_len;
   fl_font(FL_COURIER, 14);
   fl_color(FL_BLACK);
   //blatantly stolen from FR2
@@ -45,52 +48,149 @@ SeqView::draw()
   y_loc = 10;
   for(seq_i = seq_num-1; seq_i >= 0; --seq_i)
   {
-    //a_seq = a_seq;
-    fl_draw((*S)[seq_i].c_seq(), 0+x(), y_loc+y()); 
+    sub_seq = (*S)[seq_i].seq();
+    sub_seq = sub_seq.substr(seq_align_offsets[seq_i]);
+    fl_draw(sub_seq.c_str(), 0+x(), y_loc+y()); 
     y_loc += y_seq_incre;
   }
 
   fl_color(FL_RED);
   fl_line_style(FL_SOLID, 1, NULL);
-  //fl_line(x(),10+y(),20+x(),y_max/2 + y());
-
-      //fl_color(FL_RED);
-
 
-  for(pathz_i = P->pathz.begin(); pathz_i != P->pathz.end(); ++pathz_i)
+  for(pathz_i = P.begin(); pathz_i != P.end(); ++pathz_i)
   {
     a_path = *pathz_i;
     y_loc = 0;
-    for(i2 = seq_num - 1; i2 > 0; i2--)
+    window_length = a_path[0];
+    for(i2 = seq_num; i2 > 1; i2--)
     {
       // RC case handling
 
-      // ugh, and xor...only want black if one of the nodes is rc
+      // ugh, and xor...only want rc coloring if just one of the nodes is rc
       if ( ((a_path[i2] < 0) || (a_path[i2-1] < 0)) &&
            !((a_path[i2] < 0) && (a_path[i2-1] < 0)) )
         fl_color(FL_BLUE);
 
+      offset1 = seq_align_offsets[i2-1];
+      offset2 = seq_align_offsets[i2-2];
+      center1 = 0.5;
+      center2 = 0.5;
+
       // no matter the case, any RC node needs an offset
       if (a_path[i2] < 0)
+      {
         rc_1 = window_length;        
+        offset1 = -offset1;
+        center1 = -center1;
+      }
       if (a_path[i2-1] < 0)
+      {
         rc_2 = window_length;        
-      // normal case
+        offset2 = -offset2;
+        center2 = -center2;
+      }
+
+      // draw the line for each bp in this window that actually matches
       for(win_i = 0; win_i < window_length; win_i++)
-        fl_line(abs((int)((a_path[i2]-rc_1+win_i+x()+0.5)*ch_width)),
-                y_loc + 10+y(),
-                abs((int)((a_path[i2-1]-rc_2+win_i+x()+0.5)*ch_width)),
-                y_loc+y_seq_incre+y());
+      {
+        x_start = abs((int) (a_path[i2]-rc_1+win_i-offset1) );
+        x_end = abs((int) (a_path[i2-1]-rc_2+win_i-offset2) );
+        fl_line( (x_start+center1)*ch_width, y_loc + 10,
+                 (x_end+center2)*ch_width, y_loc+y_seq_incre );
+      }
       y_loc += y_seq_incre;
-      //if ((a_path[i2] < 0) || (a_path[i2-1] < 0))
-      //{
-        fl_color(FL_RED);
-        rc_1 = 0;
-        rc_2 = 0;
-        //}
+      fl_color(FL_RED);
+      rc_1 = 0;
+      rc_2 = 0;
     }
   }
 }
+
+void
+SeqView::debug_draw()
+{
+  int seq_i;
+  Sequence a_seq;
+  int i, y_loc;
+  vector<int> a_path;
+  list<vector<int> >::iterator pathz_i;
+  int i2;
+  int x_start, y_start, x_end, y_end;
+  double ch_width;
+  int window_length, win_i;
+  int rc_1 = 0; 
+  int rc_2 = 0;
+  string sub_seq;
+
+
+  cout << "debugging...\n";
+  y_loc = 10;
+  for(seq_i = seq_num-1; seq_i >= 0; --seq_i)
+  {
+    sub_seq = (*S)[seq_i].seq();
+    cout << sub_seq << endl;
+    sub_seq = sub_seq.substr(seq_align_offsets[seq_i]);
+    cout << sub_seq << endl;
+    //fl_draw(sub_seq.c_str(), 0+x(), y_loc+y());     
+    y_loc += y_seq_incre;
+  }
+}
+
+void
+SeqView::align_offsets(int align_num)
+{
+  list<vector<int> >::iterator pathz_i;
+  int i;
+
+
+  if (P.begin() == P.end())
+    cout << "crud....\n";
+  pathz_i = P.begin();
+  while(pathz_i != P.end())
+  {
+    //cout << (*pathz_i)[0] << endl;
+    ++pathz_i;
+  }
+
+  // find the path specified
+  i = 0;
+  pathz_i = P.begin();
+  cout << "fee\n";
+  while( (i < align_num) && (pathz_i != P.end()) )
+  {
+  cout << "fie\n";
+    ++i;
+    ++pathz_i;
+  }
+
+  cout << "foe\n";
+  // now set the alignment offsets - basically where the path starts
+  seq_align_offsets.clear();
+  for(i = 0; i < seq_num; i++)
+  {
+    cout << (*pathz_i)[i+1] << endl;
+    seq_align_offsets.push_back( (*pathz_i)[i+1] );
+  }
+  cout << "fum\n";
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 /*
       x_start = (int) ();
       y_start = ;
index aed014024ad5a942f56ca250f94e18b9c490b8f8..58e615107b8f99c99683c2a7143e8c36f1085670 100644 (file)
@@ -16,11 +16,13 @@ class SeqView : public Fl_Box
 
     //this data is passed as pointers to the instantiated classes
     vector<Sequence> *S;
-    Nway_Paths *P;
+    // list of paths in selection box
+    list<vector<int> > P;
     vector<int> seq_lens;
 
     int x_max, y_max;
     int y_seq_incre;
+    vector<int> seq_align_offsets;
 
   public:
     SeqView(int x_top,int y_top,int x_bot,int y_bot) :
@@ -30,6 +32,9 @@ class SeqView : public Fl_Box
         y_max = y_bot;
     }
              
-    void setup(string, int, int, vector<Sequence> *, Nway_Paths *, vector<int>);
+    void setup(string, int, vector<Sequence> *some_seqs, 
+               list<vector<int> > some_paths, vector<int>);
     void draw();
+    void align_offsets(int align_num);
+    void debug_draw();
 };
index ae9b69a975a70927ba8ed2aab369e13c502c21ec..fd760b8364ff66c878b2bc91f77a74f41b8f183d 100644 (file)
@@ -16,9 +16,11 @@ Nway_Paths::Nway_Paths()
 }
 
 void
-Nway_Paths::setup(int sp_num)
+Nway_Paths::setup(int sp_num, int w, int t)
 {
   species_num = sp_num;
+  threshold = t;
+  win_size = w;
   pathz.clear();
 }
 
@@ -53,10 +55,13 @@ Nway_Paths::path_search(vector<vector<FLPs> > all_comparisons, vector<int> path,
 
     if (trans_check_good)
     {
+      // this makes sure path nodes are recorded with RC status relative to
+      // the base species
       if ( path[depth-1] >= 0)
         path.push_back(*new_nodes_i);
       else
         path.push_back(*new_nodes_i * -1);
+
       if (depth < species_num - 1)
         path_search(all_comparisons, path, depth + 1);
       else
@@ -106,6 +111,56 @@ Nway_Paths::find_paths_r(vector<vector<FLPs> > all_comparisons)
   }
 }
 
+// dumbly goes thru and combines path windows exactly adjacent (ie + 1 index)
+// doesn't deal with interleaved adjacency
+void
+Nway_Paths::simple_refine()
+{
+  // ext_path remembers the first window set in an extending path
+  vector<int> ext_path, cur_path, next_path, new_path;
+  list<vector<int> >::iterator pathz_i;
+  int i2, win_ext_len = 0;
+  bool extending;
+
+
+  refined_pathz.clear();
+
+  pathz_i = pathz.begin();
+  ext_path = *pathz_i; 
+  while(pathz_i != pathz.end())
+  {
+    // keep track of current path and advance to next path
+    cur_path = *pathz_i;
+    ++pathz_i;
+    next_path = *pathz_i;
+    // if node for each seq is equal to the next node+1 then for all
+    // sequences then we are extending
+    extending = true;
+    for(i2 = 0; i2 < species_num; i2++)
+    {
+      //cout << cur_path[i2] << " vs " << next_path[i2] << endl; 
+      if (cur_path[i2] + 1 != next_path[i2])
+        extending = false;
+    }
+    if (extending)
+    {
+      //cout << "Raaaawwwrrr!  I am extending\n";
+      win_ext_len++;
+    }
+    else
+    {
+      // add the extend window length as first element and add as refined
+      new_path = ext_path;
+      new_path.insert(new_path.begin(),win_size + win_ext_len);
+      refined_pathz.push_back(new_path);
+      // reset stuff
+      win_ext_len = 0;
+      ext_path = next_path;
+    }
+  }
+}
+
 
 void
 Nway_Paths::add_path(vector<int> loaded_path)
@@ -124,27 +179,122 @@ Nway_Paths::save(string save_file_path)
 
   save_file.open(save_file_path.c_str(), ios::out);
 
-  //save_file << "<Seqcomp type=" << ana_type << " win=" << window_size;
-  //save_file << " thres=" << hard_threshold << ">\n";
-
-  path_i = pathz.begin();
-  paths_end = pathz.end();
-  if (path_i == paths_end)
-    cout << "Arrrrrrgggghhhhhh\n";
-  while(path_i != paths_end)
+  save_file << "<Mussa type=flp seq_num=" << species_num;
+  save_file << " win=" << win_size;
+  // add a function para new_thres defaults to -1 to later deal with
+  // reanalysis with higher thres - if statement whether to record base thres
+  // or new thres (ie if -1, then base)
+  save_file << " thres=" << threshold << " >\n";
+
+  path_i = refined_pathz.begin();
+  paths_end = refined_pathz.end();
+  //path_i = pathz.begin();
+  //paths_end = pathz.end();
+  while (path_i != paths_end)
   {
     a_path = *path_i;
     //cout << a_path.size() << endl;
-    for(i = 0; i < species_num; ++i)
-      save_file << i << "," << a_path[i] << " ";
+    //first entry is the window length of the windows in the path
+    save_file << a_path[0] << ":";
+    for(i = 1; i <= species_num; ++i)
+    {
+      save_file << a_path[i];
+      if (i != species_num)
+        save_file << ",";
+    }
     save_file << endl;
     ++path_i;
   }
 
-//save_file << "</Mussa>\n";
+  save_file << "</Mussa>\n";
   save_file.close();
 }
 
+/*
+  if (path_i == paths_end)
+    cout << "Arrrrrrgggghhhhhh\n";
+*/
+
+int
+Nway_Paths::load(string load_file_path)
+{
+  fstream load_file;
+  string file_data_line, header_data, data, path_node, path_length;
+  int i, space_split_i, equal_split_i, comma_split_i, colon_split_i;
+  vector<int> loaded_path;
+
+
+  load_file.open(load_file_path.c_str(), ios::in);
+
+  // get header data
+  // grab mussa tag - discard for now...maybe check in future...
+  getline(load_file,file_data_line);
+  space_split_i = file_data_line.find(" ");
+  file_data_line = file_data_line.substr(space_split_i+1);
+  // grab type tag - need in future to distinguish between flp and vlp paths
+  space_split_i = file_data_line.find(" ");
+  file_data_line = file_data_line.substr(space_split_i+1);
+  // get species/seq number
+  space_split_i = file_data_line.find(" ");
+  header_data = file_data_line.substr(0,space_split_i); 
+  equal_split_i = header_data.find("=");
+  data = file_data_line.substr(equal_split_i+1); 
+  species_num = atoi (data.c_str());
+  file_data_line = file_data_line.substr(space_split_i+1);
+  // get window size
+  space_split_i = file_data_line.find(" ");
+  header_data = file_data_line.substr(0,space_split_i); 
+  equal_split_i = header_data.find("=");
+  data = file_data_line.substr(equal_split_i+1); 
+  win_size = atoi (data.c_str());
+  file_data_line = file_data_line.substr(space_split_i+1);
+  // get window size
+  space_split_i = file_data_line.find(" ");
+  header_data = file_data_line.substr(0,space_split_i); 
+  equal_split_i = header_data.find("=");
+  data = file_data_line.substr(equal_split_i+1); 
+  threshold = atoi (data.c_str());
+  file_data_line = file_data_line.substr(space_split_i+1);
+
+  cout << "seq_num=" << species_num << " win=" << win_size;
+  cout << " thres=" << threshold << endl;
+
+  // clear out the current data
+  refined_pathz.clear();
+
+  int temp;
+
+  getline(load_file,file_data_line);
+  while ( (!load_file.eof()) && (file_data_line != "</Mussa>") )
+  {
+    if (file_data_line != "")
+    {
+      loaded_path.clear();
+      colon_split_i = file_data_line.find(":");
+      path_length = file_data_line.substr(0,colon_split_i); 
+      loaded_path.push_back(atoi (path_length.c_str()));
+      file_data_line = file_data_line.substr(colon_split_i+1);
+      for(i = 0; i < species_num; i++)
+      {
+        comma_split_i = file_data_line.find(",");
+        path_node = file_data_line.substr(0, comma_split_i); 
+        temp = atoi (path_node.c_str());
+        loaded_path.push_back(temp);
+        file_data_line = file_data_line.substr(comma_split_i+1);
+      }
+      refined_pathz.push_back(loaded_path);
+    }
+    getline(load_file,file_data_line);
+  }
+
+
+  load_file.close();
+
+  return species_num;
+}
+
+
+
 void
 Nway_Paths::save_old(string save_file_path)
 {
index 43529247eb2489c3b9e031b6af1f1951e9cbb7cd..3e5bbe2140d809e84633e04bfcb91ccd9f2bff1b 100644 (file)
@@ -14,20 +14,22 @@ class Nway_Paths
     int threshold;
     int win_size;
     list<vector<int> > pathz;
+    list<vector<int> > refined_pathz;
 
 
   public:
     Nway_Paths();
-    void setup(int sp_num);
+    void setup(int sp_num, int w, int t);
     void find_paths_r(vector<vector<FLPs> > all_comparisons);
     void path_search(vector<vector<FLPs> > all_comparisons, vector<int> path, int depth);
+    void simple_refine();
     void save(string save_file_path);
-    void save_old(string save_file_path);
-    void load(string load_file_path);
+    int load(string load_file_path);
     void add_path(vector<int> loaded_path);
+
     void find_paths(vector<vector<FLPs> > all_comparisons);
     void refine();
-    void print();
 
-    list<vector<int> > scaled_paths(float scale_factor);
+    void save_old(string save_file_path);
+    void print();
 };
diff --git a/mussa_test.cc b/mussa_test.cc
new file mode 100644 (file)
index 0000000..13b4021
--- /dev/null
@@ -0,0 +1,94 @@
+#include "mussa_class.hh"
+#include "time.h"
+
+int main(int argc, char **argv) 
+{
+  Mussa overlord;
+  char * para_file, * ana_file;
+  string  an_arg;
+  time_t t1, t2, begin, end;
+  double setuptime, seqloadtime, seqcomptime, nwaytime, savetime, totaltime;
+  string run_mode;
+  int seq_num;
+
+
+  begin = time(NULL);
+
+  // need more sophisticated arg reading structure
+  // support -w and -t override options...other stuff??
+  an_arg = * ++argv;
+
+  if (an_arg == "-v")
+  {
+    ana_file = * ++argv;
+    run_mode = "v";
+  }
+  else
+  {
+    para_file = (char*) an_arg.c_str();
+    run_mode = "f";
+  }
+
+  if (run_mode == "v")
+  {
+    cout << "load file = " << ana_file << endl;
+    overlord.load(ana_file);
+    //overlord.FuckingPieceOfShit(1000,500);
+  }
+  else if (run_mode == "f")
+  {
+    t1 = time(NULL);
+    overlord.setup(para_file);
+    t2 = time(NULL);
+    setuptime = difftime(t2, t1);
+
+
+    cout << "fee\n";
+    t1 = time(NULL);
+    overlord.get_Seqs();
+    t2 = time(NULL);
+    seqloadtime = difftime(t2, t1);
+
+
+    cout << "fie\n";
+    t1 = time(NULL);
+    overlord.seqcomp();
+    t2 = time(NULL);
+    seqcomptime = difftime(t2, t1);
+
+
+    cout << "foe\n";
+    t1 = time(NULL);
+    overlord.nway();
+    t2 = time(NULL);
+    nwaytime = difftime(t2, t1);
+
+
+    cout << "fum\n";
+    t1 = time(NULL);
+    overlord.save_old();
+    t2 = time(NULL);
+    savetime = difftime(t2, t1);
+
+    end = time(NULL);
+    totaltime = difftime(end, begin);
+
+    cout << "setup\tseqload\tseqcomp\tnway\tsave\ttotal\n";
+    cout << setuptime << "\t"; 
+    cout << seqloadtime << "\t";
+    cout << seqcomptime << "\t";
+    cout << nwaytime << "\t";
+    cout << savetime << "\t";
+    cout << totaltime << "\n";
+  }
+}
+
+
+
+
+/*
+      cout << "fee\n";
+      cout << "fie\n";
+      cout << "foe\n";
+      cout << "fum\n";
+*/
index d0a19ecc6cba5dad40be999bb19d78878412ca44..5eaba25843d36cd16fa883b83dc1113d45ec084d 100644 (file)
@@ -1,6 +1,6 @@
 #include "flp.hh"
 #include <time.h>
-#include <iomanip.h>
+#include <iomanip>
 
 int main(int argc, char **argv) 
 {
index 199a4abc5fca91dc08f6c84e2f2660d6bfb2b704..3a25656c8ba099ccf93d3abcfdd9732fa1df4a4a 100644 (file)
@@ -11,12 +11,18 @@ Sequence::Sequence()
 
 
 void
-Sequence::load_fasta(string file_path, int seq_num)
+Sequence::load_fasta(string file_path, int seq_num, 
+                     int start_index, int end_index)
 {
   fstream data_file;
   string file_data_line;
   int header_counter = 0;
   bool read_seq = true;
+  string rev_comp;
+  char conversionTable[257];
+  int seq_i, table_i, len;
+  string sequence_raw;
+  char * seq_tmp;             // holds sequence during basic filtering
 
 
   data_file.open(file_path.c_str(), ios::in);
@@ -32,32 +38,122 @@ Sequence::load_fasta(string file_path, int seq_num)
 
   header = file_data_line.substr(1);
 
+  sequence_raw = "";
+
   while ( !data_file.eof() && read_seq )
   {
     getline(data_file,file_data_line);
     if (file_data_line.substr(0,1) == ">")
       read_seq = false;
-    else sequence += file_data_line;
+    else sequence_raw += file_data_line;
   }
 
   data_file.close();
 
-  // need more sequence filtering for non AGCTN and convert to N
-  transform(sequence.begin(), sequence.end(), sequence.begin(), toupper);
+  // sequence filtering for upcasing agctn and convert non AGCTN to N
+
+  len = sequence_raw.length();
+  seq_tmp = (char*)sequence_raw.c_str();
+
+  sequence = "";
+
+  // Make a conversion table
+
+  // everything we don't specify below will become 'N'
+  for(table_i=0; table_i < 256; table_i++)
+  {
+    conversionTable[table_i] = 'N';
+  }
+  // add end of string character for printing out table for testing purposes
+  conversionTable[256] = '\0';
+
+  // we want these to map to themselves - ie not to change
+  conversionTable['A'] = 'A';
+  conversionTable['T'] = 'T';
+  conversionTable['G'] = 'G';
+  conversionTable['C'] = 'C';
+  // this is to upcase
+  conversionTable['a'] = 'A';
+  conversionTable['t'] = 'T';
+  conversionTable['g'] = 'G';
+  conversionTable['c'] = 'C';
+
+  // finally, the actual conversion loop
+  for(seq_i = 0; seq_i < len; seq_i++)
+  {
+    table_i = (int) seq_tmp[seq_i];
+    sequence += conversionTable[table_i];
+  }
+
+
+  // Lastly, if subselection of the sequence was specified we keep cut out
+  // and only keep that part
+
+  // end_index = 0 means no end was specified, so cut to the end 
+  if (end_index == 0)
+    end_index = len;
+
+  // start_index = 0 if no start was specified
+  sequence = sequence.substr(start_index, end_index - start_index);
 }
 
 
+  // this doesn't work properly under gcc 3.x ... it can't recognize toupper
+  //transform(sequence.begin(), sequence.end(), sequence.begin(), toupper);
+
+
 void
 Sequence::load_annot(string file_path)
 {
   fstream data_file;
   string file_data_line;
   annot an_annot;
+  int space_split_i;
+  string annot_value;
 
+  annots.clear();
   data_file.open(file_path.c_str(), ios::in);
 
+  getline(data_file,file_data_line);
+  species = file_data_line;
+
+  while ( !data_file.eof() )
+  {
+    getline(data_file,file_data_line);
+    if (file_data_line != "")
+    {
+      // need to get 4 values...almost same code 4 times...
+      // get annot start index
+      space_split_i = file_data_line.find(" ");
+      annot_value = file_data_line.substr(0,space_split_i);
+      an_annot.start = atoi (annot_value.c_str());
+      file_data_line = file_data_line.substr(space_split_i+1);
+      // get annot end index
+      space_split_i = file_data_line.find(" ");
+      annot_value = file_data_line.substr(0,space_split_i);
+      an_annot.end = atoi (annot_value.c_str());
+      file_data_line = file_data_line.substr(space_split_i+1);
+      // get annot start index
+      space_split_i = file_data_line.find(" ");
+      annot_value = file_data_line.substr(0,space_split_i);
+      an_annot.name = annot_value;
+      file_data_line = file_data_line.substr(space_split_i+1);
+      // get annot start index
+      space_split_i = file_data_line.find(" ");
+      annot_value = file_data_line.substr(0,space_split_i);
+      an_annot.type = annot_value;
+      annots.push_back(an_annot);
+    }
+  }
+
   data_file.close();
 
+  list<annot>::iterator list_i;
+  for(list_i = annots.begin(); list_i != annots.end(); ++list_i)
+  {
+    cout << (*list_i).start << "," << (*list_i).end << "\t";
+    cout << (*list_i).name << "\t" << (*list_i).type << endl;
+  }
 }
 
 
@@ -153,3 +249,164 @@ Sequence::clear()
   species_num = -4;
   annots.clear();
 }
+
+void
+Sequence::save(fstream &save_file)
+               //string save_file_path)
+{
+  //fstream save_file;
+  list<annot>::iterator annots_i;
+  int i;
+
+  // not sure why, or if i'm doing something wrong, but can't seem to pass
+  // file pointers down to this method from the mussa control class
+  // so each call to save a sequence appends to the file started by mussa_class
+  //save_file.open(save_file_path.c_str(), ios::app);
+
+  save_file << "<Sequence>" << endl;
+  save_file << sequence << endl;
+  save_file << "</Sequence>" << endl;
+
+  save_file << "<Annotations>" << endl;
+  for (annots_i = annots.begin(); annots_i != annots.end(); ++annots_i)
+  {
+    save_file << annots_i->start << " " << annots_i->end << " " ;
+    save_file << annots_i->name << " " << annots_i->type << endl;
+  }
+  save_file << "</Annotations>" << endl;
+  //save_file.close();
+}
+
+void
+Sequence::load_museq(string load_file_path, int seq_num)
+{
+  fstream load_file;
+  string file_data_line;
+  int seq_counter;
+  annot an_annot;
+  int space_split_i;
+  string annot_value;
+
+  annots.clear();
+  load_file.open(load_file_path.c_str(), ios::in);
+
+  seq_counter = 0;
+  // search for the seq_num-th sequence 
+  while ( (!load_file.eof()) && (seq_counter < seq_num) )
+  {
+    getline(load_file,file_data_line);
+    if (file_data_line == "<Sequence>")
+      seq_counter++;
+  }
+  getline(load_file, file_data_line);
+  //cout << "*fee\n";
+  sequence = file_data_line;
+  //cout << "*fie\n";
+  getline(load_file, file_data_line);
+  getline(load_file, file_data_line);
+  if (file_data_line == "<Annotations>")
+  {
+    while ( (!load_file.eof())  && (file_data_line != "</Annotations>") )
+    {
+      //cout << "*foe\n";
+      getline(load_file,file_data_line);
+      if ((file_data_line != "") && (file_data_line != "</Annotations>"))  
+      {
+        //cout << "*fum\n";
+        // need to get 4 values...almost same code 4 times...
+        // get annot start index
+        space_split_i = file_data_line.find(" ");
+        annot_value = file_data_line.substr(0,space_split_i);
+        an_annot.start = atoi (annot_value.c_str());
+        file_data_line = file_data_line.substr(space_split_i+1);
+        // get annot end index
+        space_split_i = file_data_line.find(" ");
+        annot_value = file_data_line.substr(0,space_split_i);
+        an_annot.end = atoi (annot_value.c_str());
+        file_data_line = file_data_line.substr(space_split_i+1);
+        // get annot start index
+        space_split_i = file_data_line.find(" ");
+        annot_value = file_data_line.substr(0,space_split_i);
+        an_annot.name = annot_value;
+        file_data_line = file_data_line.substr(space_split_i+1);
+        // get annot start index
+        space_split_i = file_data_line.find(" ");
+        annot_value = file_data_line.substr(0,space_split_i);
+        an_annot.type = annot_value;
+        annots.push_back(an_annot);
+      }
+    }
+  }
+  load_file.close();
+}
+
+
+list<int>
+Sequence::find_motif(string a_motif)
+{
+  char * seq_c;
+  int seq_i, motif_i, motif_len;
+  list<int> motif_match_starts;
+
+  motif_match_starts.clear();
+  // faster to loop thru the sequence as a old c string (ie char array)
+  seq_c = (char*)sequence.c_str();
+  motif_len = a_motif.length();
+
+  
+  for (seq_i = 0; seq_i < length; seq_i++)
+  {
+
+    // this is pretty much a straight translation of Nora's python code
+    // to match iupac letter codes
+    if (a_motif[motif_i] == seq_c[seq_i])
+      motif_i++;
+    else if (seq_c[seq_i] =='N')
+      motif_i++;
+    else if ((a_motif[motif_i] =='M') && 
+             ((seq_c[seq_i]=='A') || (seq_c[seq_i]=='C')))
+      motif_i++;
+    else if ((a_motif[motif_i] =='R') && 
+             ((seq_c[seq_i]=='A') || (seq_c[seq_i]=='G')))
+      motif_i++;
+    else if ((a_motif[motif_i] =='W') && 
+             ((seq_c[seq_i]=='A') || (seq_c[seq_i]=='T')))
+      motif_i++;
+    else if ((a_motif[motif_i] =='S') && 
+             ((seq_c[seq_i]=='C') || (seq_c[seq_i]=='G')))
+      motif_i++;
+    else if ((a_motif[motif_i] =='Y') && 
+             ((seq_c[seq_i]=='C') || (seq_c[seq_i]=='T')))
+      motif_i++;
+    else if ((a_motif[motif_i] =='K') && 
+             ((seq_c[seq_i]=='G') || (seq_c[seq_i]=='T')))
+      motif_i++;
+    else if ((a_motif[motif_i] =='V') && 
+             ((seq_c[seq_i]=='A') || (seq_c[seq_i]=='C') ||
+              (seq_c[seq_i]=='G')))
+      motif_i++;
+    else if ((a_motif[seq_i] =='H') && 
+             ((seq_c[seq_i]=='A') || (seq_c[seq_i]=='C') ||
+              (seq_c[seq_i]=='T')))
+      motif_i++;
+    else if ((a_motif[motif_i] =='D') &&
+             ((seq_c[seq_i]=='A') || (seq_c[seq_i]=='G') ||
+              (seq_c[seq_i]=='T')))
+      motif_i++;
+    else if ((a_motif[motif_i] =='B') &&
+             ((seq_c[seq_i]=='C') || (seq_c[seq_i]=='G') ||
+              (seq_c[seq_i]=='T')))
+      motif_i++;
+    else
+      motif_i = 0;
+
+    // end Nora stuff, now we see if a match is found this pass
+    if (motif_i == motif_len)
+    {
+      motif_match_starts.push_back(seq_i - motif_len + 1);
+      motif_i = 0;
+    }
+  }
+
+  return motif_match_starts;
+}
index afbb9da21e65c1454b9d299b19380d119b6e27f5..088eac842f329afe3c2a69d28814fac4c577b4b8 100644 (file)
@@ -7,12 +7,20 @@
 #include <list>
 #include <vector>
 #include <string>
-#include <fstream.h>
+#include <fstream>
 #include <stdlib.h>
 #include <algorithm>
 
 // Sequence data class
 
+using namespace std;
+
+struct annot
+{
+  int start, end;
+  string name, type;
+};
+
 class Sequence
 {
   friend class ConnView;
@@ -24,11 +32,6 @@ class Sequence
     string species;
     int species_num; 
 
-    struct annot
-    {
-      int start, end;
-      string name, type;
-    };
 
     list<annot> annots;
 
@@ -37,7 +40,8 @@ class Sequence
 
   public:
     Sequence();
-    void load_fasta(string file_path, int seq_num);
+    void load_fasta(string file_path, int seq_num, 
+                    int start_index, int end_index);
     void load_annot(string file_path);
     string seq();
     const char * c_seq();
@@ -45,6 +49,9 @@ class Sequence
     int len();
     string hdr();
     void set_seq(string a_seq);
+    list<int> find_motif(string a_motif);
   //  string species();
     void clear();
+    void save(fstream &save_file);
+    void load_museq(string load_file_path, int seq_num); 
 };