#CFLAGS=-ftemplate-depth-20
CFLAGS=-O -Winline -ftemplate-depth-20
+# -ggdb
LDFLAGS= -lfltk
+# this is what's needed on titus's redhat machine
+# tho he may have things setup oddly to accomodate his development setup
+#LDFLAGS= -lfltk -L/usr/X11R6/lib -lX11
+
+
#all: seqcomp
sequence.o : sequence.cc sequence.hh
g++ $(CFLAGS) -c mussa_class.cc
mussa : sequence.o flp.o flp_seqcomp.o mussa_nway.o mussa_gui_seq.o mussa_gui_conn.o mussa_class.o mussa.cc
- g++ $(CFLAGS) $(LDFLAGS) -o mussa mussa.cc mussa_class.o mussa_gui_conn.o mussa_gui_seq.o mussa_nway.o flp_seqcomp.o flp.o sequence.o
+ g++ $(CFLAGS) -o mussa mussa.cc mussa_class.o mussa_gui_conn.o mussa_gui_seq.o mussa_nway.o flp_seqcomp.o flp.o sequence.o $(LDFLAGS)
+
+
+
+annot_test : sequence.o annot_test.cc
+ g++ $(CFLAGS) -o annot_test annot_test.cc sequence.o
#match_list_type.o : modules/match_list_type.c modules/match_list_type.h \
--- /dev/null
+#include "sequence.hh"
+
+int main(int argc, char **argv)
+{
+ Sequence seqA;
+ char * seqA_file, * seqA_annot;
+ cout << "fee fie foe fum" << endl;
+
+
+ seqA_file = * ++argv;
+ seqA_annot = * ++argv;
+
+ seqA.load_fasta(seqA_file, 1);
+ seqA.load_annot(seqA_annot);
+ //seqA_len = seqA.len();
+}
+
+/*
+ cout << "fee\n";
+ cout << "fie\n";
+ cout << "foe ";
+ cout << "fum\n";
+*/
--- /dev/null
+# name of anaylsis directory and stem for associated files
+ANA_NAME mck3test
+
+# if APPEND vars true, a _wXX and/or _tYY added to analysis name
+# where XX = WINDOW and YY = THRESHOLD
+# Highly recommeded with use of command line override of WINDOW or THRESHOLD
+APPEND_WIN true
+APPEND_THRES true
+
+# how many sequences are being analyzed
+SEQUENCE_NUM 3
+
+# first sequence info
+SEQUENCE seq/mouse_mck_pro.fa
+# ANNOTATION test.annot
+SEQ_START 600
+
+# the second sequence info
+SEQUENCE seq/human_mck_pro.fa
+# ANNOTATION test.annot
+SEQ_START 600
+# SEQ_END 2400
+
+# third sequence info
+SEQUENCE seq/rabbit_mck_pro.fa
+# ANNOTATION test.annot
+
+# analyses parameters: command line args -w -t will override these
+WINDOW 30
+THRESHOLD 20
--- /dev/null
+<Mussa_Sequence>
+<Sequence>
+CCATCCTGGTCTATAGAGAGAGTTCCAGAACAGCCAGGGCTACAGATAAACCCATCTGGAAAAACAAAGTTGAATGACCCAAGAGGGGTTCTCAGAGGGTGGCGTGTGCTCCCTGGCAAGCCTATGACATGGCCGGGGCCTGCCTCTCTCTGCCTCTGACCCTCAGTGGCTCCCATGAACTCCTTGCCCAATGGCATCTTTTTCCTGCGCTCCTTGGGTTATTCCAGTCTCCCCTCAGCATTCCTTCCTCAGGGCCTCGCTCTTCTCTCTGCTCCCTCCTTGCACAGCTGGCTCTGTCCACCTCAGATGTCACAGTGCTCTCTCAGAGGAGGAAGGCACCATGTACCCTCTGTTTCCCAGGTAAGGGTTCAATTTTTAAAAATGGTTTTTTGTTTGTTTGTTTGTTTGTTTGTTTGTTTGTTTTTCAAGACAGGGCTCCTCTGTGTAGTCCTAACTGTCTTGAAACTCCCTCTGTAGACCAGGTCGACCTCGAACTCTTGAAACCTGCCACGGACCACCCAGTCAGGTATGGAGGTCCCTGGAATGAGCGTCCTCGAAGCTAGGTGGGTAAGGGTTCGGCGGTGACAAACAGAAACAAACACAGAGGCAGTTTGAATCTGAGTGTATTTTGCAGCTCTCAAGCAGGGGATTTTATACATAAAAAAAAAAAAAAAAAAAAAACCAAACATTACATCTCTTAGAAACTATATCCAATGAAACAATCACAGATACCAACCAAAACCATTGGGCAGAGTAAAGCACAAAAATCATCCAAGCATTACAACTCTGAAACCATGTATTCAGTGAATCACAAACAGAACAGGTAACATCATTATTAATATAAATCACCAAAATATAACAATTCTAAAAGGATGTATCCAGTGGGGGCTGTCGTCCAAGGCTAGTGGCAGATTTCCAGGAGCAGGTTAGTAAATCTTAACCACTGAACTAACTCTCCAGCCCCATGGTCAATTATTATTTAGCATCTAGTGCCTAATTTTTTTTTATAAATCTTCACTATGTAATTTAAAACTATTTTAATTCTTCCTAATTAAGGCTTTCTTTACCATATACCAAAATTCACCTCCAATGACACACGCGTAGCCATATGAAATTTTATTGTTGGGAAAATTTGTACCTATCATAATAGTTTTGTAAATGATTTAAAAAGCAAAGTGTTAGCCGGGCGTGGTGGCACACGCCTTTAATCCCTGCACTCGGGAGGCAGGGGCAGGAGGATTTCTGAGTTTGAGGCCAGCCTGGTCTACAGAGTGAGTTCCAGGACAGCCAGGGCTACACAGAGAAACCCTGTCTCGAACCCCCCACCCCCCAAAAAAAGCAAAGTGTTGGTTTCCTTGGGGATAAAGTCATGTTAGTGGCCCATCTCTAGGCCCATCTCACCCATTATTCTCGCTTAAGATCTTGGCCTAGGCTACCAGGAACATGTAAATAAGAAAAGGAATAAGAGAAAACAAAACAGAGAGATTGCCATGAGAACTACGGCTCAATATTTTTTCTCTCCGGCGAAGAGTTCCACAACCATCTCCAGGAGGCCTCCACGTTTTGAGGTCAATGGCCTCAGTCTGTGGAACTTGTCACACAGATCTTACTGGAGGTGGTGTGGCAGAAACCCATTCCTTTTAGTGTCTTGGGCTAAAAGTAAAAGGCCCAGAGGAGGCCTTTGCTCATCTGACCATGCTGACAAGGAACACGGGTGCCAGGACAGAGGCTGGACCCCAGGAACACCTTAAACACTTCTTCCCTTCTCCGCCCCCTAGAGCAGGCTCCCCTCACCAGCCTGGGCAGAAATGGGGGAAGATGGAGTGAAGCCATACTGGCTACTCCAGAATCAACAGAGGGAGCCGGGGGCAATACTGGAGAAGCTGGTCTCCCCCCAGGGGCAATCCTGGCACCTCCCAGGCAGAAGAGGAAACTTCCACAGTGCATCTCACTTCCATGAATCCCCTCCTCGGACTCTGAGGTCCTTGGTCACAGCTGAGGTGCAAAAGGCTCCTGTCATATTGTGTCCTGCTCTGGTCTGCCTTCCACAGCTTGGGGGCCACCTAGCCCACCTCTCCCTAGGGATGAGAGCAGCCACTACGGGTCTAGGCTGCCCATGTAAGGAGGCAAGGCCTGGGGACACCCGAGATGCCTGGTTATAATTAACCCAGACATGTGGCTGCCCCCCCCCCCCCAACACCTGCTGCCTGAGCCTCACCCCCACCCCGGTGCCTGGGTCTTAGGCTCTGTACACCATGGAGGAGAAGCTCGCTCTAAAAATAACCCTGTCCCTGGTGGATCCAGGGTGAGGGGCAGGCTGAGGGCGGCCACTTCCCTCAGCCGCAGGTTTGTTTTCCCAAGAATGGTTTTTCTGCTTCTGTAGCTTTTCCTGTCAATTCTGCCATGGTGGAGCAGCCTGCACTGGGCTTCTGGGAGAAACCAAACCGGGTTCTAACCTTTCAGCTACAGTTATTGCCTTTCCTGTAGATGGGCGACTACAGCCCCACCCCCACCCCCGTCTCCTGTATCCTTCCTGGGCCTGGGGATCCTAGGCTTTCACTGGAAATTTCCCCCCAGGTGCTGTAGGCTAGAGTCACGGCTCCCAAGAACAGTGCTTGCCTGGCATGCATGGTTCTGAACCTCCAACTGCAAAAAATGACACATACCTTGACCCTTGGAAGGCTGAGGCAGGGGGATTGCCATGAGTGCAAAGCCAGACTGGGTGGCATAGTTAGACCCTGTCTCAAAAAACCAAAAACAATTAAATAACTAAAGTCAGGCAAGTAATCCTACTCGGGAGACTGAGGCAGAGGGATTGTTACATGTCTGAGGCCAGCCTGGACTACATAGGGTTTCAGGCTAGCCCTGTCTACAGAGTAAGGCCCTATTTCAAAAACACAAACAAAATGGTTCTCCCAGCTGCTAATGCTCACCAGGCAATGAAGCCTGGTGAGCATTAGCAATGAAGGCAATGAAGGAGGGTGCTGGCTACAATCAAGGCTGTGGGGGACTGAGGGCAGGCTGTAACAGGCTTGGGGGCCAGGGCTTATACGTGCCTGGGACTCCCAAAGTATTACTGTTCCATGTTCCCGGCGAAGGGCCAGCTGTCCCCCGCCAGCTAGACTCAGCACTTAGTTTAGGAACCAGTGAGCAAGTCAGCCCTTGGGGCAGCCCATACAAGGCCATGGGGCTGGGCAAGCTGCACGCCTGGGTCCGGGGTGGGCACGGTGCCCGGGCAACGAGCTGAAAGCTCATCTGCTCTCAGGGGCCCCTCCCTGGGGACAGCCCCTCCTGGCTAGTCACACCCTGTAGGCTCCTCTATATAACCCAGGGGCACAGGGGCTGCCCCCGGGTCAC
+</Sequence>
+<Annotations>
+300 600 Glorp Glorptype
+</Annotations>
+<Sequence>
+GGATCCTTCCTCCTTGGCCTCCCAAAGTGCTGGGATTACAGGTGTGAGCCACTGCACCTGGCCTATTACCCTTCTCAGGCTCTGGAGTCCATCCTTCTGCTCTGTCTCCCTCAGTTCAATTGTTTTTTGTTTTTTGTTTTTTTTTTAGACACAGTCTCGCTCTGTCACCAAGGCTGGAGTGCAGCAGTGCGATCACAGCTCACCGCAGCCTCACCTCCCAGGCTCAAGTGATCCTCCCATCTCGGCCTCTGAGTAGCTGAGACTATAGGTGTGTCCACATGTCCGGCTAATTTTTGTATTTTTAGTAGAGACAGGGTTTCACCGCGTTGGCCAGGGTGGTCTTGAACTCCTGAGCTCAAGCAATCCTCCTGCCTCAGCCTCCTTGTTTTGATTTTTAGATCCCACAAATAACTTGTGATGTTTGTCTTTCTATACCTGGTTCATTTAACATTTTCTTTTTCTTTTCTTTTCTTTTTTTTTTTTTTTGTGAGACTGAGTCTTGCTCTGTCACTCAGGCTGGAGGGCAATGGTGCATCTCAGCTCACTGCAACCTCCACCTCCTAGGTTCAAGCAATTCTTATGCCTCAGCCTCCTGGCTAGCTGGGATTACAGGCGTGTGTCACCATGCCAGGCTAATTTTTGTACTTTTAGTAGAGATGGGGTTTCACCATGTTGGCCAGGCTGGTCTTGAACTCCTGGCCTCAAGTGATCCACCCGCCTCCGCCTCTGCCTCCCAAAGTGCTGGGATTACGGGCCTGAGCCACTGTGCCCGGCCCATCTAACATTTTCACTGTCAATCACAATGGGATTAAAACTCCTCCCACAGCCCCTAGGGACCATGGGTCTGCTCCTGTCTCCCCTCCAACCTCATCTTCTTCCTCCCACTCTCTCCTTGGCCCCATCTGCTCCAGTCCCCTGGCCTCCTTCCTGTCTGTCCTCAGATGTGCCCAGCCATTCTCACCTCAGCGCCTTTGCACCTGCTGTTCCCCCCAGAGCCGCACATGGCTGGCTCCCTGTTCTCCTTCAGGTCTCTGCTCAGATGTCATCTTCCCAAAGAGGCCTGCCTCGACCTCCCCTGCTGCTGTGCCGTCCCCTCATCTGTGACCCTCTTGCACTATCACCTCCAGGACGGCGGGGGTTTTGTGTTTTGTTGTAGCCTCAGGAAGTGCCTGATAGATCCCTGTTTCGAGACCAGTTCCATTTGGTTTTCTGGGCCTCAGTTTCCGTAACCGTGAAGGAGACCCTCGGCAATCTGAGCTTGCTGGGAAAGGGCTGGGCCCCATGTAAATATTTCTAAAGCACCCCTCTCCCCTCCCCCCTCAGATCAGGAGTCTGAGGGAGAGGCACAGAGGCTCCCTTTCTCTAAGCCAGTCCTCACCTGCCTAAGAAGATGTGAAGGAGACCCAGGAGACCCTGGGATAGGGAGGAACTCAGAGGGAAGGGACATTCTTTTCTTCGTCGCAATCCTGGGAGCTCCCTGGAGGAGGAGACCCGATCAGCCTGCAATCCTGGCGCGTCCCAGGAGGAGAAAGCGGCTTCCTCTATACTGTACTCTCCTCCACAGAACCCCCCTCTCAGCCCTGGAAGTCCTTGCTCACAGCCGAGGCGCCGAGAGCGCTTGCTCTGCCCAGATCTCGGCGAGTCTGCGCCCGCGCTCTGAACGGCGTCGCTGCCCAGCCCCCTTCCCCGGGAGGTGGGAGCGGCCACCCAGGGCCCCGTGGCTGCCCTTGTAAGGAGGCGAGGCCGAGGACACCCGAGACGCCCGGTTATAATTAACCAGGACACGTGGCGAACCCCCCTCCAACACCTGCCCCCGAACCCCCCCATACCCAGCGCCTCGGGTCTCGGCCTTTGCGGCAGAGGAGACAGCAAAGCGCCCTCTAAAAATAACTCCTTTCCCGGCGACCGAGACCCTCCCTGTCCCCGCACAGCGAAATCTCCCAGTGGCACCGAGGGGGCGAGGGTTAAGTGGGGGGGAGGGTGACCACCGCCTCCCACCCTTGCCCTGAGTTTGAATCTCTCCAACTCAGCCAGCCTCAGTTTCCCCTCCACTCAGTCCCTAGGAGGAAGGGGCGCCCAAGCGGGTTTCTGGGGTTAGACTGCCCTCCATTGCAATTGGTCCTTCTCCCGGCCTCTGCTTCCTCCAGCTCACAGGGTATCTGCTCCTCCTGGAGCCACACCTTGGTTCCCCGAGGTGCCGCTGGGACTCGGGTAGGGGTGAGGGCCCAGGGGCGACAGGGGGAGCCGAGGGCCACAGGAAGGGCTGGTGGCTGAAGGAGACTCAGGGGCCAGGGGACGGTGGCTTCTACGTGCTTGGGACGTTCCCAGCCACCGTCCCATGTTCCCGGCGGGGGCCAGCTGTCCCCACCGCCAGCCCAACTCAGCACTTGGTTAGGGTATCAGCTTGGTGGGGGCGTGAGCCCAGCCCTGGGGCGCTCAGCCCATACAAGGCCATGGGGCTGGGCGCAAAGCATGCCTGGGTTCAGGGTGGGTATGGTGCCGGAGCAGGGAGGTGAGAGGCTCAGCTGCCCTCCAGAACTCCTCCCTGGGGACAACCCCTCCCAGCCAATAGCACAGCCTAGGTCCCCCTATATAAGGCCACGGCTGCTGGCCCTTCCTTTGGGTCAGTGTCACCTCCAGGATACAGACAGCCCCCCTTCAGCCCAGCCCAGCCAGGTACTGCACGGGGCGGGAATCTGGGTGGGGGCCAGAGTAGGGGATTTCTGTGGGTGCTAGAGGCTTGGCTTGGGAAAGGGTCTGTGTGTCACCCCTTGCTCCACCAACATCCTCCTATACAAAGGCAGGTCGGTGCGTGGGAAGGTTGACCCTTGTGTGTCTGGGAGGCCCCTCCATCTGTGAGGCTGCCTGAACCCCCACTGGGACCTGTGATTTCTGCGGCACAG
+</Sequence>
+<Annotations>
+300 600 Glorp Glorptype
+</Annotations>
+<Sequence>
+CTCCCTGAGGGGAGTGCCCCGCTTAGCCCTCAGCCCTGGCACCCACCAGGCAGAGGAGGCAGCTCCCAGAGTCCCCGGCTGAGGGGGCCTGCCCATGTCCTCGCGAGTCGGGCACCCAGTCCCCTTCCCTGGGGGCCTCGGGTGCCTTGTAAGGAGGCAAGGCCCAGGACACCCGAGCTGCCGGCTATAATTAGCCTGGACACGTGGGCCCCCCGCCAGCACCTGCCCTGAGCCCCCCTACCCGCAGCCTTGGGTGCTGGGACTGCCGACAGCAGAGACCGCTCTAAAAATAGCTCCCTTCCTGGACGGCCCCTCCCAGCAGGCCCCCCACCCCCCGCTGAGGCTCACTGCTGGTGTCCCTGCCTGGTGCTCCCTAGGGTGTGCACCCACGGCAGGTGCAGAGGCAGGGGGCACCGCGGGGGGCTGGTAACAGCAGCAGGCCTAGGGGCCAGGGGACGGCGAGCTGTACGTGCCAGGACACCCCCAGCCACTGTCCCGTGCTCCCGGCGGGGGCCAGCTGTCCCCCGCCAGCCCGACTCAGCACTTGCTCTGCACATCGGCCTGGGGCGGCCCATACAAGGGCATGGGGCTGGGCACAAGGCACGCCCGCGTCCAGGTGGGCACTGTGCCCGGGCATGGTGGGGGGAGAGGCTGGGCTGCCTTCCAGGCCCCCTCCCTGGGGCCAGCCCCGCCCAGCCAGTGGCACAGCAGGGTCCCCTACATAAAGCCGGGGCCGCGGGCCCTGGATCTGCCTCAGTGTCGCCTGCAGGACACACGTAGCCCCGCTCAGCACAGCCCAGCCAG
+</Sequence>
+<Annotations>
+300 600 Glorp Glorptype
+</Annotations>
+</Mussa_Sequence>
--- /dev/null
+<Mussa type=flp seq_num=3 win=30 thres=20 >
+31:1895,1497,24
+30:1899,1459,28
+30:1901,1503,30
+30:1901,1461,30
+34:1904,1506,33
+47:2100,1711,130
+30:2123,1733,152
+34:2125,1735,154
+30:2131,1741,160
+30:2133,1743,162
+44:2148,1758,176
+30:2186,1793,208
+40:2261,1868,271
+31:3015,2264,424
+34:3071,2326,485
+31:3085,2339,498
+30:3086,-2346,499
+30:3087,2341,500
+30:3088,-2344,-503
+30:3088,-2344,501
+30:3089,-2343,-502
+30:3089,-2343,502
+30:3090,-2342,-501
+33:3090,-2342,503
+30:3094,-2338,-497
+30:3094,-2338,507
+30:3095,-2337,-496
+30:3095,-2337,508
+30:3096,-2336,-495
+30:3096,-2336,509
+30:3097,-2335,-494
+44:3101,2357,514
+30:3157,2426,556
+30:3158,2542,673
+30:3158,2427,557
+30:3159,2543,674
+64:3159,2428,558
+41:3210,2479,608
+62:3246,2516,647
+</Mussa>
--- /dev/null
+>gi|180579|gb|M21487.1|HUMCKMM1 Human muscle creatine kinase gene (CKMM), 5' flank
+GGATCCTTCCTCCTTGGCCTCCCAAAGTGCTGGGATTACAGGTGTGAGCCACTGCACCTGGCCTATTACC
+CTTCTCAGGCTCTGGAGTCCATCCTTCTGCTCTGTCTCCCTCAGTTCAATTGTTTTTTGTTTTTTGTTTT
+TTTTTTAGACACAGTCTCGCTCTGTCACCAAGGCTGGAGTGCAGCAGTGCGATCACAGCTCACCGCAGCC
+TCACCTCCCAGGCTCAAGTGATCCTCCCATCTCGGCCTCTGAGTAGCTGAGACTATAGGTGTGTCCACAT
+GTCCGGCTAATTTTTGTATTTTTAGTAGAGACAGGGTTTCACCGCGTTGGCCAGGGTGGTCTTGAACTCC
+TGAGCTCAAGCAATCCTCCTGCCTCAGCCTCCTTGTTTTGATTTTTAGATCCCACAAATAACTTGTGATG
+TTTGTCTTTCTATACCTGGTTCATTTAACATTTTCTTTTTCTTTTCTTTTCTTTTTTTTTTTTTTTGTGA
+GACTGAGTCTTGCTCTGTCACTCAGGCTGGAGGGCAATGGTGCATCTCAGCTCACTGCAACCTCCACCTC
+CTAGGTTCAAGCAATTCTTATGCCTCAGCCTCCTGGCTAGCTGGGATTACAGGCGTGTGTCACCATGCCA
+GGCTAATTTTTGTACTTTTAGTAGAGATGGGGTTTCACCATGTTGGCCAGGCTGGTCTTGAACTCCTGGC
+CTCAAGTGATCCACCCGCCTCCGCCTCTGCCTCCCAAAGTGCTGGGATTACGGGCCTGAGCCACTGTGCC
+CGGCCCATCTAACATTTTCACTGTCAATCACAATGGGATTAAAACTCCTCCCACAGCCCCTAGGGACCAT
+GGGTCTGCTCCTGTCTCCCCTCCAACCTCATCTTCTTCCTCCCACTCTCTCCTTGGCCCCATCTGCTCCA
+GTCCCCTGGCCTCCTTCCTGTCTGTCCTCAGATGTGCCCAGCCATTCTCACCTCAGCGCCTTTGCACCTG
+CTGTTCCCCCCAGAGCCGCACATGGCTGGCTCCCTGTTCTCCTTCAGGTCTCTGCTCAGATGTCATCTTC
+CCAAAGAGGCCTGCCTCGACCTCCCCTGCTGCTGTGCCGTCCCCTCATCTGTGACCCTCTTGCACTATCA
+CCTCCAGGACGGCGGGGGTTTTGTGTTTTGTTGTAGCCTCAGGAAGTGCCTGATAGATCCCTGTTTCGAG
+ACCAGTTCCATTTGGTTTTCTGGGCCTCAGTTTCCGTAACCGTGAAGGAGACCCTCGGCAATCTGAGCTT
+GCTGGGAAAGGGCTGGGCCCCATGTAAATATTTCTAAAGCACCCCTCTCCCCTCCCCCCTCAGATCAGGA
+GTCTGAGGGAGAGGCACAGAGGCTCCCTTTCTCTAAGCCAGTCCTCACCTGCCTAAGAAGATGTGAAGGA
+GACCCAGGAGACCCTGGGATAGGGAGGAACTCAGAGGGAAGGGACATTCTTTTCTTCGTCGCAATCCTGG
+GAGCTCCCTGGAGGAGGAGACCCGATCAGCCTGCAATCCTGGCGCGTCCCAGGAGGAGAAAGCGGCTTCC
+TCTATACTGTACTCTCCTCCACAGAACCCCCCTCTCAGCCCTGGAAGTCCTTGCTCACAGCCGAGGCGCC
+GAGAGCGCTTGCTCTGCCCAGATCTCGGCGAGTCTGCGCCCGCGCTCTGAACGGCGTCGCTGCCCAGCCC
+CCTTCCCCGGGAGGTGGGAGCGGCCACCCAGGGCCCCGTGGCTGCCCTTGTAAGGAGGCGAGGCCGAGGA
+CACCCGAGACGCCCGGTTATAATTAACCAGGACACGTGGCGAACCCCCCTCCAACACCTGCCCCCGAACC
+CCCCCATACCCAGCGCCTCGGGTCTCGGCCTTTGCGGCAGAGGAGACAGCAAAGCGCCCTCTAAAAATAA
+CTCCTTTCCCGGCGACCGAGACCCTCCCTGTCCCCGCACAGCGAAATCTCCCAGTGGCACCGAGGGGGCG
+AGGGTTAAGTGGGGGGGAGGGTGACCACCGCCTCCCACCCTTGCCCTGAGTTTGAATCTCTCCAACTCAG
+CCAGCCTCAGTTTCCCCTCCACTCAGTCCCTAGGAGGAAGGGGCGCCCAAGCGGGTTTCTGGGGTTAGAC
+TGCCCTCCATTGCAATTGGTCCTTCTCCCGGCCTCTGCTTCCTCCAGCTCACAGGGTATCTGCTCCTCCT
+GGAGCCACACCTTGGTTCCCCGAGGTGCCGCTGGGACTCGGGTAGGGGTGAGGGCCCAGGGGCGACAGGG
+GGAGCCGAGGGCCACAGGAAGGGCTGGTGGCTGAAGGAGACTCAGGGGCCAGGGGACGGTGGCTTCTACG
+TGCTTGGGACGTTCCCAGCCACCGTCCCATGTTCCCGGCGGGGGCCAGCTGTCCCCACCGCCAGCCCAAC
+TCAGCACTTGGTTAGGGTATCAGCTTGGTGGGGGCGTGAGCCCAGCCCTGGGGCGCTCAGCCCATACAAG
+GCCATGGGGCTGGGCGCAAAGCATGCCTGGGTTCAGGGTGGGTATGGTGCCGGAGCAGGGAGGTGAGAGG
+CTCAGCTGCCCTCCAGAACTCCTCCCTGGGGACAACCCCTCCCAGCCAATAGCACAGCCTAGGTCCCCCT
+ATATAAGGCCACGGCTGCTGGCCCTTCCTTTGGGTCAGTGTCACCTCCAGGATACAGACAGCCCCCCTTC
+AGCCCAGCCCAGCCAGGTACTGCACGGGGCGGGAATCTGGGTGGGGGCCAGAGTAGGGGATTTCTGTGGG
+TGCTAGAGGCTTGGCTTGGGAAAGGGTCTGTGTGTCACCCCTTGCTCCACCAACATCCTCCTATACAAAG
+GCAGGTCGGTGCGTGGGAAGGTTGACCCTTGTGTGTCTGGGAGGCCCCTCCATCTGTGAGGCTGCCTGAA
+CCCCCACTGGGACCTGTGATTTCTGCGGCACAG
\ No newline at end of file
--- /dev/null
+>gi|10129974|gb|AF188002.1|AF188002 Mus musculus muscle creatine kinase (Mck) gene, promoter region
+CCATCCTGGTCTATAGAGAGAGTTCCAGAACAGCCAGGGCTACAGATAAACCCATCTGGAAAAACAAAGT
+TGAATGACCCAAGAGGGGTTCTCAGAGGGTGGCGTGTGCTCCCTGGCAAGCCTATGACATGGCCGGGGCC
+TGCCTCTCTCTGCCTCTGACCCTCAGTGGCTCCCATGAACTCCTTGCCCAATGGCATCTTTTTCCTGCGC
+TCCTTGGGTTATTCCAGTCTCCCCTCAGCATTCCTTCCTCAGGGCCTCGCTCTTCTCTCTGCTCCCTCCT
+TGCACAGCTGGCTCTGTCCACCTCAGATGTCACAGTGCTCTCTCAGAGGAGGAAGGCACCATGTACCCTC
+TGTTTCCCAGGTAAGGGTTCAATTTTTAAAAATGGTTTTTTGTTTGTTTGTTTGTTTGTTTGTTTGTTTG
+TTTTTCAAGACAGGGCTCCTCTGTGTAGTCCTAACTGTCTTGAAACTCCCTCTGTAGACCAGGTCGACCT
+CGAACTCTTGAAACCTGCCACGGACCACCCAGTCAGGTATGGAGGTCCCTGGAATGAGCGTCCTCGAAGC
+TAGGTGGGTAAGGGTTCGGCGGTGACAAACAGAAACAAACACAGAGGCAGTTTGAATCTGAGTGTATTTT
+GCAGCTCTCAAGCAGGGGATTTTATACATAAAAAAAAAAAAAAAAAAAAAACCAAACATTACATCTCTTA
+GAAACTATATCCAATGAAACAATCACAGATACCAACCAAAACCATTGGGCAGAGTAAAGCACAAAAATCA
+TCCAAGCATTACAACTCTGAAACCATGTATTCAGTGAATCACAAACAGAACAGGTAACATCATTATTAAT
+ATAAATCACCAAAATATAACAATTCTAAAAGGATGTATCCAGTGGGGGCTGTCGTCCAAGGCTAGTGGCA
+GATTTCCAGGAGCAGGTTAGTAAATCTTAACCACTGAACTAACTCTCCAGCCCCATGGTCAATTATTATT
+TAGCATCTAGTGCCTAATTTTTTTTTATAAATCTTCACTATGTAATTTAAAACTATTTTAATTCTTCCTA
+ATTAAGGCTTTCTTTACCATATACCAAAATTCACCTCCAATGACACACGCGTAGCCATATGAAATTTTAT
+TGTTGGGAAAATTTGTACCTATCATAATAGTTTTGTAAATGATTTAAAAAGCAAAGTGTTAGCCGGGCGT
+GGTGGCACACGCCTTTAATCCCTGCACTCGGGAGGCAGGGGCAGGAGGATTTCTGAGTTTGAGGCCAGCC
+TGGTCTACAGAGTGAGTTCCAGGACAGCCAGGGCTACACAGAGAAACCCTGTCTCGAACCCCCCACCCCC
+CAAAAAAAGCAAAGTGTTGGTTTCCTTGGGGATAAAGTCATGTTAGTGGCCCATCTCTAGGCCCATCTCA
+CCCATTATTCTCGCTTAAGATCTTGGCCTAGGCTACCAGGAACATGTAAATAAGAAAAGGAATAAGAGAA
+AACAAAACAGAGAGATTGCCATGAGAACTACGGCTCAATATTTTTTCTCTCCGGCGAAGAGTTCCACAAC
+CATCTCCAGGAGGCCTCCACGTTTTGAGGTCAATGGCCTCAGTCTGTGGAACTTGTCACACAGATCTTAC
+TGGAGGTGGTGTGGCAGAAACCCATTCCTTTTAGTGTCTTGGGCTAAAAGTAAAAGGCCCAGAGGAGGCC
+TTTGCTCATCTGACCATGCTGACAAGGAACACGGGTGCCAGGACAGAGGCTGGACCCCAGGAACACCTTA
+AACACTTCTTCCCTTCTCCGCCCCCTAGAGCAGGCTCCCCTCACCAGCCTGGGCAGAAATGGGGGAAGAT
+GGAGTGAAGCCATACTGGCTACTCCAGAATCAACAGAGGGAGCCGGGGGCAATACTGGAGAAGCTGGTCT
+CCCCCCAGGGGCAATCCTGGCACCTCCCAGGCAGAAGAGGAAACTTCCACAGTGCATCTCACTTCCATGA
+ATCCCCTCCTCGGACTCTGAGGTCCTTGGTCACAGCTGAGGTGCAAAAGGCTCCTGTCATATTGTGTCCT
+GCTCTGGTCTGCCTTCCACAGCTTGGGGGCCACCTAGCCCACCTCTCCCTAGGGATGAGAGCAGCCACTA
+CGGGTCTAGGCTGCCCATGTAAGGAGGCAAGGCCTGGGGACACCCGAGATGCCTGGTTATAATTAACCCA
+GACATGTGGCTGCCCCCCCCCCCCCAACACCTGCTGCCTGAGCCTCACCCCCACCCCGGTGCCTGGGTCT
+TAGGCTCTGTACACCATGGAGGAGAAGCTCGCTCTAAAAATAACCCTGTCCCTGGTGGATCCAGGGTGAG
+GGGCAGGCTGAGGGCGGCCACTTCCCTCAGCCGCAGGTTTGTTTTCCCAAGAATGGTTTTTCTGCTTCTG
+TAGCTTTTCCTGTCAATTCTGCCATGGTGGAGCAGCCTGCACTGGGCTTCTGGGAGAAACCAAACCGGGT
+TCTAACCTTTCAGCTACAGTTATTGCCTTTCCTGTAGATGGGCGACTACAGCCCCACCCCCACCCCCGTC
+TCCTGTATCCTTCCTGGGCCTGGGGATCCTAGGCTTTCACTGGAAATTTCCCCCCAGGTGCTGTAGGCTA
+GAGTCACGGCTCCCAAGAACAGTGCTTGCCTGGCATGCATGGTTCTGAACCTCCAACTGCAAAAAATGAC
+ACATACCTTGACCCTTGGAAGGCTGAGGCAGGGGGATTGCCATGAGTGCAAAGCCAGACTGGGTGGCATA
+GTTAGACCCTGTCTCAAAAAACCAAAAACAATTAAATAACTAAAGTCAGGCAAGTAATCCTACTCGGGAG
+ACTGAGGCAGAGGGATTGTTACATGTCTGAGGCCAGCCTGGACTACATAGGGTTTCAGGCTAGCCCTGTC
+TACAGAGTAAGGCCCTATTTCAAAAACACAAACAAAATGGTTCTCCCAGCTGCTAATGCTCACCAGGCAA
+TGAAGCCTGGTGAGCATTAGCAATGAAGGCAATGAAGGAGGGTGCTGGCTACAATCAAGGCTGTGGGGGA
+CTGAGGGCAGGCTGTAACAGGCTTGGGGGCCAGGGCTTATACGTGCCTGGGACTCCCAAAGTATTACTGT
+TCCATGTTCCCGGCGAAGGGCCAGCTGTCCCCCGCCAGCTAGACTCAGCACTTAGTTTAGGAACCAGTGA
+GCAAGTCAGCCCTTGGGGCAGCCCATACAAGGCCATGGGGCTGGGCAAGCTGCACGCCTGGGTCCGGGGT
+GGGCACGGTGCCCGGGCAACGAGCTGAAAGCTCATCTGCTCTCAGGGGCCCCTCCCTGGGGACAGCCCCT
+CCTGGCTAGTCACACCCTGTAGGCTCCTCTATATAACCCAGGGGCACAGGGGCTGCCCCCGGGTCAC
\ No newline at end of file
--- /dev/null
+>gi|1621|emb|X55146.1|OCMCK1 Rabbit gene for muscle creatine kinase, exon 1
+CTCCCTGAGGGGAGTGCCCCGCTTAGCCCTCAGCCCTGGCACCCACCAGGCAGAGGAGGCAGCTCCCAGA
+GTCCCCGGCTGAGGGGGCCTGCCCATGTCCTCGCGAGTCGGGCACCCAGTCCCCTTCCCTGGGGGCCTCG
+GGTGCCTTGTAAGGAGGCAAGGCCCAGGACACCCGAGCTGCCGGCTATAATTAGCCTGGACACGTGGGCC
+CCCCGCCAGCACCTGCCCTGAGCCCCCCTACCCGCAGCCTTGGGTGCTGGGACTGCCGACAGCAGAGACC
+GCTCTAAAAATAGCTCCCTTCCTGGACGGCCCCTCCCAGCAGGCCCCCCACCCCCCGCTGAGGCTCACTG
+CTGGTGTCCCTGCCTGGTGCTCCCTAGGGTGTGCACCCACGGCAGGTGCAGAGGCAGGGGGCACCGCGGG
+GGGCTGGTAACAGCAGCAGGCCTAGGGGCCAGGGGACGGCGAGCTGTACGTGCCAGGACACCCCCAGCCA
+CTGTCCCGTGCTCCCGGCGGGGGCCAGCTGTCCCCCGCCAGCCCGACTCAGCACTTGCTCTGCACATCGG
+CCTGGGGCGGCCCATACAAGGGCATGGGGCTGGGCACAAGGCACGCCCGCGTCCAGGTGGGCACTGTGCC
+CGGGCATGGTGGGGGGAGAGGCTGGGCTGCCTTCCAGGCCCCCTCCCTGGGGCCAGCCCCGCCCAGCCAG
+TGGCACAGCAGGGTCCCCTACATAAAGCCGGGGCCGCGGGCCCTGGATCTGCCTCAGTGTCGCCTGCAGG
+ACACACGTAGCCCCGCTCAGCACAGCCCAGCCAG
\ No newline at end of file
#include "mussa_class.hh"
#include "time.h"
+
int main(int argc, char **argv)
{
Mussa overlord;
+ char run_mode;
char * para_file, * ana_file;
- string an_arg;
time_t t1, t2, begin, end;
double setuptime, seqloadtime, seqcomptime, nwaytime, savetime, totaltime;
- string run_mode;
int seq_num;
begin = time(NULL);
- // need more sophisticated arg reading structure
- // support -w and -t override options...other stuff??
- an_arg = * ++argv;
-
- if (an_arg == "-v")
- {
- seq_num = atoi(* ++argv);
- ana_file = * ++argv;
- run_mode = "v";
- }
- else
- {
- para_file = (char*) an_arg.c_str();
- run_mode = "f";
- }
+ run_mode = overlord.parse_args(argc, argv);
- if (run_mode == "v")
- {
- cout << "load file = " << ana_file << " seq num = " << seq_num << endl;
- overlord.load_old(ana_file, seq_num);
- }
- else if (run_mode == "f")
+ if ((run_mode == 'f') || (run_mode == 'n'))
{
t1 = time(NULL);
- overlord.setup(para_file);
+ overlord.setup();
t2 = time(NULL);
setuptime = difftime(t2, t1);
cout << "fum\n";
t1 = time(NULL);
- overlord.save_old();
+ overlord.save();
t2 = time(NULL);
savetime = difftime(t2, t1);
cout << totaltime << "\n";
}
+ if (run_mode == 'v')
+ overlord.load();
- if (run_mode == "v")
+ if ((run_mode == 'f') || (run_mode == 'v'))
overlord.FuckingPieceOfShit(1000,500);
}
#include "mussa_class.hh"
+// doesn't do neg ints...
+string
+int_to_str(int an_int)
+{
+ string converted_int;
+ int remainder;
+
+ converted_int = "";
+
+ if (an_int == 0)
+ converted_int = "0";
+
+ while (an_int != 0)
+ {
+ remainder = an_int % 10;
+
+ if (remainder == 0)
+ converted_int = "0" + converted_int;
+ else if (remainder == 1)
+ converted_int = "1" + converted_int;
+ else if (remainder == 2)
+ converted_int = "2" + converted_int;
+ else if (remainder == 3)
+ converted_int = "3" + converted_int;
+ else if (remainder == 4)
+ converted_int = "4" + converted_int;
+ else if (remainder == 5)
+ converted_int = "5" + converted_int;
+ else if (remainder == 6)
+ converted_int = "6" + converted_int;
+ else if (remainder == 7)
+ converted_int = "7" + converted_int;
+ else if (remainder == 8)
+ converted_int = "8" + converted_int;
+ else if (remainder == 9)
+ converted_int = "9" + converted_int;
+
+ an_int = an_int / 10;
+ }
+
+ return converted_int;
+}
+
Mussa::Mussa()
{
}
+char
+Mussa::parse_args(int argc, char **argv)
+{
+ int i;
+ string an_arg;
+ char run_mode;
+
+ win_override = false;
+ thres_override = false;
+ // minimal arg reading structure, not very robust to errors
+
+
+ i = 1;
+ while (i < argc)
+ {
+ an_arg = * ++argv;
+ i++;
+
+ if (an_arg == "-v")
+ {
+ ana_name = * ++argv;
+ i++;
+ run_mode = 'v';
+ }
+ else if (an_arg == "-n")
+ {
+ para_file_path = * ++argv;
+ i++;
+ run_mode = 'n';
+ }
+ else if (an_arg == "-w")
+ {
+ window = atoi(* ++argv);
+ i++;
+ win_override = true;
+ }
+ else if (an_arg == "-t")
+ {
+ threshold = atoi(* ++argv);
+ i++;
+ thres_override = true;
+ cout << thres_override << endl;
+ }
+ else
+ {
+ para_file_path = an_arg;
+ run_mode = 'f';
+ }
+ }
+ return run_mode;
+}
+
+
void
-Mussa::setup(char * para_file_path)
+Mussa::setup()
{
ifstream para_file;
string file_data_line;
string param, value, annot_file;
int split_index, fasta_index;
+ int sub_seq_start, sub_seq_end;
bool seq_params, did_seq;
int bogo;
win_append = false;
thres_append = false;
+ seq_files.clear();
+ fasta_indices.clear();
+ annot_files.clear();
+ sub_seq_starts.clear();
+ sub_seq_ends.clear();
- para_file.open(para_file_path, ios::in);
+ para_file.open(para_file_path.c_str(), ios::in);
getline(para_file,file_data_line);
split_index = file_data_line.find(" ");
else if (param == "SEQUENCE_NUM")
seq_num = atoi(value.c_str());
else if (param == "WINDOW")
- window = atoi(value.c_str());
+ {
+ if (!win_override)
+ window = atoi(value.c_str());
+ }
else if (param == "THRESHOLD")
- threshold = atoi(value.c_str());
+ {
+ if (!thres_override)
+ threshold = atoi(value.c_str());
+ }
else if (param == "SEQUENCE")
{
seq_files.push_back(value);
fasta_index = 1;
annot_file = "";
+ sub_seq_start = 0;
+ sub_seq_end = 0;
seq_params = true;
while ((!para_file.eof()) && seq_params)
fasta_index = atoi(value.c_str());
else if (param == "ANNOTATION")
annot_file = value;
+ else if (param == "SEQ_START")
+ sub_seq_start = atoi(value.c_str());
+ else if (param == "SEQ_END")
+ {
+ cout << "hey! " << atoi(value.c_str()) << endl;
+ sub_seq_end = atoi(value.c_str());
+ }
//ignore empty lines or that start with '#'
else if ((param == "") || (param == "#")) {}
else seq_params = false;
fasta_indices.push_back(fasta_index);
annot_files.push_back(annot_file);
+ sub_seq_starts.push_back(sub_seq_start);
+ sub_seq_ends.push_back(sub_seq_end);
did_seq = true;
}
cout << "window = " << window << " threshold = " << threshold << "\n";
}
+// if (!((param == "") || (param == "#")))
+// cout << value << " = " << param << endl;
void
Mussa::get_Seqs()
{
list<string>::iterator seq_files_i, annot_files_i;
- list<int>::iterator fasta_indices_i;
+ list<int>::iterator fasta_indices_i, seq_starts_i, seq_ends_i;
Sequence aSeq;
seq_files_i = seq_files.begin();
fasta_indices_i = fasta_indices.begin();
annot_files_i = annot_files.begin();
+ seq_starts_i = sub_seq_starts.begin();
+ seq_ends_i = sub_seq_ends.begin();
- while ( (seq_files_i != seq_files.end()) &&
+
+ while (seq_files_i != seq_files.end())
+ /* it should be guarenteed that each of the following exist
+ &&
(fasta_indices_i != fasta_indices.end()) &&
- (annot_files_i != annot_files.end()) )
+ (annot_files_i != annot_files.end()) &&
+ (seq_starts_i != sub_seq_starts.end()) &&
+ (seq_ends_i != sub_seq_ends.end()) )
+ */
{
- aSeq.load_fasta(*seq_files_i, *fasta_indices_i);
+ aSeq.load_fasta(*seq_files_i, *fasta_indices_i,*seq_starts_i,*seq_ends_i);
+ if (*annot_files_i != "")
+ aSeq.load_annot(*annot_files_i);
the_Seqs.push_back(aSeq);
cout << aSeq.hdr() << endl;
+ //cout << aSeq.seq() << endl;
aSeq.clear();
++seq_files_i;
++fasta_indices_i;
++annot_files_i;
+ ++seq_starts_i;
+ ++seq_ends_i;
}
}
all_comps[i][i2].setup("m", window, threshold, seq_lens[i],seq_lens[i2]);
all_comps[i][i2].seqcomp(the_Seqs[i].seq(), the_Seqs[i2].seq(), false);
all_comps[i][i2].seqcomp(the_Seqs[i].seq(),the_Seqs[i2].rev_comp(),true);
- save_file_string = "ana";
- if (i == 0)
- save_file_string += "0";
- else if (i == 1)
- save_file_string += "1";
- if (i2 == 1)
- save_file_string += "1";
- else if (i2 == 2)
- save_file_string += "2";
all_comps[i][i2].file_save(save_file_string);
}
}
+
void
Mussa::nway()
{
- the_paths.setup(seq_num);
+ the_paths.setup(seq_num, window, threshold);
the_paths.find_paths_r(all_comps);
+ the_paths.simple_refine();
}
+void
+Mussa::save()
+{
+ string save_path_base, save_path;
+ fstream save_file;
+ int i;
+
+ // gotta do bit with adding win & thres if to be appended - need itos
+
+ // not sure why, but gotta close file each time since can't pass file streams
+
+ save_path_base = ana_name;
+
+ if (win_append)
+ save_path_base += "_w" + int_to_str(window);
+
+ if (thres_append)
+ save_path_base += "_t" + int_to_str(threshold);
+
+ // save sequence and annots to a special mussa file
+ save_path = save_path_base + ".museq";
+ save_file.open(save_path.c_str(), ios::out);
+ save_file << "<Mussa_Sequence>" << endl;
+ //save_file.close();
+
+ for(i = 0; i < seq_num; i++)
+ the_Seqs[i].save(save_file);
+
+ //save_file.open(save_path.c_str(), ios::app);
+ save_file << "</Mussa_Sequence>" << endl;
+ save_file.close();
+
+ // save nway paths to its mussa save file
+ save_path = save_path_base + ".muway";
+ the_paths.save(save_path);
+}
+
+void
+Mussa::load()
+{
+ int i;
+ string load_file_path;
+ Sequence tmp_seq;
+
+
+ load_file_path = ana_name + ".muway";
+ seq_num = the_paths.load(load_file_path);
+
+ load_file_path = ana_name + ".museq";
+ for (i = 1; i <= seq_num; i++)
+ {
+ tmp_seq.clear();
+ tmp_seq.load_museq(load_file_path, i);
+ the_Seqs.push_back(tmp_seq);
+ }
+}
+
+
+
+// In Memorial to Everything that's gone wrong in the last week
+// and Everything that will go wrong in the next 2 weeks 03/02/2004 - Tristan
+void
+Mussa::FuckingPieceOfShit(int x_max, int y_max)
+{
+ Fl_Window *conn_window = new Fl_Window(x_max, y_max, "Mussa Connections");
+ ConnView *conn_box = new ConnView(0, 0, x_max, y_max);
+ conn_box->setup(ana_name, seq_num, window, &the_Seqs, &the_paths);
+ conn_box->scale_paths();
+ conn_window->end();
+ conn_window->show();
+
+ Fl::run();
+}
+
+/*
+ cout << "fee\n";
+ cout << "fie\n";
+ cout << "foe\n";
+ cout << "fum\n";
+*/
+
+
void
Mussa::save_old()
Sequence a_seq;
seq_num = s_num;
- the_paths.setup(seq_num);
+ the_paths.setup(seq_num, 0, 0);
save_file.open(load_file_path, ios::in);
// currently loads old mussa format
//the_paths.save("tmp.save");
}
-
-
-// In Memorial to Everything that's gone wrong in the last week
-// and Everything that will go wrong in the next 2 weeks 03/02/2004 - Tristan
-void
-Mussa::FuckingPieceOfShit(int x_max, int y_max)
-{
-
- Fl_Window *conn_window = new Fl_Window(x_max, y_max, "Mussa Connections");
- ConnView *conn_box = new ConnView(0, 0, x_max, y_max);
- conn_box->setup(ana_name, seq_num, window, &the_Seqs, &the_paths);
- conn_box->scale_paths();
- conn_box->spawnSeq();
- conn_window->end();
- conn_window->show();
-
- Fl::run();
-}
-
-/*
- cout << "fee\n";
- cout << "fie\n";
- cout << "foe\n";
- cout << "fum\n";
-*/
-
-
class Mussa
{
private:
- string ana_name;
+ string para_file_path, ana_name;
int seq_num, window, threshold;
list<string> seq_files, annot_files;
- list<int> fasta_indices;
+ list<int> fasta_indices, sub_seq_starts, sub_seq_ends;
+ bool win_override, thres_override;
bool win_append, thres_append;
vector<Sequence> the_Seqs;
public:
Mussa();
- void setup(char * para_file_path);
+ char parse_args(int argc, char **argv);
+ void setup();
void get_Seqs();
void seqcomp();
void nway();
+ void save();
+ void load();
+ void FuckingPieceOfShit(int x_max, int y_max);
+
+ // deprecated - support bridge for python version of mussa
void save_old();
void load_old(char * load_file_path, int s_num);
- void FuckingPieceOfShit(int x_max, int y_max);
+
};
x_scale_factor = (float) seq_lens[0] / x_max;
cout << "scale factor is " << x_scale_factor << endl;
y_seq_incre = (y_max-20) / (seq_num - 1);
+
+ dragging = false;
+ selected = false;
+
+ highlight.clear();
}
scaled_pathz.clear();
- for(pathz_i = P->pathz.begin(); pathz_i != P->pathz.end(); ++pathz_i)
+ for(pathz_i = P->refined_pathz.begin(); pathz_i != P->refined_pathz.end(); ++pathz_i)
{
a_path = *pathz_i;
- for(i2 = 0; i2 < seq_num; i2++)
+ for(i2 = 0; i2 <= seq_num; i2++)
a_path[i2] = (int) (a_path[i2] / x_scale_factor);
scaled_pathz.push_back(a_path);
}
ConnView::draw()
{
list<vector<int> >::iterator i;
- int i2, y_loc, x_loc;
+ int i2, i3, y_loc, x_loc, x_start, x_end;
vector<int> a_path;
+ list<annot>::iterator annot_i;
+ int window_size;
+ bool rc_color;
+ int path_start, path_end;
+ list<bool>::iterator highlight_i;
+
+
+ // clear drawing area and set background to white
+ fl_color(FL_WHITE);
+ fl_rectf(x(), y(), w(), h());
+
+ // determine which paths to highlight
+ highlight.clear();
+ for(i = scaled_pathz.begin(); i != scaled_pathz.end(); ++i)
+ {
+ a_path = *i;
+ // determine if path falls within the selected region and mark it for
+ // highlighted color
+ path_start = abs(a_path[1]);
+ path_end = path_start + a_path[0];
+ if ( ( (path_start >= drag_start) && (path_end <= drag_end) ) ||
+ ( (path_start < drag_start) && (path_end > drag_end) ) ||
+ ( (path_start < drag_start) && (path_end > drag_start) ) ||
+ ( (path_start < drag_end) && (path_end > drag_end) ) )
+ highlight.push_back(true);
+ else
+ highlight.push_back(false);
+ }
- fl_color(FL_RED);
fl_line_style(FL_SOLID, 1, NULL);
+ // draw non-highlight paths (ie not in the selection box)
+ highlight_i = highlight.begin();
+ for(i = scaled_pathz.begin(); i != scaled_pathz.end(); ++i)
+ {
+ a_path = *i;
+ y_loc = 10;
+
+ // get window size to determine line width
+ window_size = a_path[0];
+ // make sure width is at least 1 - might be zero to my slack rounding
+ if (window_size == 0)
+ window_size = 1;
+
+ if (!(*highlight_i))
+ for(i2 = seq_num; i2 > 1; i2--)
+ {
+ // RC case handling
+ // ugh, an xor...only want blue if one of the nodes is rc
+ if ( ((a_path[i2] < 0) || (a_path[i2-1] < 0)) &&
+ !((a_path[i2] < 0) && (a_path[i2-1] < 0)) )
+ fl_color(FL_CYAN);
+ else
+ fl_color(FL_MAGENTA);
+
+ fl_polygon((int)abs(a_path[i2]), y_loc + 3,
+ (int)abs(a_path[i2])+window_size, y_loc + 3,
+ (int)abs(a_path[i2-1])+window_size, y_loc + y_seq_incre - 3,
+ (int)abs(a_path[i2-1]), y_loc + y_seq_incre - 3);
+ y_loc += y_seq_incre;
+ }
+ ++highlight_i;
+ }
+
+ // draw highlighted paths (ie in or partially in selection)
+ // drawing these separately and after other paths so they are on top
+ highlight_i = highlight.begin();
for(i = scaled_pathz.begin(); i != scaled_pathz.end(); ++i)
{
a_path = *i;
y_loc = 10;
- for(i2 = seq_num - 1; i2 > 0; i2--)
- {
- // RC case handling
- // ugh, and xor...only want blue if one of the nodes is rc
- if ( ((a_path[i2] < 0) || (a_path[i2-1] < 0)) &&
- !((a_path[i2] < 0) && (a_path[i2-1] < 0)) )
- fl_color(FL_BLUE);
-
- fl_line((int)abs(a_path[i2]),y_loc,(int)abs(a_path[i2-1]),y_loc + y_seq_incre);
- y_loc += y_seq_incre;
- fl_color(FL_RED);
- }
+
+ // get window size to determine line width
+ window_size = a_path[0];
+ // make sure width is at least 1 - might be zero to my slack rounding
+ if (window_size == 0)
+ window_size = 1;
+
+ if (*highlight_i)
+ for(i2 = seq_num; i2 > 1; i2--)
+ {
+ // RC case handling
+ // ugh, an xor...only want blue if one of the nodes is rc
+ if ( ((a_path[i2] < 0) || (a_path[i2-1] < 0)) &&
+ !((a_path[i2] < 0) && (a_path[i2-1] < 0)) )
+ fl_color(FL_BLUE);
+ else
+ fl_color(FL_RED);
+
+ fl_polygon((int)abs(a_path[i2]), y_loc + 3,
+ (int)abs(a_path[i2])+window_size, y_loc + 3,
+ (int)abs(a_path[i2-1])+window_size, y_loc + y_seq_incre - 3,
+ (int)abs(a_path[i2-1]), y_loc + y_seq_incre - 3);
+
+ y_loc += y_seq_incre;
+ }
+ ++highlight_i;
}
+ // draw sequence representation lines
fl_color(FL_BLACK);
fl_line_style(FL_SOLID, 3, NULL);
y_loc = 10;
fl_line(0,y_loc,x_loc,y_loc);
y_loc += y_seq_incre;
}
+
+ // draw annotations
+ fl_color(FL_GREEN);
+ fl_line_style(FL_SOLID, 9, NULL);
+ y_loc = 10;
+ for(i2 = seq_num - 1; i2 >= 0; i2--)
+ {
+ for(annot_i = (*S)[i2].annots.begin(); annot_i != (*S)[i2].annots.end(); ++annot_i)
+ {
+ x_start = (int)(annot_i->start / x_scale_factor);
+ x_end = (int)(annot_i->end / x_scale_factor);
+ fl_line(x_start,y_loc,x_end,y_loc);
+ }
+ y_loc += y_seq_incre;
+ }
+
+ // draw selection box
+ if (selected)
+ {
+ fl_color(FL_BLACK);
+ fl_line_style(FL_SOLID, 2, NULL);
+ fl_rect(drag_start, y_loc - y_seq_incre-8, drag_end-drag_start, 16);
+ }
+}
+
+
+
+int
+ConnView::handle(int e)
+{
+ int return_value;
+
+ switch(e)
+ {
+ case FL_PUSH:
+ if (Fl::event_button3())
+ {
+ spawnSeq();
+ return_value = 1;
+ }
+ break;
+ case FL_DRAG:
+ if (!dragging)
+ {
+ drag_start = Fl::event_x() -x();
+ dragging = true;
+ selected = false;
+ return_value = 1;
+ }
+ fl_color(FL_CYAN);
+ fl_line_style(FL_SOLID, 1, NULL);
+ fl_overlay_rect(drag_start, y_seq_incre*(seq_num-1), Fl::event_x()-drag_start, 16);
+
+ break;
+ case FL_RELEASE:
+ if (dragging)
+ {
+ drag_end = Fl::event_x() - x();
+ dragging = false;
+ selected = true;
+ redraw();
+ return_value = 1;
+ }
+ break;
+ default:
+ return_value = Fl_Widget::handle(e);
+ }
+
+ return return_value;
}
+
void
ConnView::spawnSeq()
{
- int seq_box_x;
+ list<vector<int> > selected_paths;
+ list<vector<int> >::iterator pathz_i;
+ int i2, i3, y_loc, x_loc, x_start, x_end;
+ vector<int> a_path;
+ list<bool>::iterator highlight_i;
+
+ // make new list of connections that are highlighted
+ selected_paths.clear();
+ highlight_i = highlight.begin();
+ for(pathz_i = P->refined_pathz.begin(); pathz_i != P->refined_pathz.end(); ++pathz_i)
+ {
+ if (*highlight_i)
+ {
+ a_path = *pathz_i;
+ selected_paths.push_back(a_path);
+ //cout << "bing ";
+ }
+ /*
+ else
+ cout << "bang ";
+ */
+ ++highlight_i;
+ }
+
+ //cout << endl;
Fl_Window *seq_window = new Fl_Window(x_max, y_max, "Mussa Sequence");
- Fl_Scroll *scroll_seq = new Fl_Scroll(0, 0, x_max, y_max);
- seq_box_x = 32000;
- //seq_lens[0]*14;
- SeqView *seq_box = new SeqView(0, 0, seq_box_x, y_max-20);
- seq_box->setup(ana_name, seq_num, window, S, P, seq_lens);
+
+ SeqView *seq_box = new SeqView(0, 0, x_max, y_max);
+ seq_box->setup(ana_name, seq_num, S, selected_paths, seq_lens);
+ seq_box->debug_draw();
seq_window->end();
seq_window->show();
}
+
+
int x_max, y_max;
float x_scale_factor;
int y_seq_incre;
- //list< will be list of these later for zooming history
+ int drag_start, drag_end;
+ bool dragging, selected;
+ list<bool> highlight;
+
list<vector<int> > scaled_pathz;
vector<int> seq_lens;
vector<Sequence> *, Nway_Paths *);
void scale_paths();
void draw();
+ int handle(int e);
void spawnSeq();
};
void
-SeqView::setup(string name, int sq_num, int win_len,
+SeqView::setup(string name, int sq_num,
vector<Sequence> *some_seqs,
- Nway_Paths *some_paths, vector<int> some_lens)
+ list<vector<int> > some_paths, vector<int> some_lens)
{
ana_name = name;
seq_num = sq_num;
- base_window_len = win_len;
S = some_seqs;
P = some_paths;
seq_lens = some_lens;
fl_font(FL_COURIER, 14);
cout << fl_width('A') << endl;
+
+ align_offsets(0);
}
int window_length, win_i;
int rc_1 = 0;
int rc_2 = 0;
+ string sub_seq;
+ int offset1, offset2;
+ float center1, center2;
- window_length = base_window_len;
fl_font(FL_COURIER, 14);
fl_color(FL_BLACK);
//blatantly stolen from FR2
y_loc = 10;
for(seq_i = seq_num-1; seq_i >= 0; --seq_i)
{
- //a_seq = a_seq;
- fl_draw((*S)[seq_i].c_seq(), 0+x(), y_loc+y());
+ sub_seq = (*S)[seq_i].seq();
+ sub_seq = sub_seq.substr(seq_align_offsets[seq_i]);
+ fl_draw(sub_seq.c_str(), 0+x(), y_loc+y());
y_loc += y_seq_incre;
}
fl_color(FL_RED);
fl_line_style(FL_SOLID, 1, NULL);
- //fl_line(x(),10+y(),20+x(),y_max/2 + y());
-
- //fl_color(FL_RED);
-
- for(pathz_i = P->pathz.begin(); pathz_i != P->pathz.end(); ++pathz_i)
+ for(pathz_i = P.begin(); pathz_i != P.end(); ++pathz_i)
{
a_path = *pathz_i;
y_loc = 0;
- for(i2 = seq_num - 1; i2 > 0; i2--)
+ window_length = a_path[0];
+ for(i2 = seq_num; i2 > 1; i2--)
{
// RC case handling
- // ugh, and xor...only want black if one of the nodes is rc
+ // ugh, and xor...only want rc coloring if just one of the nodes is rc
if ( ((a_path[i2] < 0) || (a_path[i2-1] < 0)) &&
!((a_path[i2] < 0) && (a_path[i2-1] < 0)) )
fl_color(FL_BLUE);
+ offset1 = seq_align_offsets[i2-1];
+ offset2 = seq_align_offsets[i2-2];
+ center1 = 0.5;
+ center2 = 0.5;
+
// no matter the case, any RC node needs an offset
if (a_path[i2] < 0)
+ {
rc_1 = window_length;
+ offset1 = -offset1;
+ center1 = -center1;
+ }
if (a_path[i2-1] < 0)
+ {
rc_2 = window_length;
- // normal case
+ offset2 = -offset2;
+ center2 = -center2;
+ }
+
+ // draw the line for each bp in this window that actually matches
for(win_i = 0; win_i < window_length; win_i++)
- fl_line(abs((int)((a_path[i2]-rc_1+win_i+x()+0.5)*ch_width)),
- y_loc + 10+y(),
- abs((int)((a_path[i2-1]-rc_2+win_i+x()+0.5)*ch_width)),
- y_loc+y_seq_incre+y());
+ {
+ x_start = abs((int) (a_path[i2]-rc_1+win_i-offset1) );
+ x_end = abs((int) (a_path[i2-1]-rc_2+win_i-offset2) );
+ fl_line( (x_start+center1)*ch_width, y_loc + 10,
+ (x_end+center2)*ch_width, y_loc+y_seq_incre );
+ }
y_loc += y_seq_incre;
- //if ((a_path[i2] < 0) || (a_path[i2-1] < 0))
- //{
- fl_color(FL_RED);
- rc_1 = 0;
- rc_2 = 0;
- //}
+ fl_color(FL_RED);
+ rc_1 = 0;
+ rc_2 = 0;
}
}
}
+
+void
+SeqView::debug_draw()
+{
+ int seq_i;
+ Sequence a_seq;
+ int i, y_loc;
+ vector<int> a_path;
+ list<vector<int> >::iterator pathz_i;
+ int i2;
+ int x_start, y_start, x_end, y_end;
+ double ch_width;
+ int window_length, win_i;
+ int rc_1 = 0;
+ int rc_2 = 0;
+ string sub_seq;
+
+
+ cout << "debugging...\n";
+ y_loc = 10;
+ for(seq_i = seq_num-1; seq_i >= 0; --seq_i)
+ {
+ sub_seq = (*S)[seq_i].seq();
+ cout << sub_seq << endl;
+ sub_seq = sub_seq.substr(seq_align_offsets[seq_i]);
+ cout << sub_seq << endl;
+ //fl_draw(sub_seq.c_str(), 0+x(), y_loc+y());
+ y_loc += y_seq_incre;
+ }
+}
+
+void
+SeqView::align_offsets(int align_num)
+{
+ list<vector<int> >::iterator pathz_i;
+ int i;
+
+
+ if (P.begin() == P.end())
+ cout << "crud....\n";
+ pathz_i = P.begin();
+ while(pathz_i != P.end())
+ {
+ //cout << (*pathz_i)[0] << endl;
+ ++pathz_i;
+ }
+
+ // find the path specified
+ i = 0;
+ pathz_i = P.begin();
+ cout << "fee\n";
+ while( (i < align_num) && (pathz_i != P.end()) )
+ {
+ cout << "fie\n";
+ ++i;
+ ++pathz_i;
+ }
+
+ cout << "foe\n";
+ // now set the alignment offsets - basically where the path starts
+ seq_align_offsets.clear();
+ for(i = 0; i < seq_num; i++)
+ {
+ cout << (*pathz_i)[i+1] << endl;
+ seq_align_offsets.push_back( (*pathz_i)[i+1] );
+ }
+ cout << "fum\n";
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
/*
x_start = (int) ();
y_start = ;
//this data is passed as pointers to the instantiated classes
vector<Sequence> *S;
- Nway_Paths *P;
+ // list of paths in selection box
+ list<vector<int> > P;
vector<int> seq_lens;
int x_max, y_max;
int y_seq_incre;
+ vector<int> seq_align_offsets;
public:
SeqView(int x_top,int y_top,int x_bot,int y_bot) :
y_max = y_bot;
}
- void setup(string, int, int, vector<Sequence> *, Nway_Paths *, vector<int>);
+ void setup(string, int, vector<Sequence> *some_seqs,
+ list<vector<int> > some_paths, vector<int>);
void draw();
+ void align_offsets(int align_num);
+ void debug_draw();
};
}
void
-Nway_Paths::setup(int sp_num)
+Nway_Paths::setup(int sp_num, int w, int t)
{
species_num = sp_num;
+ threshold = t;
+ win_size = w;
pathz.clear();
}
if (trans_check_good)
{
+ // this makes sure path nodes are recorded with RC status relative to
+ // the base species
if ( path[depth-1] >= 0)
path.push_back(*new_nodes_i);
else
path.push_back(*new_nodes_i * -1);
+
if (depth < species_num - 1)
path_search(all_comparisons, path, depth + 1);
else
}
}
+// dumbly goes thru and combines path windows exactly adjacent (ie + 1 index)
+// doesn't deal with interleaved adjacency
+void
+Nway_Paths::simple_refine()
+{
+ // ext_path remembers the first window set in an extending path
+ vector<int> ext_path, cur_path, next_path, new_path;
+ list<vector<int> >::iterator pathz_i;
+ int i2, win_ext_len = 0;
+ bool extending;
+
+
+ refined_pathz.clear();
+
+ pathz_i = pathz.begin();
+ ext_path = *pathz_i;
+ while(pathz_i != pathz.end())
+ {
+ // keep track of current path and advance to next path
+ cur_path = *pathz_i;
+ ++pathz_i;
+ next_path = *pathz_i;
+
+ // if node for each seq is equal to the next node+1 then for all
+ // sequences then we are extending
+ extending = true;
+ for(i2 = 0; i2 < species_num; i2++)
+ {
+ //cout << cur_path[i2] << " vs " << next_path[i2] << endl;
+ if (cur_path[i2] + 1 != next_path[i2])
+ extending = false;
+ }
+ if (extending)
+ {
+ //cout << "Raaaawwwrrr! I am extending\n";
+ win_ext_len++;
+ }
+ else
+ {
+ // add the extend window length as first element and add as refined
+ new_path = ext_path;
+ new_path.insert(new_path.begin(),win_size + win_ext_len);
+ refined_pathz.push_back(new_path);
+ // reset stuff
+ win_ext_len = 0;
+ ext_path = next_path;
+ }
+ }
+}
+
void
Nway_Paths::add_path(vector<int> loaded_path)
save_file.open(save_file_path.c_str(), ios::out);
- //save_file << "<Seqcomp type=" << ana_type << " win=" << window_size;
- //save_file << " thres=" << hard_threshold << ">\n";
-
- path_i = pathz.begin();
- paths_end = pathz.end();
- if (path_i == paths_end)
- cout << "Arrrrrrgggghhhhhh\n";
- while(path_i != paths_end)
+ save_file << "<Mussa type=flp seq_num=" << species_num;
+ save_file << " win=" << win_size;
+ // add a function para new_thres defaults to -1 to later deal with
+ // reanalysis with higher thres - if statement whether to record base thres
+ // or new thres (ie if -1, then base)
+ save_file << " thres=" << threshold << " >\n";
+
+ path_i = refined_pathz.begin();
+ paths_end = refined_pathz.end();
+ //path_i = pathz.begin();
+ //paths_end = pathz.end();
+ while (path_i != paths_end)
{
a_path = *path_i;
//cout << a_path.size() << endl;
- for(i = 0; i < species_num; ++i)
- save_file << i << "," << a_path[i] << " ";
+ //first entry is the window length of the windows in the path
+ save_file << a_path[0] << ":";
+ for(i = 1; i <= species_num; ++i)
+ {
+ save_file << a_path[i];
+ if (i != species_num)
+ save_file << ",";
+ }
save_file << endl;
++path_i;
}
-//save_file << "</Mussa>\n";
+ save_file << "</Mussa>\n";
save_file.close();
}
+/*
+ if (path_i == paths_end)
+ cout << "Arrrrrrgggghhhhhh\n";
+*/
+
+int
+Nway_Paths::load(string load_file_path)
+{
+ fstream load_file;
+ string file_data_line, header_data, data, path_node, path_length;
+ int i, space_split_i, equal_split_i, comma_split_i, colon_split_i;
+ vector<int> loaded_path;
+
+
+ load_file.open(load_file_path.c_str(), ios::in);
+
+ // get header data
+ // grab mussa tag - discard for now...maybe check in future...
+ getline(load_file,file_data_line);
+ space_split_i = file_data_line.find(" ");
+ file_data_line = file_data_line.substr(space_split_i+1);
+ // grab type tag - need in future to distinguish between flp and vlp paths
+ space_split_i = file_data_line.find(" ");
+ file_data_line = file_data_line.substr(space_split_i+1);
+ // get species/seq number
+ space_split_i = file_data_line.find(" ");
+ header_data = file_data_line.substr(0,space_split_i);
+ equal_split_i = header_data.find("=");
+ data = file_data_line.substr(equal_split_i+1);
+ species_num = atoi (data.c_str());
+ file_data_line = file_data_line.substr(space_split_i+1);
+ // get window size
+ space_split_i = file_data_line.find(" ");
+ header_data = file_data_line.substr(0,space_split_i);
+ equal_split_i = header_data.find("=");
+ data = file_data_line.substr(equal_split_i+1);
+ win_size = atoi (data.c_str());
+ file_data_line = file_data_line.substr(space_split_i+1);
+ // get window size
+ space_split_i = file_data_line.find(" ");
+ header_data = file_data_line.substr(0,space_split_i);
+ equal_split_i = header_data.find("=");
+ data = file_data_line.substr(equal_split_i+1);
+ threshold = atoi (data.c_str());
+ file_data_line = file_data_line.substr(space_split_i+1);
+
+ cout << "seq_num=" << species_num << " win=" << win_size;
+ cout << " thres=" << threshold << endl;
+
+ // clear out the current data
+ refined_pathz.clear();
+
+ int temp;
+
+ getline(load_file,file_data_line);
+ while ( (!load_file.eof()) && (file_data_line != "</Mussa>") )
+ {
+ if (file_data_line != "")
+ {
+ loaded_path.clear();
+ colon_split_i = file_data_line.find(":");
+ path_length = file_data_line.substr(0,colon_split_i);
+ loaded_path.push_back(atoi (path_length.c_str()));
+ file_data_line = file_data_line.substr(colon_split_i+1);
+ for(i = 0; i < species_num; i++)
+ {
+ comma_split_i = file_data_line.find(",");
+ path_node = file_data_line.substr(0, comma_split_i);
+ temp = atoi (path_node.c_str());
+ loaded_path.push_back(temp);
+ file_data_line = file_data_line.substr(comma_split_i+1);
+ }
+ refined_pathz.push_back(loaded_path);
+ }
+ getline(load_file,file_data_line);
+ }
+
+
+ load_file.close();
+
+ return species_num;
+}
+
+
+
void
Nway_Paths::save_old(string save_file_path)
{
int threshold;
int win_size;
list<vector<int> > pathz;
+ list<vector<int> > refined_pathz;
public:
Nway_Paths();
- void setup(int sp_num);
+ void setup(int sp_num, int w, int t);
void find_paths_r(vector<vector<FLPs> > all_comparisons);
void path_search(vector<vector<FLPs> > all_comparisons, vector<int> path, int depth);
+ void simple_refine();
void save(string save_file_path);
- void save_old(string save_file_path);
- void load(string load_file_path);
+ int load(string load_file_path);
void add_path(vector<int> loaded_path);
+
void find_paths(vector<vector<FLPs> > all_comparisons);
void refine();
- void print();
- list<vector<int> > scaled_paths(float scale_factor);
+ void save_old(string save_file_path);
+ void print();
};
--- /dev/null
+#include "mussa_class.hh"
+#include "time.h"
+
+int main(int argc, char **argv)
+{
+ Mussa overlord;
+ char * para_file, * ana_file;
+ string an_arg;
+ time_t t1, t2, begin, end;
+ double setuptime, seqloadtime, seqcomptime, nwaytime, savetime, totaltime;
+ string run_mode;
+ int seq_num;
+
+
+ begin = time(NULL);
+
+ // need more sophisticated arg reading structure
+ // support -w and -t override options...other stuff??
+ an_arg = * ++argv;
+
+ if (an_arg == "-v")
+ {
+ ana_file = * ++argv;
+ run_mode = "v";
+ }
+ else
+ {
+ para_file = (char*) an_arg.c_str();
+ run_mode = "f";
+ }
+
+ if (run_mode == "v")
+ {
+ cout << "load file = " << ana_file << endl;
+ overlord.load(ana_file);
+ //overlord.FuckingPieceOfShit(1000,500);
+ }
+ else if (run_mode == "f")
+ {
+ t1 = time(NULL);
+ overlord.setup(para_file);
+ t2 = time(NULL);
+ setuptime = difftime(t2, t1);
+
+
+ cout << "fee\n";
+ t1 = time(NULL);
+ overlord.get_Seqs();
+ t2 = time(NULL);
+ seqloadtime = difftime(t2, t1);
+
+
+ cout << "fie\n";
+ t1 = time(NULL);
+ overlord.seqcomp();
+ t2 = time(NULL);
+ seqcomptime = difftime(t2, t1);
+
+
+ cout << "foe\n";
+ t1 = time(NULL);
+ overlord.nway();
+ t2 = time(NULL);
+ nwaytime = difftime(t2, t1);
+
+
+ cout << "fum\n";
+ t1 = time(NULL);
+ overlord.save_old();
+ t2 = time(NULL);
+ savetime = difftime(t2, t1);
+
+ end = time(NULL);
+ totaltime = difftime(end, begin);
+
+ cout << "setup\tseqload\tseqcomp\tnway\tsave\ttotal\n";
+ cout << setuptime << "\t";
+ cout << seqloadtime << "\t";
+ cout << seqcomptime << "\t";
+ cout << nwaytime << "\t";
+ cout << savetime << "\t";
+ cout << totaltime << "\n";
+ }
+}
+
+
+
+
+/*
+ cout << "fee\n";
+ cout << "fie\n";
+ cout << "foe\n";
+ cout << "fum\n";
+*/
#include "flp.hh"
#include <time.h>
-#include <iomanip.h>
+#include <iomanip>
int main(int argc, char **argv)
{
void
-Sequence::load_fasta(string file_path, int seq_num)
+Sequence::load_fasta(string file_path, int seq_num,
+ int start_index, int end_index)
{
fstream data_file;
string file_data_line;
int header_counter = 0;
bool read_seq = true;
+ string rev_comp;
+ char conversionTable[257];
+ int seq_i, table_i, len;
+ string sequence_raw;
+ char * seq_tmp; // holds sequence during basic filtering
data_file.open(file_path.c_str(), ios::in);
header = file_data_line.substr(1);
+ sequence_raw = "";
+
while ( !data_file.eof() && read_seq )
{
getline(data_file,file_data_line);
if (file_data_line.substr(0,1) == ">")
read_seq = false;
- else sequence += file_data_line;
+ else sequence_raw += file_data_line;
}
data_file.close();
- // need more sequence filtering for non AGCTN and convert to N
- transform(sequence.begin(), sequence.end(), sequence.begin(), toupper);
+ // sequence filtering for upcasing agctn and convert non AGCTN to N
+
+ len = sequence_raw.length();
+ seq_tmp = (char*)sequence_raw.c_str();
+
+ sequence = "";
+
+ // Make a conversion table
+
+ // everything we don't specify below will become 'N'
+ for(table_i=0; table_i < 256; table_i++)
+ {
+ conversionTable[table_i] = 'N';
+ }
+ // add end of string character for printing out table for testing purposes
+ conversionTable[256] = '\0';
+
+ // we want these to map to themselves - ie not to change
+ conversionTable['A'] = 'A';
+ conversionTable['T'] = 'T';
+ conversionTable['G'] = 'G';
+ conversionTable['C'] = 'C';
+ // this is to upcase
+ conversionTable['a'] = 'A';
+ conversionTable['t'] = 'T';
+ conversionTable['g'] = 'G';
+ conversionTable['c'] = 'C';
+
+ // finally, the actual conversion loop
+ for(seq_i = 0; seq_i < len; seq_i++)
+ {
+ table_i = (int) seq_tmp[seq_i];
+ sequence += conversionTable[table_i];
+ }
+
+
+ // Lastly, if subselection of the sequence was specified we keep cut out
+ // and only keep that part
+
+ // end_index = 0 means no end was specified, so cut to the end
+ if (end_index == 0)
+ end_index = len;
+
+ // start_index = 0 if no start was specified
+ sequence = sequence.substr(start_index, end_index - start_index);
}
+ // this doesn't work properly under gcc 3.x ... it can't recognize toupper
+ //transform(sequence.begin(), sequence.end(), sequence.begin(), toupper);
+
+
void
Sequence::load_annot(string file_path)
{
fstream data_file;
string file_data_line;
annot an_annot;
+ int space_split_i;
+ string annot_value;
+ annots.clear();
data_file.open(file_path.c_str(), ios::in);
+ getline(data_file,file_data_line);
+ species = file_data_line;
+
+ while ( !data_file.eof() )
+ {
+ getline(data_file,file_data_line);
+ if (file_data_line != "")
+ {
+ // need to get 4 values...almost same code 4 times...
+ // get annot start index
+ space_split_i = file_data_line.find(" ");
+ annot_value = file_data_line.substr(0,space_split_i);
+ an_annot.start = atoi (annot_value.c_str());
+ file_data_line = file_data_line.substr(space_split_i+1);
+ // get annot end index
+ space_split_i = file_data_line.find(" ");
+ annot_value = file_data_line.substr(0,space_split_i);
+ an_annot.end = atoi (annot_value.c_str());
+ file_data_line = file_data_line.substr(space_split_i+1);
+ // get annot start index
+ space_split_i = file_data_line.find(" ");
+ annot_value = file_data_line.substr(0,space_split_i);
+ an_annot.name = annot_value;
+ file_data_line = file_data_line.substr(space_split_i+1);
+ // get annot start index
+ space_split_i = file_data_line.find(" ");
+ annot_value = file_data_line.substr(0,space_split_i);
+ an_annot.type = annot_value;
+ annots.push_back(an_annot);
+ }
+ }
+
data_file.close();
+ list<annot>::iterator list_i;
+ for(list_i = annots.begin(); list_i != annots.end(); ++list_i)
+ {
+ cout << (*list_i).start << "," << (*list_i).end << "\t";
+ cout << (*list_i).name << "\t" << (*list_i).type << endl;
+ }
}
species_num = -4;
annots.clear();
}
+
+void
+Sequence::save(fstream &save_file)
+ //string save_file_path)
+{
+ //fstream save_file;
+ list<annot>::iterator annots_i;
+ int i;
+
+ // not sure why, or if i'm doing something wrong, but can't seem to pass
+ // file pointers down to this method from the mussa control class
+ // so each call to save a sequence appends to the file started by mussa_class
+ //save_file.open(save_file_path.c_str(), ios::app);
+
+ save_file << "<Sequence>" << endl;
+ save_file << sequence << endl;
+ save_file << "</Sequence>" << endl;
+
+ save_file << "<Annotations>" << endl;
+ for (annots_i = annots.begin(); annots_i != annots.end(); ++annots_i)
+ {
+ save_file << annots_i->start << " " << annots_i->end << " " ;
+ save_file << annots_i->name << " " << annots_i->type << endl;
+ }
+ save_file << "</Annotations>" << endl;
+ //save_file.close();
+}
+
+void
+Sequence::load_museq(string load_file_path, int seq_num)
+{
+ fstream load_file;
+ string file_data_line;
+ int seq_counter;
+ annot an_annot;
+ int space_split_i;
+ string annot_value;
+
+ annots.clear();
+ load_file.open(load_file_path.c_str(), ios::in);
+
+ seq_counter = 0;
+ // search for the seq_num-th sequence
+ while ( (!load_file.eof()) && (seq_counter < seq_num) )
+ {
+ getline(load_file,file_data_line);
+ if (file_data_line == "<Sequence>")
+ seq_counter++;
+ }
+ getline(load_file, file_data_line);
+ //cout << "*fee\n";
+ sequence = file_data_line;
+ //cout << "*fie\n";
+ getline(load_file, file_data_line);
+ getline(load_file, file_data_line);
+ if (file_data_line == "<Annotations>")
+ {
+ while ( (!load_file.eof()) && (file_data_line != "</Annotations>") )
+ {
+ //cout << "*foe\n";
+ getline(load_file,file_data_line);
+ if ((file_data_line != "") && (file_data_line != "</Annotations>"))
+ {
+ //cout << "*fum\n";
+ // need to get 4 values...almost same code 4 times...
+ // get annot start index
+ space_split_i = file_data_line.find(" ");
+ annot_value = file_data_line.substr(0,space_split_i);
+ an_annot.start = atoi (annot_value.c_str());
+ file_data_line = file_data_line.substr(space_split_i+1);
+ // get annot end index
+ space_split_i = file_data_line.find(" ");
+ annot_value = file_data_line.substr(0,space_split_i);
+ an_annot.end = atoi (annot_value.c_str());
+ file_data_line = file_data_line.substr(space_split_i+1);
+ // get annot start index
+ space_split_i = file_data_line.find(" ");
+ annot_value = file_data_line.substr(0,space_split_i);
+ an_annot.name = annot_value;
+ file_data_line = file_data_line.substr(space_split_i+1);
+ // get annot start index
+ space_split_i = file_data_line.find(" ");
+ annot_value = file_data_line.substr(0,space_split_i);
+ an_annot.type = annot_value;
+ annots.push_back(an_annot);
+ }
+ }
+ }
+ load_file.close();
+}
+
+
+list<int>
+Sequence::find_motif(string a_motif)
+{
+ char * seq_c;
+ int seq_i, motif_i, motif_len;
+ list<int> motif_match_starts;
+
+ motif_match_starts.clear();
+ // faster to loop thru the sequence as a old c string (ie char array)
+ seq_c = (char*)sequence.c_str();
+ motif_len = a_motif.length();
+
+
+ for (seq_i = 0; seq_i < length; seq_i++)
+ {
+
+ // this is pretty much a straight translation of Nora's python code
+ // to match iupac letter codes
+ if (a_motif[motif_i] == seq_c[seq_i])
+ motif_i++;
+ else if (seq_c[seq_i] =='N')
+ motif_i++;
+ else if ((a_motif[motif_i] =='M') &&
+ ((seq_c[seq_i]=='A') || (seq_c[seq_i]=='C')))
+ motif_i++;
+ else if ((a_motif[motif_i] =='R') &&
+ ((seq_c[seq_i]=='A') || (seq_c[seq_i]=='G')))
+ motif_i++;
+ else if ((a_motif[motif_i] =='W') &&
+ ((seq_c[seq_i]=='A') || (seq_c[seq_i]=='T')))
+ motif_i++;
+ else if ((a_motif[motif_i] =='S') &&
+ ((seq_c[seq_i]=='C') || (seq_c[seq_i]=='G')))
+ motif_i++;
+ else if ((a_motif[motif_i] =='Y') &&
+ ((seq_c[seq_i]=='C') || (seq_c[seq_i]=='T')))
+ motif_i++;
+ else if ((a_motif[motif_i] =='K') &&
+ ((seq_c[seq_i]=='G') || (seq_c[seq_i]=='T')))
+ motif_i++;
+ else if ((a_motif[motif_i] =='V') &&
+ ((seq_c[seq_i]=='A') || (seq_c[seq_i]=='C') ||
+ (seq_c[seq_i]=='G')))
+ motif_i++;
+ else if ((a_motif[seq_i] =='H') &&
+ ((seq_c[seq_i]=='A') || (seq_c[seq_i]=='C') ||
+ (seq_c[seq_i]=='T')))
+ motif_i++;
+ else if ((a_motif[motif_i] =='D') &&
+ ((seq_c[seq_i]=='A') || (seq_c[seq_i]=='G') ||
+ (seq_c[seq_i]=='T')))
+ motif_i++;
+ else if ((a_motif[motif_i] =='B') &&
+ ((seq_c[seq_i]=='C') || (seq_c[seq_i]=='G') ||
+ (seq_c[seq_i]=='T')))
+ motif_i++;
+ else
+ motif_i = 0;
+
+ // end Nora stuff, now we see if a match is found this pass
+ if (motif_i == motif_len)
+ {
+ motif_match_starts.push_back(seq_i - motif_len + 1);
+ motif_i = 0;
+ }
+ }
+
+ return motif_match_starts;
+}
#include <list>
#include <vector>
#include <string>
-#include <fstream.h>
+#include <fstream>
#include <stdlib.h>
#include <algorithm>
// Sequence data class
+using namespace std;
+
+struct annot
+{
+ int start, end;
+ string name, type;
+};
+
class Sequence
{
friend class ConnView;
string species;
int species_num;
- struct annot
- {
- int start, end;
- string name, type;
- };
list<annot> annots;
public:
Sequence();
- void load_fasta(string file_path, int seq_num);
+ void load_fasta(string file_path, int seq_num,
+ int start_index, int end_index);
void load_annot(string file_path);
string seq();
const char * c_seq();
int len();
string hdr();
void set_seq(string a_seq);
+ list<int> find_motif(string a_motif);
// string species();
void clear();
+ void save(fstream &save_file);
+ void load_museq(string load_file_path, int seq_num);
};