1 #ifndef _MUSSA_CLASS_H_
2 #define _MUSSA_CLASS_H_
3 // This file is part of the Mussa source distribution.
4 // http://mussa.caltech.edu/
5 // Contact author: Tristan De Buysscher, tristan@caltech.edu
7 // This program and all associated source code files are Copyright (C) 2005
8 // the California Institute of Technology, Pasadena, CA, 91125 USA. It is
9 // under the GNU Public License; please see the included LICENSE.txt
10 // file for more information, or contact Tristan directly.
13 // ----------------------------------------
14 // ---------- mussa_class.hh -----------
15 // ----------------------------------------
23 #include "alg/annotation_colors.hpp"
24 #include "alg/nway_paths.hpp"
25 #include "alg/sequence.hpp"
27 std::string int_to_str(int an_int);
31 friend class ConnWindow;
33 enum analysis_modes { TransitiveNway, RadialNway, EntropyNway,
40 void save_muway(std::string save_path);
41 //! load a saved analysis directory
42 void load(std::string ana_path);
44 //! clear parameters and initialize data lists
47 // set parameters from a file - 'mupa' ~ mussa parameters
48 void load_mupa_file(std::string para_file_path);
50 // set parameters individually (eg from user input into gui classes)
52 void set_name(std::string a_name);
53 //! return name for this analysis
54 std::string get_name();
56 //! return number of sequences in this analyzis
57 /*! this returns either the_seqs.size() or seq_files.size()
58 * depending on which has data loaded in
59 * (silly delayed loading of sequence data)
62 //! set number of bases for this window size
63 void set_window(int a_window);
64 //! get number of bases for the sliding window
65 int get_window() const;
66 //! set number of bases that must match for a window to be saved
67 void set_threshold(int a_threshold);
68 //! get number of bases that must match for a window to be saved
69 int get_threshold() const;
70 void set_soft_thres(int sft_thres);
72 void set_analysis_mode(enum analysis_modes new_ana_mode);
73 enum analysis_modes get_analysis_mode() const;
74 //! return a string name for an analysis mode
75 std::string get_analysis_mode_name() const;
77 //! return the refined paths found by the nway analysis.
78 const NwayPaths& paths() const;
80 //! run seqcomp and the nway filtering algorithm.
81 /*!analyze will run seqcomp and then the nway algorithm
82 * on whatever sequences have been loaded into this mussa instance.
83 * w & t are for command line override functionality, set to 0 to ignore
84 * \throws mussa_analysis_error
86 void analyze(int w=0, int t=0,
87 enum analysis_modes ana_mode=TransitiveNway,
88 double ent_thres=0.0);
89 /*! Run the nway filtering algorithm,
90 * this might be used when changing the soft threshhold?
94 //! appends a string sequence to the list of the_seqs
95 void add_a_seq(std::string a_seq);
96 //! Load a sequence from a fasta file and any annotations
97 /*! \param[in] seq_file the full path to the fasta file
98 * \param[in] annot_file the full path to an annotation file,
99 * if is an empty string, we won't bother loading anything
100 * \param[in] fasta_index specify which sequence in a multisequence fasta
102 * \param[in] sub_seq_start starting slice index to select a subsequence
103 * use 0 start from the beginning.
104 * \param[in] sub_seq_end ending slice index to select a subsequence
105 * use 0 to go to the end.
107 void load_sequence(std::string seq_file, std::string annot_file,
108 int fasta_index, int sub_seq_start=0, int sub_seq_end=0);
109 //! allow examining the sequences we have loaded
110 const std::vector<Sequence>& sequences() const;
112 // deprecated - support bridge for python version of mussa
113 // these save & load from the old file format
115 void load_old(char * load_file_path, int s_num);
117 // manage motif lists
118 //! load motifs from an ifstream
119 /*! The file should look something like
120 * <sequence> <red> <green> <blue>
121 * where sequence is a string of IUPAC symbols
122 * and red,green,blue are a white space separated list of floats
123 * in the range [0.0, 1.0]
125 void load_motifs(std::istream &);
126 //! load a list of motifs from a file named filename
127 void load_motifs(std::string filename);
129 //! return color mapper
130 AnnotationColors& colorMapper();
134 // parameters needed for a mussa analysis
135 //! name of this analysis. (will also be used when saving an analysis)
136 std::string analysis_name;
137 //! how many base pairs to include in a sliding window
139 //! how many base pairs need to match order to record a window as conserved
142 //! which nway comparison algorithm to use.
143 enum analysis_modes ana_mode;
145 //! should we append _w<window_size> to the saved analysis
147 //! should we append _t<threshold> to the saved analysis
151 std::vector<Sequence> the_seqs;
153 std::vector<std::vector<FLPs> > all_comps;
154 //! N-way data, ie the mussa results
158 std::set<std::string> motif_sequences;
160 AnnotationColors color_mapper;
163 //! loads sequence and annotations from fasta and annotation file