1 #ifndef _MUSSA_CLASS_H_
2 #define _MUSSA_CLASS_H_
3 // This file is part of the Mussa source distribution.
4 // http://mussa.caltech.edu/
5 // Contact author: Tristan De Buysscher, tristan@caltech.edu
7 // This program and all associated source code files are Copyright (C) 2005
8 // the California Institute of Technology, Pasadena, CA, 91125 USA. It is
9 // under the GNU Public License; please see the included LICENSE.txt
10 // file for more information, or contact Tristan directly.
13 // ----------------------------------------
14 // ---------- mussa_class.hh -----------
15 // ----------------------------------------
16 #include <boost/filesystem/path.hpp>
24 #include "alg/annotation_colors.hpp"
25 #include "alg/mussa_callback.hpp"
26 #include "alg/nway_paths.hpp"
27 #include "alg/sequence.hpp"
29 std::string int_to_str(int an_int);
34 enum analysis_modes { TransitiveNway, RadialNway, EntropyNway,
41 //! save the nway comparison
42 void save_muway(boost::filesystem::path save_path);
43 //! load a saved analysis directory
44 void load(boost::filesystem::path ana_path);
46 //! clear parameters and initialize data lists
49 //! set parameters from a file - 'mupa' ~ mussa parameters
50 void load_mupa_file(std::string para_file_path) { load_mupa_file(boost::filesystem::path(para_file_path));}
51 void load_mupa_file(boost::filesystem::path para_file_path);
53 // set parameters individually (eg from user input into gui classes)
55 void set_name(std::string a_name);
56 //! return name for this analysis
57 std::string get_name();
59 //! return number of sequences in this analyzis
60 /*! this returns either the_seqs.size() or seq_files.size()
61 * depending on which has data loaded in
62 * (silly delayed loading of sequence data)
66 void set_analysis_callback(analysis_callback cb);
67 analysis_callback get_analysis_calback() const;
69 //! set number of bases for this window size
70 void set_window(int a_window);
71 //! get number of bases for the sliding window
72 int get_window() const;
73 //! set number of bases that must match for a window to be saved
74 //! if threshold > soft_threshold this also sets soft_threshold
75 void set_threshold(int a_threshold);
76 //! get number of bases that must match for a window to be saved
77 int get_threshold() const;
78 //! sets the threshold used for computing the nway paths
79 //! must be in range [threshold..window size]
80 void set_soft_threshold(int sft_thres);
81 int get_soft_threshold() const;
83 void set_analysis_mode(enum analysis_modes new_ana_mode);
84 enum analysis_modes get_analysis_mode() const;
85 //! return a string name for an analysis mode
86 std::string get_analysis_mode_name() const;
88 //! return the refined paths found by the nway analysis.
89 const NwayPaths& paths() const;
91 //! given selected_paths, and view_paths, compute per base pair matches
92 //template <class IteratorT>
93 void createLocalAlignment(std::list<ConservedPath>::iterator begin,
94 std::list<ConservedPath>::iterator end,
95 std::list<ConservedPath::path_type>& result,
96 std::list<std::vector<bool> >& reversed);
98 //! run seqcomp and the nway filtering algorithm.
99 /*!analyze will run seqcomp and then the nway algorithm
100 * on whatever sequences have been loaded into this mussa instance.
101 * \throws mussa_analysis_error
104 /*! Run the nway filtering algorithm,
105 * this might be used when changing the soft threshhold?
109 //! appends a string sequence to the list of the_seqs
110 void add_a_seq(std::string a_seq);
111 //! Load a sequence from a fasta file and any annotations
112 /*! \param[in] seq_file the full path to the fasta file
113 * \param[in] annot_file the full path to an annotation file,
114 * if is an empty string, we won't bother loading anything
115 * \param[in] fasta_index specify which sequence in a multisequence fasta
117 * \param[in] sub_seq_start starting slice index to select a subsequence
118 * use 0 start from the beginning.
119 * \param[in] sub_seq_end ending slice index to select a subsequence
120 * use 0 to go to the end.
122 void load_sequence(boost::filesystem::path seq_file,
123 boost::filesystem::path annot_file,
124 int fasta_index, int sub_seq_start=0, int sub_seq_end=0);
125 //! allow examining the sequences we have loaded
126 const std::vector<Sequence>& sequences() const;
128 // deprecated - support bridge for python version of mussa
129 // these save & load from the old file format
131 void load_old(char * load_file_path, int s_num);
133 // manage motif lists
134 //! add vector of motifs and colors to our motif collection
135 /*! this depends on sets and color maps being unique
136 * (aka if you add the same item more than once it doesn't
137 * increase the size of the data structure
139 void add_motifs(const std::vector<std::string>& motifs,
140 const std::vector<Color>& colors);
141 //! load motifs from an ifstream
142 /*! The file should look something like
143 * <sequence> <red> <green> <blue>
144 * where sequence is a string of IUPAC symbols
145 * and red,green,blue are a white space separated list of floats
146 * in the range [0.0, 1.0]
148 void load_motifs(std::istream &);
149 //! load a list of motifs from a file named filename
150 void load_motifs(boost::filesystem::path filename);
151 //! return our motifs;
152 const std::set<std::string>& motifs() const;
154 //! return color mapper
155 AnnotationColors& colorMapper();
158 //! push motifs to our attached sequences
159 void update_sequences_motifs();
162 // parameters needed for a mussa analysis
163 //! name of this analysis. (will also be used when saving an analysis)
164 std::string analysis_name;
165 //! how many base pairs to include in a sliding window
167 //! how many base pairs need to match order to record a window as conserved
170 //! which nway comparison algorithm to use.
171 enum analysis_modes ana_mode;
173 //! should we append _w<window_size> to the saved analysis
175 //! should we append _t<threshold> to the saved analysis
177 //! callback, periodically called as we run an analysis
178 analysis_callback analysis_cb;
181 std::vector<Sequence> the_seqs;
183 std::vector<std::vector<FLPs> > all_comps;
184 //! N-way data, ie the mussa results
188 std::set<std::string> motif_sequences;
190 AnnotationColors color_mapper;
193 //! runs all the seqcomps needed to support the nway comparison