1 #ifndef _MUSSA_CLASS_H_
2 #define _MUSSA_CLASS_H_
3 // This file is part of the Mussa source distribution.
4 // http://mussa.caltech.edu/
5 // Contact author: Tristan De Buysscher, tristan@caltech.edu
7 // This program and all associated source code files are Copyright (C) 2005
8 // the California Institute of Technology, Pasadena, CA, 91125 USA. It is
9 // under the GNU Public License; please see the included LICENSE.txt
10 // file for more information, or contact Tristan directly.
13 // ----------------------------------------
14 // ---------- mussa_class.hh -----------
15 // ----------------------------------------
19 #include <boost/filesystem/path.hpp>
20 #include <boost/shared_ptr.hpp>
28 #include "alg/annotation_colors.hpp"
29 #include "alg/mussa_callback.hpp"
30 #include "alg/nway_paths.hpp"
31 #include "alg/sequence.hpp"
33 std::string int_to_str(int an_int);
36 //! provide a simple name to point to our Mussa shared_ptr
37 typedef boost::shared_ptr<Mussa> MussaRef;
39 class Mussa : public QObject
44 //! call whatever signaling system we want
45 void progress(const QString& description, int cur, int max);
46 //! triggered when our state changes between unsaved(true) and saved(false)
47 void isModified(bool);
50 typedef std::set<Sequence> motif_set;
51 enum analysis_modes { TransitiveNway, RadialNway, EntropyNway,
57 //! dynamically construct a new Mussa object and return a reference to it
58 static MussaRef init();
61 void save(boost::filesystem::path save_path="");
62 //! save the nway comparison
63 void save_muway(boost::filesystem::path save_path);
64 //! load a saved analysis directory
65 void load(boost::filesystem::path ana_path);
66 // ! return path to the where the analysis is stored
67 boost::filesystem::path get_analysis_path() const;
69 void set_analysis_path(boost::filesystem::path);
71 //! clear parameters and initialize data lists
74 //! set parameters from a file - 'mupa' ~ mussa parameters
75 void load_mupa_file(std::string para_file_path);
76 void load_mupa_file(boost::filesystem::path para_file_path);
77 //! load mussa parameters from a stream, specifing output location
78 void load_mupa_stream(
79 std::istream & para_file,
80 boost::filesystem::path& file_path_base
84 // set parameters individually (eg from user input into gui classes)
86 void set_name(std::string a_name);
87 //! return name for this analysis
88 std::string get_name() const;
89 //! return a reasonable window title for this analysis
90 /*! this returns the "variable" portion for a title
92 std::string get_title() const;
94 //! return number of sequences in this analyzis
95 /*! this returns either the_seqs.size() or seq_files.size()
96 * depending on which has data loaded in
97 * (silly delayed loading of sequence data)
101 //! set number of bases for this window size
102 void set_window(int a_window);
103 //! get number of bases for the sliding window
104 int get_window() const;
105 //! set number of bases that must match for a window to be saved
106 //! if threshold > soft_threshold this also sets soft_threshold
107 void set_threshold(int a_threshold);
108 //! get number of bases that must match for a window to be saved
109 int get_threshold() const;
110 //! sets the threshold used for computing the nway paths
111 //! must be in range [threshold..window size]
112 void set_soft_threshold(int sft_thres);
113 int get_soft_threshold() const;
115 //! should we append the window size to the analysis name
116 void set_append_window(bool v);
117 //! should we append the window size to the analysis name
118 bool get_append_window();
120 //! should we append the threshold to the analysis name
121 void set_append_threshold(bool v);
122 //! should we append the threshold to the analysis name
123 bool get_append_threshold();
125 void set_analysis_mode(enum analysis_modes new_ana_mode);
126 enum analysis_modes get_analysis_mode() const;
127 //! return a string name for an analysis mode
128 std::string get_analysis_mode_name() const;
129 //! set our current dirty state
130 void set_dirty(bool);
131 //! return if we have unsaved changes
132 bool is_dirty() const;
133 //! is there anything loaded into this analysis?
136 //! return the refined paths found by the nway analysis.
137 const NwayPaths& paths() const;
139 //! given selected_paths, and view_paths, compute per base pair matches
140 //template <class IteratorT>
141 void createLocalAlignment(std::list<ConservedPath>::iterator begin,
142 std::list<ConservedPath>::iterator end,
143 std::list<ConservedPath::path_type>& result,
144 std::list<std::vector<bool> >& reversed);
146 //! run seqcomp and the nway filtering algorithm.
147 /*!analyze will run seqcomp and then the nway algorithm
148 * on whatever sequences have been loaded into this mussa instance.
149 * \throws mussa_analysis_error
152 /*! Run the nway filtering algorithm,
153 * this might be used when changing the soft threshhold?
157 //! appends a string sequence to the list of the_seqs
158 // void append_sequence(std::string a_seq);
159 //! appends a sequence to the list of the_seqs (makes copy)
160 void append_sequence(const Sequence& a_seq);
161 //! append a sequence to the list of seqs (shared)
162 void append_sequence(boost::shared_ptr<Sequence> a_seq);
164 //! Load a sequence from a fasta file and any annotations
165 /*! \param[in] seq_file the full path to the fasta file
166 * \param[in] annot_file the full path to an annotation file,
167 * if is an empty string, we won't bother loading anything
168 * \param[in] fasta_index specify which sequence in a multisequence fasta
170 * \param[in] sub_seq_start starting slice index to select a subsequence
171 * use 0 start from the beginning.
172 * \param[in] sub_seq_end ending slice index to select a subsequence
173 * use 0 to go to the end.
174 * \param[in] name sequence name, only used if not null
176 void load_sequence(boost::filesystem::path seq_file,
177 boost::filesystem::path annot_file,
178 int fasta_index, int sub_seq_start=0, int sub_seq_end=0,
179 std::string *name=0);
180 //! allow examining the sequences we have loaded
181 typedef std::vector<boost::shared_ptr<Sequence> > vector_sequence_type;
182 const vector_sequence_type& sequences() const;
184 // deprecated - support bridge for python version of mussa
185 // these save & load from the old file format
187 void load_old(char * load_file_path, int s_num);
189 // manage motif lists
190 //! add a motif it wont be applied until update_sequences_motif is called
191 void add_motif(const Sequence& motifs, const Color& colors);
192 //! add vector of motifs and colors to our motif collection
193 /*! this will automatically call update_sequences_motif
194 * this depends on sets and color maps being unique
195 * (aka if you add the same item more than once it doesn't
196 * increase the size of the data structure)
198 void set_motifs(const std::vector<Sequence>& motifs,
199 const std::vector<Color>& colors);
200 /*! The file should look something like
201 * <sequence> <red> <green> <blue>
202 * where sequence is a string of IUPAC symbols
203 * and red,green,blue are a white space separated list of floats
204 * in the range [0.0, 1.0]
206 //! load a list of motifs from a file named filename
207 void load_motifs(boost::filesystem::path filename);
208 //! load motifs from an ifstream
209 /*! \sa Mussa::load_motifs(boost::filesystem::path)
211 void load_motifs(std::istream &);
212 //! save motif list to the specified filename
213 void save_motifs(boost::filesystem::path filename);
214 //! save motif list to an ostream
215 void save_motifs(std::ostream &);
216 //! return our motifs;
217 const motif_set& motifs() const;
219 //! return color mapper
220 boost::shared_ptr<AnnotationColors> colorMapper();
223 //! push motifs to our attached sequences
224 void update_sequences_motifs();
227 // parameters needed for a mussa analysis
228 //! name of this analysis. (will also be used when saving an analysis)
229 std::string analysis_name;
230 //! how many base pairs to include in a sliding window
232 //! how many base pairs need to match order to record a window as conserved
234 //! stores current filter used by GUI to change the connections shown
236 //! which nway comparison algorithm to use.
237 enum analysis_modes ana_mode;
239 //! should we append _w<window_size> to the saved analysis
241 //! should we append _t<threshold> to the saved analysis
245 vector_sequence_type the_seqs;
247 std::vector<std::vector<FLPs> > all_comps;
248 //! N-way data, ie the mussa results
252 motif_set motif_sequences;
254 boost::shared_ptr<AnnotationColors> color_mapper;
255 //! path to our analysis
256 boost::filesystem::path analysis_path;
257 //! flag indicating if we have unsaved changes
261 //! runs all the seqcomps needed to support the nway comparison