X-Git-Url: http://woldlab.caltech.edu/gitweb/?a=blobdiff_plain;f=alg%2Fmussa.hpp;h=152d8217349568755913e67c1a77fed6aaa6eaa6;hb=6d25d4d945af696134bdf788b111f38b197b1a15;hp=20b1069fd2a9c0134d51f78841e7ff8e41b2d273;hpb=cc0b69411a2d71e2b6e94d742f373a8ccd67b8ce;p=mussa.git diff --git a/alg/mussa.hpp b/alg/mussa.hpp index 20b1069..152d821 100644 --- a/alg/mussa.hpp +++ b/alg/mussa.hpp @@ -13,41 +13,83 @@ // ---------------------------------------- // ---------- mussa_class.hh ----------- // ---------------------------------------- +#include +#include + +#include +#include #include #include #include +#include +#include +#include "alg/annotation_colors.hpp" +#include "alg/mussa_callback.hpp" #include "alg/nway_paths.hpp" #include "alg/sequence.hpp" std::string int_to_str(int an_int); -class Mussa +class Mussa; +//! provide a simple name to point to our Mussa shared_ptr +typedef boost::shared_ptr MussaRef; + +class Mussa : public QObject { - friend class ConnWindow; - public: + Q_OBJECT + +signals: + //! call whatever signaling system we want + void progress(const QString& description, int cur, int max); + //! triggered when our state changes between unsaved(true) and saved(false) + void isModified(bool); + +public: + typedef std::set motif_set; enum analysis_modes { TransitiveNway, RadialNway, EntropyNway, RecursiveNway }; - Mussa(); + Mussa(); + Mussa(const Mussa &); - void save(); - void save_muway(std::string save_path); + //! dynamically construct a new Mussa object and return a reference to it + static MussaRef init(); + + //! save all of mussa + void save(boost::filesystem::path save_path=""); + //! save the nway comparison + void save_muway(boost::filesystem::path save_path); //! load a saved analysis directory - void load(std::string ana_path); + void load(boost::filesystem::path ana_path); + // ! return path to the where the analysis is stored + boost::filesystem::path get_analysis_path() const; + //! set analysis path + void set_analysis_path(boost::filesystem::path); //! clear parameters and initialize data lists void clear(); - // set parameters from a file - 'mupa' ~ mussa parameters + //! set parameters from a file - 'mupa' ~ mussa parameters void load_mupa_file(std::string para_file_path); + void load_mupa_file(boost::filesystem::path para_file_path); + //! load mussa parameters from a stream, specifing output location + void load_mupa_stream( + std::istream & para_file, + boost::filesystem::path& file_path_base + ); + // set parameters individually (eg from user input into gui classes) //! set analysis name void set_name(std::string a_name); //! return name for this analysis - std::string get_name(); + std::string get_name() const; + //! return a reasonable window title for this analysis + /*! this returns the "variable" portion for a title + */ + std::string get_title() const; //! return number of sequences in this analyzis /*! this returns either the_seqs.size() or seq_files.size() @@ -55,40 +97,60 @@ class Mussa * (silly delayed loading of sequence data) */ int size() const; + //! set number of bases for this window size void set_window(int a_window); //! get number of bases for the sliding window int get_window() const; - //! set number of bases that must match for a window to be saved + //! set number of bases that must match for a window to be saved + //! if threshold > soft_threshold this also sets soft_threshold void set_threshold(int a_threshold); //! get number of bases that must match for a window to be saved int get_threshold() const; - void set_soft_thres(int sft_thres); - + //! sets the threshold used for computing the nway paths + //! must be in range [threshold..window size] + void set_soft_threshold(int sft_thres); + int get_soft_threshold() const; + void set_analysis_mode(enum analysis_modes new_ana_mode); enum analysis_modes get_analysis_mode() const; //! return a string name for an analysis mode std::string get_analysis_mode_name() const; + //! set our current dirty state + void set_dirty(bool); + //! return if we have unsaved changes + bool is_dirty() const; + //! is there anything loaded into this analysis? + bool empty() const; //! return the refined paths found by the nway analysis. const NwayPaths& paths() const; + //! given selected_paths, and view_paths, compute per base pair matches + //template + void createLocalAlignment(std::list::iterator begin, + std::list::iterator end, + std::list& result, + std::list >& reversed); + //! run seqcomp and the nway filtering algorithm. /*!analyze will run seqcomp and then the nway algorithm * on whatever sequences have been loaded into this mussa instance. - * w & t are for command line override functionality, set to 0 to ignore * \throws mussa_analysis_error */ - void analyze(int w=0, int t=0, - enum analysis_modes ana_mode=TransitiveNway, - double ent_thres=0.0); + void analyze(); /*! Run the nway filtering algorithm, * this might be used when changing the soft threshhold? */ void nway(); //! appends a string sequence to the list of the_seqs - void add_a_seq(std::string a_seq); + // void append_sequence(std::string a_seq); + //! appends a sequence to the list of the_seqs (makes copy) + void append_sequence(const Sequence& a_seq); + //! append a sequence to the list of seqs (shared) + void append_sequence(boost::shared_ptr a_seq); + //! Load a sequence from a fasta file and any annotations /*! \param[in] seq_file the full path to the fasta file * \param[in] annot_file the full path to an annotation file, @@ -99,36 +161,94 @@ class Mussa * use 0 start from the beginning. * \param[in] sub_seq_end ending slice index to select a subsequence * use 0 to go to the end. + * \param[in] name sequence name, only used if not null */ - void load_sequence(std::string seq_file, std::string annot_file, - int fasta_index, int sub_seq_start=0, int sub_seq_end=0); + void load_sequence(boost::filesystem::path seq_file, + boost::filesystem::path annot_file, + int fasta_index, int sub_seq_start=0, int sub_seq_end=0, + std::string *name=0); //! allow examining the sequences we have loaded - const std::vector& sequences() const; + typedef std::vector > vector_sequence_type; + const vector_sequence_type& sequences() const; // deprecated - support bridge for python version of mussa // these save & load from the old file format void save_old(); void load_old(char * load_file_path, int s_num); + // manage motif lists + //! add a motif it wont be applied until update_sequences_motif is called + void add_motif(const Sequence& motifs, const Color& colors); + //! add vector of motifs and colors to our motif collection + /*! this will automatically call update_sequences_motif + * this depends on sets and color maps being unique + * (aka if you add the same item more than once it doesn't + * increase the size of the data structure) + */ + void set_motifs(const std::vector& motifs, + const std::vector& colors); + /*! The file should look something like + * + * where sequence is a string of IUPAC symbols + * and red,green,blue are a white space separated list of floats + * in the range [0.0, 1.0] + */ + //! load a list of motifs from a file named filename + void load_motifs(boost::filesystem::path filename); + //! load motifs from an ifstream + /*! \sa Mussa::load_motifs(boost::filesystem::path) + */ + void load_motifs(std::istream &); + //! save motif list to the specified filename + void save_motifs(boost::filesystem::path filename); + //! save motif list to an ostream + void save_motifs(std::ostream &); + //! return our motifs; + const motif_set& motifs() const; + + //! return color mapper + boost::shared_ptr colorMapper(); + private: + //! push motifs to our attached sequences + void update_sequences_motifs(); + // Private variables // parameters needed for a mussa analysis + //! name of this analysis. (will also be used when saving an analysis) std::string analysis_name; - int window, threshold, soft_thres; + //! how many base pairs to include in a sliding window + int window; + //! how many base pairs need to match order to record a window as conserved + int threshold; + //! stores current filter used by GUI to change the connections shown + int soft_thres; + //! which nway comparison algorithm to use. enum analysis_modes ana_mode; double ent_thres; - bool win_override, thres_override; - bool win_append, thres_append; + //! should we append _w to the saved analysis + bool win_append; + //! should we append _t to the saved analysis + bool thres_append; //! sequence data - std::vector the_seqs; + vector_sequence_type the_seqs; //! the seqcomp data std::vector > all_comps; //! N-way data, ie the mussa results NwayPaths the_paths; + //! motif list + motif_set motif_sequences; + //! color manager + boost::shared_ptr color_mapper; + //! path to our analysis + boost::filesystem::path analysis_path; + //! flag indicating if we have unsaved changes + bool dirty; + // Private methods - //! loads sequence and annotations from fasta and annotation file + //! runs all the seqcomps needed to support the nway comparison void seqcomp(); };