From 6d25d4d945af696134bdf788b111f38b197b1a15 Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Thu, 19 Apr 2007 23:02:29 +0000 Subject: [PATCH] make mupa file loading eol-style insensitive ticket:260 I refactored the load_mupa_file function into one that takes a file, opens it, and then passes it to a different function that takes a stream. AKA split a function in two, so I could more easily unit test the mupa loading code. Once it was unit testable, I moved multiplatform_getline out of sequence, into its own new cpp file and changed the mupa loading code to use it instead. --- alg/CMakeLists.txt | 1 + alg/io.cpp | 17 ++++ alg/io.hpp | 10 +++ alg/mussa.cpp | 188 +++++++++++++++++++++------------------- alg/mussa.hpp | 8 +- alg/sequence.cpp | 17 +--- alg/test/test_mussa.cpp | 19 ++++ 7 files changed, 154 insertions(+), 106 deletions(-) create mode 100644 alg/io.cpp create mode 100644 alg/io.hpp diff --git a/alg/CMakeLists.txt b/alg/CMakeLists.txt index 0234d94..9963d64 100644 --- a/alg/CMakeLists.txt +++ b/alg/CMakeLists.txt @@ -19,6 +19,7 @@ SET(SOURCES alphabet.cpp flp_seqcomp.cpp glseqbrowser.cpp glsequence.cpp + io.cpp mussa.cpp motif_parser.cpp nway_entropy.cpp diff --git a/alg/io.cpp b/alg/io.cpp new file mode 100644 index 0000000..4d7624b --- /dev/null +++ b/alg/io.cpp @@ -0,0 +1,17 @@ +#include "io.hpp" + +void multiplatform_getline(std::istream& in, std::string& line) +{ + line.clear(); + char c; + in.get(c); + while(in.good() and !(c == '\012' or c == '\015') ) { + line.push_back(c); + in.get(c); + } + // if we have cr-lf eat it + c = in.peek(); + if (c=='\012' or c == '\015') { + in.get(); + } +} \ No newline at end of file diff --git a/alg/io.hpp b/alg/io.hpp new file mode 100644 index 0000000..29e1105 --- /dev/null +++ b/alg/io.hpp @@ -0,0 +1,10 @@ +#ifndef IO_HPP_ +#define IO_HPP_ + +#include +#include + +//! useful function for ignoring the various end of line conventions +void multiplatform_getline(std::istream& in, std::string& line); + +#endif /*IO_HPP_*/ diff --git a/alg/mussa.cpp b/alg/mussa.cpp index 82b1d4a..9a5935e 100644 --- a/alg/mussa.cpp +++ b/alg/mussa.cpp @@ -21,9 +21,11 @@ namespace fs = boost::filesystem; #include #include "mussa_exceptions.hpp" -#include "alg/flp.hpp" -#include "alg/mussa.hpp" -#include "alg/motif_parser.hpp" + +#include "flp.hpp" +#include "io.hpp" +#include "mussa.hpp" +#include "motif_parser.hpp" using namespace std; @@ -350,10 +352,35 @@ void Mussa::load_sequence(fs::path seq_file, fs::path annot_file, set_dirty(true); } +void Mussa::load_mupa_file(std::string para_file_path) { + load_mupa_file(boost::filesystem::path(para_file_path)); +} + void Mussa::load_mupa_file(fs::path para_file_path) { - fs::ifstream para_file; + if (not fs::exists(para_file_path)) + { + throw mussa_load_error("Config File: " + para_file_path.string() + " not found"); + } else if (fs::is_directory(para_file_path)) { + throw mussa_load_error("Config File: " + para_file_path.string() + " is a directory."); + } else if (fs::is_empty(para_file_path)) { + throw mussa_load_error("Config File: " + para_file_path.string() + " is empty"); + } else { + // what directory is the mupa file in? + fs::path file_path_base( para_file_path.branch_path()) ; + + fs::ifstream para_file; + para_file.open(para_file_path, ios::in); + + load_mupa_stream(para_file, file_path_base); + para_file.close(); + } +} + +void +Mussa::load_mupa_stream(std::istream& para_file, fs::path& file_path_base) +{ string file_data_line; string param, value; fs::path annot_file; @@ -367,101 +394,84 @@ Mussa::load_mupa_file(fs::path para_file_path) // initialize values clear(); - // if file was opened, read the parameter values - if (not fs::exists(para_file_path)) + // setup loop by getting file's first line + getline(para_file, file_data_line); + split_index = file_data_line.find(" "); + param = file_data_line.substr(0,split_index); + value = file_data_line.substr(split_index+1); + + while (para_file) { - throw mussa_load_error("Config File: " + para_file_path.string() + " not found"); - } else if (fs::is_directory(para_file_path)) { - throw mussa_load_error("Config File: " + para_file_path.string() + " is a directory."); - } else if (fs::is_empty(para_file_path)) { - throw mussa_load_error("Config File: " + para_file_path.string() + " is empty"); - } else { - para_file.open(para_file_path, ios::in); - - // what directory is the mupa file in? - fs::path file_path_base = para_file_path.branch_path(); - - // setup loop by getting file's first line - getline(para_file,file_data_line); - split_index = file_data_line.find(" "); - param = file_data_line.substr(0,split_index); - value = file_data_line.substr(split_index+1); - - while (para_file) + did_seq = false; + if (param == "ANA_NAME") + analysis_name = value; + else if (param == "APPEND_WIN") + win_append = true; + else if (param == "APPEND_THRES") + thres_append = true; + else if (param == "SEQUENCE_NUM") + ; // ignore sequence_num now + else if (param == "WINDOW") + window = atoi(value.c_str()); + else if (param == "THRESHOLD") + threshold = atoi(value.c_str()); + else if (param == "SEQUENCE") { - did_seq = false; - if (param == "ANA_NAME") - analysis_name = value; - else if (param == "APPEND_WIN") - win_append = true; - else if (param == "APPEND_THRES") - thres_append = true; - else if (param == "SEQUENCE_NUM") - ; // ignore sequence_num now - else if (param == "WINDOW") - window = atoi(value.c_str()); - else if (param == "THRESHOLD") - threshold = atoi(value.c_str()); - else if (param == "SEQUENCE") - { - fs::path seq_file = file_path_base / value; - //cout << "seq_file_name " << seq_files.back() << endl; - fasta_index = 1; - annot_file = ""; - sub_seq_start = 0; - sub_seq_end = 0; - seq_params = true; - - while (para_file && seq_params) - { - getline(para_file,file_data_line); - split_index = file_data_line.find(" "); - param = file_data_line.substr(0,split_index); - value = file_data_line.substr(split_index+1); - - if (param == "FASTA_INDEX") - fasta_index = atoi(value.c_str()); - else if (param == "ANNOTATION") - annot_file = file_path_base / value; - else if (param == "SEQ_START") - sub_seq_start = atoi(value.c_str()); - else if (param == "SEQ_END") - { - sub_seq_end = atoi(value.c_str()); - } - //ignore empty lines or that start with '#' - else if ((param == "") || (param == "#")) {} - else seq_params = false; - } - load_sequence(seq_file, annot_file, fasta_index, sub_seq_start, - sub_seq_end); - did_seq = true; - } - //ignore empty lines or that start with '#' - else if ((param == "") || (param == "#")) {} - else - { - clog << "Illegal/misplaced mussa parameter in file\n"; - clog << param << "\n"; - } - - if (!did_seq) + fs::path seq_file = file_path_base / value; + //cout << "seq_file_name " << seq_files.back() << endl; + fasta_index = 1; + annot_file = ""; + sub_seq_start = 0; + sub_seq_end = 0; + seq_params = true; + + while (para_file && seq_params) { - getline(para_file,file_data_line); + multiplatform_getline(para_file,file_data_line); split_index = file_data_line.find(" "); param = file_data_line.substr(0,split_index); value = file_data_line.substr(split_index+1); - did_seq = false; + + if (param == "FASTA_INDEX") + fasta_index = atoi(value.c_str()); + else if (param == "ANNOTATION") + annot_file = file_path_base / value; + else if (param == "SEQ_START") + sub_seq_start = atoi(value.c_str()); + else if (param == "SEQ_END") + { + sub_seq_end = atoi(value.c_str()); + } + //ignore empty lines or that start with '#' + else if ((param == "") || (param == "#")) { + // pass + } else { + seq_params = false; + } } + load_sequence(seq_file, annot_file, fasta_index, sub_seq_start, + sub_seq_end); + did_seq = true; + } + //ignore empty lines or that start with '#' + else if ((param == "") || (param == "#")) {} + else + { + clog << "Illegal/misplaced mussa parameter in file\n"; + clog << param << "\n"; } - para_file.close(); - - soft_thres = threshold; - //cout << "nway mupa: analysis_name = " << analysis_name - // << " window = " << window - // << " threshold = " << threshold << endl; + if (!did_seq) + { + multiplatform_getline(para_file,file_data_line); + split_index = file_data_line.find(" "); + param = file_data_line.substr(0,split_index); + value = file_data_line.substr(split_index+1); + did_seq = false; + } } + + soft_thres = threshold; // no file was loaded, signal error set_dirty(true); } diff --git a/alg/mussa.hpp b/alg/mussa.hpp index 83e08cb..152d821 100644 --- a/alg/mussa.hpp +++ b/alg/mussa.hpp @@ -72,8 +72,14 @@ public: void clear(); //! set parameters from a file - 'mupa' ~ mussa parameters - void load_mupa_file(std::string para_file_path) { load_mupa_file(boost::filesystem::path(para_file_path));} + void load_mupa_file(std::string para_file_path); void load_mupa_file(boost::filesystem::path para_file_path); + //! load mussa parameters from a stream, specifing output location + void load_mupa_stream( + std::istream & para_file, + boost::filesystem::path& file_path_base + ); + // set parameters individually (eg from user input into gui classes) //! set analysis name diff --git a/alg/sequence.cpp b/alg/sequence.cpp index 2e845c6..1f21bd5 100644 --- a/alg/sequence.cpp +++ b/alg/sequence.cpp @@ -32,6 +32,7 @@ namespace fs = boost::filesystem; namespace spirit = boost::spirit; #include "alg/sequence.hpp" +#include "io.hpp" #include "mussa_exceptions.hpp" #include @@ -162,22 +163,6 @@ Sequence &Sequence::operator=(const Sequence& s) return *this; } -static void multiplatform_getline(std::istream& in, std::string& line) -{ - line.clear(); - char c; - in.get(c); - while(in.good() and !(c == '\012' or c == '\015') ) { - line.push_back(c); - in.get(c); - } - // if we have cr-lf eat it - c = in.peek(); - if (c=='\012' or c == '\015') { - in.get(); - } -} - void Sequence::load_fasta(fs::path file_path, int seq_num, int start_index, int end_index) { load_fasta(file_path, reduced_nucleic_alphabet, seq_num, start_index, end_index); diff --git a/alg/test/test_mussa.cpp b/alg/test/test_mussa.cpp index 55188ea..e1cef22 100644 --- a/alg/test/test_mussa.cpp +++ b/alg/test/test_mussa.cpp @@ -110,6 +110,25 @@ BOOST_AUTO_TEST_CASE ( empty_mussa_set_threshold ) m.nway(); } +BOOST_AUTO_TEST_CASE( mussa_load_mupa_crlf ) +{ + fs::path example_path(EXAMPLE_DIR, fs::native); + fs::path seq_path(example_path / "seq" / "mouse_mck_pro.fa"); + fs::path annot_path(example_path / "mm_mck3test.annot"); + + std::string mupa( + "# hello\015\012" + "ANA_NAME load_mupa_crlf\015\012"); + mupa += "SEQUENCE " + seq_path.native_file_string() + "\015\012"; + mupa += "ANNOTATION " + annot_path.native_file_string() + "\015\012"; + + istringstream mupa_stream(mupa); + Mussa m; + fs::path base; + m.load_mupa_stream( mupa_stream, base ); + // Should run with no exceptions +} + BOOST_AUTO_TEST_CASE( mussa_load_mupa ) { fs::path mupa_path(EXAMPLE_DIR, fs::native); -- 2.30.2