return out;
}
+static void multiplatform_getline(std::istream& in, std::string& line)
+{
+ line.clear();
+ char c;
+ in.get(c);
+ while(in.good() and !(c == '\012' or c == '\015') ) {
+ line.push_back(c);
+ in.get(c);
+ }
+ // if we have cr-lf eat it
+ c = in.peek();
+ if (c=='\012' or c == '\015') {
+ in.get();
+ }
+
+}
+
//! load a fasta file into a sequence
/*!
* \param file_path the location of the fasta file in the filesystem
* \param end_index ending position in the fasta sequence, 0 for end
* \return error message, empty string if no error. (gag!)
*/
+void Sequence::load_fasta(fs::path file_path, int seq_num,
+ int start_index, int end_index)
+{
+ fs::fstream data_file;
+ data_file.open(file_path, std::ios::in);
+
+ if (!data_file.good())
+ {
+ throw mussa_load_error("Sequence File: "+file_path.string()+" not found");
+ } else {
+ try {
+ load_fasta(data_file, seq_num, start_index, end_index);
+ } catch(sequence_empty_error e) {
+ // there doesn't appear to be any sequence
+ // catch and rethrow to include the filename
+ std::stringstream msg;
+ msg << "The selected sequence in "
+ << file_path.native_file_string()
+ << " appears to be empty";
+ throw sequence_empty_error(msg.str());
+ } catch(sequence_empty_file_error e) {
+ std::stringstream errormsg;
+ errormsg << file_path.native_file_string()
+ << " did not have any fasta sequences" << std::endl;
+ throw sequence_empty_file_error(errormsg.str());
+ }
+ }
+}
+
void
-Sequence::load_fasta(fs::path file_path, int seq_num,
+Sequence::load_fasta(std::iostream& data_file, int seq_num,
int start_index, int end_index)
{
- fs::fstream data_file;
std::string file_data_line;
int header_counter = 0;
bool read_seq = true;
std::string sequence_raw;
std::string seq_tmp; // holds sequence during basic filtering
- data_file.open(file_path, std::ios::in);
-
if (seq_num == 0) {
throw mussa_load_error("fasta sequence number is 1 based (can't be 0)");
}
- if (!data_file)
- {
- throw mussa_load_error("Sequence File: " + file_path.string() + " not found");
- }
- // if file opened okay, read it
- else
+
+ // search for the header of the fasta sequence we want
+ while ( (!data_file.eof()) && (header_counter < seq_num) )
{
- // search for the header of the fasta sequence we want
- while ( (!data_file.eof()) && (header_counter < seq_num) )
- {
- getline(data_file,file_data_line);
- if (file_data_line.substr(0,1) == ">")
- header_counter++;
- }
+ multiplatform_getline(data_file, file_data_line);
+ if (file_data_line.substr(0,1) == ">")
+ header_counter++;
+ }
- if (header_counter > 0) {
- header = file_data_line.substr(1);
+ if (header_counter > 0) {
+ header = file_data_line.substr(1);
- sequence_raw = "";
+ sequence_raw = "";
- while ( !data_file.eof() && read_seq ) {
- getline(data_file,file_data_line);
- if (file_data_line.substr(0,1) == ">")
- read_seq = false;
- else sequence_raw += file_data_line;
- }
+ while ( !data_file.eof() && read_seq ) {
+ multiplatform_getline(data_file,file_data_line);
+ if (file_data_line.substr(0,1) == ">")
+ read_seq = false;
+ else sequence_raw += file_data_line;
+ }
- // Lastly, if subselection of the sequence was specified we keep cut out
- // and only keep that part
- // end_index = 0 means no end was specified, so cut to the end
- if (end_index == 0)
- end_index = sequence_raw.size();
-
- // sequence filtering for upcasing agctn and convert non AGCTN to N
- if (end_index-start_index <= 0) {
- // there doesn't appear to be any sequence
- std::stringstream msg;
- msg << "The selected sequence in "
- << file_path.native_file_string()
- << " appears to be empty";
- throw mussa_load_error(msg.str());
- }
- set_filtered_sequence(sequence_raw, start_index, end_index-start_index);
- } else {
- std::stringstream errormsg;
- errormsg << file_path.native_file_string()
- << " did not have any fasta sequences" << std::endl;
- throw mussa_load_error(errormsg.str());
+ // Lastly, if subselection of the sequence was specified we keep cut out
+ // and only keep that part
+ // end_index = 0 means no end was specified, so cut to the end
+ if (end_index == 0)
+ end_index = sequence_raw.size();
+
+ // sequence filtering for upcasing agctn and convert non AGCTN to N
+ if (end_index-start_index <= 0) {
+ std::string msg("The selected sequence appears to be empty");
+ throw sequence_empty_error(msg);
}
- data_file.close();
+ set_filtered_sequence(sequence_raw, start_index, end_index-start_index);
+ } else {
+ std::string errormsg("There were no fasta sequences");
+ throw sequence_empty_file_error(errormsg);
}
}
std::string::size_type count=0);
//! load sequence AGCT from fasta file
- /*! \throws mussa_load_error
- */
+ //! \throw mussa_load_error
+ //! \throw sequence_empty_error
+ //! \throw sequence_empty_file_error
void load_fasta(const boost::filesystem::path file_path, int seq_num=1,
int start_index=0, int end_index=0);
+ //! load sequence from stream
+ //! \throw mussa_load_error
+ //! \throw sequence_empty_error
+ //! \throw sequence_empty_file_error
+ void load_fasta(std::iostream& file, int seq_num=1,
+ int start_index=0, int end_index=0);
//! load sequence annotations
//! \throws mussa_load_error
void load_annot(const boost::filesystem::path file_path, int start_index, int end_index);
//! load sequence annotations
//! \throws mussa_load_error
- //void load_annot(std::istream& data_stream, int start_index, int end_index);
+ void load_annot(std::fstream& data_stream, int start_index, int end_index);
void parse_annot(std::string data, int start_index, int end_index);
const std::list<annot>& annotations() const;
const std::list<motif>& motifs() const;