#include <boost/spirit/core.hpp>
#include <boost/spirit/actor/push_back_actor.hpp>
#include <boost/spirit/iterator/file_iterator.hpp>
+#include <boost/spirit/utility/chset.hpp>
namespace spirit = boost::spirit;
#include "alg/sequence.hpp"
#include <iostream>
#include <sstream>
+// some standard dna alphabets
+// \012 = nl
+// \015 = cr
+// this should make our sequence parsing end-of-line convention
+// independent
+static const char* dna_alphabet = "AaCcGgTtNn\012\015";
+static const char* rna_alphabet = "AaCcGgNnUu\012\015";
+static const char* iupac_alphabet = "AaCcGgTtUuRrYyMmKkSsWwBbDdHhVvNn\012\015";
+
annot::annot()
: start(0),
end(0),
sequence(motif)
{
}
-
+
Sequence::Sequence()
- : sequence(""),
+ : sequence(""),
header(""),
species("")
{
motif_list.clear();
}
-Sequence::Sequence(std::string seq)
+Sequence::Sequence(std::string seq)
+ : header(""),
+ species("")
{
set_filtered_sequence(seq);
}
end_index = sequence_raw.size();
// sequence filtering for upcasing agctn and convert non AGCTN to N
+ if (end_index-start_index <= 0) {
+ // there doesn't appear to be any sequence
+ std::stringstream msg;
+ msg << "The selected sequence in "
+ << file_path.native_file_string()
+ << " appears to be empty";
+ throw mussa_load_error(msg.str());
+ }
set_filtered_sequence(sequence_raw, start_index, end_index-start_index);
} else {
std::stringstream errormsg;
seq_i != seq.end();
++seq_i)
{
- if (*seq_i != '\n') new_seq += *seq_i;
+ if (*seq_i != '\015' && *seq_i != '\012') new_seq += *seq_i;
}
//std::cout << "adding seq: " << name << " " << new_seq << std::endl;
)[push_back_annot(annots, start, end, type, name)]
|
(spirit::ch_p('>') >>
- (*(~spirit::chlit<char>('\n')))[spirit::assign_a(name)] >>
- +spirit::space_p >>
- (+(spirit::ch_p('A')|
- spirit::ch_p('G')|
- spirit::ch_p('C')|
- spirit::ch_p('T')|
- spirit::ch_p('N')|
- spirit::ch_p('\n')))[spirit::assign_a(seq)]
+ (*(spirit::print_p))[spirit::assign_a(name)] >>
+ spirit::eol_p >>
+ (+(spirit::chset<>(iupac_alphabet)))[spirit::assign_a(seq)]
)[push_back_seq(query_seqs, name, seq)]
) >>
*spirit::space_p