// ---------- sequence.cc -----------
// ----------------------------------------
#include <boost/filesystem/fstream.hpp>
+#include <boost/filesystem/operations.hpp>
namespace fs = boost::filesystem;
#include <boost/spirit/core.hpp>
errormsg << file_path.native_file_string()
<< " did not have any fasta sequences" << std::endl;
throw sequence_empty_file_error(errormsg.str());
+ } catch(sequence_invalid_load_error e) {
+ std::ostringstream msg;
+ msg << file_path.native_file_string();
+ msg << " " << e.what();
+ throw sequence_invalid_load_error(msg.str());
}
}
}
{
std::string file_data_line;
int header_counter = 0;
+ size_t line_counter = 0;
bool read_seq = true;
std::string rev_comp;
std::string sequence_raw;
while ( (!data_file.eof()) && (header_counter < seq_num) )
{
multiplatform_getline(data_file, file_data_line);
+ ++line_counter;
if (file_data_line.substr(0,1) == ">")
header_counter++;
}
while ( !data_file.eof() && read_seq ) {
multiplatform_getline(data_file,file_data_line);
+ ++line_counter;
if (file_data_line.substr(0,1) == ">")
read_seq = false;
else {
if(alpha.exists(*line_i)) {
sequence_raw += *line_i;
} else {
- throw sequence_invalid_load_error("Unrecognized characters in fasta sequence");
+ std::ostringstream msg;
+ msg << "Unrecognized characters in fasta sequence at line ";
+ msg << line_counter;
+ throw sequence_invalid_load_error(msg.str());
}
}
}
void
Sequence::load_annot(fs::path file_path, int start_index, int end_index)
{
+ if (not fs::exists(file_path)) {
+ throw mussa_load_error("Annotation File " + file_path.string() + " was not found");
+ }
+ if (fs::is_directory(file_path)) {
+ throw mussa_load_error(file_path.string() +
+ " is a directory, please provide a file for annotations."
+ );
+ }
fs::fstream data_stream(file_path, std::ios::in);
if (!data_stream)
{
- throw mussa_load_error("Sequence File: " + file_path.string() + " not found");
+ throw mussa_load_error("Error loading annotation file " + file_path.string());
}
// so i should probably be passing the parse function some iterators
data.push_back(c);
}
data_stream.close();
-
- parse_annot(data, start_index, end_index);
+
+ try {
+ parse_annot(data, start_index, end_index);
+ } catch(annotation_load_error e) {
+ std::ostringstream msg;
+ msg << file_path.native_file_string()
+ << " "
+ << e.what();
+ throw annotation_load_error(msg.str());
+ }
}
/* If this works, yikes, this is some brain hurting code.
std::string seq;
std::list<annot> parsed_annots;
std::list<Sequence> query_seqs;
- int parsed=1;
+ int parsed=0;
bool ok = spirit::parse(data.begin(), data.end(),
(
return (seq_count == 0) ? true : false;
}
+Sequence::size_type Sequence::find_first_not_of(
+ const std::string& query,
+ Sequence::size_type index)
+{
+ typedef std::set<std::string::value_type> sequence_set;
+ sequence_set match_set;
+
+ for(const_iterator query_item = query.begin();
+ query_item != query.end();
+ ++query_item)
+ {
+ match_set.insert(*query_item);
+ }
+ for(const_iterator base = begin();
+ base != end();
+ ++base)
+ {
+ if(match_set.find(*base) == match_set.end()) {
+ return base-begin();
+ }
+ }
+ return Sequence::npos;
+}
+
Sequence::size_type Sequence::start() const
{
if (parent)