From 510c9340528467a30a64d03d3936a4d8fdd18af4 Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Sat, 23 Jun 2007 00:46:45 +0000 Subject: [PATCH] Catch annotation sequences that don't end with newline ticket:265 for some unknown reason if the last sequence in an annotation file wasn't terminated by a new line, spirit ended up duplicating the last character. Needless to say this meant that it wouldn't always align properly. I installed a hack into push_back_seq::operator() that checks for this condition and removes the duplicated character if the bug was triggered. Needless to say if spirit (or my grammar) gets fixed and this bug goes away the hack will need to be removed. I think the sequence equality tessts in test_sequence::sequence_no_trailing_newline should trip if the last character doesn't get duplicated. Also I didn't test what happens if a location based sequence doesn't end with a newline. --- alg/sequence.cpp | 29 ++++++++++++++++------- alg/test/test_sequence.cpp | 48 ++++++++++++++++++++++++++++---------- 2 files changed, 56 insertions(+), 21 deletions(-) diff --git a/alg/sequence.cpp b/alg/sequence.cpp index 521496d..b39c4d4 100644 --- a/alg/sequence.cpp +++ b/alg/sequence.cpp @@ -423,16 +423,32 @@ struct push_back_seq { void operator()(std::string::const_iterator, std::string::const_iterator) const { + std::string::iterator seq_i = seq.begin(); + std::string::iterator seq_end = seq.end(); + + // this if block is a hack, for some reason spirit was + // duplicating the last character if the file didn't end + // with a new line. + // this checks for the trailing newline, and if it is missing + // removes the last character ( which should be the duplicated character. + // check test_sequence.cpp:sequence_no_trailing_newline for test case + // also see ticket:265 for more information + if (seq.size() > 0) { + std::string::value_type c = seq[seq.size()-1]; + if (not (c == '\015' or c == '\012')) { + // doesn't end with a new line character + seq_end--; + } + } + // end hack + // filter out newlines from our sequence std::string new_seq; - for(std::string::const_iterator seq_i = seq.begin(); - seq_i != seq.end(); - ++seq_i) + for(; seq_i != seq_end; ++seq_i) { if (*seq_i != '\015' && *seq_i != '\012') new_seq += *seq_i; } //std::cout << "adding seq: " << name << " " << new_seq << std::endl; - Sequence s(new_seq); s.set_fasta_header(name); seq_list.push_back(s); @@ -677,11 +693,6 @@ Sequence::save(fs::fstream &save_file) SeqSpanRefList::iterator annots_i; AnnotationsRef metadata; - // not sure why, or if i'm doing something wrong, but can't seem to pass - // file pointers down to this method from the mussa control class - // so each call to save a sequence appends to the file started by mussa_class - //save_file.open(save_file_path.c_str(), std::ios::app); - save_file << "" << std::endl; save_file << *this << std::endl; save_file << "" << std::endl; diff --git a/alg/test/test_sequence.cpp b/alg/test/test_sequence.cpp index 9784621..2ea2fc0 100644 --- a/alg/test/test_sequence.cpp +++ b/alg/test/test_sequence.cpp @@ -833,22 +833,46 @@ BOOST_AUTO_TEST_CASE( annotate_from_sequence ) } } -BOOST_AUTO_TEST_CASE( sequence_annoted_with_reversed_sequence ) +BOOST_AUTO_TEST_CASE( sequence_no_trailing_newline ) { // sorry about the long string... - string s = "CTGGGTCGGGGGCGCTGGGGGCTGCTGGTAnnnnnnnnnnnnnnnnnnnnnnnnnTCTGCGCCGCCCGAGCCGCTGTGCTGCGCGTACTCCTCGAAGGGAGGGAACTTGGGCTCGATGTAGTTGGAGTTTATCAAAAACGAGCTCATGGTCATTAATTTGTGAAGTGCAAAAATACTAATTTTTCTCGCGTTGTCGTTTTTTCTGGGCTTGCCGAGGnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnGCAGACGCCGCCACCAAAGTTCGAGCCGCTCCTCCCCAGCCCAGCGCGCGCCCCGCCCCGTGCCCCACGTGCAGCGCCCCCACCAATGGGCGCACCGCGCGCGCGGACCCGGATCAGGAAACGCGCGGGTGCGTGATGGATGCTGCTGTCCGGCCCCTGGGCTGGGGGAGGGAGCAGGAGCTTTGGACCCCAGCCCCCCAGCTTTGGTTCCCGCTGGGAATTCAGGCCCTGTCAGGCTGTAGGTCCTCTCGGGAGCCCTCTGCCTGCCCTACTGCTGGCCTAGGCCTCGGGCTGTCTGGCGGCCGCGACTCAGCGCTGACCTCGGGCGCAACCCAGTCAGGCTTCGTGTCCTTCAGGGGTTCTAGGCTAACAGGCGAAAGGAAGGGCGTTGGGACCGAGGGGCATCCTGGTTTTTATGTACGCCACTGAGAGGCCACCAGACACATTTTCTCAACCGCAGATCCCCCTTCCCCACACCCTGCTCCTTGCGTGTCAGCCTGAGAGCCCTTGCTTTGAGAAGCTTGGCAGAAGCTGCAAAGGGTGGGCGGGCAGCTAAGAGAAATCGACCCAAGGATGTAAATCGAGGCCATTCCATTATAACTGGATGGACACTTTTCATTTTTTCCTTCTTTCAGAGACAATCTGTTTCGTGTTTTCCTAAGAAAAATTGGAACCTTCGTAATAGCATCTAATTTGACGGGGGTTGTCGATGTGAGAGCTAAATATGCCCGCATTTACTAGGTGCGATTGTGAGAGAGAAGGTGGCCCAAGGATGGGAATGGATAGAAGCAACACCTCCACAGAACCGAGCTTTGAAAACAATAACTTCCTATTTCAGAACTATCCCCAAACAAAAACAAGCTAAGGGTAGAATAAACACCTTGCCGGGTCTGATCGCTGATGGGTCTTTTCCAGCTAAGAATTTCATGTTTTCTCTTTTAGATCCTGCTTTCTCAGGCAGTATCTGAGGCTAGAGTTATATTTGCAGGACAGTCTATAATTTCTGAATTGCTGAAAATTAGCGTATTAACGATATCAGAAGCTCCGGAAAGGAGGGAGAGGAGACTGTTGCCTGCTATTTGGTAATTGAAATTTGATGGGTACACTAATTACGCCATTATTAACAAATAAATTACTTATTAATTCCACCTAATGTTGATCTTTGAAGTAAATACTGATGCCTTATTTGTGCTGTGTGCTTTCTCCCTTTCTTTTCTGAGTAGTAGACATATCTAGATCCTCTACTTTTCAGCCTAAATTAAAGCAGTGTAAACTAGCATAGTCACCATTCTAAAAATATTTTCATATTGGCATGCAAAAGCAAGGATTTTTCAGCTGGTGCACCTTAGTTGATTTTTCAAAGAGCAGTATAAACAGCCTTCTCACAACTGAGTCTGGAACGCAGACAAGGAAAATTATTTCCTAAGCCTGGAGACACTTGAAAAGGAATGTCAATTCTATCTTCATTCATACTGGTTACTCATATGAGTTACTAAATGCTGGAATATATCCATTTGATGGATAGTCACTTAATGCTTAGCCACATAAAGCCTATTATATGGGACTAATCTTTAAACTAATTTAGGAAAAGAGGTTAAAAAGGGGATCATATTAGCTTTCTAACTGGAATCACCCTGAAGAGGTACAAAGAGATTTTCCACGTTAGGTGTATATGAGTGTGAAGAGTGCTGTCCATTCACATGAGGCACCCTGAAAATTTGTTTTTAAAGAAATTTGAGCCACAGACAGAAATCAACACTGAGTGTAATCTTTAGCCATCCTCTCTAGACTGGAGGAAAAATTTAGAATGTGATACATCTACCTGAACCAATATCTCTCCCTAGCAAGAAAAAATAATATACACATAGGnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnGACATACGTATTACAAATCTGAACCCTATAAGTTTCAGGGGGACAAAAAGCATGACAAGGAAATCTTCCCTCCTTCTCATGTCATCAGCCTTGAGTACTAGGGTCTTAACCATATCTGTTTAATATTTACAGACACTAAAACACAAAATTCTGTTGTTTAGCCTCAGAACCTTGTACCAAGTTTCTATTTTTAAGTATTAACGAGACATAAACACTGTTTTGTATACGGTTAACCCAAACGAGTTAGCTGTGCCTGTGTTTTGTGTGATTCTATTACTTTAGGAAGATGGCCTTACACAGAATCCCCCAAGGCCTGTAACTTGTCTTTGTGGTTCGTATCATAAACACAAACGGAGCCAGGACCACCAAGTGTTATCTCAACACCGACATTTTGACATTTTACTGCAAGATTTATGGCTGTAATAAACAATCTCAGTACCTTTTCTGAACCTTCCTCAATCTCCCTTTGCAAACCATAGCATCATTCCATTGAATCAAATAATCTTTTGAAAAACATTTAAAAAAAATACCTCTTGCCTTTACACAATATCCAAGACACCAAAGTAAAGCCAGGAAGAAACTAACTCAATTAATAAACAAACTGAAGTTTACCAGCAGCATCTCGCCTGAGAAAAGATGGGATGCCCTGAAATGTAGCAGAGAGGGAGCATGCTAATCCTCACACACCAACTGGCTCCAGTCCCAAGCGGGGTGAAAGCGTTATCCTTTCCTTAGGAAACTGGTGAGCACGTTTGCTCATTTCCACGTGCAGGGATAACATATATTCCCAACAAAAGCTTTCTTAAAATCCCATTAGGTGAAATAACTTTTCATCATGTCCTCGAATCCCAGATGGAGAAGAGTGAAGGGAGTCGGAGGGAGAGGGAGGGTGCAAGGGAGGCAATGTTTTGCAGCTTGGTTTGAATCTGATTTGAATCATTTTGAATATATTTGTAACAGCATTCCCTCTTGAATGCAACCCTGTCCCAAGTTTCAAAGTGACCGAACAGTGACACCGTGTGCATTTTGTTTCTTATTAATCTTACACATTGACAGTCTTTGTTAAATCACAAGGCGCGCCCTTCACTAGCCGACATTTTCATATTTGTTAGACGCACTGACCTGAAGTTCACCTCGGCCTTGGACTTTGCGCTTCTAAAAGGTCTATACAGTGTCTTTTAGAGAGCAGGGTGCTTTGCCCAGGTCACTCCTTCTCAGGAAAAACCAAGGGGAAAAGCCAAAGGAAATGTAAACGTTATGGAATGTATTGACTGTATTTGTCCTTTGTTCTTTAGAGCGAGAGTCCCCCAGACTGTTCTCTATCTGATGCATGTCTCTAGAGCTGAACAGTGGAATGGCAGAATTTCAAAACGCCTGATGGTGGCATTTGAAGGCTTCCCCACCACCTACACTAGACACAAGATTTGAGAGGAACAACACTTTACCAGCCATTTGACCAATTAATTCTTTGGGGATAATTTTCTTGTAGTAGTTTAAAATAATGCACACAACGCAGGGATGAGGACTGATATTCATATTGGGATTACACATGAATTTTAACTGGGATTGTTTGAGAGGCCTGAGGTTCAAAATCCTCCAGATAAAGCAAGCACACTAAAAGCAATAAATTCTGCAAGTACTCTTTTCTTTTACTTTGAAGACTAGCTAAGAGGTATCTATGGTTTTTGAAGCTGACATGTCTATAAGGTGTGTCACATGTTTTTAACCAAAAAGCACAATAAAAAGGTTTTTCCCAAAGAGACACGTAATTGTCTTGTTGACTCATCGAGGGGTTTCAGTTTTCCTCATTTCACTAGCCCAAATGTGGTGAAATGTTCACTGCTGCAACAGCAATCACCACAGTTGTTTCCTTTCTTCTGTTTCATCTGGCAAACCCCCATTTGGCTTCAAGCTCTTGGCCAGAGTGAAAACTTTACACATTGCACAGAAGCACCCTGATTACTTCCATGAAGGCAGTGTTTGGAAAATATTTACTTTACCACTGAACATACCTGGCACCATTAAATCCAATCAACCAAAATATTGGGATGATCTTAAACATTCCTGCAAGAGTCCACATTCTGAGTAGATGAATTATTTCCAAAGTTAAAAAAGAAAAACCTAGGGAAAATATTTCACTTTTCTCTTCTCTGTTTTCCTATACTGATCCCTTGAAGGTCAATTCATAGAAAAGGGAAATATGTCCTCTGGAAAATAGATTCTTACAGCACCAACACTTAAAGCCATTCTAGATGCATGAAAAATAAAATATTGTTTAGCTCTTCAGTTGCAACTCACACATGAGGCATGGTTCTAGTCGGCTTCCTTAATACACTATTCTCTTTCTTTTCTGCTCTCCCACCCTTCTTCTTAGGTTGCTTTATCTTCTCCTTGGCTTTTTTTTTTAATTCCACGTGTATCATTAAAAAGTACATTCTGAAGAATAGAAAATATTCTATTCTGTCCTGGTGGTCTTACAGAGTAGCCTGTTATTTGTGGATTTCACCTTTCTGCATTCCCTACAGTCTAGTTATTCACTTATAGCTTGTAGCATTTCTCTTACATTCAATTGTGGTTTAATAATAAACATTAAAAAAATTTCCAAACAGGAACATTTTCATGGCACCAGTAAGCATTTTGTCACTGGCAGTGGTGGTGGAAGGGGTGAAGGGAGAATTCTGTGTCTTTCAGGAGGGTTTCCACTTCCTTCCTTCCCCTTCTCAGATCTCAGAACGCTTTGCATTCAGCGGACTGTAGTTTCAGAAAAAGCATATTCTGTGTTTGAAAACTGCAAAGATTATATTTTGCAAGAAGTGTCTGTGTTTGCATTTATTCTTACACACTTTAGGGGTCATCATGTGTACTAAAAAGACAAAAAACCGGCCAATCAGAATCCCTCTTTTCAAATAAAGGAGGTTTCCTGCACCATTCTGTTGCCTTTGAAGGCATAATGAAATATTGGAAACTTGTGACATTAGTTTTTAAAGCTCCACAGATGAGTTTTTAGCATTTTTATTTTGTGACAAACCCACAGACTCCTGGTTCTCCAACACCTAAGGTGTTGATGTTTCAGTAATCTATGCCTATTTACCTGCTGCTATTCCCTCAGAATGGGAGCGATAATTCAAGATGAGATACAGCATGTATTACTCTTGAAAAGAGGAATTTTCTATCCTTTCCTCCGTAATTGAGGTCATTCAACCACTAGGGTTCACCTGGAGTCCATACCGTGATACACGCGTCACTCTGAGCCATTTTATCTTTTGTGCTGATAGTCAAGATCACAGCTCTAACATTGACATCAAACTCTGTCTGGGCAGATGACTAAGAGCACTGCACAATGTAAACTTTTGACCCTCAACTTTTTGACCTGCAGTTGTAACGCACTAACCGCAAAGATACACAAAGCCGAGCCTCTTCTTTCAGGGGGAAGGGGCCCCCCAGCATCTCAGGATGCCCTGCTTCTGCCACTGCCATTTGAAATTAGAGGGTGAAATGGATATTTTTGTGTGTTTGTGACTGTACTTTTTGTTAAATCAGCCTATGACCTCTTCGTTAGCACCTAGGAACTAGATTAACTTGAAATCACTCGTGATTCTATTTTACAAGGAAAATTTGGAGCAGAATGGGAGAACCTTGCAAAAAGTGAAAGAAAAGAGAAGATGGGGGAAAGCAGGCAATGGGAGGTGGAGACACTTTTTCCCTTTATTTAAAACTAAAGACGCAGCCCTAATTGTTGGGAGAGCTGGCCCAAGCGGGTGAATTGACTGTGAACTTGTACTAAAGCGTGCTCTGCTGGCGATTCCTAGGGTGTGCAGATTTATCTTCTCTGCATTTACTTAACCCGGCAGTGAACTGCGCGGGCGTCATTTGTTAGGCGATGACAGACTTCACCTCCAGCAAGGGCTGCTTCACAAAATCGCAATAATTATCTAATAACCTTCATAACAAATATTATTATTGAAAAGACTGGTTTGTGGGGAGGGGACCTGGTGGGAGAACAAATTTATTTGTGAACAACAACAAACAAAACAAACCTGGGCAGACCTTCAAGTTCTGGGGCTTAGAATGGCTGGGGCTGTGGATCCCCTCCCCTACTTGGGTGGGAGCTTAGGCTGACCCCCTCAGCCCTGCCTGGGAGCCCCGTTTATAGTTTTGCCATTGACTAGAAGGAAACTCCTCCTCAGAAACCAAAGGGAGGGAGCCCACAATGCTCTGCACTCTCCATGGTGGGCAAGCCATGGACAGACCCCCAGCCAAGGCAGGGGGGAGGCTGAGAAGGGCATCTTTTAAGCTAAAAGGATTGTTTTCCTCTTTAATTGCCTATCTTTTAAGATGTGATTTGCTTTCCACTCACTAATTATTTCGATATAATACTCTCAGAATCTCAACAAATGAACAGGACTCTGTTTTTTGGTGGGAAATTCTGTCTTGCTCTCTCAGAGCCGCCAACAATGAAGCAGGGGAAAGAGCAGGAGAAAGGGAATCTTGGCATAATGTTGTGAAATTAGACCATGGAAACCCTAACAAACCACTAAGTAAGTGTGACCAGAAGCTTCCTGTTGTATTTATAGTTCAGAAATATTGTCTCTTCAGCTTGTGGGAACAAACGAGCCCCCGCACATTGCCGCTGAGGAGGAGCACAGACACGCACTTCTGCCACCGGCTGAGGCTGGATGTCTTCATAAAGCCCTCAGTGACAGACATATTTTTTCTTAGTAAGTTCCTCTGCAAGAACAACCCAAAAGAATCCACAAAAGAAATAACTTATCTACAGAATGAGCAGAAAACCAGCCATCCTCTTTATTATGCTTCCTATGAAAATAGGAAGAAAGAAAAAAATCTTCCAGTAACACATAGGTCTGACTGCATGATGTATTTTTAAAGTCATTTTAATTCCATGTGGCCATGTGGGTTTGCCTGCTCTCTTAAATTCTACTTAAGTTTTGTGAAGATTAAAACAGACAGAAATAAGCAAGCTGACAATATTTACAGCCTGTAATTTTTCTCATTCCTTGGAAAGATTCTCTATGTTCTGTGGTACTGGATATGACTTCAACAGGCTTTCTGCTCATTCCCACACCCCAGGGTGGAATATGGCCATGAAGTAGTGTGGATATTTTCTGTGTAAGTAACTCAAATTAAACTGGCAGAATCCCCGTCACTCTTTTTTTTTTCTAATTTCAATCACCAAGAAATCACTCAAGCAAGATCACCAAATCAGTAACTAAAATGGAACCATAACGCAATATTTTCCAATAAGGAGCCCAAAATTCAGAGCAGCAAAACAAGGAATCCAGTATTCTCACAGACACATAACATTATAAAAGAGAACCCATACCCATGTAGAGTTTATATCCTTGTTCCCACTAAGATGTGGACACATCTTCTTGAATGCTGAAATACCAATGTTTACTTTAATAGGTTACACACAATGACTTCAGGATTCTTCACCTTGCCACTATTCATGAGAAGTAGCACTTGTGGGAGGGTTTTGATTTTTCAAAAAAACTTTCTAGGTTTTGCTTTCTGGACCTCTGACTTTAGGGACATCTGTTGGACTTATGTTGAGTGTAGGTGGCCTCTGCACAATAAGTTTATTGAAATTCCAAATCTATACTTTCAATTTTTTCACTTTAAGCACTTAATAGGTATCTTTACCAATTAATACTTGCTGAAAACTGCCCAGCTCCTAAGGAGAAAAGCAGATCCTATTTTTTGTTTCATTTCTGAATGCAGTAGGAGAATTTGGCTTAATTCCTAAAATAGGATTGGAGGAAATCTACTGGGTCCCTTGTGGGTACCCATCCAGAAAAAGATCCCAGGACAGGCCACAGTCCCCAGTCACTGGGCTTGGGTTTTGCCATTGAAGAATATGGGGGGTTGGGGCCAGAAGGGGTGACTGGGGCCAATATGGAATTGTGCCCAGGATAAACTTATTTCACCTTACTTCACCCATTGGTGCAATTTTGGAGACTGTTCTGGAAATCATAGATTATGTAAATTTCCTGGGATCAAACAGAAAGAGCAACTAACAAAAGAAAGGCGGAAATCTCCTACTGACAAAGGACCAATTTCTTCCCTAAACTACCGTTTATGATGTGTCAGGAAAAACAACCTAATGGCTCTGGGGACTTTTAAGTTGGGCACTGAAGACACCTCAATTTCCCCCAAAACTTTAGAGCACAGTTTGGAACAGAGAATTCGCCTGTATGTTGAGGGGGAGTGAATTTCTCCAATCTTAATGTTATCCAGGGGGCCGCCTAAGTTGCCTTCTGAGGGTCCTGTGCGTAGATGTTTTTAATTCTACnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnGCGCTTTAACCCCTTTCAATTAGCCTGGGGATTCAAAGACTAAAGTTAAATCCGGCCATAAAGTTTATTGCTTCAGACTCACAAGCGGCTGAGAACAGTCCCGCCGAAATAAAAAGAACATGCAGGCAAACAGGGTTCAGGGCCTGGTCCCGGGTGCGGGGGAGGGGGTCCTGAACACCCCCCCACACCAGGGTGGGGATCCTTGGTCCTCAGGGTCCAGTGGGCGCTAGCAGCCCAGGATCCACCTTGCAACCCGGGGGCCCAGCCTGGAGGTGCAGCCCCAGCCTCGCCGGCCTCTGCCACCCTCCCGCTCTCGCGAGCTAGCCTGAAACCCGGCCCCGAAGGCCGCCGCCTCAATTCAGCCCTGCCAAATGACCCCGGCCCGCGAAGACATATTGCCACAGCCCCGTAAGGAATCCCGCCAGAGTCCGCCTCGGCCCTGCCCCGGCCTTTCTTTCAAACTCCTGAGCGCAnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnACTGTCTCCCAGCCCCGACCCCAGGGCTCCGCGACCCCCAGGAGCTGGCCCCGGCCGGCCCAGCAATTGCGCGGGGGACTGGGGGTGCGGCCCTGCCAGGTCCCCACACACAGGCCCATTCGCACACAAAAATCATCTTTTTGCACGCCGGCGGGAGCAGCGGAAGTCATTAACATCCGCGGTTGTGCTGCAATTAAAGTTAGGCCTGGGGATGCGGCGCGGCCACAGGCGCTGCTCACTCTGCTGCCTCCGCAGAGTTGGCTCCTGGCGCTGCTCTTTTGGGCAGAGGGAAAGTTTGCTCTGCCTTTTCGAATTCAGAGGCAGCCTGAGTTATTGAACCnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnTTTCCAAAGTACAAATAAACTTGAAAGCGCTCAGGAGGCGAGCTTACCTTAACTCGGAGGGAGCCATTTTTCAGAGAGTTTTGAGAACTTGTGGTTTGGACACTTCTGGACCTAAAATTGACAGTTTGAATGGCCAGGCGGCACACGTAGCCTGCAAAAGAGTCAAATGGAGTCCAGCGTTAGTGAGATTATATGTTATGTGGTATATAATGTTGGATGTCAACTCCCCAAAACCATAAAACTTACTTTAATGGCCCCACGTGACGTTTTATAGCCAGTGAGCCGATCTGTCTGTGCTATGGATGATTTTACGATCTAATTCATAGACAAAACCCTATTCATTTGGCACCCAAATGTCATATAGCCGGAACTGGGGCTTATAAAGTTTACTGTTTTATAACTTTTAAAAGGAAAGACGGCATCAGTGTAAGCAGTCGGTAAATGTGCAAATCTCTAGTTGCGCTTTAGCTGCTCTGAGGAGTTTCCCAATCGAGCTAGGATGGGGTAAGTACCTTCAATTTGTAGCAAATTAATTGTAGCAAAAGAAGCCAACTGGGTCCCGGGTGAAGAGTGGGGAAGGGGTGCTGGGATGGGTTAAGGGCAGAGGGTTTGGGGTCCACAGACAGACATAGCAGCGTCTTCAGCAAGTGGAGGCCTAGGACAGCCTTAGGAAAGAGGCAGGATCTGTGTGGCCTGAGGGCGGCTAACAAAGCCCTGGGTTTTTTCTCCTTTTTTCTTGCTCTTTCTCTCTTTTTTGTACCCAGCAAGTTAACTTGGTTTCCTCAGAGATGGACAGGGTGTTCTGGGGCTTTGGAACAGCCTACAGCTTTTTCCACCTTCTGCCCTGAACTTTGCAATGGGTCAGAGGTAGGGAAGCGATGGGACAGTGTTGGTATGAGGTCTCCCTGCACAGGTCATCTGCTCAGGTAGCCTCAGACCCAACAGCTTCCAAGACTGCACAGACAGACAGAAAAGCAGACAGAGCCGCTCACTATTTGGCACAAACCAGACCAAGAGAACTTACAATAGAAAGTTTATTTTTTGTTCCAGTCAGTATTTTTTCCTTnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnGCTGATCACAGTTTGCTTAAAACAGCCAGACTTGGACAATATTTGTAACTTTGTTCACAAAAACATACATCACTGAAGCTGCGCTTATAAGAGCCACTTCCAGAGTTCGTGCAAAGGGTCCTATAAAGGCACGCAGGGACACACCGCTTGGAGTCACAGTTTTCATCACAGAGTCACTAGTCACTACACGTCGAACAAGTTGTGTCTCATCAAGTCACCTCTACAACAGCATTAATTACACAAGGAATATAGGTAGTTTGAATAAAAATATCTTTAACAGCTTGGAGCTATTGAGACAGGAACACTTCCACGCACATGCACAGTTAAACAACTTGAGTGCAACACACAACATTGGCACTAAACGAGATTGAAGGGGGACTTTTTGTGTGTTTTTTTTTCTCTTTTCTTTTTTTGTTATAGTTACTTCAAGTAACACAGCTTGCTTCATATAAATAAGTTAAAACATCTATTTTTTTTCAAGACAAAGCCATTCAGGACAAAGAGATGAACAGAAAGCAGATCTACTTATACAGGCGCTATAATGGCAATAAACAGGCTCATGATTAAAAGATGAATTAGGGCAACGAGAACAGGGCTTCTTCACAGAAGGAACACAAGGGAGTTTCAGAAAGTCACCTTAGTACTGACACTACGCGGGATCCGCTAATACTGCTCAGTACTTTAAACGCTCAGATACTCAGGGACGGAAGGCCCCTCCTGCCGCGGCCATGCTCATGCTTTTCAGCTTATTATCTTTTTTCCACTTCATTCTCCGGTTTTGGAACCAGATTTTAATTTGTCTCTCGGAGAGGCAAAGAGCATGTGCTATTTCAATCCTCCTTCTGCGGGTCAGGTAACGGTTGAAGTGGAACTCCTTCTCCAGCTCCAGGGTCTGGTAGCGCGTGTAGGCCGTCCGGGCCCTTTTGCCTTCCGGGCCGCCTATGTTGTCTGCAATAGAAAAGTCAGCGGTTTAGCCACCAACTCCTGTCTTCCAAAGTCCGCCAGGGGGACAAGCTTGGGTCATGAGCAGGGAACCCAGGCGAAAAGCTCAACAAGTTCTGCCTACCAGCCCGCACACCCCTCCCGAATTTCCTTCTCTCTTCCTTTCTAGAAAGAAAACAATACGATTTGGACCCTGGGAACAATCTGCCCATCTGAGGCTGGGGCCGTGTCCCGGCGGACTCCGGCTTTCCCTGGCCCCTCTCCTGCCCCCTCCGCCCTGCCCCGGGCGCCCCGATCGGGAGGCACAGCCCTCCCAGGCTGCCCACCGCACAGAAACCCAGGAAGCAAGGCCCTTTCCTGAGCGCCCAAGTGGCCTTCGGGTCACCCTCCCTCAAAGTTCCAGCCCCGAGAGCCGCCTCCCGTTTCCAGCCTGCAGGGTTGGGGAGCCTGTTTTCTTTTTCTTCCCTTTCCTTCTCTCTCCCTCCTGCCCCCAAAATTCAGAATCCTGCAGGCTCTCGCCTCGATTCTTTCCCCCAAGCCCCTTTTCGGGGGCTGTAATTAGTAACGCTGTTTCCCCAGCGTAGCCCTCCTCATAAATTATCCGCCGTGACAAGCCCGATTCACGGCTGCTACAGCCATCCTCTACCTCTCTGCGCCTTGCTCGGCTGGCCTGACCCGGGAGCGCGTCCCAAGGCGTGGGGTTCCAGAGGGGTTTTTTGCTTCCTCCCCCTTCCAACGTCTAAACTGTCCCAGAGAACGCCCATTTCCCCCACTATTTGTGAGCGCAGGGTGCTCGCAAAGAAGAGGAGGAAGGAGGAAGGCAGGGGAGGGAGAACGGCAAGGAGAGCTCCGCAGGGCTGGGAGAAATGAGACCAAGAGAGACTGGGAGAGGGCGGCAGAGAAGAGAGGGGGGACCGAGAGCCGCGTCCCCGCGGTCGCGTGGATTTAGAAAAAGGCTGGCTTTACCATGACTTATGTGCAGCTTGCGCATCCAGGGGTAGATCTGGGGTTGGGCGGGCGGCGCCGGGCTCGGCTCGCTCTGCGCACTCGCCTGCTCGCTGCTGGCAGGGGCGTCCTCCTCGGCTCCGGACGCCGTGCCAACCCCCTCTCTGCTGCTGATGTGGGTGCTGCCGGCGTCGGCCGAGGCGCCGCTGGAGTTGCTTAGGGAGTTTTTCCCGCCGTGGTGGCTGTCGCTGCCGGGCGAGGGGGCCACGGCGGAGCAGGGCAGCGGATCGGGCTGAGGAGAGTGCGTGGACGTGGCCGGCTGGCTGTACCTGGGCTCGGCGGGCGCCGCGCTGGCGCTGGCAGCGTAGCTGCGGGCGCGCTCTCCGGAGCCAAAGTGGCCGGAGCCCGAGCGGCCGACGCTGAGATCCATGCCATTGTAGCCGTAGCCGTACCTGCCGGAGTGCATGCTCGCCGAGTCCCTGAATTGCTCGCTCACGGAACTATGATCTCCATAATTATGCAACTGGTAGTCCGGGCCATTTGGATAGCGACCGCAAAATGAGTTTACAAAATAAGAGCTCATTTGTTTTTTGATATGTGTGCTTGATTTGTGGCTCGCGGTCGTTTGTGCGTCTATAGCACCCTTGCACAATTTATGATGAATTATGGAAATGACTGGGACATGTACTTGGTTCCCTCCTACGTAGGCACCCAAATATGGGGTACGACTTCGAATCACGTGCTTTTGTTGTCCAGTCGTAAATCCTGCCTGATGACCTCTAGAGGTAAACTCGTGCACTAATAGGGGAGTTGGGTGGAGGCGAGGGGGGTnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnAGTTGCCGCCGTTCAGCCGGACTCGAGCGCCACCCGCTGGAGGCAGGGCTCATCGCCCAGCTTCCGACCGGGGGCTGCAAGGGCCGGGGTCGAATTGAGGTTACAGCCCATTATGGCAAAATTATTGCATTTCCCTCGCAGTTCCATTAGGATGTACCAATTGTTAGGCCGTCAGCTGCCGATCGCGCGCCCGGCGAGGATGCAGAGGATTGGGGGGAGGTGGTGACTTGCATTTTATTTACAACAACTTTATTTCCCCCGTTTTGCAGCCCCTCTTATTTTTGTGTCGAGGTTGGGGTCGGTACTGACCGTCCTGCCAGCAGCTCTGAATTTTGAAAATACAGATATCACCTTCGGGGAAGGGGGAAAGCCATTTAGCCAATTGGAGAAATAAATCCTGCCCGCAGCAGCAGCAGCTACAATTACGGCTCTGTTTTTGCGAGCGCATGAGGGACAGTGTCCCTGCCGCTCTTAAATGACAGGCGTCTATTAAAGATAGCTTTTGTGTAGTGTTTCTCCAAGGCGAGGTCAAATTCCATACACTTTTATAACCGTAGTCGATTTTTCTTTCGTGTGAATATGGTTTTCGTGTCATTAGTTTGCGATTTGATTTGCTTACGTATCCAGCCTGGAAAATCTTCATCACAGGGTCCGGTTCCTCGAGCCAGCCGGGCCCCAAGTCGGAGGGTTCTCCTTGAACCCAGCGAGTGGGCCCAGGCTCCCTGCAGCCACAGAGGCTGCCTGGGGTCTGGGGATCCGTGGGGCGGGTTACTGGGGTCTTGCTTAGACCTCCAGGAGTAAAATGAGGGCGATAATGGAAGCATTCCTTGGCAGTGCCTAGTATCTCTGTAGTTATTTTCCACGGCTCCGAAAGACTCAAGTAAATCACAAATATAGCTGAGAGGCAAGTGGAGTCTCCCCGCTGGAGGCCCGGCGTTGCAGGCGCCCCTGGCACGTCTGGAAGCCAGGACTCTGGCGGCTCCCATGGCCCTGGGCCCCTCGTTGGGTCCTGAACGCTGCTGTGGCGGCGACGCGGGCGCTATCGGAGGCTGGGAGCGGGAATCCGGAGCCGGGAGCCTACCCCGGGCTGTAATGTTCCACCCGCGCCCAGGTTAACTCGCCTCGGCTGAGGCTGCTTCTCTTCCACTGACGGTTGCACACGCGGGACCGAGAGACTGGGCTCTGTTGGGGCCCCCTTTGTTCCTCGAGCTTCCTTCCTGTTCTGGGAGGCGGCTTGGGAGGCCGCGACAAGGCCGGGCTCCAGCTCTTAGACCCCCTCTTTCCACTGGCCAGAGATGATTTGATGATGCCCTTCGGGACTTACTGGCGAGGGACTTAGGCAGAGACGCCCAGACACGAAACGGGGCTCGGCCCAGGGCTCTTTCCTCCCCAGCAGCCCCGCGTCCCGAGGTCGGGGAGCTCAGAGACACTAGCACAGGAGCCCCAGACGCATTCAGGGCGCACCCCAGAACTCCGGAGCCGGTTTGGGCATCCTTGTGGAGCGGGACTGGGTGTGTGCAGTGCGCCCCGCTCCACCGCTGGTATTGGCTGTGTGTGAGnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnAAGAAATAAATGCACAGACGCTTGCAAAGCTCCGGGCTCCCCTGAAGCTGCGGAAGCCCCCAGATGGGAGCAGGCGGGGAGAAAAGTTGGGGAACAGGCGAGGGCAAGGGGGCAAAGCCGAAGGAGGTTGCAGCGCTGGCCTGGTCCCTGCCCAGGCATCTACTCGCCCGCCTTTGCCTCTGAGTCCTCCCCGCTGGGCTGCGTGGAATTGATGAGCTTGTTTTCCTTTTTCCACTTCATGCGGCGGTTCTGGAACCAGATCTTGATCTGGCGCTCGGTGAGGCAGAGCGCGTTGGCGATCTCGATGCGGCGGCGCCGTGTCAGGTAGCGGTTGAAGTGGAACTCCTTCTCCAGCTCCAGTGTCTG"; - Sequence seq(s, reduced_dna_alphabet); - string a = "HumanHXA5\n" -">hg18_knownGene_NM_019102_0\n" -"GGGTGCTATAGACGCACAAACGACCGCGAGCCACAAATCAAGCACACATATCAAAAAACAAATGAGCTCTTATTTTGTAAACTCATTTTGCGGTCGCTATCCAAATGGCCCGGACTACCAGTTGCATAATTATGGAGATCATAGTTCCGTGAGCGAGCAATTCAGGGACTCGGCGAGCATGCACTCCGGCAGGTACGGCTACGGCTACAATGGCATGGATCTCAGCGTCGGCCGCTCGGGCTCCGGCCACTTTGGCTCCGGAGAGCGCGCCCGCAGCTACGCTGCCAGCGCCAGCGCGGCGCCCGCCGAGCCCAGGTACAGCCAGCCGGCCACGTCCACGCACTCTCCTCAGCCCGATCCGCTGCCCTGCTCCGCCGTGGCCCCCTCGCCCGGCAGCGACAGCCACCACGGCGGGAAAAACTCCCTAAGCAACTCCAGCGGCGCCTCGGCCGACGCCGGCAGCACCCACATCAGCAGCAGAGAGGGGGTTGGCACGGCGTCCGGAGCCGAGGAGGACGCCCCTGCCAGCAGCGAGCAGGCGAGTGCGCAGAGCGAGCCGAGCCCGGCGCCGCCCGCCCAACCCCAGATCTACCCCTGGATGCGCAAGCTGCACATAAGTCATG\n" -"\n" -">hg18_knownGene_NM_019102_1\n" -"ACAACATAGGCGGCCCGGAAGGCAAAAGGGCCCGGACGGCCTACACGCGCTACCAGACCCTGGAGCTGGAGAAGGAGTTCCACTTCAACCGTTACCTGACCCGCAGAAGGAGGATTGAAATAGCACATGCTCTTTGCCTCTCCGAGAGACAAATTAAAATCTGGTTCCAAAACCGGAGAATGAAGTGGAAAAAAGATAATAAGCTGAAAAGCATGAGCATGGCCGCGGCAGGAGGGGCCTTCCGTCCCTGAGTATCTGAGCGTTTAAAGTACTGAGCAGTATTAGCGGATCCCGCGTAGTGTCAGTACTAAGGTGACTTTCTGAAACTCCCTTGTGTTCCTTCTGTGAAGAAGCCCTGTTCTCGTTGCCCTAATTCATCTTTTAATCATGAGCCTGTTTATTGCCATTATAGCGCCTGTATAAGTAGATCTGCTTTCTGTTCATCTCTTTGTCCTGAATGGCTTTGTCTTGAAAAAAAATAGATGTTTTAACTTATTTATATGAAGCAAGCTGTGTTACTTGAAGTAACTATAACAAAAAAAGAAAAGAGAAAAAAAAACACACAAAAAGTCCCCCTTCAATCTCGTTTAGTGCCAATGTTGTGTGTTGCACTCAAGTTGTTTAACTGTGCATGTGCGTGGAAGTGTTCCTGTCTCAATAGCTCCAAGCTGTTAAAGATATTTTTATTCAAACTACCTATATTCCTTGT\n"; - stringstream annot(a); + string s = "AATTACACAAGGAATATAGGTAGTTTGAATAAAAATATCTTTAACAGCTTGGAGCTATTGAGACAGGAACACTTCCACGCACATGCACAGTTAAACAACTTGAGTGCAACACACAACATTGGCACTAAACGAGATTGAAGGGGGACTTTTTGTGTGTTTTTTTTTCTCTTTTCTTTTTTTGTTATAGTTACTTCAAGTAACACAGCTTGCTTCATATAAATAAGTTAAAACATCTATTTTTTTTCAAGACAAAGCCATTCAGGACAAAGAGATGAACAGAAAGCAGATCTACTTATACAGGCGCTATAATGGCAATAAACAGGCTCATGATTAAAAGATGAATTAGGGCAACGAGAACAGGGCTTCTTCACAGAAGGAACACAAGGGAGTTTCAGAAAGTCACCTTAGTACTGACACTACGCGGGATCCGCTAATACTGCTCAGTACTTTAAACGCTCAGATACTCAGGGACGGAAGGCCCCTCCTGCCGCGGCCATGCTCATGCTTTTCAGCTTATTATCTTTTTTCCACTTCATTCTCCGGTTTTGGAACCAGATTTTAATTTGTCTCTCGGAGAGGCAAAGAGCATGTGCTATTTCAATCCTCCTTCTGCGGGTCAGGTAACGGTTGAAGTGGAACTCCTTCTCCAGCTCCAGGGTCTGGTAGCGCGTGTAGGCCGTCCGGGCCCTTTTGCCTTCCGGGCCGCCTATGTTGTCTGCAATAGAAAAGTCAGCGGTTTAGCCACCAACTCCTGTCTTCCAAAGTCCGCCAGGGGGACAAGCTTGGGTCATGAGCAGGGAACCCAGGCGAAAAGCTCAACAAGTTCTGCCTACCAGCCCGCACACCCCTCCCGAATTTCCTTCTCTCTTCCTTTCTAGAAAGAAAACAATACGATTTGGACCCTGGGAACAATCTGCCCATCTGAGGCTGGGGCCGTGTCCCGGCGGACTCCGGCTTTCCCTGGCCCCTCTCCTGCCCCCTCCGCCCTGCCCCGGGCGCCCCGATCGGGAGGCACAGCCCTCCCAGGCTGCCCACCGCACAGAAACCCAGGAAGCAAGGCCCTTTCCTGAGCGCCCAAGTGGCCTTCGGGTCACCCTCCCTCAAAGTTCCAGCCCCGAGAGCCGCCTCCCGTTTCCAGCCTGCAGGGTTGGGGAGCCTGTTTTCTTTTTCTTCCCTTTCCTTCTCTCTCCCTCCTGCCCCCAAAATTCAGAATCCTGCAGGCTCTCGCCTCGATTCTTTCCCCCAAGCCCCTTTTCGGGGGCTGTAATTAGTAACGCTGTTTCCCCAGCGTAGCCCTCCTCATAAATTATCCGCCGTGACAAGCCCGATTCACGGCTGCTACAGCCATCCTCTACCTCTCTGCGCCTTGCTCGGCTGGCCTGACCCGGGAGCGCGTCCCAAGGCGTGGGGTTCCAGAGGGGTTTTTTGCTTCCTCCCCCTTCCAACGTCTAAACTGTCCCAGAGAACGCCCATTTCCCCCACTATTTGTGAGCGCAGGGTGCTCGCAAAGAAGAGGAGGAAGGAGGAAGGCAGGGGAGGGAGAACGGCAAGGAGAGCTCCGCAGGGCTGGGAGAAATGAGACCAAGAGAGACTGGGAGAGGGCGGCAGAGAAGAGAGGGGGGACCGAGAGCCGCGTCCCCGCGGTCGCGTGGATTTAGAAAAAGGCTGGCTTTACCATGACTTATGTGCAGCTTGCGCATCCAGGGGTAGATCTGGGGTTGGGCGGGCGGCGCCGGGCTCGGCTCGCTCTGCGCACTCGCCTGCTCGCTGCTGGCAGGGGCGTCCTCCTCGGCTCCGGACGCCGTGCCAACCCCCTCTCTGCTGCTGATGTGGGTGCTGCCGGCGTCGGCCGAGGCGCCGCTGGAGTTGCTTAGGGAGTTTTTCCCGCCGTGGTGGCTGTCGCTGCCGGGCGAGGGGGCCACGGCGGAGCAGGGCAGCGGATCGGGCTGAGGAGAGTGCGTGGACGTGGCCGGCTGGCTGTACCTGGGCTCGGCGGGCGCCGCGCTGGCGCTGGCAGCGTAGCTGCGGGCGCGCTCTCCGGAGCCAAAGTGGCCGGAGCCCGAGCGGCCGACGCTGAGATCCATGCCATTGTAGCCGTAGCCGTACCTGCCGGAGTGCATGCTCGCCGAGTCCCTGAATTGCTCGCTCACGGAACTATGATCTCCATAATTATGCAACTGGTAGTCCGGGCCATTTGGATAGCGACCGCAAAATGAGTTTACAAAATAAGAGCTCATTTGTTTTTTGATATGTGTGCTTGATTTGTGGCTCGCGGTCGTTTGTGCGTCTATAGCACCCTT"; + std::string species = "HumanHXA5\n"; + std::string header0 = ">hg18_knownGene_NM_019102_0\n"; + std::string str0 = "GGGTGCTATAGACGCACAAACGACCGCGAGCCACAAATCAAGCACACATATCAAAAAACAAATGAGCTCTTATTTTGTAAACTCATTTTGCGGTCGCTATCCAAATGGCCCGGACTACCAGTTGCATAATTATGGAGATCATAGTTCCGTGAGCGAGCAATTCAGGGACTCGGCGAGCATGCACTCCGGCAGGTACGGCTACGGCTACAATGGCATGGATCTCAGCGTCGGCCGCTCGGGCTCCGGCCACTTTGGCTCCGGAGAGCGCGCCCGCAGCTACGCTGCCAGCGCCAGCGCGGCGCCCGCCGAGCCCAGGTACAGCCAGCCGGCCACGTCCACGCACTCTCCTCAGCCCGATCCGCTGCCCTGCTCCGCCGTGGCCCCCTCGCCCGGCAGCGACAGCCACCACGGCGGGAAAAACTCCCTAAGCAACTCCAGCGGCGCCTCGGCCGACGCCGGCAGCACCCACATCAGCAGCAGAGAGGGGGTTGGCACGGCGTCCGGAGCCGAGGAGGACGCCCCTGCCAGCAGCGAGCAGGCGAGTGCGCAGAGCGAGCCGAGCCCGGCGCCGCCCGCCCAACCCCAGATCTACCCCTGGATGCGCAAGCTGCACATAAGTCATG"; + std::string header1 = ">hg18_knownGene_NM_019102_1\n"; + std::string str1 = "ACAACATAGGCGGCCCGGAAGGCAAAAGGGCCCGGACGGCCTACACGCGCTACCAGACCCTGGAGCTGGAGAAGGAGTTCCACTTCAACCGTTACCTGACCCGCAGAAGGAGGATTGAAATAGCACATGCTCTTTGCCTCTCCGAGAGACAAATTAAAATCTGGTTCCAAAACCGGAGAATGAAGTGGAAAAAAGATAATAAGCTGAAAAGCATGAGCATGGCCGCGGCAGGAGGGGCCTTCCGTCCCTGAGTATCTGAGCGTTTAAAGTACTGAGCAGTATTAGCGGATCCCGCGTAGTGTCAGTACTAAGGTGACTTTCTGAAACTCCCTTGTGTTCCTTCTGTGAAGAAGCCCTGTTCTCGTTGCCCTAATTCATCTTTTAATCATGAGCCTGTTTATTGCCATTATAGCGCCTGTATAAGTAGATCTGCTTTCTGTTCATCTCTTTGTCCTGAATGGCTTTGTCTTGAAAAAAAATAGATGTTTTAACTTATTTATATGAAGCAAGCTGTGTTACTTGAAGTAACTATAACAAAAAAAGAAAAGAGAAAAAAAAACACACAAAAAGTCCCCCTTCAATCTCGTTTAGTGCCAATGTTGTGTGTTGCACTCAAGTTGTTTAACTGTGCATGTGCGTGGAAGTGTTCCTGTCTCAATAGCTCCAAGCTGTTAAAGATATTTTTATTCAAACTACCTATATTCCTTGT"; + stringstream annot; + annot << species + << header0 + << str0 << std::endl + << std::endl + << header1 + << str1; + // need to convert strings to sequences for reverse complementing + Sequence seq0(str0, reduced_dna_alphabet); + Sequence seq1(str1, reduced_dna_alphabet); + + Sequence annotated_seq(s, reduced_dna_alphabet); + annotated_seq.load_annot(annot, 0, 0); + + SeqSpanRefList annots_list = annotated_seq.annotations(); + // both sequences were found + BOOST_REQUIRE_EQUAL( annots_list.size(), 2 ); - seq.load_annot(annot, 0, 0); + std::vector annots(annots_list.begin(), annots_list.end()); + // are they the same sequence? + BOOST_CHECK_EQUAL( annots[0]->size(), seq0.size()); + BOOST_CHECK_EQUAL( annots[0]->sequence(), seq0.rev_comp() ); + // this should hopefully catch the case when my hack in + // sequence.cpp::push_back_seq::operator() is no longer needed. + // spirit (or my grammar was duplicating the last char, + // the hack removes the duplicate. but if what ever's causing + // the dup gets fixed actual meaningful data will be being removed. + // see mussa ticket:265 for more information + BOOST_CHECK_EQUAL( annots[1]->size(), seq1.size()); + BOOST_CHECK_EQUAL( annots[1]->sequence(), seq1.rev_comp() ); - BOOST_CHECK_EQUAL( seq.annotations().size(), 2 ); } BOOST_AUTO_TEST_CASE( subseq_annotation_test ) -- 2.30.2