From: Diane Trout Date: Thu, 19 Oct 2006 00:58:47 +0000 (+0000) Subject: Sequence::rev_comp works on subsequences X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=mussa.git;a=commitdiff_plain;h=f2459784ce26346876ba2ec76475a579051bbd5c Sequence::rev_comp works on subsequences ticket:197 Sequence::rev_comp when called on a Sequence which contained a subsequence was actually reversing the entire sequence and not just the selected region. To fix this I added rbegin and rend which return reverse iterators that cover the subseq range and updated rev_comp to use them. There's also some extra sequence unittests to make sure that rbegin/rend work correctly. (and as a bonus loading several sequences out of one fasta file works). --- diff --git a/alg/sequence.cpp b/alg/sequence.cpp index 3ac4dd0..9abbbcd 100644 --- a/alg/sequence.cpp +++ b/alg/sequence.cpp @@ -197,14 +197,14 @@ void Sequence::load_fasta(fs::path file_path, alphabet_ref a, } } -void Sequence::load_fasta(std::iostream& file, +void Sequence::load_fasta(std::istream& file, int seq_num, int start_index, int end_index) { load_fasta(file, alphabet, seq_num, start_index, end_index); } void -Sequence::load_fasta(std::iostream& data_file, alphabet_ref a, +Sequence::load_fasta(std::istream& data_file, alphabet_ref a, int seq_num, int start_index, int end_index) { @@ -580,9 +580,9 @@ Sequence Sequence::rev_comp() const std::string rc_map = create_reverse_map(); // reverse and convert - seq_string::const_reverse_iterator seq_i; - seq_string::const_reverse_iterator seq_end = seq->rend(); - for(seq_i = seq->rbegin(); + Sequence::const_reverse_iterator seq_i; + Sequence::const_reverse_iterator seq_end = rend(); + for(seq_i = rbegin(); seq_i != seq_end; ++seq_i) { @@ -711,6 +711,23 @@ Sequence::const_iterator Sequence::end() const } } +Sequence::const_reverse_iterator Sequence::rbegin() const +{ + if (seq and seq_count != 0) + return seq->rbegin()+(seq->size()-(seq_start+seq_count)); + else + return Sequence::const_reverse_iterator(); +} + +Sequence::const_reverse_iterator Sequence::rend() const +{ + if (seq and seq_count != 0) { + return rbegin() + seq_count; + } else { + return Sequence::const_reverse_iterator(); + } +} + bool Sequence::empty() const { return (seq_count == 0) ? true : false; diff --git a/alg/sequence.hpp b/alg/sequence.hpp index 20bf018..6365ea7 100644 --- a/alg/sequence.hpp +++ b/alg/sequence.hpp @@ -160,6 +160,11 @@ public: size_type size() const; //! alias for size (used by string) size_type length() const; + //! reverse iterator + const_reverse_iterator rbegin() const; + //! reverse end iterator + const_reverse_iterator rend() const; + //! is our sequence empty? //! start position relative to "base" sequence size_type start() const; //! one past the last position relative to "base" sequence @@ -202,13 +207,13 @@ public: alphabet_ref a, int seq_num=1, int start_index=0, int end_index=0); - void load_fasta(std::iostream& file, + void load_fasta(std::istream& file, int seq_num=1, int start_index=0, int end_index=0); //! load sequence from stream //! \throw mussa_load_error //! \throw sequence_empty_error //! \throw sequence_empty_file_error - void load_fasta(std::iostream& file, + void load_fasta(std::istream& file, alphabet_ref a, int seq_num=1, int start_index=0, int end_index=0); diff --git a/alg/test/test_sequence.cpp b/alg/test/test_sequence.cpp index 48492f7..6317144 100644 --- a/alg/test/test_sequence.cpp +++ b/alg/test/test_sequence.cpp @@ -258,6 +258,32 @@ BOOST_AUTO_TEST_CASE( sequence_load_fasta_default ) sequence_invalid_load_error); } +BOOST_AUTO_TEST_CASE( sequence_load_multiple_sequences_one_fasta ) +{ + std::string fasta_file( + ">gi|10129974|gb|AF188002.1|AF188002\n" + "AAAAGGCTCCTGTCATATTGTGTCCTGCTCTGGTCTGC\n" + ">gi|180579|gb|M21487.1|HUMCKMM1\n" + "CGCCGAGAGCGCTTGCTCTGCCCAGATCTCGGCGAGTC\n" + ">gi|1621|emb|X55146.1|OCMCK1\n" + "CTCCCTGAGGGGAGTGCCCCGCTTAGCCC\n" + ); + istringstream seq1_file(fasta_file); + Sequence seq1; + seq1.load_fasta(seq1_file, 1, 0, 0); + BOOST_CHECK_EQUAL(seq1.get_sequence(), "AAAAGGCTCCTGTCATATTGTGTCCTGCTCTGGTCTGC"); + + istringstream seq2_file(fasta_file); + Sequence seq2; + seq2.load_fasta(seq2_file, 2, 0, 0); + BOOST_CHECK_EQUAL(seq2.get_sequence(), "CGCCGAGAGCGCTTGCTCTGCCCAGATCTCGGCGAGTC"); + + istringstream seq3_file(fasta_file); + Sequence seq3; + seq3.load_fasta(seq3_file, 3, 0, 0); + BOOST_CHECK_EQUAL(seq3.get_sequence(), "CTCCCTGAGGGGAGTGCCCCGCTTAGCCC"); +} + BOOST_AUTO_TEST_CASE( sequence_reverse_complement ) { std::string iupac_symbols("AaCcGgTtRrYySsWwKkMmBbDdHhVvNn-~.?"); @@ -287,6 +313,56 @@ BOOST_AUTO_TEST_CASE( sequence_reverse_complement_rna ) BOOST_CHECK_EQUAL(rna_seq, rna_seq.rev_comp().rev_comp()); } +BOOST_AUTO_TEST_CASE( sequence_reverse_complement_subseq ) +{ + std::string dna_str("AAAAAAAAAAGGGGGGGGGGG"); + Sequence seq(dna_str, Sequence::reduced_dna_alphabet); + Sequence subseq = seq.subseq(8,4); + BOOST_CHECK_EQUAL( subseq, "AAGG"); + Sequence rev_subseq = subseq.rev_comp(); + BOOST_CHECK_EQUAL( rev_subseq.size(), subseq.size() ); + BOOST_CHECK_EQUAL( rev_subseq.get_sequence(), "CCTT"); +} + +BOOST_AUTO_TEST_CASE( sequence_reverse_iterator ) +{ + std::string dna_str("AAAAAAAAAAGGGGGGGGGGG"); + std::string dna_str_reversed(dna_str.rbegin(), dna_str.rend()); + Sequence seq(dna_str); + std::string seq_reversed(seq.rbegin(), seq.rend()); + BOOST_CHECK_EQUAL(seq_reversed, dna_str_reversed); + + std::string substr = dna_str.substr(8,4); + Sequence subseq = seq.subseq(8,4); + BOOST_CHECK_EQUAL(substr, subseq); + + std::string substr_reversed(substr.rbegin(), substr.rend()); + std::string subseq_reversed(subseq.rbegin(), subseq.rend()); + BOOST_CHECK_EQUAL(substr_reversed, subseq_reversed); +} + +BOOST_AUTO_TEST_CASE( sequence_empty_reverse_iterator) +{ + // so what happens with reverse interators when we have no sequence? + Sequence seq1; + Sequence seq2; + Sequence seq3("AGCT"); + + // all the empty sequences should have equal iterators + BOOST_CHECK(seq1.rbegin() == seq1.rend()); + BOOST_CHECK(seq1.rbegin() == seq2.rend()); + + // none of the seq1 iterators should equal any of the seq3 iterators + BOOST_CHECK(seq1.rbegin() != seq3.rbegin()); + BOOST_CHECK(seq1.rbegin() != seq3.rend()); + BOOST_CHECK(seq1.rend() != seq3.rbegin()); + BOOST_CHECK(seq1.rend() != seq3.rend()); + + // seq3 iterators should work + BOOST_CHECK(seq3.rbegin() != seq3.rend()); + +} + BOOST_AUTO_TEST_CASE( annotation_load ) { string annot_data = "human\n"