1 #define BOOST_AUTO_TEST_MAIN
2 #include <boost/test/auto_unit_test.hpp>
4 #include "seq_span.hpp"
5 #include "mussa_exceptions.hpp"
9 BOOST_AUTO_TEST_CASE( seqspan_from_string )
11 std::string str1("AAGGCCTT");
12 SeqSpanRef span1(new SeqSpan(str1));
13 BOOST_CHECK_EQUAL(span1->length(), str1.length());
14 BOOST_CHECK_EQUAL(span1->sequence(), str1);
15 BOOST_CHECK_EQUAL(span1->strand(), SeqSpan::PlusStrand);
18 BOOST_AUTO_TEST_CASE( seqspan_from_string_with_alphabet )
20 std::string str1("AAGGCCUU");
21 SeqSpanRef span1(new SeqSpan(str1, reduced_rna_alphabet));
22 BOOST_CHECK_EQUAL(span1->length(), str1.length());
23 BOOST_CHECK_EQUAL(span1->sequence(), str1);
24 BOOST_CHECK_EQUAL(span1->get_alphabet(), Alphabet::reduced_rna_alphabet());
27 BOOST_AUTO_TEST_CASE( seqspan_from_string_with_alphabet_and_plusstrand )
29 std::string str1("AAGGCCUU");
30 SeqSpanRef span1(new SeqSpan(str1, reduced_rna_alphabet, SeqSpan::PlusStrand));
31 BOOST_CHECK_EQUAL(span1->length(), str1.length());
32 BOOST_CHECK_EQUAL(span1->sequence(), str1);
33 BOOST_CHECK_EQUAL(span1->get_alphabet(), Alphabet::reduced_rna_alphabet());
34 BOOST_CHECK_EQUAL(span1->strand(), SeqSpan::PlusStrand);
37 BOOST_AUTO_TEST_CASE( seqspan_from_string_with_alphabet_and_singlestrand )
39 std::string str1("AAAAGCT");
40 SeqSpanRef span1(new SeqSpan(str1, reduced_dna_alphabet, SeqSpan::SingleStrand));
41 BOOST_CHECK_EQUAL(span1->length(), str1.length());
42 BOOST_CHECK_EQUAL(span1->sequence(), str1);
43 BOOST_CHECK_EQUAL(span1->get_alphabet(), Alphabet::reduced_dna_alphabet());
44 // we always store strands as Plus
45 BOOST_CHECK_EQUAL(span1->strand(), SeqSpan::SingleStrand);
46 BOOST_CHECK_EQUAL(span1->sequence(), "AAAAGCT");
47 BOOST_CHECK_THROW(span1->subseq(0,2,SeqSpan::OppositeStrand), sequence_invalid_strand);
50 BOOST_AUTO_TEST_CASE( seqspan_from_string_with_invalidstrand )
52 std::string s("AAAAGCT");
53 BOOST_CHECK_THROW(SeqSpan(s, reduced_dna_alphabet, SeqSpan::UnknownStrand), sequence_invalid_strand);
54 BOOST_CHECK_THROW(SeqSpan(s, reduced_dna_alphabet, SeqSpan::BothStrand), sequence_invalid_strand);
55 BOOST_CHECK_THROW(SeqSpan(s, reduced_dna_alphabet, SeqSpan::SameStrand), sequence_invalid_strand);
56 BOOST_CHECK_THROW(SeqSpan(s, reduced_dna_alphabet, SeqSpan::OppositeStrand), sequence_invalid_strand);
57 BOOST_CHECK_THROW(SeqSpan(s, reduced_dna_alphabet, SeqSpan::BothStrand), sequence_invalid_strand);
60 BOOST_AUTO_TEST_CASE( seqspan_from_seqspan )
62 std::string str1("AAGGCCTT");
63 SeqSpanRef span1(new SeqSpan(str1));
64 SeqSpanRef span2(new SeqSpan(span1));
65 SeqSpanRef span3(new SeqSpan(span1));
67 BOOST_CHECK_EQUAL(span1->length(), str1.length());
68 BOOST_CHECK_EQUAL(span1->sequence(), str1);
69 BOOST_CHECK_EQUAL(span1->length(), span2->length());
70 BOOST_CHECK_EQUAL(span2->sequence(), str1);
72 BOOST_CHECK_EQUAL(span1->length(), span3->length());
73 BOOST_CHECK_EQUAL(span3->sequence(), str1);
76 BOOST_AUTO_TEST_CASE( seqspan_equality )
78 std::string str1("AAGGCCTT");
79 std::string str2("AACCGGTT");
80 std::string str3("AACCGGTT");
81 SeqSpanRef span1(new SeqSpan(str1));
82 SeqSpanRef span1copy(new SeqSpan(span1));
83 SeqSpanRef span2(new SeqSpan(str2));
84 SeqSpanRef span3(new SeqSpan(str3));
86 BOOST_CHECK_EQUAL(*span1, *span1copy);
87 BOOST_CHECK(*span1 != *span3);
88 // if its different source strings, compare the underlying string
89 BOOST_CHECK(*span2 != *span3);
90 BOOST_CHECK_EQUAL(SeqSpan::isFamily(*span2, *span3), false);
93 BOOST_AUTO_TEST_CASE( seqspan_find_first_not_of )
95 std::string str1("AAAAT");
97 BOOST_CHECK_EQUAL(seq1.find_first_not_of("A"), str1.find_first_not_of("A"));
99 std::string str2("AATTGGCC");
101 BOOST_CHECK_EQUAL(seq2.find_first_not_of("qwer"), str2.find_first_not_of("qwer"));
104 BOOST_AUTO_TEST_CASE( seqspan_at )
106 std::string str1("AAGGCCTT");
107 SeqSpanRef seq1(new SeqSpan(str1));
108 SeqSpanRef seq2(new SeqSpan(seq1, 2, 2));
110 BOOST_CHECK_EQUAL( seq1->at(0), str1.at(0) );
111 BOOST_CHECK_EQUAL( seq1->at(2), seq2->at(0) );
112 BOOST_CHECK_EQUAL( str1[2], seq2->at(0) );
113 BOOST_CHECK_EQUAL( (*seq1)[0], seq1->at(0) );
114 BOOST_CHECK_EQUAL( (*seq1)[2], (*seq2)[0] );
116 SeqSpanRef seq3 = seq1->subseq(0, 4, SeqSpan::OppositeStrand);
117 BOOST_CHECK_EQUAL( seq3->at(0), 'C');
118 BOOST_CHECK_EQUAL( seq3->at(1), 'C');
119 BOOST_CHECK_EQUAL( seq3->at(2), 'T');
120 BOOST_CHECK_EQUAL( seq3->at(3), 'T');
124 BOOST_AUTO_TEST_CASE( seqspan_data )
126 std::string str1("AAGGCCTT");
127 SeqSpanRef seq1(new SeqSpan(str1));
128 SeqSpanRef seq2(new SeqSpan(seq1, 3, 2));
130 BOOST_REQUIRE_EQUAL( str1.length(), seq1->length());
131 BOOST_CHECK_EQUAL( str1.data(), seq1->data() );
132 std::string str1sub = str1.substr(3,2);
133 BOOST_REQUIRE_EQUAL( seq2->size(), str1sub.size() );
134 BOOST_REQUIRE_EQUAL( seq2->length(), str1sub.length() );
135 for (int i = 0; i != seq2->size(); ++i) {
136 BOOST_CHECK_EQUAL( seq2->data()[i], str1sub.data()[i] );
140 BOOST_AUTO_TEST_CASE( seqspan_begin_end )
142 std::string str1("AAGGCCTT");
143 SeqSpanRef seq1(new SeqSpan(str1));
144 SeqSpanRef seq2(new SeqSpan(seq1, 2, 2));
146 BOOST_CHECK(seq1->begin() + 2 == seq2->begin());
148 std::string::const_iterator str1_i = str1.begin();
149 SeqSpan::const_iterator seq1_i = seq1->begin();
150 for(; not ((str1_i == str1.end()) or (seq1_i == seq1->end())); ++str1_i, ++seq1_i) {
151 BOOST_CHECK_EQUAL( *str1_i, *seq1_i );
155 BOOST_AUTO_TEST_CASE( seqspan_subseq_reverse_begin_end )
157 std::string str1("AAAACCTT");
158 std::string str1rc("AAGGTTTT");
159 SeqSpanRef seq1(new SeqSpan(str1));
160 SeqSpanRef seq2(new SeqSpan(seq1, 0, SeqSpan::npos, SeqSpan::OppositeStrand ));
163 std::string::const_iterator str1rc_i = str1rc.begin();
164 SeqSpan::const_iterator seq2_i = seq2->begin();
165 for(; not ((str1rc_i == str1rc.end()) or (seq2_i == seq2->end())); ++str1rc_i, ++seq2_i) {
166 BOOST_CHECK_EQUAL( *str1rc_i, *seq2_i );
170 BOOST_AUTO_TEST_CASE( seqspan_rbegin_rend )
172 std::string str1("AAGGCCTT");
173 SeqSpanRef seq1(new SeqSpan(str1));
175 std::string::const_reverse_iterator str1_i = str1.rbegin();
176 SeqSpan::const_reverse_iterator seq1_i = seq1->rbegin();
177 for(; seq1_i != seq1->rend(); ++str1_i, ++seq1_i) {
178 BOOST_CHECK_EQUAL( *str1_i, *seq1_i );
182 BOOST_AUTO_TEST_CASE( seqspan_empty_start_stop )
184 SeqSpanRef s1(new SeqSpan(""));
185 BOOST_CHECK_EQUAL( s1->start(), 0 );
186 BOOST_CHECK_EQUAL( s1->stop(), 0 );
188 BOOST_CHECK_EQUAL( s1->parentStart(), 0 );
189 BOOST_CHECK_EQUAL( s1->parentStop(), 0 );
191 BOOST_CHECK_EQUAL( s1->size(), 0 );
194 BOOST_AUTO_TEST_CASE( seqspan_global_start_stop )
196 std::string seq_string("AAGGCCTT");
197 SeqSpanRef s1(new SeqSpan(seq_string));
198 BOOST_CHECK_EQUAL( s1->start(), 0 );
199 BOOST_CHECK_EQUAL( s1->stop(), seq_string.size() );
201 std::string s2seq_string = seq_string.substr(2,3);
202 SeqSpanRef s2 = s1->subseq(2,3);
203 BOOST_CHECK_EQUAL( s2->start(), 2);
204 BOOST_CHECK_EQUAL( s2->stop(), 2+3);
205 BOOST_CHECK_EQUAL( s2->size(), 3);
206 BOOST_CHECK_EQUAL( s2->sequence(), s2seq_string);
208 std::string s3seq_string = s2seq_string.substr(1,1);
209 SeqSpanRef s3 = s2->subseq(1,1);
210 BOOST_CHECK_EQUAL( s3->start(), 2+1 );
211 BOOST_CHECK_EQUAL( s3->stop(), 2+1+1);
212 BOOST_CHECK_EQUAL( s3->size(), 1);
213 BOOST_CHECK_EQUAL( s3->sequence(), s3seq_string);
216 BOOST_AUTO_TEST_CASE( seqspan_global_to_large )
218 std::string seq_string("AAGGCCTT");
219 SeqSpanRef s1(new SeqSpan(seq_string));
220 BOOST_CHECK_EQUAL( s1->start(), 0 );
221 BOOST_CHECK_EQUAL( s1->stop(), seq_string.size() );
223 std::string s2seq_string = seq_string.substr(2,3);
224 SeqSpanRef s2 = s1->subseq(4,8);
225 BOOST_CHECK_EQUAL( s2->start(), 4);
226 BOOST_CHECK_EQUAL( s2->size(), 4);
227 BOOST_CHECK_EQUAL( s2->stop(), 8);
230 BOOST_AUTO_TEST_CASE( seqspan_parent_start_stop )
232 std::string seq_string("AAGGCCTT");
233 SeqSpanRef s1(new SeqSpan(seq_string));
234 BOOST_CHECK_EQUAL( s1->parentStart(), 0 );
235 BOOST_CHECK_EQUAL( s1->parentStop(), seq_string.size() );
237 std::string s2seq_string = seq_string.substr(2,3);
238 SeqSpanRef s2 = s1->subseq(2,3);
239 BOOST_CHECK_EQUAL( s2->parentStart(), 2);
240 BOOST_CHECK_EQUAL( s2->parentStop(), 2+3);
241 BOOST_CHECK_EQUAL( s2->size(), 3);
242 BOOST_CHECK_EQUAL( s2->sequence(), s2seq_string);
244 std::string s3seq_string = s2seq_string.substr(1,1);
245 SeqSpanRef s3 = s2->subseq(1,1);
246 BOOST_CHECK_EQUAL( s3->parentStart(), 1 );
247 BOOST_CHECK_EQUAL( s3->parentStop(), 1+1);
248 BOOST_CHECK_EQUAL( s3->size(), 1);
249 BOOST_CHECK_EQUAL( s3->sequence(), s3seq_string);
252 BOOST_AUTO_TEST_CASE( seqspan_global_mutable_start_stop )
254 std::string seq_string("AAGGCCTT");
255 SeqSpanRef s1(new SeqSpan(seq_string));
257 std::string s2seq_string = seq_string.substr(2,3);
258 SeqSpanRef s2 = s1->subseq(2,3);
259 BOOST_CHECK_EQUAL( s2->start(), 2);
260 BOOST_CHECK_EQUAL( s2->stop(), 2+3);
261 BOOST_CHECK_EQUAL( s2->size(), 3);
262 BOOST_CHECK_EQUAL( s2->sequence(), s2seq_string);
264 std::string s3seq_string = s2seq_string.substr(1,1);
265 SeqSpanRef s3 = s2->subseq(1,1);
266 // Check root location
267 BOOST_CHECK_EQUAL( s3->start(), 2+1 );
268 BOOST_CHECK_EQUAL( s3->stop(), 2+1+1);
269 BOOST_CHECK_EQUAL( s3->size(), 1);
270 // Check parent location
271 BOOST_CHECK_EQUAL( s3->parentStart(), 1 );
272 BOOST_CHECK_EQUAL( s3->parentStop(), 1+1);
273 BOOST_CHECK_EQUAL( s3->sequence(), s3seq_string);
275 // Extend s2 to the left
277 BOOST_CHECK_EQUAL( s2->start(), 1);
278 BOOST_CHECK_EQUAL( s2->stop(), 1+1+3);
279 BOOST_CHECK_EQUAL( s2->size(), 4);
281 // Child sequence should have the same global location
282 BOOST_CHECK_EQUAL( s3->start(), 2+1 );
283 BOOST_CHECK_EQUAL( s3->stop(), 2+1+1);
284 BOOST_CHECK_EQUAL( s3->size(), 1);
285 // Child sequence should now have different parent location
286 BOOST_CHECK_EQUAL( s3->parentStart(), 2 );
287 BOOST_CHECK_EQUAL( s3->parentStop(), 2+1);
288 BOOST_CHECK_EQUAL( s3->size(), 1);
291 BOOST_AUTO_TEST_CASE( seqspan_global_mutable_start_stop_minus_strand )
293 std::string seq_string("AAAAGCTA");
294 SeqSpanRef s1(new SeqSpan(seq_string));
296 SeqSpanRef s2 = s1->subseq(2,3, SeqSpan::MinusStrand);
297 BOOST_CHECK_EQUAL( s2->start(), 2);
298 BOOST_CHECK_EQUAL( s2->stop(), 2+3);
299 BOOST_CHECK_EQUAL( s2->size(), 3);
300 BOOST_CHECK_EQUAL( s2->sequence(), "CTT");
302 SeqSpanRef s3 = s2->subseq(1,2, SeqSpan::SameStrand);
303 BOOST_CHECK_EQUAL(s3->sequence(), "TT");
305 // Could also argue that it should be CT
306 // if you assume that the locations are all relative to the global sequence
307 // and are then reverse complemented
310 BOOST_CHECK_EQUAL( s2->sequence(), "CTTT");
313 BOOST_AUTO_TEST_CASE( seqspan_parent_mutable_start_stop )
315 std::string seq_string("AAGGCCTT");
316 SeqSpanRef s1(new SeqSpan(seq_string));
318 std::string s2seq_string = seq_string.substr(3,3);
319 SeqSpanRef s2 = s1->subseq(3,3);
320 BOOST_CHECK_EQUAL( s2->start(), 3);
321 BOOST_CHECK_EQUAL( s2->stop(), 3+3);
322 BOOST_CHECK_EQUAL( s2->size(), 3);
323 BOOST_CHECK_EQUAL( s2->sequence(), s2seq_string);
325 std::string s3seq_string = s2seq_string.substr(1,1);
326 SeqSpanRef s3 = s2->subseq(1,1);
327 // Check root location
328 BOOST_CHECK_EQUAL( s3->start(), 3+1 );
329 BOOST_CHECK_EQUAL( s3->stop(), 3+1+1);
330 BOOST_CHECK_EQUAL( s3->size(), 1);
331 // Check parent location
332 BOOST_CHECK_EQUAL( s3->parentStart(), 1 );
333 BOOST_CHECK_EQUAL( s3->parentStop(), 1+1);
334 BOOST_CHECK_EQUAL( s3->sequence(), s3seq_string);
336 // s2 should now be equivalent to s1->subseq(1,5)
337 s2->setParentStart(1);
338 BOOST_CHECK_EQUAL( s2->start(), 1);
339 BOOST_CHECK_EQUAL( s2->stop(), 3+3);
340 BOOST_CHECK_EQUAL( s2->size(), 5);
342 // Child sequence should have the same global location
343 BOOST_CHECK_EQUAL( s3->start(), 3+1 );
344 BOOST_CHECK_EQUAL( s3->stop(), 3+1+1);
345 BOOST_CHECK_EQUAL( s3->size(), 1);
346 // Child sequence should now have different parent location
347 BOOST_CHECK_EQUAL( s3->parentStart(), 1+2);
348 BOOST_CHECK_EQUAL( s3->parentStop(), 1+2+1);
351 // what happens if we set a stop past our actual end
352 BOOST_AUTO_TEST_CASE( seqspan_stop_past_end )
354 std::string seq_string("AAGGCCTT");
355 SeqSpanRef s1(new SeqSpan(seq_string));
356 std::string s2seq_string = seq_string.substr(3,3);
357 SeqSpanRef s2 = s1->subseq(3,3);
358 std::string s3seq_string = s2seq_string.substr(1,1);
359 SeqSpanRef s3 = s2->subseq(1,1);
361 // should be limited by our parent sequence
363 BOOST_CHECK_EQUAL( s3->size(), 2);
366 BOOST_CHECK_EQUAL( s2->size(), 5);
369 BOOST_CHECK_EQUAL( s3->size(), 4);
372 BOOST_AUTO_TEST_CASE( seqspan_strand_sameother )
374 SeqSpanRef seq1(new SeqSpan("AAAAAGGGGG"));
375 BOOST_CHECK_EQUAL(seq1->strand(), SeqSpan::PlusStrand);
377 SeqSpanRef seq2 = seq1->subseq(0,4,SeqSpan::SameStrand);
378 BOOST_CHECK_EQUAL(seq2->sequence(), "AAAA");
379 BOOST_CHECK_EQUAL(seq2->strand(), SeqSpan::PlusStrand);
380 SeqSpanRef seq3 = seq1->subseq(0,4,SeqSpan::OppositeStrand);
381 BOOST_CHECK_EQUAL(seq3->sequence(), "TTTT");
382 BOOST_CHECK_EQUAL(seq3->strand(), SeqSpan::MinusStrand);
384 // opposite of a plus strand should be minus
385 SeqSpanRef seq4 = seq2->subseq(0,4,SeqSpan::OppositeStrand);
386 BOOST_CHECK_EQUAL(seq4->sequence(), "TTTT");
387 BOOST_CHECK_EQUAL(seq4->strand(), SeqSpan::MinusStrand);
388 // opposite of a minus strand should be plus
389 SeqSpanRef seq5 = seq3->subseq(0,4,SeqSpan::OppositeStrand);
390 BOOST_CHECK_EQUAL(seq5->sequence(), "AAAA");
391 BOOST_CHECK_EQUAL(seq5->strand(), SeqSpan::PlusStrand);
394 BOOST_AUTO_TEST_CASE( seqspan_strand_plusminus )
396 SeqSpanRef seq1(new SeqSpan("AAAAAGGGGG"));
397 BOOST_CHECK_EQUAL(seq1->strand(), SeqSpan::PlusStrand);
399 SeqSpanRef seq2 = seq1->subseq(0,4,SeqSpan::PlusStrand);
400 BOOST_CHECK_EQUAL(seq2->sequence(), "AAAA");
401 SeqSpanRef seq3 = seq1->subseq(0,4,SeqSpan::MinusStrand);
402 BOOST_CHECK_EQUAL(seq3->sequence(), "TTTT");