1 #define BOOST_TEST_DYN_LINK
2 #define BOOST_TEST_MODULE test_seq_span
3 #include <boost/test/unit_test.hpp>
5 #include "seq_span.hpp"
6 #include "mussa_exceptions.hpp"
10 BOOST_AUTO_TEST_CASE( seqspan_from_string )
12 std::string str1("AAGGCCTT");
13 SeqSpanRef span1(new SeqSpan(str1));
14 BOOST_CHECK_EQUAL(span1->length(), str1.length());
15 BOOST_CHECK_EQUAL(span1->sequence(), str1);
16 BOOST_CHECK_EQUAL(span1->strand(), SeqSpan::PlusStrand);
19 BOOST_AUTO_TEST_CASE( seqspan_from_string_with_alphabet )
21 std::string str1("AAGGCCUU");
22 SeqSpanRef span1(new SeqSpan(str1, reduced_rna_alphabet));
23 BOOST_CHECK_EQUAL(span1->length(), str1.length());
24 BOOST_CHECK_EQUAL(span1->sequence(), str1);
25 BOOST_CHECK_EQUAL(span1->get_alphabet(), Alphabet::reduced_rna_alphabet());
28 BOOST_AUTO_TEST_CASE( seqspan_from_string_with_alphabet_and_plusstrand )
30 std::string str1("AAGGCCUU");
31 SeqSpanRef span1(new SeqSpan(str1, reduced_rna_alphabet, SeqSpan::PlusStrand));
32 BOOST_CHECK_EQUAL(span1->length(), str1.length());
33 BOOST_CHECK_EQUAL(span1->sequence(), str1);
34 BOOST_CHECK_EQUAL(span1->get_alphabet(), Alphabet::reduced_rna_alphabet());
35 BOOST_CHECK_EQUAL(span1->strand(), SeqSpan::PlusStrand);
38 BOOST_AUTO_TEST_CASE( seqspan_from_string_with_alphabet_and_singlestrand )
40 std::string str1("AAAAGCT");
41 SeqSpanRef span1(new SeqSpan(str1, reduced_dna_alphabet, SeqSpan::SingleStrand));
42 BOOST_CHECK_EQUAL(span1->length(), str1.length());
43 BOOST_CHECK_EQUAL(span1->sequence(), str1);
44 BOOST_CHECK_EQUAL(span1->get_alphabet(), Alphabet::reduced_dna_alphabet());
45 // we always store strands as Plus
46 BOOST_CHECK_EQUAL(span1->strand(), SeqSpan::SingleStrand);
47 BOOST_CHECK_EQUAL(span1->sequence(), "AAAAGCT");
48 BOOST_CHECK_THROW(span1->subseq(0,2,SeqSpan::OppositeStrand), sequence_invalid_strand);
51 BOOST_AUTO_TEST_CASE( seqspan_from_string_with_invalidstrand )
53 std::string s("AAAAGCT");
54 BOOST_CHECK_THROW(SeqSpan(s, reduced_dna_alphabet, SeqSpan::UnknownStrand), sequence_invalid_strand);
55 BOOST_CHECK_THROW(SeqSpan(s, reduced_dna_alphabet, SeqSpan::BothStrand), sequence_invalid_strand);
56 BOOST_CHECK_THROW(SeqSpan(s, reduced_dna_alphabet, SeqSpan::SameStrand), sequence_invalid_strand);
57 BOOST_CHECK_THROW(SeqSpan(s, reduced_dna_alphabet, SeqSpan::OppositeStrand), sequence_invalid_strand);
58 BOOST_CHECK_THROW(SeqSpan(s, reduced_dna_alphabet, SeqSpan::BothStrand), sequence_invalid_strand);
61 BOOST_AUTO_TEST_CASE( seqspan_from_seqspan )
63 std::string str1("AAGGCCTT");
64 SeqSpanRef span1(new SeqSpan(str1));
65 SeqSpanRef span2(new SeqSpan(span1));
66 SeqSpanRef span3(new SeqSpan(span1));
68 BOOST_CHECK_EQUAL(span1->length(), str1.length());
69 BOOST_CHECK_EQUAL(span1->sequence(), str1);
70 BOOST_CHECK_EQUAL(span1->length(), span2->length());
71 BOOST_CHECK_EQUAL(span2->sequence(), str1);
73 BOOST_CHECK_EQUAL(span1->length(), span3->length());
74 BOOST_CHECK_EQUAL(span3->sequence(), str1);
77 BOOST_AUTO_TEST_CASE( seqspan_copy )
79 SeqSpanRef span1(new SeqSpan("AAAAGGGG"));
80 SeqSpanRef span2 = span1->subseq(0,4);
81 SeqSpanRef span2ref(span2);
82 SeqSpanRef span2copy(new SeqSpan(span2));
84 BOOST_CHECK_EQUAL(span2->start(), 0);
85 BOOST_CHECK_EQUAL(span2ref->start(), 0);
86 BOOST_CHECK_EQUAL(span2copy->start(), 0);
89 BOOST_CHECK_EQUAL(span2->start(), 2);
90 BOOST_CHECK_EQUAL(span2ref->start(), 2);
91 BOOST_CHECK_EQUAL(span2copy->start(), 0);
94 BOOST_AUTO_TEST_CASE( seqspan_equality )
96 std::string str1("AAGGCCTT");
97 std::string str2("AACCGGTT");
98 std::string str3("AACCGGTT");
99 SeqSpanRef span1(new SeqSpan(str1));
100 SeqSpanRef span1copy(new SeqSpan(span1));
101 SeqSpanRef span2(new SeqSpan(str2));
102 SeqSpanRef span3(new SeqSpan(str3));
104 BOOST_CHECK_EQUAL(*span1, *span1copy);
105 BOOST_CHECK(*span1 != *span3);
106 // if its different source strings, compare the underlying string
107 BOOST_CHECK(*span2 != *span3);
108 BOOST_CHECK_EQUAL(SeqSpan::isFamily(*span2, *span3), false);
111 BOOST_AUTO_TEST_CASE( seqspan_parents )
113 std::string str1("AAGGCCTT");
114 std::string str2("AACCGGTT");
115 SeqSpanRef s1(new SeqSpan(str1));
116 SeqSpanRef s1_1 = s1->subseq(2,4);
117 SeqSpanRef s2(new SeqSpan(str2));
118 SeqSpanRef s2_1 = s2->subseq(0,2);
120 BOOST_CHECK_EQUAL(s1, s1_1->parent());
121 BOOST_CHECK_EQUAL(s2, s2_1->parent());
122 BOOST_CHECK(s1 != s2_1->parent());
123 BOOST_CHECK(s2 != s1_1->parent());
125 SeqSpanRef s2_copy = s2;
127 BOOST_CHECK_EQUAL(s2_copy, s2_1->parent());
130 BOOST_AUTO_TEST_CASE( seqspan_find_first_not_of )
132 std::string str1("AAAAT");
134 BOOST_CHECK_EQUAL(seq1.find_first_not_of("A"), str1.find_first_not_of("A"));
136 std::string str2("AATTGGCC");
138 BOOST_CHECK_EQUAL(seq2.find_first_not_of("qwer"), str2.find_first_not_of("qwer"));
141 BOOST_AUTO_TEST_CASE( seqspan_at )
143 std::string str1("AAGGCCTT");
144 SeqSpanRef seq1(new SeqSpan(str1));
145 SeqSpanRef seq2(new SeqSpan(seq1, 2, 2));
147 BOOST_CHECK_EQUAL( seq1->at(0), str1.at(0) );
148 BOOST_CHECK_EQUAL( seq1->at(2), seq2->at(0) );
149 BOOST_CHECK_EQUAL( str1[2], seq2->at(0) );
150 BOOST_CHECK_EQUAL( (*seq1)[0], seq1->at(0) );
151 BOOST_CHECK_EQUAL( (*seq1)[2], (*seq2)[0] );
153 SeqSpanRef seq3 = seq1->subseq(0, 4, SeqSpan::OppositeStrand);
154 BOOST_CHECK_EQUAL( seq3->at(0), 'C');
155 BOOST_CHECK_EQUAL( seq3->at(1), 'C');
156 BOOST_CHECK_EQUAL( seq3->at(2), 'T');
157 BOOST_CHECK_EQUAL( seq3->at(3), 'T');
161 BOOST_AUTO_TEST_CASE( seqspan_data )
163 std::string str1("AAGGCCTT");
164 SeqSpanRef seq1(new SeqSpan(str1));
165 SeqSpanRef seq2(new SeqSpan(seq1, 3, 2));
167 BOOST_REQUIRE_EQUAL( str1.length(), seq1->length());
168 BOOST_CHECK_EQUAL( str1.data(), seq1->data() );
169 std::string str1sub = str1.substr(3,2);
170 BOOST_REQUIRE_EQUAL( seq2->size(), str1sub.size() );
171 BOOST_REQUIRE_EQUAL( seq2->length(), str1sub.length() );
172 for (int i = 0; i != seq2->size(); ++i) {
173 BOOST_CHECK_EQUAL( seq2->data()[i], str1sub.data()[i] );
177 BOOST_AUTO_TEST_CASE( seqspan_begin_end )
179 std::string str1("AAGGCCTT");
180 SeqSpanRef seq1(new SeqSpan(str1));
181 SeqSpanRef seq2(new SeqSpan(seq1, 2, 2));
183 BOOST_CHECK(seq1->begin() + 2 == seq2->begin());
185 std::string::const_iterator str1_i = str1.begin();
186 SeqSpan::const_iterator seq1_i = seq1->begin();
187 for(; not ((str1_i == str1.end()) or (seq1_i == seq1->end())); ++str1_i, ++seq1_i) {
188 BOOST_CHECK_EQUAL( *str1_i, *seq1_i );
192 BOOST_AUTO_TEST_CASE( seqspan_subseq_reverse_begin_end )
194 std::string str1("AAAACCTT");
195 std::string str1rc("AAGGTTTT");
196 SeqSpanRef seq1(new SeqSpan(str1));
197 SeqSpanRef seq2(new SeqSpan(seq1, 0, SeqSpan::npos, SeqSpan::OppositeStrand ));
200 std::string::const_iterator str1rc_i = str1rc.begin();
201 SeqSpan::const_iterator seq2_i = seq2->begin();
202 for(; not ((str1rc_i == str1rc.end()) or (seq2_i == seq2->end())); ++str1rc_i, ++seq2_i) {
203 BOOST_CHECK_EQUAL( *str1rc_i, *seq2_i );
207 BOOST_AUTO_TEST_CASE( seqspan_rbegin_rend )
209 std::string str1("AAGGCCTT");
210 SeqSpanRef seq1(new SeqSpan(str1));
212 std::string::const_reverse_iterator str1_i = str1.rbegin();
213 SeqSpan::const_reverse_iterator seq1_i = seq1->rbegin();
214 for(; seq1_i != seq1->rend(); ++str1_i, ++seq1_i) {
215 BOOST_CHECK_EQUAL( *str1_i, *seq1_i );
219 BOOST_AUTO_TEST_CASE( seqspan_empty_start_stop )
221 SeqSpanRef s1(new SeqSpan(""));
222 BOOST_CHECK_EQUAL( s1->start(), 0 );
223 BOOST_CHECK_EQUAL( s1->stop(), 0 );
225 BOOST_CHECK_EQUAL( s1->parentStart(), 0 );
226 BOOST_CHECK_EQUAL( s1->parentStop(), 0 );
228 BOOST_CHECK_EQUAL( s1->size(), 0 );
231 BOOST_AUTO_TEST_CASE( seqspan_global_start_stop )
233 std::string seq_string("AAGGCCTT");
234 SeqSpanRef s1(new SeqSpan(seq_string));
235 BOOST_CHECK_EQUAL( s1->start(), 0 );
236 BOOST_CHECK_EQUAL( s1->stop(), seq_string.size() );
238 std::string s2seq_string = seq_string.substr(2,3);
239 SeqSpanRef s2 = s1->subseq(2,3);
240 BOOST_CHECK_EQUAL( s2->start(), 2);
241 BOOST_CHECK_EQUAL( s2->stop(), 2+3);
242 BOOST_CHECK_EQUAL( s2->size(), 3);
243 BOOST_CHECK_EQUAL( s2->sequence(), s2seq_string);
245 std::string s3seq_string = s2seq_string.substr(1,1);
246 SeqSpanRef s3 = s2->subseq(1,1);
247 BOOST_CHECK_EQUAL( s3->start(), 2+1 );
248 BOOST_CHECK_EQUAL( s3->stop(), 2+1+1);
249 BOOST_CHECK_EQUAL( s3->size(), 1);
250 BOOST_CHECK_EQUAL( s3->sequence(), s3seq_string);
253 BOOST_AUTO_TEST_CASE( seqspan_global_to_large )
255 std::string seq_string("AAGGCCTT");
256 SeqSpanRef s1(new SeqSpan(seq_string));
257 BOOST_CHECK_EQUAL( s1->start(), 0 );
258 BOOST_CHECK_EQUAL( s1->stop(), seq_string.size() );
260 std::string s2seq_string = seq_string.substr(2,3);
261 SeqSpanRef s2 = s1->subseq(4,8);
262 BOOST_CHECK_EQUAL( s2->start(), 4);
263 BOOST_CHECK_EQUAL( s2->size(), 4);
264 BOOST_CHECK_EQUAL( s2->stop(), 8);
267 BOOST_AUTO_TEST_CASE( seqspan_parent_start_stop )
269 std::string seq_string("AAGGCCTT");
270 SeqSpanRef s1(new SeqSpan(seq_string));
271 BOOST_CHECK_EQUAL( s1->parentStart(), 0 );
272 BOOST_CHECK_EQUAL( s1->parentStop(), seq_string.size() );
274 std::string s2seq_string = seq_string.substr(2,3);
275 SeqSpanRef s2 = s1->subseq(2,3);
276 BOOST_CHECK_EQUAL( s2->parentStart(), 2);
277 BOOST_CHECK_EQUAL( s2->parentStop(), 2+3);
278 BOOST_CHECK_EQUAL( s2->size(), 3);
279 BOOST_CHECK_EQUAL( s2->sequence(), s2seq_string);
281 std::string s3seq_string = s2seq_string.substr(1,1);
282 SeqSpanRef s3 = s2->subseq(1,1);
283 BOOST_CHECK_EQUAL( s3->parentStart(), 1 );
284 BOOST_CHECK_EQUAL( s3->parentStop(), 1+1);
285 BOOST_CHECK_EQUAL( s3->size(), 1);
286 BOOST_CHECK_EQUAL( s3->sequence(), s3seq_string);
289 BOOST_AUTO_TEST_CASE( seqspan_global_mutable_start_stop )
291 std::string seq_string("AAGGCCTT");
292 SeqSpanRef s1(new SeqSpan(seq_string));
294 std::string s2seq_string = seq_string.substr(2,3);
295 SeqSpanRef s2 = s1->subseq(2,3);
296 BOOST_CHECK_EQUAL( s2->start(), 2);
297 BOOST_CHECK_EQUAL( s2->stop(), 2+3);
298 BOOST_CHECK_EQUAL( s2->size(), 3);
299 BOOST_CHECK_EQUAL( s2->sequence(), s2seq_string);
301 std::string s3seq_string = s2seq_string.substr(1,1);
302 SeqSpanRef s3 = s2->subseq(1,1);
303 // Check root location
304 BOOST_CHECK_EQUAL( s3->start(), 2+1 );
305 BOOST_CHECK_EQUAL( s3->stop(), 2+1+1);
306 BOOST_CHECK_EQUAL( s3->size(), 1);
307 // Check parent location
308 BOOST_CHECK_EQUAL( s3->parentStart(), 1 );
309 BOOST_CHECK_EQUAL( s3->parentStop(), 1+1);
310 BOOST_CHECK_EQUAL( s3->sequence(), s3seq_string);
312 // Extend s2 to the left
314 BOOST_CHECK_EQUAL( s2->start(), 1);
315 BOOST_CHECK_EQUAL( s2->stop(), 1+1+3);
316 BOOST_CHECK_EQUAL( s2->size(), 4);
318 // Child sequence should have the same global location
319 BOOST_CHECK_EQUAL( s3->start(), 2+1 );
320 BOOST_CHECK_EQUAL( s3->stop(), 2+1+1);
321 BOOST_CHECK_EQUAL( s3->size(), 1);
322 // Child sequence should now have different parent location
323 BOOST_CHECK_EQUAL( s3->parentStart(), 2 );
324 BOOST_CHECK_EQUAL( s3->parentStop(), 2+1);
325 BOOST_CHECK_EQUAL( s3->size(), 1);
328 BOOST_AUTO_TEST_CASE( seqspan_global_mutable_start_stop_minus_strand )
330 std::string seq_string("AAAAGCTA");
331 SeqSpanRef s1(new SeqSpan(seq_string));
333 SeqSpanRef s2 = s1->subseq(2,3, SeqSpan::MinusStrand);
334 BOOST_CHECK_EQUAL( s2->start(), 2);
335 BOOST_CHECK_EQUAL( s2->stop(), 2+3);
336 BOOST_CHECK_EQUAL( s2->size(), 3);
337 BOOST_CHECK_EQUAL( s2->sequence(), "CTT");
339 SeqSpanRef s3 = s2->subseq(1,2, SeqSpan::SameStrand);
340 BOOST_CHECK_EQUAL(s3->sequence(), "TT");
342 // Could also argue that it should be CT
343 // if you assume that the locations are all relative to the global sequence
344 // and are then reverse complemented
347 BOOST_CHECK_EQUAL( s2->sequence(), "CTTT");
350 BOOST_AUTO_TEST_CASE( seqspan_parent_mutable_start_stop )
352 std::string seq_string("AAGGCCTT");
353 SeqSpanRef s1(new SeqSpan(seq_string));
355 std::string s2seq_string = seq_string.substr(3,3);
356 SeqSpanRef s2 = s1->subseq(3,3);
357 BOOST_CHECK_EQUAL( s2->start(), 3);
358 BOOST_CHECK_EQUAL( s2->stop(), 3+3);
359 BOOST_CHECK_EQUAL( s2->size(), 3);
360 BOOST_CHECK_EQUAL( s2->sequence(), s2seq_string);
362 std::string s3seq_string = s2seq_string.substr(1,1);
363 SeqSpanRef s3 = s2->subseq(1,1);
364 // Check root location
365 BOOST_CHECK_EQUAL( s3->start(), 3+1 );
366 BOOST_CHECK_EQUAL( s3->stop(), 3+1+1);
367 BOOST_CHECK_EQUAL( s3->size(), 1);
368 // Check parent location
369 BOOST_CHECK_EQUAL( s3->parentStart(), 1 );
370 BOOST_CHECK_EQUAL( s3->parentStop(), 1+1);
371 BOOST_CHECK_EQUAL( s3->sequence(), s3seq_string);
373 // s2 should now be equivalent to s1->subseq(1,5)
374 s2->setParentStart(1);
375 BOOST_CHECK_EQUAL( s2->start(), 1);
376 BOOST_CHECK_EQUAL( s2->stop(), 3+3);
377 BOOST_CHECK_EQUAL( s2->size(), 5);
379 // Child sequence should have the same global location
380 BOOST_CHECK_EQUAL( s3->start(), 3+1 );
381 BOOST_CHECK_EQUAL( s3->stop(), 3+1+1);
382 BOOST_CHECK_EQUAL( s3->size(), 1);
383 // Child sequence should now have different parent location
384 BOOST_CHECK_EQUAL( s3->parentStart(), 1+2);
385 BOOST_CHECK_EQUAL( s3->parentStop(), 1+2+1);
388 // what happens if we set a stop past our actual end
389 BOOST_AUTO_TEST_CASE( seqspan_stop_past_end )
391 std::string seq_string("AAGGCCTT");
392 SeqSpanRef s1(new SeqSpan(seq_string));
393 std::string s2seq_string = seq_string.substr(3,3);
394 SeqSpanRef s2 = s1->subseq(3,3);
395 std::string s3seq_string = s2seq_string.substr(1,1);
396 SeqSpanRef s3 = s2->subseq(1,1);
398 // should be limited by our parent sequence
400 BOOST_CHECK_EQUAL( s3->size(), 2);
403 BOOST_CHECK_EQUAL( s2->size(), 5);
406 BOOST_CHECK_EQUAL( s3->size(), 4);
409 BOOST_AUTO_TEST_CASE( seqspan_strand_sameother )
411 SeqSpanRef seq1(new SeqSpan("AAAAAGGGGG"));
412 BOOST_CHECK_EQUAL(seq1->strand(), SeqSpan::PlusStrand);
414 SeqSpanRef seq2 = seq1->subseq(0,4,SeqSpan::SameStrand);
415 BOOST_CHECK_EQUAL(seq2->sequence(), "AAAA");
416 BOOST_CHECK_EQUAL(seq2->strand(), SeqSpan::PlusStrand);
417 SeqSpanRef seq3 = seq1->subseq(0,4,SeqSpan::OppositeStrand);
418 BOOST_CHECK_EQUAL(seq3->sequence(), "TTTT");
419 BOOST_CHECK_EQUAL(seq3->strand(), SeqSpan::MinusStrand);
421 // opposite of a plus strand should be minus
422 SeqSpanRef seq4 = seq2->subseq(0,4,SeqSpan::OppositeStrand);
423 BOOST_CHECK_EQUAL(seq4->sequence(), "TTTT");
424 BOOST_CHECK_EQUAL(seq4->strand(), SeqSpan::MinusStrand);
425 // opposite of a minus strand should be plus
426 SeqSpanRef seq5 = seq3->subseq(0,4,SeqSpan::OppositeStrand);
427 BOOST_CHECK_EQUAL(seq5->sequence(), "AAAA");
428 BOOST_CHECK_EQUAL(seq5->strand(), SeqSpan::PlusStrand);
431 BOOST_AUTO_TEST_CASE( seqspan_strand_plusminus )
433 SeqSpanRef seq1(new SeqSpan("AAAAAGGGGG"));
434 BOOST_CHECK_EQUAL(seq1->strand(), SeqSpan::PlusStrand);
436 SeqSpanRef seq2 = seq1->subseq(0,4,SeqSpan::PlusStrand);
437 BOOST_CHECK_EQUAL(seq2->sequence(), "AAAA");
438 SeqSpanRef seq3 = seq1->subseq(0,4,SeqSpan::MinusStrand);
439 BOOST_CHECK_EQUAL(seq3->sequence(), "TTTT");