1 #define BOOST_AUTO_TEST_MAIN
2 #include <boost/test/auto_unit_test.hpp>
4 #include "seq_span.hpp"
5 #include "mussa_exceptions.hpp"
9 BOOST_AUTO_TEST_CASE( seqspan_from_string )
11 std::string str1("AAGGCCTT");
12 SeqSpanRef span1(new SeqSpan(str1));
13 BOOST_CHECK_EQUAL(span1->length(), str1.length());
14 BOOST_CHECK_EQUAL(span1->sequence(), str1);
15 BOOST_CHECK_EQUAL(span1->strand(), SeqSpan::PlusStrand);
18 BOOST_AUTO_TEST_CASE( seqspan_from_string_with_alphabet )
20 std::string str1("AAGGCCUU");
21 SeqSpanRef span1(new SeqSpan(str1, reduced_rna_alphabet));
22 BOOST_CHECK_EQUAL(span1->length(), str1.length());
23 BOOST_CHECK_EQUAL(span1->sequence(), str1);
24 BOOST_CHECK_EQUAL(span1->get_alphabet(), Alphabet::reduced_rna_alphabet());
27 BOOST_AUTO_TEST_CASE( seqspan_from_string_with_alphabet_and_plusstrand )
29 std::string str1("AAGGCCUU");
30 SeqSpanRef span1(new SeqSpan(str1, reduced_rna_alphabet, SeqSpan::PlusStrand));
31 BOOST_CHECK_EQUAL(span1->length(), str1.length());
32 BOOST_CHECK_EQUAL(span1->sequence(), str1);
33 BOOST_CHECK_EQUAL(span1->get_alphabet(), Alphabet::reduced_rna_alphabet());
34 BOOST_CHECK_EQUAL(span1->strand(), SeqSpan::PlusStrand);
37 BOOST_AUTO_TEST_CASE( seqspan_from_string_with_alphabet_and_singlestrand )
39 std::string str1("AAAAGCT");
40 SeqSpanRef span1(new SeqSpan(str1, reduced_dna_alphabet, SeqSpan::SingleStrand));
41 BOOST_CHECK_EQUAL(span1->length(), str1.length());
42 BOOST_CHECK_EQUAL(span1->sequence(), str1);
43 BOOST_CHECK_EQUAL(span1->get_alphabet(), Alphabet::reduced_dna_alphabet());
44 // we always store strands as Plus
45 BOOST_CHECK_EQUAL(span1->strand(), SeqSpan::SingleStrand);
46 BOOST_CHECK_EQUAL(span1->sequence(), "AAAAGCT");
47 BOOST_CHECK_THROW(span1->subseq(0,2,SeqSpan::OppositeStrand), sequence_invalid_strand);
50 BOOST_AUTO_TEST_CASE( seqspan_from_string_with_invalidstrand )
52 std::string s("AAAAGCT");
53 BOOST_CHECK_THROW(SeqSpan(s, reduced_dna_alphabet, SeqSpan::UnknownStrand), sequence_invalid_strand);
54 BOOST_CHECK_THROW(SeqSpan(s, reduced_dna_alphabet, SeqSpan::BothStrand), sequence_invalid_strand);
55 BOOST_CHECK_THROW(SeqSpan(s, reduced_dna_alphabet, SeqSpan::SameStrand), sequence_invalid_strand);
56 BOOST_CHECK_THROW(SeqSpan(s, reduced_dna_alphabet, SeqSpan::OppositeStrand), sequence_invalid_strand);
57 BOOST_CHECK_THROW(SeqSpan(s, reduced_dna_alphabet, SeqSpan::BothStrand), sequence_invalid_strand);
60 BOOST_AUTO_TEST_CASE( seqspan_from_seqspan )
62 std::string str1("AAGGCCTT");
63 SeqSpanRef span1(new SeqSpan(str1));
64 SeqSpanRef span2(new SeqSpan(span1));
65 SeqSpanRef span3(new SeqSpan(span1));
67 BOOST_CHECK_EQUAL(span1->length(), str1.length());
68 BOOST_CHECK_EQUAL(span1->sequence(), str1);
69 BOOST_CHECK_EQUAL(span1->length(), span2->length());
70 BOOST_CHECK_EQUAL(span2->sequence(), str1);
72 BOOST_CHECK_EQUAL(span1->length(), span3->length());
73 BOOST_CHECK_EQUAL(span3->sequence(), str1);
76 BOOST_AUTO_TEST_CASE( seqspan_copy )
78 SeqSpanRef span1(new SeqSpan("AAAAGGGG"));
79 SeqSpanRef span2 = span1->subseq(0,4);
80 SeqSpanRef span2ref(span2);
81 SeqSpanRef span2copy(new SeqSpan(span2));
83 BOOST_CHECK_EQUAL(span2->start(), 0);
84 BOOST_CHECK_EQUAL(span2ref->start(), 0);
85 BOOST_CHECK_EQUAL(span2copy->start(), 0);
88 BOOST_CHECK_EQUAL(span2->start(), 2);
89 BOOST_CHECK_EQUAL(span2ref->start(), 2);
90 BOOST_CHECK_EQUAL(span2copy->start(), 0);
93 BOOST_AUTO_TEST_CASE( seqspan_equality )
95 std::string str1("AAGGCCTT");
96 std::string str2("AACCGGTT");
97 std::string str3("AACCGGTT");
98 SeqSpanRef span1(new SeqSpan(str1));
99 SeqSpanRef span1copy(new SeqSpan(span1));
100 SeqSpanRef span2(new SeqSpan(str2));
101 SeqSpanRef span3(new SeqSpan(str3));
103 BOOST_CHECK_EQUAL(*span1, *span1copy);
104 BOOST_CHECK(*span1 != *span3);
105 // if its different source strings, compare the underlying string
106 BOOST_CHECK(*span2 != *span3);
107 BOOST_CHECK_EQUAL(SeqSpan::isFamily(*span2, *span3), false);
110 BOOST_AUTO_TEST_CASE( seqspan_find_first_not_of )
112 std::string str1("AAAAT");
114 BOOST_CHECK_EQUAL(seq1.find_first_not_of("A"), str1.find_first_not_of("A"));
116 std::string str2("AATTGGCC");
118 BOOST_CHECK_EQUAL(seq2.find_first_not_of("qwer"), str2.find_first_not_of("qwer"));
121 BOOST_AUTO_TEST_CASE( seqspan_at )
123 std::string str1("AAGGCCTT");
124 SeqSpanRef seq1(new SeqSpan(str1));
125 SeqSpanRef seq2(new SeqSpan(seq1, 2, 2));
127 BOOST_CHECK_EQUAL( seq1->at(0), str1.at(0) );
128 BOOST_CHECK_EQUAL( seq1->at(2), seq2->at(0) );
129 BOOST_CHECK_EQUAL( str1[2], seq2->at(0) );
130 BOOST_CHECK_EQUAL( (*seq1)[0], seq1->at(0) );
131 BOOST_CHECK_EQUAL( (*seq1)[2], (*seq2)[0] );
133 SeqSpanRef seq3 = seq1->subseq(0, 4, SeqSpan::OppositeStrand);
134 BOOST_CHECK_EQUAL( seq3->at(0), 'C');
135 BOOST_CHECK_EQUAL( seq3->at(1), 'C');
136 BOOST_CHECK_EQUAL( seq3->at(2), 'T');
137 BOOST_CHECK_EQUAL( seq3->at(3), 'T');
141 BOOST_AUTO_TEST_CASE( seqspan_data )
143 std::string str1("AAGGCCTT");
144 SeqSpanRef seq1(new SeqSpan(str1));
145 SeqSpanRef seq2(new SeqSpan(seq1, 3, 2));
147 BOOST_REQUIRE_EQUAL( str1.length(), seq1->length());
148 BOOST_CHECK_EQUAL( str1.data(), seq1->data() );
149 std::string str1sub = str1.substr(3,2);
150 BOOST_REQUIRE_EQUAL( seq2->size(), str1sub.size() );
151 BOOST_REQUIRE_EQUAL( seq2->length(), str1sub.length() );
152 for (int i = 0; i != seq2->size(); ++i) {
153 BOOST_CHECK_EQUAL( seq2->data()[i], str1sub.data()[i] );
157 BOOST_AUTO_TEST_CASE( seqspan_begin_end )
159 std::string str1("AAGGCCTT");
160 SeqSpanRef seq1(new SeqSpan(str1));
161 SeqSpanRef seq2(new SeqSpan(seq1, 2, 2));
163 BOOST_CHECK(seq1->begin() + 2 == seq2->begin());
165 std::string::const_iterator str1_i = str1.begin();
166 SeqSpan::const_iterator seq1_i = seq1->begin();
167 for(; not ((str1_i == str1.end()) or (seq1_i == seq1->end())); ++str1_i, ++seq1_i) {
168 BOOST_CHECK_EQUAL( *str1_i, *seq1_i );
172 BOOST_AUTO_TEST_CASE( seqspan_subseq_reverse_begin_end )
174 std::string str1("AAAACCTT");
175 std::string str1rc("AAGGTTTT");
176 SeqSpanRef seq1(new SeqSpan(str1));
177 SeqSpanRef seq2(new SeqSpan(seq1, 0, SeqSpan::npos, SeqSpan::OppositeStrand ));
180 std::string::const_iterator str1rc_i = str1rc.begin();
181 SeqSpan::const_iterator seq2_i = seq2->begin();
182 for(; not ((str1rc_i == str1rc.end()) or (seq2_i == seq2->end())); ++str1rc_i, ++seq2_i) {
183 BOOST_CHECK_EQUAL( *str1rc_i, *seq2_i );
187 BOOST_AUTO_TEST_CASE( seqspan_rbegin_rend )
189 std::string str1("AAGGCCTT");
190 SeqSpanRef seq1(new SeqSpan(str1));
192 std::string::const_reverse_iterator str1_i = str1.rbegin();
193 SeqSpan::const_reverse_iterator seq1_i = seq1->rbegin();
194 for(; seq1_i != seq1->rend(); ++str1_i, ++seq1_i) {
195 BOOST_CHECK_EQUAL( *str1_i, *seq1_i );
199 BOOST_AUTO_TEST_CASE( seqspan_empty_start_stop )
201 SeqSpanRef s1(new SeqSpan(""));
202 BOOST_CHECK_EQUAL( s1->start(), 0 );
203 BOOST_CHECK_EQUAL( s1->stop(), 0 );
205 BOOST_CHECK_EQUAL( s1->parentStart(), 0 );
206 BOOST_CHECK_EQUAL( s1->parentStop(), 0 );
208 BOOST_CHECK_EQUAL( s1->size(), 0 );
211 BOOST_AUTO_TEST_CASE( seqspan_global_start_stop )
213 std::string seq_string("AAGGCCTT");
214 SeqSpanRef s1(new SeqSpan(seq_string));
215 BOOST_CHECK_EQUAL( s1->start(), 0 );
216 BOOST_CHECK_EQUAL( s1->stop(), seq_string.size() );
218 std::string s2seq_string = seq_string.substr(2,3);
219 SeqSpanRef s2 = s1->subseq(2,3);
220 BOOST_CHECK_EQUAL( s2->start(), 2);
221 BOOST_CHECK_EQUAL( s2->stop(), 2+3);
222 BOOST_CHECK_EQUAL( s2->size(), 3);
223 BOOST_CHECK_EQUAL( s2->sequence(), s2seq_string);
225 std::string s3seq_string = s2seq_string.substr(1,1);
226 SeqSpanRef s3 = s2->subseq(1,1);
227 BOOST_CHECK_EQUAL( s3->start(), 2+1 );
228 BOOST_CHECK_EQUAL( s3->stop(), 2+1+1);
229 BOOST_CHECK_EQUAL( s3->size(), 1);
230 BOOST_CHECK_EQUAL( s3->sequence(), s3seq_string);
233 BOOST_AUTO_TEST_CASE( seqspan_global_to_large )
235 std::string seq_string("AAGGCCTT");
236 SeqSpanRef s1(new SeqSpan(seq_string));
237 BOOST_CHECK_EQUAL( s1->start(), 0 );
238 BOOST_CHECK_EQUAL( s1->stop(), seq_string.size() );
240 std::string s2seq_string = seq_string.substr(2,3);
241 SeqSpanRef s2 = s1->subseq(4,8);
242 BOOST_CHECK_EQUAL( s2->start(), 4);
243 BOOST_CHECK_EQUAL( s2->size(), 4);
244 BOOST_CHECK_EQUAL( s2->stop(), 8);
247 BOOST_AUTO_TEST_CASE( seqspan_parent_start_stop )
249 std::string seq_string("AAGGCCTT");
250 SeqSpanRef s1(new SeqSpan(seq_string));
251 BOOST_CHECK_EQUAL( s1->parentStart(), 0 );
252 BOOST_CHECK_EQUAL( s1->parentStop(), seq_string.size() );
254 std::string s2seq_string = seq_string.substr(2,3);
255 SeqSpanRef s2 = s1->subseq(2,3);
256 BOOST_CHECK_EQUAL( s2->parentStart(), 2);
257 BOOST_CHECK_EQUAL( s2->parentStop(), 2+3);
258 BOOST_CHECK_EQUAL( s2->size(), 3);
259 BOOST_CHECK_EQUAL( s2->sequence(), s2seq_string);
261 std::string s3seq_string = s2seq_string.substr(1,1);
262 SeqSpanRef s3 = s2->subseq(1,1);
263 BOOST_CHECK_EQUAL( s3->parentStart(), 1 );
264 BOOST_CHECK_EQUAL( s3->parentStop(), 1+1);
265 BOOST_CHECK_EQUAL( s3->size(), 1);
266 BOOST_CHECK_EQUAL( s3->sequence(), s3seq_string);
269 BOOST_AUTO_TEST_CASE( seqspan_global_mutable_start_stop )
271 std::string seq_string("AAGGCCTT");
272 SeqSpanRef s1(new SeqSpan(seq_string));
274 std::string s2seq_string = seq_string.substr(2,3);
275 SeqSpanRef s2 = s1->subseq(2,3);
276 BOOST_CHECK_EQUAL( s2->start(), 2);
277 BOOST_CHECK_EQUAL( s2->stop(), 2+3);
278 BOOST_CHECK_EQUAL( s2->size(), 3);
279 BOOST_CHECK_EQUAL( s2->sequence(), s2seq_string);
281 std::string s3seq_string = s2seq_string.substr(1,1);
282 SeqSpanRef s3 = s2->subseq(1,1);
283 // Check root location
284 BOOST_CHECK_EQUAL( s3->start(), 2+1 );
285 BOOST_CHECK_EQUAL( s3->stop(), 2+1+1);
286 BOOST_CHECK_EQUAL( s3->size(), 1);
287 // Check parent location
288 BOOST_CHECK_EQUAL( s3->parentStart(), 1 );
289 BOOST_CHECK_EQUAL( s3->parentStop(), 1+1);
290 BOOST_CHECK_EQUAL( s3->sequence(), s3seq_string);
292 // Extend s2 to the left
294 BOOST_CHECK_EQUAL( s2->start(), 1);
295 BOOST_CHECK_EQUAL( s2->stop(), 1+1+3);
296 BOOST_CHECK_EQUAL( s2->size(), 4);
298 // Child sequence should have the same global location
299 BOOST_CHECK_EQUAL( s3->start(), 2+1 );
300 BOOST_CHECK_EQUAL( s3->stop(), 2+1+1);
301 BOOST_CHECK_EQUAL( s3->size(), 1);
302 // Child sequence should now have different parent location
303 BOOST_CHECK_EQUAL( s3->parentStart(), 2 );
304 BOOST_CHECK_EQUAL( s3->parentStop(), 2+1);
305 BOOST_CHECK_EQUAL( s3->size(), 1);
308 BOOST_AUTO_TEST_CASE( seqspan_global_mutable_start_stop_minus_strand )
310 std::string seq_string("AAAAGCTA");
311 SeqSpanRef s1(new SeqSpan(seq_string));
313 SeqSpanRef s2 = s1->subseq(2,3, SeqSpan::MinusStrand);
314 BOOST_CHECK_EQUAL( s2->start(), 2);
315 BOOST_CHECK_EQUAL( s2->stop(), 2+3);
316 BOOST_CHECK_EQUAL( s2->size(), 3);
317 BOOST_CHECK_EQUAL( s2->sequence(), "CTT");
319 SeqSpanRef s3 = s2->subseq(1,2, SeqSpan::SameStrand);
320 BOOST_CHECK_EQUAL(s3->sequence(), "TT");
322 // Could also argue that it should be CT
323 // if you assume that the locations are all relative to the global sequence
324 // and are then reverse complemented
327 BOOST_CHECK_EQUAL( s2->sequence(), "CTTT");
330 BOOST_AUTO_TEST_CASE( seqspan_parent_mutable_start_stop )
332 std::string seq_string("AAGGCCTT");
333 SeqSpanRef s1(new SeqSpan(seq_string));
335 std::string s2seq_string = seq_string.substr(3,3);
336 SeqSpanRef s2 = s1->subseq(3,3);
337 BOOST_CHECK_EQUAL( s2->start(), 3);
338 BOOST_CHECK_EQUAL( s2->stop(), 3+3);
339 BOOST_CHECK_EQUAL( s2->size(), 3);
340 BOOST_CHECK_EQUAL( s2->sequence(), s2seq_string);
342 std::string s3seq_string = s2seq_string.substr(1,1);
343 SeqSpanRef s3 = s2->subseq(1,1);
344 // Check root location
345 BOOST_CHECK_EQUAL( s3->start(), 3+1 );
346 BOOST_CHECK_EQUAL( s3->stop(), 3+1+1);
347 BOOST_CHECK_EQUAL( s3->size(), 1);
348 // Check parent location
349 BOOST_CHECK_EQUAL( s3->parentStart(), 1 );
350 BOOST_CHECK_EQUAL( s3->parentStop(), 1+1);
351 BOOST_CHECK_EQUAL( s3->sequence(), s3seq_string);
353 // s2 should now be equivalent to s1->subseq(1,5)
354 s2->setParentStart(1);
355 BOOST_CHECK_EQUAL( s2->start(), 1);
356 BOOST_CHECK_EQUAL( s2->stop(), 3+3);
357 BOOST_CHECK_EQUAL( s2->size(), 5);
359 // Child sequence should have the same global location
360 BOOST_CHECK_EQUAL( s3->start(), 3+1 );
361 BOOST_CHECK_EQUAL( s3->stop(), 3+1+1);
362 BOOST_CHECK_EQUAL( s3->size(), 1);
363 // Child sequence should now have different parent location
364 BOOST_CHECK_EQUAL( s3->parentStart(), 1+2);
365 BOOST_CHECK_EQUAL( s3->parentStop(), 1+2+1);
368 // what happens if we set a stop past our actual end
369 BOOST_AUTO_TEST_CASE( seqspan_stop_past_end )
371 std::string seq_string("AAGGCCTT");
372 SeqSpanRef s1(new SeqSpan(seq_string));
373 std::string s2seq_string = seq_string.substr(3,3);
374 SeqSpanRef s2 = s1->subseq(3,3);
375 std::string s3seq_string = s2seq_string.substr(1,1);
376 SeqSpanRef s3 = s2->subseq(1,1);
378 // should be limited by our parent sequence
380 BOOST_CHECK_EQUAL( s3->size(), 2);
383 BOOST_CHECK_EQUAL( s2->size(), 5);
386 BOOST_CHECK_EQUAL( s3->size(), 4);
389 BOOST_AUTO_TEST_CASE( seqspan_strand_sameother )
391 SeqSpanRef seq1(new SeqSpan("AAAAAGGGGG"));
392 BOOST_CHECK_EQUAL(seq1->strand(), SeqSpan::PlusStrand);
394 SeqSpanRef seq2 = seq1->subseq(0,4,SeqSpan::SameStrand);
395 BOOST_CHECK_EQUAL(seq2->sequence(), "AAAA");
396 BOOST_CHECK_EQUAL(seq2->strand(), SeqSpan::PlusStrand);
397 SeqSpanRef seq3 = seq1->subseq(0,4,SeqSpan::OppositeStrand);
398 BOOST_CHECK_EQUAL(seq3->sequence(), "TTTT");
399 BOOST_CHECK_EQUAL(seq3->strand(), SeqSpan::MinusStrand);
401 // opposite of a plus strand should be minus
402 SeqSpanRef seq4 = seq2->subseq(0,4,SeqSpan::OppositeStrand);
403 BOOST_CHECK_EQUAL(seq4->sequence(), "TTTT");
404 BOOST_CHECK_EQUAL(seq4->strand(), SeqSpan::MinusStrand);
405 // opposite of a minus strand should be plus
406 SeqSpanRef seq5 = seq3->subseq(0,4,SeqSpan::OppositeStrand);
407 BOOST_CHECK_EQUAL(seq5->sequence(), "AAAA");
408 BOOST_CHECK_EQUAL(seq5->strand(), SeqSpan::PlusStrand);
411 BOOST_AUTO_TEST_CASE( seqspan_strand_plusminus )
413 SeqSpanRef seq1(new SeqSpan("AAAAAGGGGG"));
414 BOOST_CHECK_EQUAL(seq1->strand(), SeqSpan::PlusStrand);
416 SeqSpanRef seq2 = seq1->subseq(0,4,SeqSpan::PlusStrand);
417 BOOST_CHECK_EQUAL(seq2->sequence(), "AAAA");
418 SeqSpanRef seq3 = seq1->subseq(0,4,SeqSpan::MinusStrand);
419 BOOST_CHECK_EQUAL(seq3->sequence(), "TTTT");