From 80fa4901503f9eb884d102a605d883c91a2adb18 Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Tue, 29 Aug 2006 22:12:31 +0000 Subject: [PATCH] put basepair alphabets into the Sequence class thanks to google i found the proper way to initialize a static const string. I also added the protein 1-letter alphabet. At some point it should be possible to initialize different alphabets for a sequence. --- alg/sequence.cpp | 18 ++++++++---------- alg/sequence.hpp | 11 +++++++++++ 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/alg/sequence.cpp b/alg/sequence.cpp index 0d1594f..f606597 100644 --- a/alg/sequence.cpp +++ b/alg/sequence.cpp @@ -37,15 +37,6 @@ namespace spirit = boost::spirit; #include #include -// some standard dna alphabets -// \012 = nl -// \015 = cr -// this should make our sequence parsing end-of-line convention -// independent -static const char* dna_alphabet = "AaCcGgTtNn\012\015"; -static const char* rna_alphabet = "AaCcGgNnUu\012\015"; -static const char* iupac_alphabet = "AaCcGgTtUuRrYyMmKkSsWwBbDdHhVvNn\012\015"; - annot::annot() : begin(0), end(0), @@ -84,6 +75,13 @@ motif::~motif() { } +const std::string Sequence::dna_alphabet("AaCcGgTtNn\012\015"); +const std::string Sequence::rna_alphabet("AaCcGgNnUu\012\015"); + //! this is the general iupac alphabet for nucleotides +const std::string Sequence::nucleic_iupac_alphabet("AaCcGgTtUuRrYyMmKkSsWwBbDdHhVvNn\012\015"); + //! the protein alphabet +const std::string Sequence::protein_alphabet("AaCcDdEeFfGgHhIiKkLlMmNnPpQqRrSsTtVvWwYy\012\015"); + Sequence::Sequence() : seq(""), header(""), @@ -444,7 +442,7 @@ Sequence::parse_annot(std::string data, int start_index, int end_index) ((spirit::ch_p('>')|spirit::str_p(">")) >> (*(spirit::print_p))[spirit::assign_a(name)] >> spirit::eol_p >> - (+(spirit::chset<>(iupac_alphabet)))[spirit::assign_a(seq)] + (+(spirit::chset<>(nucleic_iupac_alphabet.c_str())))[spirit::assign_a(seq)] )[push_back_seq(query_seqs, name, seq)] ) >> *spirit::space_p diff --git a/alg/sequence.hpp b/alg/sequence.hpp index 67f6c9f..0e0e2e6 100644 --- a/alg/sequence.hpp +++ b/alg/sequence.hpp @@ -93,6 +93,17 @@ public: typedef std::string::size_type size_type; static const size_type npos = std::string::npos; + // some standard dna alphabets + // Include nl (\012), and cr (\015) to make sequence parsing eol + // convention independent. + + static const std::string dna_alphabet; + static const std::string rna_alphabet; + //! this is the general iupac alphabet for nucleotides + static const std::string nucleic_iupac_alphabet; + //! the protein alphabet + static const std::string protein_alphabet; + Sequence(); ~Sequence(); Sequence(const char* seq); -- 2.30.2