1 /*==========================================================================
2 SeqAn - The Library for Sequence Analysis
4 ============================================================================
7 This library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 3 of the License, or (at your option) any later version.
12 This library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 ============================================================================
18 $Id: find_score.h,v 1.1 2008/08/25 16:20:07 langmead Exp $
19 ==========================================================================*/
21 #ifndef SEQAN_HEADER_FIND_SCORE_H
22 #define SEQAN_HEADER_FIND_SCORE_H
24 namespace SEQAN_NAMESPACE_MAIN
27 //////////////////////////////////////////////////////////////////////////////
29 //////////////////////////////////////////////////////////////////////////////
31 template <typename TScore>
38 ..general:Class.Pattern
39 ..summary:A dynamic programming algorithm for approximate string-matching with a user-definable scoring function.
40 ..signature:Pattern<TNeedle, DPSearch<TScore> >
41 ..param.TNeedle:The needle type.
43 ..param.TScore:The scoring function.
45 ..remarks.text:The algorithm is based on the Sellers/Needleman-Wunsch dynamic progamming algorithm.
46 The $Pattern$ object only contains the right-most column of the dynamic programming matrix.
47 ...note:At the moment, the algorithm only works on linear gap costs.
50 ///.Class.Pattern.param.TSpec.type:Class.Score
53 template <typename TNeedle, typename TScore>
54 class Pattern<TNeedle, DPSearch<TScore> >
57 typedef typename Value<TScore>::Type TScoreValue;
59 Holder<TNeedle> data_needle;
61 TScoreValue data_limit;
62 String<TScoreValue> data_tab;
71 Pattern(TNeedle & _needle,
73 TScoreValue _limit = 0):
74 data_score(_score_func),
78 setHost(*this, _needle);
81 Pattern(TNeedle & _needle,
82 TScoreValue _limit = 0):
86 setHost(*this, _needle);
89 Pattern(TScoreValue _limit):
96 Pattern(Pattern const & other):
97 data_needle( other.data_needle ),
98 data_score( other.data_score ),
99 data_limit( other.data_limit ),
100 data_tab( other.data_tab )
106 operator = (Pattern const & other)
109 this->data_needle = other.data_needle;
110 this->data_score = other.data_score;
111 this->data_limit = other.data_limit;
112 this->data_tab = other.data_tab;
119 //////////////////////////////////////////////////////////////////////////////
121 //////////////////////////////////////////////////////////////////////////////
125 template <typename TNeedle, typename TScore>
126 struct Host< Pattern<TNeedle, DPSearch<TScore> > >
128 typedef TNeedle Type;
131 template <typename TNeedle, typename TScore>
132 struct Host< Pattern<TNeedle, DPSearch<TScore> > const>
134 typedef TNeedle const Type;
138 //////////////////////////////////////////////////////////////////////////////
141 template <typename TNeedle, typename TScore>
142 inline typename Host<Pattern<TNeedle, DPSearch<TScore> > >::Type &
143 host(Pattern<TNeedle, DPSearch<TScore> > & me)
146 return value(me.data_needle);
149 template <typename TNeedle, typename TScore>
150 inline typename Host<Pattern<TNeedle, DPSearch<TScore> > const>::Type &
151 host(Pattern<TNeedle, DPSearch<TScore> > const & me)
154 return value(me.data_needle);
158 //____________________________________________________________________________
160 template <typename TNeedle, typename TScore, typename TNeedle2>
162 setHost(Pattern<TNeedle, DPSearch<TScore> > & me,
165 me.data_needle = ndl;
168 template <typename TNeedle, typename TScore, typename TNeedle2>
170 setHost(Pattern<TNeedle, DPSearch<TScore> > & me,
171 TNeedle2 const & ndl)
173 me.data_needle = ndl;
178 //____________________________________________________________________________
180 /**.Function.scoringScheme
182 ..summary:The @glos:scoring scheme@ used for finding or aligning.
183 ..signature:scoringScheme(obj)
184 ..param.obj:Object that holds a @glos:scoring scheme@
185 ...type:Spec.DPSearch
186 ..returns:The @glos:scoring scheme@ used in $obj$
187 ..see:glos:scoring scheme
190 template <typename TNeedle, typename TScore>
191 inline TScore const &
192 scoringScheme(Pattern<TNeedle, DPSearch<TScore> > & me)
195 return me.data_score;
198 //____________________________________________________________________________
200 /**.Function.setScoringScheme
202 ..summary:Sets the @glos:scoring scheme@ used for finding or aligning.
203 ..signature:setScoringScheme(obj, score)
204 ..param.obj:Object that holds a @glos:scoring scheme@.
205 ...type:Spec.DPSearch
206 ..param.score:The new @glos:scoring scheme@ used by $obj$.
207 ..see:glos:scoring scheme
208 ..see:Function.scoringScheme
211 template <typename TNeedle, typename TScore, typename TScore2>
213 setScoringScheme(Pattern<TNeedle, DPSearch<TScore> > & me,
217 me.data_score = score;
220 template <typename TNeedle, typename TScore, typename TScore2>
222 setScoringScheme(Pattern<TNeedle, DPSearch<TScore> > & me,
223 TScore2 const & score)
226 me.data_score = score;
230 //____________________________________________________________________________
233 /**.Function.scoreLimit
235 ..summary:The minimal score a match must reach in approximate searching.
236 ..signature:scoreLimit(pattern)
237 ..param.pattern:A @Concept.Pattern|pattern@ that can be used for approximate searching.
238 ...type:Spec.DPSearch
239 ..returns:The current score limit of $pattern$.
242 template <typename TNeedle, typename TScore>
243 inline typename Value<TScore>::Type
244 scoreLimit(Pattern<TNeedle, DPSearch<TScore> > const & me)
247 return me.data_limit;
250 //____________________________________________________________________________
252 /**.Function.setScoreLimit
254 ..summary:Sets the minimal score a match must reach in approximate searching.
255 ..signature:setScoreLimit(pattern, limit)
256 ..param.pattern:A @Concept.Pattern|pattern@ that can be used for approximate searching.
257 ...type:Spec.DPSearch
258 ..param.limit:The new score limit.
259 ..see:Function.scoreLimit
262 template <typename TNeedle, typename TScore, typename TScoreValue>
264 setScoreLimit(Pattern<TNeedle, DPSearch<TScore> > & me,
268 me.data_limit = _limit;
271 //____________________________________________________________________________
272 // returns the score of the last hit position found (note:position = end of occurrence in haystack)
274 /**.Function.getScore
276 ..summary:Score of the last found match in approximate searching.
277 ..signature:getScore(pattern)
278 ..param.pattern:A @Concept.Pattern|pattern@ that can be used for approximate searching.
279 ...type:Spec.DPSearch
280 ..returns:The score of the last match found using $pattern$.
281 ...remarks:If no match was found, the value is undefined.
282 ..see:Function.scoreLimit
283 ..see:Function.setScoreLimit
287 template <typename TNeedle, typename TScore>
288 inline typename Value<TScore>::Type
289 getScore(Pattern<TNeedle, DPSearch<TScore> > & me)
291 return front(me.data_tab);
294 //////////////////////////////////////////////////////////////////////////////
297 template <typename TNeedle, typename TScore>
298 inline void _patternInit (Pattern<TNeedle, DPSearch<TScore> > & me)
300 typedef Pattern<TNeedle, DPSearch<TScore> > TPattern;
301 typedef typename Value<TScore>::Type TScoreValue;
303 typedef typename Size<TPattern>::Type TSize;
305 typedef String<TScoreValue> TTab;
306 typedef typename Iterator<TTab, Standard>::Type TIterator;
308 TScoreValue score_gap = scoreGapExtend(scoringScheme(me));
310 TTab & string_tab = me.data_tab;
312 //allocate enough memory for one column of DP matrix
313 TSize need_length = length(needle(me));
314 SEQAN_ASSERT(need_length);
316 TSize got_length = resize(string_tab, need_length);
317 SEQAN_ASSERT(got_length >= need_length);
319 // if (length(_dataNeedle(me)) < got_length) throw(0); //???TODO: Throw "not enough memory" exception
322 //note: The column is stored in reverse order
323 TIterator tab_end = begin(string_tab, Standard());
324 TIterator tab = end(string_tab, Standard());
326 TScoreValue x = score_gap;
328 while (tab > tab_end)
338 //////////////////////////////////////////////////////////////////////////////
340 //////////////////////////////////////////////////////////////////////////////
342 //proportional gap cost: Needleman-Wunsch
344 //???TODO: Ukkonen trick?
345 //???TODO: Finder for affine gap costs?
347 template <typename TFinder, typename TNeedle, typename TScore>
349 _find_score_simple_proportional(TFinder & finder, Pattern<TNeedle, DPSearch<TScore> > & me)
351 typedef typename Value<TScore>::Type TScoreValue;
352 typedef String<TScoreValue> TTab;
353 typedef typename Iterator<TTab, Standard>::Type TTabIterator;
354 typedef typename Iterator<TNeedle const, Standard>::Type TNeedleIterator;
355 typedef typename Value<typename Haystack<TFinder>::Type>::Type THaystackValue;
357 String<TScoreValue> & string_tab = me.data_tab;
359 TScoreValue score_gap = scoreGapExtend(scoringScheme(me));
360 TScoreValue score_match = scoreMatch(scoringScheme(me));
361 TScoreValue score_mismatch = scoreMismatch(scoringScheme(me));
368 _finderSetNonEmpty(finder);
375 if (! length(me.data_tab))
381 TTabIterator tab_begin = end(string_tab, Standard());
383 TNeedleIterator it_begin = begin(host(me), Standard());
384 TNeedleIterator it_end = end(host(me), Standard());
386 //for each character in haystack, do...
387 for (; !atEnd(finder); ++finder)
390 THaystackValue c = *finder;
392 //init some variables
393 TNeedleIterator it = it_begin;
394 TScoreValue * tab = tab_begin;
401 --tab; //note: the column is stored in "reverse order"
403 TScoreValue m2 = (c == *it) ? h + score_match : h + score_mismatch;
405 TScoreValue m1 = (h > v) ? h + score_gap : v + score_gap;
407 v = (m1 > m2) ? m1 : m2;
413 if (*tab >= scoreLimit(me) )
424 //////////////////////////////////////////////////////////////////////////////
426 template <typename TFinder, typename TNeedle, typename TScore>
428 find(TFinder & finder,
429 Pattern<TNeedle, DPSearch<TScore> > & me)
431 SEQAN_ASSERT(scoreGapOpen(scoringScheme(me)) == scoreGapExtend(scoringScheme(me))) //this finder is only defined for linear gap costs
432 return _find_score_simple_proportional(finder, me);
435 template <typename TFinder, typename TNeedle, typename TScore>
437 find(TFinder & finder,
438 Pattern<TNeedle, DPSearch<TScore> > & me,
441 SEQAN_ASSERT(scoreGapOpen(scoringScheme(me)) == scoreGapExtend(scoringScheme(me))) //this finder is only defined for linear gap costs
442 setScoreLimit(me, limit_);
443 return _find_score_simple_proportional(finder, me);
447 //////////////////////////////////////////////////////////////////////////////
449 }// namespace SEQAN_NAMESPACE_MAIN
451 #endif //#ifndef SEQAN_HEADER_...