Commit patch to not break on spaces.
[bowtie.git] / SeqAn-1.1 / seqan / index / shape_gapped.h
1  /*==========================================================================
2                 SeqAn - The Library for Sequence Analysis
3                           http://www.seqan.de 
4  ============================================================================
5   Copyright (C) 2007
6
7   This library is free software; you can redistribute it and/or
8   modify it under the terms of the GNU Lesser General Public
9   License as published by the Free Software Foundation; either
10   version 3 of the License, or (at your option) any later version.
11
12   This library is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15   Lesser General Public License for more details.
16
17  ============================================================================
18   $Id: shape_gapped.h,v 1.1 2008/08/25 16:20:05 langmead Exp $
19  ==========================================================================*/
20
21 #ifndef SEQAN_HEADER_SHAPE_GAPPED_H
22 #define SEQAN_HEADER_SHAPE_GAPPED_H
23
24 namespace SEQAN_NAMESPACE_MAIN
25 {
26
27 //////////////////////////////////////////////////////////////////////////////
28 // HardwiredShape allows compiler-time defined gapped shape
29
30 /**
31 .Class.HardwiredShape:
32 ..cat:Index
33 ..summary:A structure to define a fixed gapped shape.
34 ..signature:HardwiredShape<P1, P2, ..., Pn>
35 ..param.P1, P2, ..., Pn:Px is the distance of the x'th '1' to the next '1' in the shape.
36 ...remarks:At most 20 parameters are allowed, so the maximal shape weight is 21.
37 ..remarks:You can use this structure to define your one gapped shapes in conjunction with @Spec.FixedGappedShape@.
38 ..note:The shape $1100101$ corresponds to $HardwiredShape<1,3,2>$.
39 ..note:The following predefined shapes are already available in $seqan/index/shape_predefined.h$:
40 ..file:../projects/library/seqan/index/shape_predefined.h
41 */
42
43         // Pxx = spaces between '1's
44         template <
45                 int P00 = 0, int P01 = 0, int P02 = 0, int P03 = 0, int P04 = 0, 
46                 int P05 = 0, int P06 = 0, int P07 = 0, int P08 = 0, int P09 = 0,
47                 int P10 = 0, int P11 = 0, int P12 = 0, int P13 = 0, int P14 = 0,
48                 int P15 = 0, int P16 = 0, int P17 = 0, int P18 = 0, int P19 = 0 
49         >
50         struct HardwiredShape {
51                 static const int DIFFS[];
52         };
53
54         template <
55                 int P00, int P01, int P02, int P03, int P04, 
56                 int P05, int P06, int P07, int P08, int P09,
57                 int P10, int P11, int P12, int P13, int P14,
58                 int P15, int P16, int P17, int P18, int P19     
59         >
60         const int HardwiredShape<
61                 P00,P01,P02,P03,P04,
62                 P05,P06,P07,P08,P09,
63                 P10,P11,P12,P13,P14,
64                 P15,P16,P17,P18,P19 
65         >::DIFFS[] = {
66                 P00,P01,P02,P03,P04,
67                 P05,P06,P07,P08,P09,
68                 P10,P11,P12,P13,P14,
69                 P15,P16,P17,P18,P19, 0 
70         };
71
72
73 //////////////////////////////////////////////////////////////////////////////
74 // LENGTH meta-function for fixed gapped shapes
75
76         template <>
77         struct LENGTH< HardwiredShape<
78                 0,0,0,0,0,
79                 0,0,0,0,0,
80                 0,0,0,0,0,
81                 0,0,0,0,0> >
82         {
83                 enum { VALUE = 1 };
84         };
85
86         template <
87                 int P00, int P01, int P02, int P03, int P04, 
88                 int P05, int P06, int P07, int P08, int P09,
89                 int P10, int P11, int P12, int P13, int P14,
90                 int P15, int P16, int P17, int P18, int P19     
91         >
92         struct LENGTH< HardwiredShape<
93                 P00,P01,P02,P03,P04,
94                 P05,P06,P07,P08,P09,
95                 P10,P11,P12,P13,P14,
96                 P15,P16,P17,P18,P19> >
97         {
98                 enum { VALUE = LENGTH< HardwiredShape<
99                         P01,P02,P03,P04,P05,
100                         P06,P07,P08,P09,P10,
101                         P11,P12,P13,P14,P15,
102                         P16,P17,P18,P19, 0 > >::VALUE + P00 };
103         };
104
105         template <
106             typename TValue,
107                 int P00, int P01, int P02, int P03, int P04, 
108                 int P05, int P06, int P07, int P08, int P09,
109                 int P10, int P11, int P12, int P13, int P14,
110                 int P15, int P16, int P17, int P18, int P19     
111         >
112         struct LENGTH< Shape<TValue, FixedGappedShape< HardwiredShape<
113                 P00,P01,P02,P03,P04,
114                 P05,P06,P07,P08,P09,
115                 P10,P11,P12,P13,P14,
116                 P15,P16,P17,P18,P19 
117         > > > >:
118         LENGTH< HardwiredShape<
119                 P00,P01,P02,P03,P04,
120                 P05,P06,P07,P08,P09,
121                 P10,P11,P12,P13,P14,
122                 P15,P16,P17,P18,P19 
123         > > {};
124
125
126 //////////////////////////////////////////////////////////////////////////////
127 // WEIGHT meta-function for fixed gapped shapes
128
129         template <>
130         struct WEIGHT< HardwiredShape<
131                 0,0,0,0,0,
132                 0,0,0,0,0,
133                 0,0,0,0,0,
134                 0,0,0,0,0> >
135         {
136                 enum { VALUE = 1 };
137         };
138
139         template <
140                 int P00, int P01, int P02, int P03, int P04, 
141                 int P05, int P06, int P07, int P08, int P09,
142                 int P10, int P11, int P12, int P13, int P14,
143                 int P15, int P16, int P17, int P18, int P19     
144         >
145         struct WEIGHT< HardwiredShape<
146                 P00,P01,P02,P03,P04,
147                 P05,P06,P07,P08,P09,
148                 P10,P11,P12,P13,P14,
149                 P15,P16,P17,P18,P19> >
150         {
151                 enum { VALUE = WEIGHT< HardwiredShape<
152                         P01,P02,P03,P04,P05,
153                         P06,P07,P08,P09,P10,
154                         P11,P12,P13,P14,P15,
155                         P16,P17,P18,P19, 0 > >::VALUE + 1 };
156         };
157
158         template <
159             typename TValue,
160                 int P00, int P01, int P02, int P03, int P04, 
161                 int P05, int P06, int P07, int P08, int P09,
162                 int P10, int P11, int P12, int P13, int P14,
163                 int P15, int P16, int P17, int P18, int P19     
164         >
165         struct WEIGHT< Shape<TValue, FixedGappedShape< HardwiredShape<
166                 P00,P01,P02,P03,P04,
167                 P05,P06,P07,P08,P09,
168                 P10,P11,P12,P13,P14,
169                 P15,P16,P17,P18,P19 
170         > > > >:
171         WEIGHT< HardwiredShape<
172                 P00,P01,P02,P03,P04,
173                 P05,P06,P07,P08,P09,
174                 P10,P11,P12,P13,P14,
175                 P15,P16,P17,P18,P19 
176         > > {};
177
178
179 //////////////////////////////////////////////////////////////////////////////
180
181 /**
182 .Spec.GappedShape:
183 ..cat:Index
184 ..summary:A variable gapped shape.
185 ..general:Class.Shape
186 ..signature:Shape<TValue, GappedShape>
187 ..param.TValue:The @Metafunction.Value@ type of the string the shape is applied to (e.g. $Dna$).
188 ..remarks:A GappedShape must be initialized first with a valid shape. To do so, call @Function.stringToShape@.
189 ..see:Spec.FixedGappedShape
190 */
191
192         //////////////////////////////////////////////////////////////////////////////
193         // variable gapped shape
194         //////////////////////////////////////////////////////////////////////////////
195
196         template <typename TValue>
197         class Shape<TValue, GappedShape>
198         {
199         public:
200 //____________________________________________________________________________
201
202                 unsigned span;
203                 unsigned weight;
204                 String<int> diffs;
205         
206                 typename Value<Shape>::Type     hValue;         // current hash value
207 //____________________________________________________________________________
208
209 /**
210 .Memfunc.GappedShape#Shape:
211 ..class:Spec.GappedShape
212 ..summary:Constructor
213 ..signature:Shape<TValue, GappedShape> ()
214 ..signature:Shape<TValue, GappedShape> (q)
215 ..signature:Shape<TValue, GappedShape> (shape)
216 ..signature:Shape<TValue, GappedShape> (predefined)
217 ..param.q:Creates an ungapped q-gram.
218 ..param.shape:Any other gapped/ungapped shape.
219 ..param.predefined:Any instance of a predefined shape spec (e.g. $ShapePatternHunter$).
220 ..see:Class.HardwiredShape
221 */
222                 Shape() {}
223
224                 // c'tor for ungapped shapes
225                 Shape(unsigned _span):
226                         span(_span),
227                         weight(_span)
228                 {
229                 SEQAN_CHECKPOINT
230                         resize(diffs, _span);
231                         for(unsigned i = 0; i < _span; ++i)
232                                 diffs[i] = 1;
233                 }
234
235                 Shape(Shape const &other):
236                         span(other.span),
237                         weight(other.weight),
238                         diffs(other.diffs),
239                         hValue(other.hValue) {} 
240
241                 template <typename TSpec>
242                 Shape(Shape<TValue, TSpec> const &other)
243                 {
244                         *this = other;
245                 }       
246
247                 template <typename TSpec>
248                 Shape(FixedGappedShape<TSpec> const &other)
249                 {
250                         *this = other;
251                 }
252
253                 template <typename TStringValue, typename TSpec>
254                 Shape(String<TStringValue, TSpec> const &bitmap)
255                 {
256                         *this = bitmap;
257                 }       
258
259 //____________________________________________________________________________
260
261                 template <unsigned q>
262                 inline Shape &
263                 operator=(Shape<TValue, FixedShape<q> > const &other)
264                 {
265                         span = length(other);
266                         weight = weight(other);
267                         resize(diffs, weight);
268                         for(unsigned i = 1; i < weight; ++i)
269                                 diffs[i] = 1;
270                         hValue = other.hValue;
271                         return *this;
272                 }
273
274                 template <typename TSpec>
275                 inline Shape &
276                 operator=(Shape<TValue, FixedGappedShape<TSpec> > const &other)
277                 {
278                         span = other.span;
279                         weight = other.weight;
280                         diffs = other.diffs;
281                         hValue = other.hValue;
282                         return *this;
283                 }
284
285                 template <typename TSpec>
286                 inline Shape &
287                 operator=(FixedGappedShape<TSpec> const)
288                 {
289                         typedef Shape<TValue, FixedGappedShape<TSpec> > TShape;
290                         return *this = TShape();
291                 }
292
293                 template <typename TStringValue, typename TSpec>
294                 inline Shape &
295                 operator=(String<TStringValue, TSpec> const &bitmap)
296                 {
297                         stringToShape(*this, bitmap);
298                         return *this;
299                 }
300         };
301
302
303 //////////////////////////////////////////////////////////////////////////////
304
305 /**
306 .Spec.FixedGappedShape:
307 ..cat:Index
308 ..summary:A fixed gapped shape.
309 ..general:Class.Shape
310 ..signature:Shape<TValue, FixedGappedShape<TSpec> >
311 ..param.TValue:The @Metafunction.Value@ type of the string the shape is applied to (e.g. $Dna$).
312 ..param.TSpec:A structure to store the shape at compile-time.
313 ...type:Class.HardwiredShape
314 ..remarks:There are predefined shapes in $index/shape_predefined.h$.
315 You can simply use them with $Shape<TValue, ShapePatternHunter>$ for example.
316 ..see:Class.HardwiredShape
317 */
318
319         //////////////////////////////////////////////////////////////////////////////
320         // fixed gapped shape
321         //////////////////////////////////////////////////////////////////////////////
322
323         template <typename TValue, typename TSpec>
324         class Shape<TValue, FixedGappedShape<TSpec> >
325         {
326         public:
327 //____________________________________________________________________________
328
329                 typedef FixedGappedShape<TSpec> TShapeSpec;
330
331                 enum { span = LENGTH<Shape>::VALUE };
332                 enum { weight = WEIGHT<Shape>::VALUE };
333                 const int *diffs;
334         
335                 typename Value<Shape>::Type     hValue;         // current hash value
336 //____________________________________________________________________________
337
338                 Shape():
339                         diffs(TSpec::DIFFS) {}
340
341                 Shape(Shape const &other):
342                         diffs(other.diffs),     
343                         hValue(other.hValue) {}
344 //____________________________________________________________________________
345
346                 inline Shape &
347                 operator=(Shape const &other)
348                 {
349                         hValue = other.hValue;
350                 }
351         };
352
353 //////////////////////////////////////////////////////////////////////////////
354
355         template <typename TValue, typename TSpec>
356         inline typename Size< Shape<TValue, FixedGappedShape<TSpec> > >::Type
357         weight(Shape<TValue, FixedGappedShape<TSpec> > const & me)
358         {
359         SEQAN_CHECKPOINT
360                 return me.weight;
361         }
362
363 //____________________________________________________________________________
364
365         template <typename TValue, typename TIter>
366         inline typename Value< Shape<TValue, GappedShape> >::Type
367         hash(Shape<TValue, GappedShape> &me, TIter it)  
368         {
369         SEQAN_CHECKPOINT
370                 typedef typename Value< Shape<TValue, GappedShape> >::Type      THValue;
371                 typedef typename Size< Shape<TValue, GappedShape> >::Type       TSize;
372
373                 me.hValue = ordValue((TValue)*it);
374                 TSize iEnd = me.weight - 1;
375                 for(TSize i = 0; i < iEnd; ++i) {
376                         goFurther(it, me.diffs[i]);
377                         me.hValue = me.hValue * ValueSize<TValue>::VALUE + ordValue((TValue)*it);
378                 }
379                 return me.hValue;
380         }
381
382         template <typename TValue, typename TSpec, typename TIter, typename TSize>
383         inline typename Value< Shape<TValue, FixedGappedShape<TSpec> > >::Type
384         hash(Shape<TValue, FixedGappedShape<TSpec> > &me, TIter it, TSize charsLeft)
385         {
386         SEQAN_CHECKPOINT
387                 typedef typename Value< Shape<TValue, FixedGappedShape<TSpec> > >::Type THValue;
388
389                 TSize iEnd = me.weight;
390                 if (iEnd > charsLeft) iEnd = charsLeft;
391
392                 TSize i = 0;
393                 if (iEnd > 0) {
394                         me.hValue = ordValue((TValue)*it);
395                         --iEnd;
396                         for(; i < iEnd; ++i) {
397                                 goFurther(it, me.diffs[i]);
398                                 me.hValue = me.hValue * ValueSize<TValue>::VALUE + ordValue((TValue)*it);
399                         }
400                         ++i;
401                 } else
402                         return me.hValue = 0;
403
404                 // fill shape with zeros
405                 for(; i < (TSize)me.weight; ++i)
406                         me.hValue *= ValueSize<TValue>::VALUE;
407                 return me.hValue;
408         }
409
410         template <typename TValue, typename TSpec, typename TIter, typename TSize>
411         inline typename Value< Shape<TValue, FixedGappedShape<TSpec> > >::Type
412         hashUpper(Shape<TValue, FixedGappedShape<TSpec> > &me, TIter it, TSize charsLeft)
413         {
414         SEQAN_CHECKPOINT
415                 typedef typename Value< Shape<TValue, FixedGappedShape<TSpec> > >::Type THValue;
416
417                 TSize iEnd = me.weight;
418                 if (iEnd > charsLeft) iEnd = charsLeft;
419
420                 TSize i = 0;
421                 if (iEnd > 0) {
422                         me.hValue = ordValue((TValue)*it);
423                         --iEnd;
424                         for(; i < iEnd; ++i) {
425                                 goFurther(it, me.diffs[i]);
426                                 me.hValue = me.hValue * ValueSize<TValue>::VALUE + ordValue((TValue)*it);
427                         }
428                         ++i;
429                         ++me.hValue;
430                 } else
431                         return me.hValue = 1;
432
433                 // fill shape with zeros
434                 for(; i < (TSize)me.weight; ++i)
435                         me.hValue *= ValueSize<TValue>::VALUE;
436                 return me.hValue;
437         }
438
439 //____________________________________________________________________________
440
441         template <typename THValue, typename TValue, typename TIter>
442         inline THValue
443         _hashHardwiredShape(THValue hash, TIter &, TValue const, HardwiredShape<
444                 0,0,0,0,0,
445                 0,0,0,0,0,
446                 0,0,0,0,0,
447                 0,0,0,0,0 > const)
448         {
449                 return hash;
450         }
451
452         template <
453                          int P01, int P02, int P03, int P04,
454                 int P05, int P06, int P07, int P08, int P09,
455                 int P10, int P11, int P12, int P13, int P14,
456                 int P15, int P16, int P17, int P18, int P19,
457                 typename THValue, typename TValue, typename TIter
458         >
459         inline THValue
460         _hashHardwiredShape(THValue hash, TIter &it, TValue const, HardwiredShape<
461                  1 ,P01,P02,P03,P04,
462                 P05,P06,P07,P08,P09,
463                 P10,P11,P12,P13,P14,
464                 P15,P16,P17,P18,P19 > const)
465         {
466                 ++it;
467                 return _hashHardwiredShape(hash * ValueSize<TValue>::VALUE + ordValue((TValue)*it),
468                         it, TValue(), HardwiredShape<
469                                 P01,P02,P03,P04,P05,
470                                 P06,P07,P08,P09,P10,
471                                 P11,P12,P13,P14,P15,
472                                 P16,P17,P18,P19, 0 >());
473         }
474
475         template <
476                 int P00, int P01, int P02, int P03, int P04,
477                 int P05, int P06, int P07, int P08, int P09,
478                 int P10, int P11, int P12, int P13, int P14,
479                 int P15, int P16, int P17, int P18, int P19,
480                 typename THValue, typename TValue, typename TIter
481         >
482         inline THValue
483         _hashHardwiredShape(THValue hash, TIter &it, TValue const, HardwiredShape<
484                 P00,P01,P02,P03,P04,
485                 P05,P06,P07,P08,P09,
486                 P10,P11,P12,P13,P14,
487                 P15,P16,P17,P18,P19 > const)
488         {
489                 it += P00;
490                 return _hashHardwiredShape(hash * ValueSize<TValue>::VALUE + ordValue((TValue)*it),
491                         it, TValue(), HardwiredShape<
492                                 P01,P02,P03,P04,P05,
493                                 P06,P07,P08,P09,P10,
494                                 P11,P12,P13,P14,P15,
495                                 P16,P17,P18,P19, 0 >());
496         }
497
498         template <
499                 int P00, int P01, int P02, int P03, int P04,
500                 int P05, int P06, int P07, int P08, int P09,
501                 int P10, int P11, int P12, int P13, int P14,
502                 int P15, int P16, int P17, int P18, int P19,
503                 typename TValue, typename TIter
504         >
505         inline typename Value< Shape<TValue, FixedGappedShape< HardwiredShape<
506                 P00,P01,P02,P03,P04,
507                 P05,P06,P07,P08,P09,
508                 P10,P11,P12,P13,P14,
509                 P15,P16,P17,P18,P19 
510         > > > >::Type
511         hash(Shape<TValue, FixedGappedShape< HardwiredShape<
512                 P00,P01,P02,P03,P04,
513                 P05,P06,P07,P08,P09,
514                 P10,P11,P12,P13,P14,
515                 P15,P16,P17,P18,P19 
516         > > > &me, TIter it)
517         {
518         SEQAN_CHECKPOINT
519                 typedef HardwiredShape<
520                         P00,P01,P02,P03,P04,
521                         P05,P06,P07,P08,P09,
522                         P10,P11,P12,P13,P14,
523                         P15,P16,P17,P18,P19 >                                                           TSpec;
524                 typedef FixedGappedShape<TSpec>                                                 TShape;
525                 typedef typename Value< Shape<TValue, TShape> >::Type   THValue;
526
527                 me.hValue = (THValue)ordValue((TValue)*it);
528                 return me.hValue = _hashHardwiredShape(me.hValue, it, TValue(), TSpec());
529         }
530
531 //____________________________________________________________________________
532
533         template <typename TValue, typename TSpec, typename TIter>
534         inline typename Value< Shape<TValue, FixedGappedShape<TSpec> > >::Type
535         hashNext(Shape<TValue, FixedGappedShape<TSpec> > &me, TIter it) 
536         {
537         SEQAN_CHECKPOINT
538                 return hash(me, it);
539         }
540
541
542 //____________________________________________________________________________
543
544 /**.Function.stringToShape:
545 ..cat:Index
546 ..summary:Takes a shape given as a string of '1' (relevant position) and '0' 
547 (irrelevant position) and converts it into a Shape object.
548 ..signature:stringToShape(shape, bitmap)
549 ..param.shape:Shape object that is manipulated.
550 ...type:Spec.GappedShape
551 ..param.bitmap:A character string of '1' and '0' representing relevant and irrelevant positions (blanks) respectively.
552 ...remarks:This string must begin with a '1'.
553 ...type:Class.String
554 */
555
556         template <typename TValue, typename TSpec, typename TShapeString>
557         inline void
558         stringToShape(
559                 Shape<TValue, FixedGappedShape<TSpec> > &me, 
560                 TShapeString const &bitmap)
561         {
562         SEQAN_CHECKPOINT
563                 typedef typename Iterator<TShapeString const>::Type             TIter;
564                 typedef typename Iterator<String<int> >::Type                   TShapeIter;
565
566                 me.span = length(bitmap);
567
568                 unsigned oneCount = 0;
569                 TIter it = begin(bitmap, Standard());
570                 TIter itEnd = end(bitmap, Standard());
571                 for(; it != itEnd; ++it)
572                         if (*it == '1')
573                                 ++oneCount;
574
575                 me.weight = oneCount;
576                 resize(me.diffs, oneCount);
577
578                 unsigned diff = 0;
579                 it = begin(bitmap, Standard());
580                 TShapeIter itS = begin(me.diffs, Standard());
581                 for(; it != itEnd; ++it) {
582                         if (*it == '1') {
583                                 *itS = diff;
584                                 ++itS;
585                                 diff = 0;
586                         }
587                         ++diff;
588                 }
589         }
590
591 }       // namespace seqan
592
593 #endif