Uploading samtools_0.1.17-1_amd64.changes.
[samtools.git] / kaln.c
diff --git a/kaln.c b/kaln.c
index 9fa40d0be3999e66b885486d7a03e50994bfbfdd..9c0bbaa437ff5654d7a181b496bb9d58c9c30558 100644 (file)
--- a/kaln.c
+++ b/kaln.c
@@ -27,6 +27,7 @@
 #include <stdio.h>
 #include <string.h>
 #include <stdint.h>
+#include <math.h>
 #include "kaln.h"
 
 #define FROM_M 0
@@ -72,8 +73,18 @@ int aln_sm_blast[] = {
        -2, -2, -2, -2, -2
 };
 
-ka_param_t ka_param_blast = {  5,  2,  2, aln_sm_blast, 5, 50 };
-ka_param_t ka_param_aa2aa = { 10,  2,  2, aln_sm_blosum62, 22, 50 };
+int aln_sm_qual[] = {
+         0, -23, -23, -23, 0,
+       -23,   0, -23, -23, 0,
+       -23, -23,   0, -23, 0,
+       -23, -23, -23,   0, 0,
+         0,   0,   0,   0, 0
+};
+
+ka_param_t ka_param_blast = {  5,  2,   5, 2, aln_sm_blast, 5, 50 };
+ka_param_t ka_param_aa2aa = { 10,  2,  10, 2, aln_sm_blosum62, 22, 50 };
+
+ka_param2_t ka_param2_qual  = { 37, 11, 37, 11, 37, 11, 0, 0, aln_sm_qual, 5, 50 };
 
 static uint32_t *ka_path2cigar32(const path_t *path, int path_len, int *n_cigar)
 {
@@ -141,13 +152,13 @@ static uint32_t *ka_path2cigar32(const path_t *path, int path_len, int *n_cigar)
 }
 #define set_end_I(II, cur, p)                                                  \
 {                                                                                                              \
-       if (gap_end >= 0) {                                                                     \
-               if ((p)->M - gap_open > (p)->I) {                               \
+       if (gap_end_ext >= 0) {                                                         \
+               if ((p)->M - gap_end_open > (p)->I) {                   \
                        (cur)->It = FROM_M;                                                     \
-                       (II) = (p)->M - gap_open - gap_end;                     \
+                       (II) = (p)->M - gap_end_open - gap_end_ext;     \
                } else {                                                                                \
                        (cur)->It = FROM_I;                                                     \
-                       (II) = (p)->I - gap_end;                                        \
+                       (II) = (p)->I - gap_end_ext;                            \
                }                                                                                               \
        } else set_I(II, cur, p);                                                       \
 }
@@ -163,19 +174,19 @@ static uint32_t *ka_path2cigar32(const path_t *path, int path_len, int *n_cigar)
 }
 #define set_end_D(DD, cur, p)                                                  \
 {                                                                                                              \
-       if (gap_end >= 0) {                                                                     \
-               if ((p)->M - gap_open > (p)->D) {                               \
+       if (gap_end_ext >= 0) {                                                         \
+               if ((p)->M - gap_end_open > (p)->D) {                   \
                        (cur)->Dt = FROM_M;                                                     \
-                       (DD) = (p)->M - gap_open - gap_end;                     \
+                       (DD) = (p)->M - gap_end_open - gap_end_ext;     \
                } else {                                                                                \
                        (cur)->Dt = FROM_D;                                                     \
-                       (DD) = (p)->D - gap_end;                                        \
+                       (DD) = (p)->D - gap_end_ext;                            \
                }                                                                                               \
        } else set_D(DD, cur, p);                                                       \
 }
 
 typedef struct {
-       uint8_t Mt:3, It:2, Dt:2;
+       uint8_t Mt:3, It:2, Dt:3;
 } dpcell_t;
 
 typedef struct {
@@ -195,18 +206,19 @@ uint32_t *ka_global_core(uint8_t *seq1, int len1, uint8_t *seq2, int len2, const
        uint8_t type, ctype;
        uint32_t *cigar = 0;
 
-       int gap_open, gap_ext, gap_end, b;
+       int gap_open, gap_ext, gap_end_open, gap_end_ext, b;
        int *score_matrix, N_MATRIX_ROW;
 
        /* initialize some align-related parameters. just for compatibility */
        gap_open = ap->gap_open;
        gap_ext = ap->gap_ext;
-       gap_end = ap->gap_end;
+       gap_end_open = ap->gap_end_open;
+       gap_end_ext = ap->gap_end_ext;
        b = ap->band_width;
        score_matrix = ap->matrix;
        N_MATRIX_ROW = ap->row;
 
-       *n_cigar = 0;
+       if (n_cigar) *n_cigar = 0;
        if (len1 == 0 || len2 == 0) return 0;
 
        /* calculate b1 and b2 */
@@ -368,3 +380,107 @@ uint32_t *ka_global_core(uint8_t *seq1, int len1, uint8_t *seq2, int len2, const
 
        return cigar;
 }
+
+typedef struct {
+       int M, I, D;
+} score_aux_t;
+
+#define MINUS_INF -0x40000000
+
+// matrix: len2 rows and len1 columns
+int ka_global_score(const uint8_t *_seq1, int len1, const uint8_t *_seq2, int len2, const ka_param2_t *ap)
+{
+       
+#define __score_aux(_p, _q0, _sc, _io, _ie, _do, _de) {                                        \
+               int t1, t2;                                                                                                             \
+               score_aux_t *_q;                                                                                                \
+               _q = _q0;                                                                                                               \
+               _p->M = _q->M >= _q->I? _q->M : _q->I;                                                  \
+               _p->M = _p->M >= _q->D? _p->M : _q->D;                                                  \
+               _p->M += (_sc);                                                                                                 \
+               ++_q;      t1 = _q->M - _io - _ie; t2 = _q->I - _ie; _p->I = t1 >= t2? t1 : t2; \
+               _q = _p-1; t1 = _q->M - _do - _de; t2 = _q->D - _de; _p->D = t1 >= t2? t1 : t2; \
+       }
+
+       int i, j, bw, scmat_size = ap->row, *scmat = ap->matrix, ret;
+       const uint8_t *seq1, *seq2;
+       score_aux_t *curr, *last, *swap;
+       bw = abs(len1 - len2) + ap->band_width;
+       i = len1 > len2? len1 : len2;
+       if (bw > i + 1) bw = i + 1;
+       seq1 = _seq1 - 1; seq2 = _seq2 - 1;
+       curr = calloc(len1 + 2, sizeof(score_aux_t));
+       last = calloc(len1 + 2, sizeof(score_aux_t));
+       { // the zero-th row
+               int x, end = len1;
+               score_aux_t *p;
+               j = 0;
+               x = j + bw; end = len1 < x? len1 : x; // band end
+               p = curr;
+               p->M = 0; p->I = p->D = MINUS_INF;
+               for (i = 1, p = &curr[1]; i <= end; ++i, ++p)
+                       p->M = p->I = MINUS_INF, p->D = -(ap->edo + ap->ede * i);
+               p->M = p->I = p->D = MINUS_INF;
+               swap = curr; curr = last; last = swap;
+       }
+       for (j = 1; j < len2; ++j) {
+               int x, beg = 0, end = len1, *scrow, col_end;
+               score_aux_t *p;
+               x = j - bw; beg =    0 > x?    0 : x; // band start
+               x = j + bw; end = len1 < x? len1 : x; // band end
+               if (beg == 0) { // from zero-th column
+                       p = curr;
+                       p->M = p->D = MINUS_INF; p->I = -(ap->eio + ap->eie * j);
+                       ++beg; // then beg = 1
+               }
+               scrow = scmat + seq2[j] * scmat_size;
+               if (end == len1) col_end = 1, --end;
+               else col_end = 0;
+               for (i = beg, p = &curr[beg]; i <= end; ++i, ++p)
+                       __score_aux(p, &last[i-1], scrow[(int)seq1[i]], ap->iio, ap->iie, ap->ido, ap->ide);
+               if (col_end) {
+                       __score_aux(p, &last[i-1], scrow[(int)seq1[i]], ap->eio, ap->eie, ap->ido, ap->ide);
+                       ++p;
+               }
+               p->M = p->I = p->D = MINUS_INF;
+//             for (i = 0; i <= len1; ++i) printf("(%d,%d,%d) ", curr[i].M, curr[i].I, curr[i].D); putchar('\n');
+               swap = curr; curr = last; last = swap;
+       }
+       { // the last row
+               int x, beg = 0, *scrow;
+               score_aux_t *p;
+               j = len2;
+               x = j - bw; beg = 0 > x?    0 : x; // band start
+               if (beg == 0) { // from zero-th column
+                       p = curr;
+                       p->M = p->D = MINUS_INF; p->I = -(ap->eio + ap->eie * j);
+                       ++beg; // then beg = 1
+               }
+               scrow = scmat + seq2[j] * scmat_size;
+               for (i = beg, p = &curr[beg]; i < len1; ++i, ++p)
+                       __score_aux(p, &last[i-1], scrow[(int)seq1[i]], ap->iio, ap->iie, ap->edo, ap->ede);
+               __score_aux(p, &last[i-1], scrow[(int)seq1[i]], ap->eio, ap->eie, ap->edo, ap->ede);
+//             for (i = 0; i <= len1; ++i) printf("(%d,%d,%d) ", curr[i].M, curr[i].I, curr[i].D); putchar('\n');
+       }
+       ret = curr[len1].M >= curr[len1].I? curr[len1].M : curr[len1].I;
+       ret = ret >= curr[len1].D? ret : curr[len1].D;
+       free(curr); free(last);
+       return ret;
+}
+
+#ifdef _MAIN
+int main(int argc, char *argv[])
+{
+//     int len1 = 35, len2 = 35;
+//     uint8_t *seq1 = (uint8_t*)"\0\0\3\3\2\0\0\0\1\0\2\1\2\1\3\2\3\3\3\0\2\3\2\1\1\3\3\3\2\3\3\1\0\0\1";
+//     uint8_t *seq2 = (uint8_t*)"\0\0\3\3\2\0\0\0\1\0\2\1\2\1\3\2\3\3\3\0\2\3\2\1\1\3\3\3\2\3\3\1\0\1\0";
+       int len1 = 4, len2 = 4;
+       uint8_t *seq1 = (uint8_t*)"\1\0\0\1";
+       uint8_t *seq2 = (uint8_t*)"\1\0\1\0";
+       int sc;
+//     ka_global_core(seq1, 2, seq2, 1, &ka_param_qual, &sc, 0);
+       sc = ka_global_score(seq1, len1, seq2, len2, &ka_param2_qual);
+       printf("%d\n", sc);
+       return 0;
+}
+#endif