X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=pysam.git;a=blobdiff_plain;f=samtools%2Fkaln.c;fp=samtools%2Fkaln.c;h=0000000000000000000000000000000000000000;hp=9fa40d0be3999e66b885486d7a03e50994bfbfdd;hb=bd0c3067c187d1f718004fb38acc093af8810a02;hpb=1b740fc70684c92a5e2293013217d5a2fd661d8a diff --git a/samtools/kaln.c b/samtools/kaln.c deleted file mode 100644 index 9fa40d0..0000000 --- a/samtools/kaln.c +++ /dev/null @@ -1,370 +0,0 @@ -/* The MIT License - - Copyright (c) 2003-2006, 2008, 2009, by Heng Li - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#include -#include -#include -#include -#include "kaln.h" - -#define FROM_M 0 -#define FROM_I 1 -#define FROM_D 2 - -typedef struct { - int i, j; - unsigned char ctype; -} path_t; - -int aln_sm_blosum62[] = { -/* A R N D C Q E G H I L K M F P S T W Y V * X */ - 4,-1,-2,-2, 0,-1,-1, 0,-2,-1,-1,-1,-1,-2,-1, 1, 0,-3,-2, 0,-4, 0, - -1, 5, 0,-2,-3, 1, 0,-2, 0,-3,-2, 2,-1,-3,-2,-1,-1,-3,-2,-3,-4,-1, - -2, 0, 6, 1,-3, 0, 0, 0, 1,-3,-3, 0,-2,-3,-2, 1, 0,-4,-2,-3,-4,-1, - -2,-2, 1, 6,-3, 0, 2,-1,-1,-3,-4,-1,-3,-3,-1, 0,-1,-4,-3,-3,-4,-1, - 0,-3,-3,-3, 9,-3,-4,-3,-3,-1,-1,-3,-1,-2,-3,-1,-1,-2,-2,-1,-4,-2, - -1, 1, 0, 0,-3, 5, 2,-2, 0,-3,-2, 1, 0,-3,-1, 0,-1,-2,-1,-2,-4,-1, - -1, 0, 0, 2,-4, 2, 5,-2, 0,-3,-3, 1,-2,-3,-1, 0,-1,-3,-2,-2,-4,-1, - 0,-2, 0,-1,-3,-2,-2, 6,-2,-4,-4,-2,-3,-3,-2, 0,-2,-2,-3,-3,-4,-1, - -2, 0, 1,-1,-3, 0, 0,-2, 8,-3,-3,-1,-2,-1,-2,-1,-2,-2, 2,-3,-4,-1, - -1,-3,-3,-3,-1,-3,-3,-4,-3, 4, 2,-3, 1, 0,-3,-2,-1,-3,-1, 3,-4,-1, - -1,-2,-3,-4,-1,-2,-3,-4,-3, 2, 4,-2, 2, 0,-3,-2,-1,-2,-1, 1,-4,-1, - -1, 2, 0,-1,-3, 1, 1,-2,-1,-3,-2, 5,-1,-3,-1, 0,-1,-3,-2,-2,-4,-1, - -1,-1,-2,-3,-1, 0,-2,-3,-2, 1, 2,-1, 5, 0,-2,-1,-1,-1,-1, 1,-4,-1, - -2,-3,-3,-3,-2,-3,-3,-3,-1, 0, 0,-3, 0, 6,-4,-2,-2, 1, 3,-1,-4,-1, - -1,-2,-2,-1,-3,-1,-1,-2,-2,-3,-3,-1,-2,-4, 7,-1,-1,-4,-3,-2,-4,-2, - 1,-1, 1, 0,-1, 0, 0, 0,-1,-2,-2, 0,-1,-2,-1, 4, 1,-3,-2,-2,-4, 0, - 0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-2,-1, 1, 5,-2,-2, 0,-4, 0, - -3,-3,-4,-4,-2,-2,-3,-2,-2,-3,-2,-3,-1, 1,-4,-3,-2,11, 2,-3,-4,-2, - -2,-2,-2,-3,-2,-1,-2,-3, 2,-1,-1,-2,-1, 3,-3,-2,-2, 2, 7,-1,-4,-1, - 0,-3,-3,-3,-1,-2,-2,-3,-3, 3, 1,-2, 1,-1,-2,-2, 0,-3,-1, 4,-4,-1, - -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, 1,-4, - 0,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-2, 0, 0,-2,-1,-1,-4,-1 -}; - -int aln_sm_blast[] = { - 1, -3, -3, -3, -2, - -3, 1, -3, -3, -2, - -3, -3, 1, -3, -2, - -3, -3, -3, 1, -2, - -2, -2, -2, -2, -2 -}; - -ka_param_t ka_param_blast = { 5, 2, 2, aln_sm_blast, 5, 50 }; -ka_param_t ka_param_aa2aa = { 10, 2, 2, aln_sm_blosum62, 22, 50 }; - -static uint32_t *ka_path2cigar32(const path_t *path, int path_len, int *n_cigar) -{ - int i, n; - uint32_t *cigar; - unsigned char last_type; - - if (path_len == 0 || path == 0) { - *n_cigar = 0; - return 0; - } - - last_type = path->ctype; - for (i = n = 1; i < path_len; ++i) { - if (last_type != path[i].ctype) ++n; - last_type = path[i].ctype; - } - *n_cigar = n; - cigar = (uint32_t*)calloc(*n_cigar, 4); - - cigar[0] = 1u << 4 | path[path_len-1].ctype; - last_type = path[path_len-1].ctype; - for (i = path_len - 2, n = 0; i >= 0; --i) { - if (path[i].ctype == last_type) cigar[n] += 1u << 4; - else { - cigar[++n] = 1u << 4 | path[i].ctype; - last_type = path[i].ctype; - } - } - - return cigar; -} - -/***************************/ -/* START OF common_align.c */ -/***************************/ - -#define SET_INF(s) (s).M = (s).I = (s).D = MINOR_INF; - -#define set_M(MM, cur, p, sc) \ -{ \ - if ((p)->M >= (p)->I) { \ - if ((p)->M >= (p)->D) { \ - (MM) = (p)->M + (sc); (cur)->Mt = FROM_M; \ - } else { \ - (MM) = (p)->D + (sc); (cur)->Mt = FROM_D; \ - } \ - } else { \ - if ((p)->I > (p)->D) { \ - (MM) = (p)->I + (sc); (cur)->Mt = FROM_I; \ - } else { \ - (MM) = (p)->D + (sc); (cur)->Mt = FROM_D; \ - } \ - } \ -} -#define set_I(II, cur, p) \ -{ \ - if ((p)->M - gap_open > (p)->I) { \ - (cur)->It = FROM_M; \ - (II) = (p)->M - gap_open - gap_ext; \ - } else { \ - (cur)->It = FROM_I; \ - (II) = (p)->I - gap_ext; \ - } \ -} -#define set_end_I(II, cur, p) \ -{ \ - if (gap_end >= 0) { \ - if ((p)->M - gap_open > (p)->I) { \ - (cur)->It = FROM_M; \ - (II) = (p)->M - gap_open - gap_end; \ - } else { \ - (cur)->It = FROM_I; \ - (II) = (p)->I - gap_end; \ - } \ - } else set_I(II, cur, p); \ -} -#define set_D(DD, cur, p) \ -{ \ - if ((p)->M - gap_open > (p)->D) { \ - (cur)->Dt = FROM_M; \ - (DD) = (p)->M - gap_open - gap_ext; \ - } else { \ - (cur)->Dt = FROM_D; \ - (DD) = (p)->D - gap_ext; \ - } \ -} -#define set_end_D(DD, cur, p) \ -{ \ - if (gap_end >= 0) { \ - if ((p)->M - gap_open > (p)->D) { \ - (cur)->Dt = FROM_M; \ - (DD) = (p)->M - gap_open - gap_end; \ - } else { \ - (cur)->Dt = FROM_D; \ - (DD) = (p)->D - gap_end; \ - } \ - } else set_D(DD, cur, p); \ -} - -typedef struct { - uint8_t Mt:3, It:2, Dt:2; -} dpcell_t; - -typedef struct { - int M, I, D; -} dpscore_t; - -/*************************** - * banded global alignment * - ***************************/ -uint32_t *ka_global_core(uint8_t *seq1, int len1, uint8_t *seq2, int len2, const ka_param_t *ap, int *_score, int *n_cigar) -{ - int i, j; - dpcell_t **dpcell, *q; - dpscore_t *curr, *last, *s; - int b1, b2, tmp_end; - int *mat, end, max = 0; - uint8_t type, ctype; - uint32_t *cigar = 0; - - int gap_open, gap_ext, gap_end, b; - int *score_matrix, N_MATRIX_ROW; - - /* initialize some align-related parameters. just for compatibility */ - gap_open = ap->gap_open; - gap_ext = ap->gap_ext; - gap_end = ap->gap_end; - b = ap->band_width; - score_matrix = ap->matrix; - N_MATRIX_ROW = ap->row; - - *n_cigar = 0; - if (len1 == 0 || len2 == 0) return 0; - - /* calculate b1 and b2 */ - if (len1 > len2) { - b1 = len1 - len2 + b; - b2 = b; - } else { - b1 = b; - b2 = len2 - len1 + b; - } - if (b1 > len1) b1 = len1; - if (b2 > len2) b2 = len2; - --seq1; --seq2; - - /* allocate memory */ - end = (b1 + b2 <= len1)? (b1 + b2 + 1) : (len1 + 1); - dpcell = (dpcell_t**)malloc(sizeof(dpcell_t*) * (len2 + 1)); - for (j = 0; j <= len2; ++j) - dpcell[j] = (dpcell_t*)malloc(sizeof(dpcell_t) * end); - for (j = b2 + 1; j <= len2; ++j) - dpcell[j] -= j - b2; - curr = (dpscore_t*)malloc(sizeof(dpscore_t) * (len1 + 1)); - last = (dpscore_t*)malloc(sizeof(dpscore_t) * (len1 + 1)); - - /* set first row */ - SET_INF(*curr); curr->M = 0; - for (i = 1, s = curr + 1; i < b1; ++i, ++s) { - SET_INF(*s); - set_end_D(s->D, dpcell[0] + i, s - 1); - } - s = curr; curr = last; last = s; - - /* core dynamic programming, part 1 */ - tmp_end = (b2 < len2)? b2 : len2 - 1; - for (j = 1; j <= tmp_end; ++j) { - q = dpcell[j]; s = curr; SET_INF(*s); - set_end_I(s->I, q, last); - end = (j + b1 <= len1 + 1)? (j + b1 - 1) : len1; - mat = score_matrix + seq2[j] * N_MATRIX_ROW; - ++s; ++q; - for (i = 1; i != end; ++i, ++s, ++q) { - set_M(s->M, q, last + i - 1, mat[seq1[i]]); /* this will change s->M ! */ - set_I(s->I, q, last + i); - set_D(s->D, q, s - 1); - } - set_M(s->M, q, last + i - 1, mat[seq1[i]]); - set_D(s->D, q, s - 1); - if (j + b1 - 1 > len1) { /* bug fixed, 040227 */ - set_end_I(s->I, q, last + i); - } else s->I = MINOR_INF; - s = curr; curr = last; last = s; - } - /* last row for part 1, use set_end_D() instead of set_D() */ - if (j == len2 && b2 != len2 - 1) { - q = dpcell[j]; s = curr; SET_INF(*s); - set_end_I(s->I, q, last); - end = (j + b1 <= len1 + 1)? (j + b1 - 1) : len1; - mat = score_matrix + seq2[j] * N_MATRIX_ROW; - ++s; ++q; - for (i = 1; i != end; ++i, ++s, ++q) { - set_M(s->M, q, last + i - 1, mat[seq1[i]]); /* this will change s->M ! */ - set_I(s->I, q, last + i); - set_end_D(s->D, q, s - 1); - } - set_M(s->M, q, last + i - 1, mat[seq1[i]]); - set_end_D(s->D, q, s - 1); - if (j + b1 - 1 > len1) { /* bug fixed, 040227 */ - set_end_I(s->I, q, last + i); - } else s->I = MINOR_INF; - s = curr; curr = last; last = s; - ++j; - } - - /* core dynamic programming, part 2 */ - for (; j <= len2 - b2 + 1; ++j) { - SET_INF(curr[j - b2]); - mat = score_matrix + seq2[j] * N_MATRIX_ROW; - end = j + b1 - 1; - for (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i != end; ++i, ++s, ++q) { - set_M(s->M, q, last + i - 1, mat[seq1[i]]); - set_I(s->I, q, last + i); - set_D(s->D, q, s - 1); - } - set_M(s->M, q, last + i - 1, mat[seq1[i]]); - set_D(s->D, q, s - 1); - s->I = MINOR_INF; - s = curr; curr = last; last = s; - } - - /* core dynamic programming, part 3 */ - for (; j < len2; ++j) { - SET_INF(curr[j - b2]); - mat = score_matrix + seq2[j] * N_MATRIX_ROW; - for (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i < len1; ++i, ++s, ++q) { - set_M(s->M, q, last + i - 1, mat[seq1[i]]); - set_I(s->I, q, last + i); - set_D(s->D, q, s - 1); - } - set_M(s->M, q, last + len1 - 1, mat[seq1[i]]); - set_end_I(s->I, q, last + i); - set_D(s->D, q, s - 1); - s = curr; curr = last; last = s; - } - /* last row */ - if (j == len2) { - SET_INF(curr[j - b2]); - mat = score_matrix + seq2[j] * N_MATRIX_ROW; - for (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i < len1; ++i, ++s, ++q) { - set_M(s->M, q, last + i - 1, mat[seq1[i]]); - set_I(s->I, q, last + i); - set_end_D(s->D, q, s - 1); - } - set_M(s->M, q, last + len1 - 1, mat[seq1[i]]); - set_end_I(s->I, q, last + i); - set_end_D(s->D, q, s - 1); - s = curr; curr = last; last = s; - } - - *_score = last[len1].M; - if (n_cigar) { /* backtrace */ - path_t *p, *path = (path_t*)malloc(sizeof(path_t) * (len1 + len2 + 2)); - i = len1; j = len2; - q = dpcell[j] + i; - s = last + len1; - max = s->M; type = q->Mt; ctype = FROM_M; - if (s->I > max) { max = s->I; type = q->It; ctype = FROM_I; } - if (s->D > max) { max = s->D; type = q->Dt; ctype = FROM_D; } - - p = path; - p->ctype = ctype; p->i = i; p->j = j; /* bug fixed 040408 */ - ++p; - do { - switch (ctype) { - case FROM_M: --i; --j; break; - case FROM_I: --j; break; - case FROM_D: --i; break; - } - q = dpcell[j] + i; - ctype = type; - switch (type) { - case FROM_M: type = q->Mt; break; - case FROM_I: type = q->It; break; - case FROM_D: type = q->Dt; break; - } - p->ctype = ctype; p->i = i; p->j = j; - ++p; - } while (i || j); - cigar = ka_path2cigar32(path, p - path - 1, n_cigar); - free(path); - } - - /* free memory */ - for (j = b2 + 1; j <= len2; ++j) - dpcell[j] += j - b2; - for (j = 0; j <= len2; ++j) - free(dpcell[j]); - free(dpcell); - free(curr); free(last); - - return cigar; -}