X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=pysam.git;a=blobdiff_plain;f=samtools%2Fbam_import.c.pysam.c;h=da25f036504eb75f2617ea0d837f3f3df73c796d;hp=36a8024c286a71b7b0bfc3eeecb91645b97befb1;hb=e1756c41e7a1d7cc01fb95e42df9dd04da2d2991;hpb=ca46ef4ba4a883c57cea62d5bf1bc021f1185109 diff --git a/samtools/bam_import.c.pysam.c b/samtools/bam_import.c.pysam.c index 36a8024..da25f03 100644 --- a/samtools/bam_import.c.pysam.c +++ b/samtools/bam_import.c.pysam.c @@ -16,7 +16,7 @@ #include "kseq.h" #include "khash.h" -KSTREAM_INIT(gzFile, gzread, 16384) +KSTREAM_INIT(gzFile, gzread, 8192) KHASH_MAP_INIT_STR(ref, uint64_t) void bam_init_header_hash(bam_header_t *header); @@ -294,22 +294,20 @@ int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b) z += str->l + 1; if (str->s[0] != '*') { for (s = str->s; *s; ++s) { - if ((isalpha(*s)) || (*s=='=')) ++c->n_cigar; + if (isalpha(*s)) ++c->n_cigar; else if (!isdigit(*s)) parse_error(fp->n_lines, "invalid CIGAR character"); } b->data = alloc_data(b, doff + c->n_cigar * 4); for (i = 0, s = str->s; i != c->n_cigar; ++i) { x = strtol(s, &t, 10); op = toupper(*t); - if (op == 'M') op = BAM_CMATCH; + if (op == 'M' || op == '=' || op == 'X') op = BAM_CMATCH; else if (op == 'I') op = BAM_CINS; else if (op == 'D') op = BAM_CDEL; else if (op == 'N') op = BAM_CREF_SKIP; else if (op == 'S') op = BAM_CSOFT_CLIP; else if (op == 'H') op = BAM_CHARD_CLIP; else if (op == 'P') op = BAM_CPAD; - else if (op == '=') op = BAM_CEQUAL; - else if (op == 'X') op = BAM_CDIFF; else parse_error(fp->n_lines, "invalid CIGAR operation"); s = t + 1; bam1_cigar(b)[i] = x << BAM_CIGAR_SHIFT | op; @@ -341,11 +339,8 @@ int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b) z += str->l + 1; if (strcmp(str->s, "*")) { c->l_qseq = strlen(str->s); - if (c->n_cigar && c->l_qseq != (int32_t)bam_cigar2qlen(c, bam1_cigar(b))) { - fprintf(pysamerr, "Line %ld, sequence length %i vs %i from CIGAR\n", - (long)fp->n_lines, c->l_qseq, (int32_t)bam_cigar2qlen(c, bam1_cigar(b))); - parse_error(fp->n_lines, "CIGAR and sequence length are inconsistent"); - } + if (c->n_cigar && c->l_qseq != (int32_t)bam_cigar2qlen(c, bam1_cigar(b))) + parse_error(fp->n_lines, "CIGAR and sequence length are inconsistent"); p = (uint8_t*)alloc_data(b, doff + c->l_qseq + (c->l_qseq+1)/2) + doff; memset(p, 0, (c->l_qseq+1)/2); for (i = 0; i < c->l_qseq; ++i) @@ -452,7 +447,7 @@ int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b) else if (str->s[5] == 'S') while (p < str->s + str->l) ((uint16_t*)s)[k++] = (uint16_t)strtol(p, &p, 0), ++p; else if (str->s[5] == 'i') while (p < str->s + str->l) ((int32_t*)s)[k++] = (int32_t)strtol(p, &p, 0), ++p; else if (str->s[5] == 'I') while (p < str->s + str->l) ((uint32_t*)s)[k++] = (uint32_t)strtol(p, &p, 0), ++p; - else if (str->s[5] == 'f') while (p < str->s + str->l) ((float*)s)[k++] = (float)strtod(p, &p), ++p; + else if (str->s[5] == 'f') while (p < str->s + str->l) ((float*)s)[k++] = (float)strtof(p, &p), ++p; else parse_error(fp->n_lines, "unrecognized array type"); s += Bsize * n; doff += size; } else parse_error(fp->n_lines, "unrecognized type");