X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=samtools.git;a=blobdiff_plain;f=bam_aux.c;h=2247bdfe9ba15d39d5a1dcf193da9d68cdfab8bf;hp=7482500ef08a53f3471858793112d9c72263e33a;hb=5075acdfc387c56ca8411cde3e419e867da9804d;hpb=b27e00385f41769d03a8cca4dbd71275fc9fa906 diff --git a/bam_aux.c b/bam_aux.c index 7482500..2247bdf 100644 --- a/bam_aux.c +++ b/bam_aux.c @@ -25,24 +25,39 @@ uint8_t *bam_aux_get_core(bam1_t *b, const char tag[2]) return bam_aux_get(b, tag); } +#define __skip_tag(s) do { \ + int type = toupper(*(s)); \ + ++(s); \ + if (type == 'Z' || type == 'H') { while (*(s)) ++(s); ++(s); } \ + else if (type == 'B') (s) += 5 + bam_aux_type2size(*(s)) * (*(int32_t*)((s)+1)); \ + else (s) += bam_aux_type2size(type); \ + } while(0) + uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]) { uint8_t *s; int y = tag[0]<<8 | tag[1]; s = bam1_aux(b); while (s < b->data + b->data_len) { - int type, x = (int)s[0]<<8 | s[1]; + int x = (int)s[0]<<8 | s[1]; s += 2; if (x == y) return s; - type = toupper(*s); ++s; - if (type == 'C') ++s; - else if (type == 'S') s += 2; - else if (type == 'I' || type == 'F') s += 4; - else if (type == 'D') s += 8; - else if (type == 'Z' || type == 'H') { while (*s) ++s; ++s; } + __skip_tag(s); } return 0; } +// s MUST BE returned by bam_aux_get() +int bam_aux_del(bam1_t *b, uint8_t *s) +{ + uint8_t *p, *aux; + aux = bam1_aux(b); + p = s - 2; + __skip_tag(s); + memmove(p, s, b->l_aux - (s - aux)); + b->data_len -= s - p; + b->l_aux -= s - p; + return 0; +} void bam_init_header_hash(bam_header_t *header) { @@ -72,47 +87,56 @@ int32_t bam_get_tid(const bam_header_t *header, const char *seq_name) return k == kh_end(h)? -1 : kh_value(h, k); } -int bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *begin, int *end) +int bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *beg, int *end) { - char *s, *p; - int i, l, k; + char *s; + int i, l, k, name_end; khiter_t iter; khash_t(s) *h; bam_init_header_hash(header); h = (khash_t(s)*)header->hash; - l = strlen(str); - p = s = (char*)malloc(l+1); - /* squeeze out "," */ - for (i = k = 0; i != l; ++i) - if (str[i] != ',' && !isspace(str[i])) s[k++] = str[i]; - s[k] = 0; - for (i = 0; i != k; ++i) if (s[i] == ':') break; - s[i] = 0; - iter = kh_get(s, h, s); /* get the ref_id */ - if (iter == kh_end(h)) { // name not found - *ref_id = -1; free(s); - return -1; - } - *ref_id = kh_value(h, iter); - if (i == k) { /* dump the whole sequence */ - *begin = 0; *end = 1<<29; free(s); - return -1; - } - for (p = s + i + 1; i != k; ++i) if (s[i] == '-') break; - *begin = atoi(p); - if (i < k) { - p = s + i + 1; - *end = atoi(p); - } else *end = 1<<29; - if (*begin > 0) --*begin; + *ref_id = *beg = *end = -1; + name_end = l = strlen(str); + s = (char*)malloc(l+1); + // remove space + for (i = k = 0; i < l; ++i) + if (!isspace(str[i])) s[k++] = str[i]; + s[k] = 0; l = k; + // determine the sequence name + for (i = l - 1; i >= 0; --i) if (s[i] == ':') break; // look for colon from the end + if (i >= 0) name_end = i; + if (name_end < l) { // check if this is really the end + int n_hyphen = 0; + for (i = name_end + 1; i < l; ++i) { + if (s[i] == '-') ++n_hyphen; + else if (!isdigit(s[i]) && s[i] != ',') break; + } + if (i < l || n_hyphen > 1) name_end = l; // malformated region string; then take str as the name + s[name_end] = 0; + iter = kh_get(s, h, s); + if (iter == kh_end(h)) { // cannot find the sequence name + iter = kh_get(s, h, str); // try str as the name + if (iter == kh_end(h)) { + if (bam_verbose >= 2) fprintf(stderr, "[%s] fail to determine the sequence name.\n", __func__); + free(s); return -1; + } else s[name_end] = ':', name_end = l; + } + } else iter = kh_get(s, h, str); + *ref_id = kh_val(h, iter); + // parse the interval + if (name_end < l) { + for (i = k = name_end + 1; i < l; ++i) + if (s[i] != ',') s[k++] = s[i]; + s[k] = 0; + *beg = atoi(s + name_end + 1); + for (i = name_end + 1; i != k; ++i) if (s[i] == '-') break; + *end = i < k? atoi(s + i + 1) : 1<<29; + if (*beg > 0) --*beg; + } else *beg = 0, *end = 1<<29; free(s); - if (*begin > *end) { - fprintf(stderr, "[bam_parse_region] invalid region.\n"); - return -1; - } - return 0; + return *beg <= *end? 0 : -1; } int32_t bam_aux2i(const uint8_t *s) @@ -164,69 +188,9 @@ char *bam_aux2Z(const uint8_t *s) else return 0; } -/****************** - * rg2lib related * - ******************/ - -int bam_strmap_put(void *rg2lib, const char *rg, const char *lib) -{ - int ret; - khint_t k; - khash_t(r2l) *h = (khash_t(r2l)*)rg2lib; - char *key; - if (h == 0) return 1; - key = strdup(rg); - k = kh_put(r2l, h, key, &ret); - if (ret) kh_val(h, k) = strdup(lib); - else { - fprintf(stderr, "[bam_rg2lib_put] duplicated @RG ID: %s\n", rg); - free(key); - } - return 0; -} - -const char *bam_strmap_get(const void *rg2lib, const char *rg) +#ifdef _WIN32 +double drand48() { - const khash_t(r2l) *h = (const khash_t(r2l)*)rg2lib; - khint_t k; - if (h == 0) return 0; - k = kh_get(r2l, h, rg); - if (k != kh_end(h)) return (const char*)kh_val(h, k); - else return 0; -} - -void *bam_strmap_dup(const void *rg2lib) -{ - const khash_t(r2l) *h = (const khash_t(r2l)*)rg2lib; - khash_t(r2l) *g; - khint_t k, l; - int ret; - if (h == 0) return 0; - g = kh_init(r2l); - for (k = kh_begin(h); k < kh_end(h); ++k) { - if (kh_exist(h, k)) { - char *key = strdup(kh_key(h, k)); - l = kh_put(r2l, g, key, &ret); - kh_val(g, l) = strdup(kh_val(h, k)); - } - } - return g; -} - -void *bam_strmap_init() -{ - return (void*)kh_init(r2l); -} - -void bam_strmap_destroy(void *rg2lib) -{ - khash_t(r2l) *h = (khash_t(r2l)*)rg2lib; - khint_t k; - if (h == 0) return; - for (k = kh_begin(h); k < kh_end(h); ++k) { - if (kh_exist(h, k)) { - free((char*)kh_key(h, k)); free(kh_val(h, k)); - } - } - kh_destroy(r2l, h); + return (double)rand() / RAND_MAX; } +#endif