X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=samtools.git;a=blobdiff_plain;f=bam_aux.c;h=28b22e3059273da57dd8395fba5e29287dc3228b;hp=fbcd9822b233dab64f6c861fb332846acb951ce7;hb=9a36c994ed991f79cc191ece6cbc5f1bf6410da2;hpb=67801bf17aa6495879da63ce77a2014759fabc16 diff --git a/bam_aux.c b/bam_aux.c index fbcd982..28b22e3 100644 --- a/bam_aux.c +++ b/bam_aux.c @@ -26,14 +26,12 @@ uint8_t *bam_aux_get_core(bam1_t *b, const char tag[2]) } #define __skip_tag(s) do { \ - int type = toupper(*(s)); \ - ++(s); \ - if (type == 'C' || type == 'A') ++(s); \ - else if (type == 'S') (s) += 2; \ - else if (type == 'I' || type == 'F') (s) += 4; \ - else if (type == 'D') (s) += 8; \ - else if (type == 'Z' || type == 'H') { while (*(s)) ++(s); ++(s); } \ - } while (0) + int type = toupper(*(s)); \ + ++(s); \ + if (type == 'Z' || type == 'H') { while (*(s)) ++(s); ++(s); } \ + else if (type == 'B') (s) += 5 + bam_aux_type2size(*(s)) * (*(int32_t*)((s)+1)); \ + else (s) += bam_aux_type2size(type); \ + } while(0) uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]) { @@ -61,6 +59,23 @@ int bam_aux_del(bam1_t *b, uint8_t *s) return 0; } +int bam_aux_drop_other(bam1_t *b, uint8_t *s) +{ + if (s) { + uint8_t *p, *aux; + aux = bam1_aux(b); + p = s - 2; + __skip_tag(s); + memmove(aux, p, s - p); + b->data_len -= b->l_aux - (s - p); + b->l_aux = s - p; + } else { + b->data_len -= b->l_aux; + b->l_aux = 0; + } + return 0; +} + void bam_init_header_hash(bam_header_t *header) { if (header->hash == 0) { @@ -89,47 +104,56 @@ int32_t bam_get_tid(const bam_header_t *header, const char *seq_name) return k == kh_end(h)? -1 : kh_value(h, k); } -int bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *begin, int *end) +int bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *beg, int *end) { - char *s, *p; - int i, l, k; + char *s; + int i, l, k, name_end; khiter_t iter; khash_t(s) *h; bam_init_header_hash(header); h = (khash_t(s)*)header->hash; - l = strlen(str); - p = s = (char*)malloc(l+1); - /* squeeze out "," */ - for (i = k = 0; i != l; ++i) - if (str[i] != ',' && !isspace(str[i])) s[k++] = str[i]; - s[k] = 0; - for (i = 0; i != k; ++i) if (s[i] == ':') break; - s[i] = 0; - iter = kh_get(s, h, s); /* get the ref_id */ - if (iter == kh_end(h)) { // name not found - *ref_id = -1; free(s); - return -1; - } - *ref_id = kh_value(h, iter); - if (i == k) { /* dump the whole sequence */ - *begin = 0; *end = 1<<29; free(s); - return 0; - } - for (p = s + i + 1; i != k; ++i) if (s[i] == '-') break; - *begin = atoi(p); - if (i < k) { - p = s + i + 1; - *end = atoi(p); - } else *end = 1<<29; - if (*begin > 0) --*begin; + *ref_id = *beg = *end = -1; + name_end = l = strlen(str); + s = (char*)malloc(l+1); + // remove space + for (i = k = 0; i < l; ++i) + if (!isspace(str[i])) s[k++] = str[i]; + s[k] = 0; l = k; + // determine the sequence name + for (i = l - 1; i >= 0; --i) if (s[i] == ':') break; // look for colon from the end + if (i >= 0) name_end = i; + if (name_end < l) { // check if this is really the end + int n_hyphen = 0; + for (i = name_end + 1; i < l; ++i) { + if (s[i] == '-') ++n_hyphen; + else if (!isdigit(s[i]) && s[i] != ',') break; + } + if (i < l || n_hyphen > 1) name_end = l; // malformated region string; then take str as the name + s[name_end] = 0; + iter = kh_get(s, h, s); + if (iter == kh_end(h)) { // cannot find the sequence name + iter = kh_get(s, h, str); // try str as the name + if (iter == kh_end(h)) { + if (bam_verbose >= 2) fprintf(stderr, "[%s] fail to determine the sequence name.\n", __func__); + free(s); return -1; + } else s[name_end] = ':', name_end = l; + } + } else iter = kh_get(s, h, str); + *ref_id = kh_val(h, iter); + // parse the interval + if (name_end < l) { + for (i = k = name_end + 1; i < l; ++i) + if (s[i] != ',') s[k++] = s[i]; + s[k] = 0; + *beg = atoi(s + name_end + 1); + for (i = name_end + 1; i != k; ++i) if (s[i] == '-') break; + *end = i < k? atoi(s + i + 1) : 1<<29; + if (*beg > 0) --*beg; + } else *beg = 0, *end = 1<<29; free(s); - if (*begin > *end) { - fprintf(stderr, "[bam_parse_region] invalid region.\n"); - return -1; - } - return 0; + return *beg <= *end? 0 : -1; } int32_t bam_aux2i(const uint8_t *s) @@ -180,3 +204,10 @@ char *bam_aux2Z(const uint8_t *s) if (type == 'Z' || type == 'H') return (char*)s; else return 0; } + +#ifdef _WIN32 +double drand48() +{ + return (double)rand() / RAND_MAX; +} +#endif