Merge commit 'upstream/0.1.18'
[samtools.git] / sam.c
diff --git a/sam.c b/sam.c
index 45cb05cf3dd6f8a04acfd4dd92c8d250835855a2..f026bc80864dbc75557bf632c760978e54acc453 100644 (file)
--- a/sam.c
+++ b/sam.c
@@ -1,4 +1,6 @@
 #include <string.h>
+#include <unistd.h>
+#include "faidx.h"
 #include "sam.h"
 
 #define TYPE_BAM  1
@@ -10,7 +12,7 @@ bam_header_t *bam_header_dup(const bam_header_t *h0)
        int i;
        h = bam_header_init();
        *h = *h0;
-       h->hash = 0;
+       h->hash = h->dict = h->rg2lib = 0;
        h->text = (char*)calloc(h->l_text + 1, 1);
        memcpy(h->text, h0->text, h->l_text);
        h->target_len = (uint32_t*)calloc(h->n_targets, 4);
@@ -19,7 +21,6 @@ bam_header_t *bam_header_dup(const bam_header_t *h0)
                h->target_len[i] = h0->target_len[i];
                h->target_name[i] = strdup(h0->target_name[i]);
        }
-       if (h0->rg2lib) h->rg2lib = bam_strmap_dup(h0->rg2lib);
        return h;
 }
 static void append_header_text(bam_header_t *header, char* text, int len)
@@ -39,9 +40,9 @@ samfile_t *samopen(const char *fn, const char *mode, const void *aux)
 {
        samfile_t *fp;
        fp = (samfile_t*)calloc(1, sizeof(samfile_t));
-       if (mode[0] == 'r') { // read
+       if (strchr(mode, 'r')) { // read
                fp->type |= TYPE_READ;
-               if (mode[1] == 'b') { // binary
+               if (strchr(mode, 'b')) { // binary
                        fp->type |= TYPE_BAM;
                        fp->x.bam = strcmp(fn, "-")? bam_open(fn, "r") : bam_dopen(fileno(stdin), "r");
                        if (fp->x.bam == 0) goto open_err_ret;
@@ -54,19 +55,23 @@ samfile_t *samopen(const char *fn, const char *mode, const void *aux)
                                if (aux) { // check if aux is present
                                        bam_header_t *textheader = fp->header;
                                        fp->header = sam_header_read2((const char*)aux);
+                                       if (fp->header == 0) goto open_err_ret;
                                        append_header_text(fp->header, textheader->text, textheader->l_text);
                                        bam_header_destroy(textheader);
                                }
-                               if (fp->header->n_targets == 0)
+                               if (fp->header->n_targets == 0 && bam_verbose >= 1)
                                        fprintf(stderr, "[samopen] no @SQ lines in the header.\n");
-                       } else fprintf(stderr, "[samopen] SAM header is present: %d sequences.\n", fp->header->n_targets);
+                       } else if (bam_verbose >= 2) fprintf(stderr, "[samopen] SAM header is present: %d sequences.\n", fp->header->n_targets);
                }
-               sam_header_parse_rg(fp->header);
-       } else if (mode[0] == 'w') { // write
+       } else if (strchr(mode, 'w')) { // write
                fp->header = bam_header_dup((const bam_header_t*)aux);
-               if (mode[1] == 'b') { // binary
+               if (strchr(mode, 'b')) { // binary
                        char bmode[3];
-                       bmode[0] = 'w'; bmode[1] = strstr(mode, "u")? 'u' : 0; bmode[2] = 0;
+                       int i, compress_level = -1;
+                       for (i = 0; mode[i]; ++i) if (mode[i] >= '0' && mode[i] <= '9') break;
+                       if (mode[i]) compress_level = mode[i] - '0';
+                       if (strchr(mode, 'u')) compress_level = 0;
+                       bmode[0] = 'w'; bmode[1] = compress_level < 0? 0 : compress_level + '0'; bmode[2] = 0;
                        fp->type |= TYPE_BAM;
                        fp->x.bam = strcmp(fn, "-")? bam_open(fn, bmode) : bam_dopen(fileno(stdout), bmode);
                        if (fp->x.bam == 0) goto open_err_ret;
@@ -75,8 +80,11 @@ samfile_t *samopen(const char *fn, const char *mode, const void *aux)
                        // open file
                        fp->x.tamw = strcmp(fn, "-")? fopen(fn, "w") : stdout;
                        if (fp->x.tamr == 0) goto open_err_ret;
+                       if (strchr(mode, 'X')) fp->type |= BAM_OFSTR<<2;
+                       else if (strchr(mode, 'x')) fp->type |= BAM_OFHEX<<2;
+                       else fp->type |= BAM_OFDEC<<2;
                        // write header
-                       if (strstr(mode, "h")) {
+                       if (strchr(mode, 'h')) {
                                int i;
                                bam_header_t *alt;
                                // parse the header text 
@@ -85,10 +93,10 @@ samfile_t *samopen(const char *fn, const char *mode, const void *aux)
                                sam_header_parse(alt);
                                alt->l_text = 0; alt->text = 0;
                                // check if there are @SQ lines in the header
-                               fwrite(fp->header->text, 1, fp->header->l_text, fp->x.tamw);
+                               fwrite(fp->header->text, 1, fp->header->l_text, fp->x.tamw); // FIXME: better to skip the trailing NULL
                                if (alt->n_targets) { // then write the header text without dumping ->target_{name,len}
-                                       if (alt->n_targets != fp->header->n_targets)
-                                               fprintf(stderr, "[samopen] inconsistent number of target sequences.\n");
+                                       if (alt->n_targets != fp->header->n_targets && bam_verbose >= 1)
+                                               fprintf(stderr, "[samopen] inconsistent number of target sequences. Output the text header.\n");
                                } else { // then dump ->target_{name,len}
                                        for (i = 0; i < fp->header->n_targets; ++i)
                                                fprintf(fp->x.tamw, "@SQ\tSN:%s\tLN:%d\n", fp->header->target_name[i], fp->header->target_len[i]);
@@ -126,7 +134,7 @@ int samwrite(samfile_t *fp, const bam1_t *b)
        if (fp == 0 || (fp->type & TYPE_READ)) return -1; // not open for writing
        if (fp->type & TYPE_BAM) return bam_write1(fp->x.bam, b);
        else {
-               char *s = bam_format1(fp->header, b);
+               char *s = bam_format1_core(fp->header, b, fp->type>>2&3);
                int l = strlen(s);
                fputs(s, fp->x.tamw); fputc('\n', fp->x.tamw);
                free(s);
@@ -149,3 +157,23 @@ int sampileup(samfile_t *fp, int mask, bam_pileup_f func, void *func_data)
        bam_destroy1(b);
        return 0;
 }
+
+char *samfaipath(const char *fn_ref)
+{
+       char *fn_list = 0;
+       if (fn_ref == 0) return 0;
+       fn_list = calloc(strlen(fn_ref) + 5, 1);
+       strcat(strcpy(fn_list, fn_ref), ".fai");
+       if (access(fn_list, R_OK) == -1) { // fn_list is unreadable
+               if (access(fn_ref, R_OK) == -1) {
+                       fprintf(stderr, "[samfaipath] fail to read file %s.\n", fn_ref);
+               } else {
+                       if (bam_verbose >= 3) fprintf(stderr, "[samfaipath] build FASTA index...\n");
+                       if (fai_build(fn_ref) == -1) {
+                               fprintf(stderr, "[samfaipath] fail to build FASTA index.\n");
+                               free(fn_list); fn_list = 0;
+                       }
+               }
+       }
+       return fn_list;
+}