From 478c2e84f597b7c4ca655e73ba26e7e6f9d99170 Mon Sep 17 00:00:00 2001 From: BenLangmead Date: Tue, 17 Oct 2017 16:37:00 -0400 Subject: [PATCH] print features for qtip --- bwamem.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++- bwamem.h | 8 ++++++- bwamem_pair.c | 15 +++++++++++++ 3 files changed, 80 insertions(+), 2 deletions(-) diff --git a/bwamem.c b/bwamem.c index eeaf4f43..fc328613 100644 --- a/bwamem.c +++ b/bwamem.c @@ -505,6 +505,8 @@ static void mem_mark_primary_se_core(const mem_opt_t *opt, int n, mem_alnreg_t * int e_min = a[j].qe < a[i].qe? a[j].qe : a[i].qe; if (e_min > b_max) { // have overlap int min_l = a[i].qe - a[i].qb < a[j].qe - a[j].qb? a[i].qe - a[i].qb : a[j].qe - a[j].qb; + a[j].osub = a[j].osub < a[i].score ? a[i].score : a[j].osub; + a[j].osub_n++; if (e_min - b_max >= min_l * opt->mask_level) { // significant overlap if (a[j].sub == 0) a[j].sub = a[i].score; if (a[j].score - a[i].score <= tmp && (a[j].is_alt || !a[i].is_alt)) @@ -942,6 +944,52 @@ void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq for (i = tmp; i < str->l; ++i) // replace TAB in the comment to SPACE if (str->s[i] == '\t') str->s[i] = ' '; } + if ((p->flag & 4) == 0) { + kputsn("\tZT:Z:", 6, str); + kputw(p->score, str); // same as AS:i, ztz0 + kputc(',', str); + if(p->n_cigar) { + kputw(p->NM, str); // ztz1 + } else { + kputsn("NA", 2, str); + } + kputc(',', str); + if(p->sub >= 0) { + kputw(p->score - p->sub, str); // ztz2 + } else { + kputsn("NA", 2, str); + } + kputc(',', str); + if(p->osub >= 0) { + kputw(p->score - p->osub, str); // ztz3 + } else { + kputsn("NA", 2, str); + } + kputc(',', str); + kputw(p->pair_score, str); // ztz4 + kputc(',', str); + if(p->pair_sub >= 0) { + kputw(p->pair_score - p->pair_sub, str); // ztz5 + } else { + kputsn("NA", 2, str); + } + kputc(',', str); + kputw(p->seedlen0, str); // ztz6 + kputc(',', str); + kputw(p->sub_n, str); // ztz7 + kputc(',', str); + kputw(p->osub_n, str); // ztz8 + kputc(',', str); + kputw(p->pair_nsub, str); // ztz9 + kputc(',', str); + kputw((int)(p->frac_rep * 1000.0), str); // ztz10 + kputc(',', str); + kputw((int)(p->pair_frac_rep * 1000.0), str); // ztz11 + kputc(',', str); + kputw((int)(p->seedcov * 1000.0), str); // ztz12 + kputc(',', str); + kputw((int)(p->pair_seedcov * 1000.0), str); // ztz13 + } kputc('\n', str); } @@ -1152,8 +1200,17 @@ mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t * a.rid = bns_pos2rid(bns, pos); assert(a.rid == ar->rid); a.pos = pos - bns->anns[a.rid].offset; - a.score = ar->score; a.sub = ar->sub > ar->csub? ar->sub : ar->csub; + a.score = ar->score; + a.sub = ar->sub > ar->csub? ar->sub : ar->csub; + a.osub = ar->osub; a.is_alt = ar->is_alt; a.alt_sc = ar->alt_sc; + a.seedlen0 = ar->seedlen0; + a.n_comp = ar->n_comp; + a.seedcov = ((float)ar->seedcov) / l_query; + a.frac_rep = ar->frac_rep; + a.sub_n = ar->sub_n; a.osub_n = ar->osub_n; + a.pair_score = a.pair_sub = a.pair_nsub = 0; + a.pair_frac_rep = a.pair_seedcov = 0.; free(query); return a; } diff --git a/bwamem.h b/bwamem.h index fb572b28..3e4a4978 100644 --- a/bwamem.h +++ b/bwamem.h @@ -63,9 +63,11 @@ typedef struct { int score; // best local SW score int truesc; // actual score corresponding to the aligned region; possibly smaller than $score int sub; // 2nd best SW score + int osub; // more liberal 2nd best SW score int alt_sc; int csub; // SW score of a tandem hit int sub_n; // approximate number of suboptimal hits + int osub_n; // more liberal approximate number of suboptimal hits int w; // actual band width used in extension int seedcov; // length of regions coverged by seeds int secondary; // index of the parent hit shadowing the current hit; <0 if primary @@ -93,7 +95,11 @@ typedef struct { // This struct is only used for the convenience of API. uint32_t *cigar; // CIGAR in the BAM encoding: opLen<<4|op; op to integer mapping: MIDSH=>01234 char *XA; // alternative mappings - int score, sub, alt_sc; + int score, sub, osub, alt_sc; + float seedcov, pair_seedcov; + int sub_n, osub_n, pair_score, pair_sub, pair_nsub; + float frac_rep, pair_frac_rep; + int seedlen0, n_comp; } mem_aln_t; #ifdef __cplusplus diff --git a/bwamem_pair.c b/bwamem_pair.c index 9beb84b6..ea6f3c89 100644 --- a/bwamem_pair.c +++ b/bwamem_pair.c @@ -351,6 +351,21 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co aa[i][n_aa[i]++] = g[i]; } } + // Might be less work if we installed these earlier + h[0].pair_score = h[1].pair_score = o; + h[0].pair_sub = h[1].pair_sub = subo; + h[0].pair_nsub = h[1].pair_nsub = n_sub; + h[0].pair_frac_rep = h[1].pair_frac_rep = a[0].a[0].frac_rep + a[1].a[0].frac_rep; + h[0].pair_seedcov = h[1].pair_seedcov = ((float)(a[0].a[0].seedcov + a[1].a[0].seedcov)) / (s[0].l_seq + s[1].l_seq); + for (j = 0; j < 2; ++j) { + for (i = 0; i < n_aa[j]; ++i) { + aa[j][i].pair_score = h[j].pair_score; + aa[j][i].pair_sub = h[j].pair_sub; + aa[j][i].pair_nsub = h[j].pair_nsub; + aa[j][i].pair_frac_rep = h[j].pair_frac_rep; + aa[j][i].pair_seedcov = h[j].pair_seedcov; + } + } for (i = 0; i < n_aa[0]; ++i) mem_aln2sam(opt, bns, &str, &s[0], n_aa[0], aa[0], i, &h[1]); // write read1 hits s[0].sam = strdup(str.s); str.l = 0;