Skip to content

Commit

Permalink
Make bcf_hdr_seqnames() work with gapped chromosome ids
Browse files Browse the repository at this point in the history
The bcf_hdr_remove() call can create gaps in tid blocks which fail
assertion in bcf_hdr_seqnames().

This problem was encountered in samtools#1533, but is only a partial fix
of the problem
  • Loading branch information
pd3 committed Dec 14, 2022
1 parent ccefe6c commit 54a7d45
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 7 deletions.
6 changes: 5 additions & 1 deletion htslib/vcf.h
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,11 @@ set to one of BCF_ERR* codes and must be checked before calling bcf_write().
HTSLIB_EXPORT
bcf_hdr_t *bcf_hdr_subset(const bcf_hdr_t *h0, int n, char *const* samples, int *imap);

/** Creates a list of sequence names. It is up to the caller to free the list (but not the sequence names) */
/**
* Creates a list of sequence names. It is up to the caller to free the list (but not the sequence names).
* NB: sequence name indexes returned by bcf_hdr_seqnames() may not correspond to bcf1_t.rid, use
* bcf_hdr_id2name() or bcf_seqname() instead.
*/
HTSLIB_EXPORT
const char **bcf_hdr_seqnames(const bcf_hdr_t *h, int *nseqs);

Expand Down
36 changes: 30 additions & 6 deletions vcf.c
Original file line number Diff line number Diff line change
Expand Up @@ -2214,20 +2214,44 @@ char *bcf_hdr_fmt_text(const bcf_hdr_t *hdr, int is_bcf, int *len)
const char **bcf_hdr_seqnames(const bcf_hdr_t *h, int *n)
{
vdict_t *d = (vdict_t*)h->dict[BCF_DT_CTG];
int tid, m = kh_size(d);
int i, tid, m = kh_size(d);
const char **names = (const char**) calloc(m,sizeof(const char*));
if ( !names )
{
hts_log_error("Failed to allocate memory");
*n = 0;
return NULL;
}
khint_t k;
for (k=kh_begin(d); k<kh_end(d); k++)
{
if ( !kh_exist(d,k) ) continue;
if ( !kh_val(d, k).hrec[0] ) continue; // removed via bcf_hdr_remove
tid = kh_val(d,k).id;
assert( tid<m );
if ( tid >= m )
{
// This can happen after a contig has been removed from BCF header via bcf_hdr_remove()
if ( hts_resize(const char*, tid + 1, &m, &names, HTS_RESIZE_CLEAR)<0 )
{
hts_log_error("Failed to allocate memory");
*n = 0;
free(names);
return NULL;
}
m = tid + 1;
}
names[tid] = kh_key(d,k);
}
// sanity check: there should be no gaps
for (tid=0; tid<m; tid++)
assert(names[tid]);
*n = m;
// ensure there are no gaps
for (i=0,tid=0; tid<m; i++,tid++)
{
while ( tid<m && !names[tid] ) tid++;
if ( tid==m ) break;
if ( i==tid ) continue;
names[i] = names[tid];
names[tid] = 0;
}
*n = i;
return names;
}

Expand Down

0 comments on commit 54a7d45

Please sign in to comment.