Skip to content

Commit ceab74f

Browse files
committed
fix: fix substitution for nonsense variant
1 parent 813c12b commit ceab74f

File tree

2 files changed

+16
-7
lines changed

2 files changed

+16
-7
lines changed

src/varity/vcf_to_hgvs/protein.clj

+15-7
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,7 @@
417417
:reverse (protein-position pos alt-rg))
418418
(count alt-prot-seq*))])
419419
[pref-only palt-only offset _] (diff-bases pref palt)
420+
npref (count pref)
420421
nprefo (count pref-only)
421422
npalto (count palt-only)
422423
[unit ref-repeat alt-repeat] (repeat-info* ref-prot-seq
@@ -436,13 +437,13 @@
436437
:unknown
437438

438439
(or (= ref-prot-rest alt-prot-rest)
439-
(and prefer-extension-for-initial-codon-alt?
440-
(not= (first ref-prot-seq) (first alt-prot-seq*))))
440+
(and prefer-extension-for-initial-codon-alt?
441+
(not= (first ref-prot-seq) (first alt-prot-seq*))))
441442
:extension
442443

443444
:else
444445
:frame-shift)
445-
(and (pos? nprefo) (= (first palt-only) \*)) :substitution
446+
(and (pos? npref) (= (first palt-only) \*)) :substitution
446447
(not= ref-prot-rest alt-prot-rest) (cond
447448
(or (and (= (first alt-prot-rest) \*)
448449
(>= nprefo npalto)
@@ -480,12 +481,19 @@
480481
palt)})))
481482

482483
(defn- protein-substitution
483-
[ppos pref palt]
484+
[ppos pref palt {:keys [ref-prot-seq alt-prot-seq]}]
484485
(let [[s-ref s-alt offset _] (diff-bases pref palt)]
485-
(if (and (empty? s-ref) (empty? s-alt))
486+
(cond
487+
(and (empty? s-ref) (empty? s-alt))
486488
(mut/protein-substitution (mut/->long-amino-acid (last pref))
487489
(coord/protein-coordinate ppos)
488490
(mut/->long-amino-acid (last palt)))
491+
(empty? s-ref)
492+
(let [{:keys [ppos pref palt]} (get-first-diff-aa-info ppos ref-prot-seq alt-prot-seq)]
493+
(mut/protein-substitution (mut/->long-amino-acid pref)
494+
(coord/protein-coordinate ppos)
495+
(mut/->long-amino-acid palt)))
496+
:else
489497
(mut/protein-substitution (mut/->long-amino-acid (first s-ref))
490498
(coord/protein-coordinate (+ ppos offset))
491499
(mut/->long-amino-acid (first s-alt))))))
@@ -555,7 +563,7 @@
555563
(subs (dec (+ ppos offset)))
556564
(string/index-of "*"))]
557565
(if (= alt \*)
558-
(protein-substitution (+ ppos offset) (str ref) (str alt)) ; eventually fs-ter-substitution
566+
(protein-substitution (+ ppos offset) (str ref) (str alt) seq-info) ; eventually fs-ter-substitution
559567
(mut/protein-frame-shift (mut/->long-amino-acid ref)
560568
(coord/protein-coordinate (+ ppos offset))
561569
(mut/->long-amino-acid alt)
@@ -662,7 +670,7 @@
662670
pvariant)
663671
seq-info (merge seq-info options)]
664672
(case (:type pvariant)
665-
:substitution (protein-substitution ppos pref palt)
673+
:substitution (protein-substitution ppos pref palt seq-info)
666674
:deletion (protein-deletion ppos pref palt)
667675
:duplication (protein-duplication ppos pref palt)
668676
:insertion (protein-insertion ppos pref palt seq-info)

test/varity/vcf_to_hgvs_test.clj

+1
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@
197197
"p.A35=") ; cf. rs786201577 (synonymous)
198198
"chr6" 33086236 "TA" "T" '("p.*259=") ; cf. rs67523850 (deletion in border of UTR)
199199
"chr7" 152247986 "G" "GT" '("p.Y816*") ; cf. rs150073007 (-, nonsense mutation)
200+
"chr18" 51048782 "C" "CAGT" '("p.Y117*") ; cf. not actual example (+, inframe nonsense mutation)
200201
"chr17" 31159027 "TGC" "T" '("p.A75*") ; not actual example (+, nonsense in del case)
201202
"chr2" 47478341 "TG" "T" '("p.L762*" "p.L696*") ;; rs786204050 (+) frameshift with termination
202203
"chr17" 7676202 "T" "TGTCCCTTAGTCTT" '("p.P58*" "p.P19*") ; cf. not actual example (-, frameshift with termination)

0 commit comments

Comments
 (0)