Skip to content

Commit 1663bc4

Browse files
committed
return no-effect when ter site is inserted around ter site
1 parent 007a8d0 commit 1663bc4

File tree

2 files changed

+46
-40
lines changed

2 files changed

+46
-40
lines changed

src/varity/vcf_to_hgvs/protein.clj

+45-40
Original file line numberDiff line numberDiff line change
@@ -179,8 +179,11 @@
179179

180180
(defn- ter-site-same-pos?
181181
[ref-prot-seq alt-prot-seq]
182-
(let [ter-site-pos (dec (count ref-prot-seq))]
183-
(= \* (get alt-prot-seq ter-site-pos))))
182+
(let [ref-ter-pos (count ref-prot-seq)
183+
alt-ter-pos (inc (count (first (string/split alt-prot-seq #"\*"))))]
184+
(and (string/includes? ref-prot-seq "*")
185+
(string/includes? alt-prot-seq "*")
186+
(= ref-ter-pos alt-ter-pos))))
184187

185188
(defn- cds-start-upstream?
186189
[cds-start pos ref alt]
@@ -443,7 +446,9 @@
443446

444447
:else
445448
:frame-shift)
446-
(and (pos? npref) (= (first palt-only) \*)) :substitution
449+
(and (pos? npref) (= (first palt-only) \*)) (if (ter-site-same-pos? ref-prot-seq alt-prot-seq*)
450+
:no-effect
451+
:substitution)
447452
(not= ref-prot-rest alt-prot-rest) (cond
448453
(or (and (= (first alt-prot-rest) \*)
449454
(>= nprefo npalto)
@@ -572,45 +577,45 @@
572577
(coord/unknown-coordinate))))))))
573578

574579
(defn- protein-extension
575-
[ppos pref palt {:keys [ref-prot-seq alt-tx-prot-seq c-ter-adjusted-alt-prot-seq ini-offset prefer-extension-for-initial-codon-alt?] :as seq-info}]
576-
(cond
577-
(and (not= ppos 1)
578-
(ter-site-same-pos? ref-prot-seq c-ter-adjusted-alt-prot-seq))
579-
(mut/protein-no-effect)
580+
[ppos pref palt {:keys [ref-prot-seq alt-tx-prot-seq ini-offset prefer-extension-for-initial-codon-alt?] :as seq-info}]
581+
(let [alt-prot-seq* (format-alt-prot-seq seq-info)]
582+
(cond
583+
(and (not= ppos 1)
584+
(ter-site-same-pos? ref-prot-seq alt-prot-seq*))
585+
(mut/protein-no-effect)
580586

581-
(and (= ppos 1) (not prefer-extension-for-initial-codon-alt?))
582-
(mut/protein-unknown-mutation)
587+
(and (= ppos 1) (not prefer-extension-for-initial-codon-alt?))
588+
(mut/protein-unknown-mutation)
583589

584-
:else
585-
(let [[_ ins offset _] (diff-bases (or pref "") (or palt ""))
586-
alt-prot-seq* (format-alt-prot-seq seq-info)
587-
ini-site ((comp str first) ref-prot-seq)
588-
first-diff-aa-info (if (= ppos 1)
589-
{:ppos 1
590-
:pref ini-site}
591-
(get-first-diff-aa-info ppos
592-
ref-prot-seq
593-
alt-prot-seq*))
594-
rest-seq (if (= ppos 1)
595-
(-> alt-tx-prot-seq
596-
(subs 0 ini-offset)
597-
reverse
598-
(#(apply str %)))
599-
(subs alt-prot-seq* (:ppos first-diff-aa-info)))
600-
alt-aa (mut/->long-amino-acid (if (= ppos 1)
601-
(or (last ins) (first rest-seq))
602-
(:palt first-diff-aa-info)))
603-
alt-aa-offset (if (and (= ppos 1) (nil? (last ins))) -1 0)
604-
new-aa-pos (some-> (string/index-of rest-seq (:pref first-diff-aa-info)) inc (+ alt-aa-offset))]
605-
(if (and (= ppos 1) (= alt-aa "Ter"))
606-
(mut/protein-unknown-mutation)
607-
(mut/protein-extension (if (= ppos 1) (mut/->long-amino-acid ini-site) "Ter")
608-
(coord/protein-coordinate (if (= ppos 1) 1 (+ ppos offset)))
609-
alt-aa
610-
(if (= ppos 1) :upstream :downstream)
611-
(if new-aa-pos
612-
(coord/protein-coordinate new-aa-pos)
613-
(coord/unknown-coordinate)))))))
590+
:else
591+
(let [[_ ins offset _] (diff-bases (or pref "") (or palt ""))
592+
ini-site ((comp str first) ref-prot-seq)
593+
first-diff-aa-info (if (= ppos 1)
594+
{:ppos 1
595+
:pref ini-site}
596+
(get-first-diff-aa-info ppos
597+
ref-prot-seq
598+
alt-prot-seq*))
599+
rest-seq (if (= ppos 1)
600+
(-> alt-tx-prot-seq
601+
(subs 0 ini-offset)
602+
reverse
603+
(#(apply str %)))
604+
(subs alt-prot-seq* (:ppos first-diff-aa-info)))
605+
alt-aa (mut/->long-amino-acid (if (= ppos 1)
606+
(or (last ins) (first rest-seq))
607+
(:palt first-diff-aa-info)))
608+
alt-aa-offset (if (and (= ppos 1) (nil? (last ins))) -1 0)
609+
new-aa-pos (some-> (string/index-of rest-seq (:pref first-diff-aa-info)) inc (+ alt-aa-offset))]
610+
(if (and (= ppos 1) (= alt-aa "Ter"))
611+
(mut/protein-unknown-mutation)
612+
(mut/protein-extension (if (= ppos 1) (mut/->long-amino-acid ini-site) "Ter")
613+
(coord/protein-coordinate (if (= ppos 1) 1 (+ ppos offset)))
614+
alt-aa
615+
(if (= ppos 1) :upstream :downstream)
616+
(if new-aa-pos
617+
(coord/protein-coordinate new-aa-pos)
618+
(coord/unknown-coordinate))))))))
614619

615620
(defn- protein-indel
616621
[ppos pref palt {:keys [ref-prot-seq c-ter-adjusted-alt-prot-seq

test/varity/vcf_to_hgvs_test.clj

+1
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,7 @@
301301
"chr11" 14279340 "G" "A" '("p.=") ; not actual example (-)
302302
"chr7" 55019277 "G" "GTC" '("p.=") ; not actual example (+)
303303
"chr17" 21042835 "T" "TG" '("p.=") ; not actual example (-)
304+
"chr13" 24421121 "A" "ATTA" '("p.=") ; not actual example (-)
304305

305306
;; unknown
306307
"chr12" 40393453 "G" "A" '("p.?") ; not actual example (+)

0 commit comments

Comments
 (0)