Skip to content

Commit bf3dff0

Browse files
authored
Merge pull request #115 from chrovis/fix/fix-ter-site-ins-around-ter-site
2 parents 007a8d0 + aaf40df commit bf3dff0

File tree

2 files changed

+54
-44
lines changed

2 files changed

+54
-44
lines changed

src/varity/vcf_to_hgvs/protein.clj

+51-44
Original file line numberDiff line numberDiff line change
@@ -179,8 +179,11 @@
179179

180180
(defn- ter-site-same-pos?
181181
[ref-prot-seq alt-prot-seq]
182-
(let [ter-site-pos (dec (count ref-prot-seq))]
183-
(= \* (get alt-prot-seq ter-site-pos))))
182+
(and (string/includes? ref-prot-seq "*")
183+
(string/includes? alt-prot-seq "*")
184+
(let [ref-ter-pos (count ref-prot-seq)
185+
alt-ter-pos (inc (count (first (string/split alt-prot-seq #"\*"))))]
186+
(= ref-ter-pos alt-ter-pos))))
184187

185188
(defn- cds-start-upstream?
186189
[cds-start pos ref alt]
@@ -425,8 +428,12 @@
425428
pref-only
426429
palt-only)
427430
ini-site-affected (ini-site-affected? ref-exon-seq alt-exon-seq)
431+
first-diff-aa-is-ter-site (first-diff-aa-is-ter-site? base-ppos
432+
ref-prot-seq
433+
alt-prot-seq*)
428434
t (cond
429-
ref-include-from-ter-start-and-over-ter-end :frame-shift
435+
(and ref-include-from-ter-start-and-over-ter-end
436+
(not first-diff-aa-is-ter-site)) :frame-shift
430437
(= (+ base-ppos offset) (count ref-prot-seq)) (if (and (= "" pref-only palt-only)
431438
(ter-site-same-pos? ref-prot-seq alt-prot-seq*))
432439
:no-effect
@@ -443,16 +450,16 @@
443450

444451
:else
445452
:frame-shift)
446-
(and (pos? npref) (= (first palt-only) \*)) :substitution
453+
(and (pos? npref) (= (first palt-only) \*)) (if (ter-site-same-pos? ref-prot-seq alt-prot-seq*)
454+
:no-effect
455+
:substitution)
447456
(not= ref-prot-rest alt-prot-rest) (cond
448457
(or (and (= (first alt-prot-rest) \*)
449458
(>= nprefo npalto)
450459
(= palt (subs pref 0 (count palt))))
451460
(= (first palt-only) \*)) :fs-ter-substitution
452461
ref-include-ter-site :indel
453-
(first-diff-aa-is-ter-site? base-ppos
454-
ref-prot-seq
455-
alt-prot-seq*) :extension
462+
first-diff-aa-is-ter-site :extension
456463
:else :frame-shift)
457464
(or (and (zero? nprefo) (zero? npalto))
458465
(and (= nprefo 1) (= npalto 1))) :substitution
@@ -572,45 +579,45 @@
572579
(coord/unknown-coordinate))))))))
573580

574581
(defn- protein-extension
575-
[ppos pref palt {:keys [ref-prot-seq alt-tx-prot-seq c-ter-adjusted-alt-prot-seq ini-offset prefer-extension-for-initial-codon-alt?] :as seq-info}]
576-
(cond
577-
(and (not= ppos 1)
578-
(ter-site-same-pos? ref-prot-seq c-ter-adjusted-alt-prot-seq))
579-
(mut/protein-no-effect)
582+
[ppos pref palt {:keys [ref-prot-seq alt-tx-prot-seq ini-offset prefer-extension-for-initial-codon-alt?] :as seq-info}]
583+
(let [alt-prot-seq* (format-alt-prot-seq seq-info)]
584+
(cond
585+
(and (not= ppos 1)
586+
(ter-site-same-pos? ref-prot-seq alt-prot-seq*))
587+
(mut/protein-no-effect)
580588

581-
(and (= ppos 1) (not prefer-extension-for-initial-codon-alt?))
582-
(mut/protein-unknown-mutation)
589+
(and (= ppos 1) (not prefer-extension-for-initial-codon-alt?))
590+
(mut/protein-unknown-mutation)
583591

584-
:else
585-
(let [[_ ins offset _] (diff-bases (or pref "") (or palt ""))
586-
alt-prot-seq* (format-alt-prot-seq seq-info)
587-
ini-site ((comp str first) ref-prot-seq)
588-
first-diff-aa-info (if (= ppos 1)
589-
{:ppos 1
590-
:pref ini-site}
591-
(get-first-diff-aa-info ppos
592-
ref-prot-seq
593-
alt-prot-seq*))
594-
rest-seq (if (= ppos 1)
595-
(-> alt-tx-prot-seq
596-
(subs 0 ini-offset)
597-
reverse
598-
(#(apply str %)))
599-
(subs alt-prot-seq* (:ppos first-diff-aa-info)))
600-
alt-aa (mut/->long-amino-acid (if (= ppos 1)
601-
(or (last ins) (first rest-seq))
602-
(:palt first-diff-aa-info)))
603-
alt-aa-offset (if (and (= ppos 1) (nil? (last ins))) -1 0)
604-
new-aa-pos (some-> (string/index-of rest-seq (:pref first-diff-aa-info)) inc (+ alt-aa-offset))]
605-
(if (and (= ppos 1) (= alt-aa "Ter"))
606-
(mut/protein-unknown-mutation)
607-
(mut/protein-extension (if (= ppos 1) (mut/->long-amino-acid ini-site) "Ter")
608-
(coord/protein-coordinate (if (= ppos 1) 1 (+ ppos offset)))
609-
alt-aa
610-
(if (= ppos 1) :upstream :downstream)
611-
(if new-aa-pos
612-
(coord/protein-coordinate new-aa-pos)
613-
(coord/unknown-coordinate)))))))
592+
:else
593+
(let [[_ ins offset _] (diff-bases (or pref "") (or palt ""))
594+
ini-site ((comp str first) ref-prot-seq)
595+
first-diff-aa-info (if (= ppos 1)
596+
{:ppos 1
597+
:pref ini-site}
598+
(get-first-diff-aa-info ppos
599+
ref-prot-seq
600+
alt-prot-seq*))
601+
rest-seq (if (= ppos 1)
602+
(-> alt-tx-prot-seq
603+
(subs 0 ini-offset)
604+
reverse
605+
(#(apply str %)))
606+
(subs alt-prot-seq* (:ppos first-diff-aa-info)))
607+
alt-aa (mut/->long-amino-acid (if (= ppos 1)
608+
(or (last ins) (first rest-seq))
609+
(:palt first-diff-aa-info)))
610+
alt-aa-offset (if (and (= ppos 1) (nil? (last ins))) -1 0)
611+
new-aa-pos (some-> (string/index-of rest-seq (:pref first-diff-aa-info)) inc (+ alt-aa-offset))]
612+
(if (and (= ppos 1) (= alt-aa "Ter"))
613+
(mut/protein-unknown-mutation)
614+
(mut/protein-extension (if (= ppos 1) (mut/->long-amino-acid ini-site) "Ter")
615+
(coord/protein-coordinate (if (= ppos 1) 1 (+ ppos offset)))
616+
alt-aa
617+
(if (= ppos 1) :upstream :downstream)
618+
(if new-aa-pos
619+
(coord/protein-coordinate new-aa-pos)
620+
(coord/unknown-coordinate))))))))
614621

615622
(defn- protein-indel
616623
[ppos pref palt {:keys [ref-prot-seq c-ter-adjusted-alt-prot-seq

test/varity/vcf_to_hgvs_test.clj

+3
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,8 @@
280280
"chr11" 125655318 "TGA" "TAT" '("p.*477Yext*17" "p.*443Yext*17" "p.*477Yext*24")
281281
"chr10" 8074014 "C" "CATGGGTT" '("p.*445Yext*64" "p.*444Yext*64") ; not actual example (+)
282282
"chr10" 87965468 "TC" "T" '("p.*404Eext*11" "p.*577Eext*11" "p.*207Eext*11") ; not actual example (+)
283+
"chrX" 15823239 "ATAA" "A" '("p.*483Text*?") ; not actual example (+)
284+
"chr13" 24421118 "CTTA" "C" '("p.*1725Vext*2") ; not actual example (-)
283285
;; NOTE: There are very few correct examples...
284286

285287
;; Extension without termination site
@@ -301,6 +303,7 @@
301303
"chr11" 14279340 "G" "A" '("p.=") ; not actual example (-)
302304
"chr7" 55019277 "G" "GTC" '("p.=") ; not actual example (+)
303305
"chr17" 21042835 "T" "TG" '("p.=") ; not actual example (-)
306+
"chr13" 24421121 "A" "ATTA" '("p.=") ; not actual example (-)
304307

305308
;; unknown
306309
"chr12" 40393453 "G" "A" '("p.?") ; not actual example (+)

0 commit comments

Comments
 (0)