|
179 | 179 |
|
180 | 180 | (defn- ter-site-same-pos?
|
181 | 181 | [ref-prot-seq alt-prot-seq]
|
182 |
| - (let [ter-site-pos (dec (count ref-prot-seq))] |
183 |
| - (= \* (get alt-prot-seq ter-site-pos)))) |
| 182 | + (and (string/includes? ref-prot-seq "*") |
| 183 | + (string/includes? alt-prot-seq "*") |
| 184 | + (let [ref-ter-pos (count ref-prot-seq) |
| 185 | + alt-ter-pos (inc (count (first (string/split alt-prot-seq #"\*"))))] |
| 186 | + (= ref-ter-pos alt-ter-pos)))) |
184 | 187 |
|
185 | 188 | (defn- cds-start-upstream?
|
186 | 189 | [cds-start pos ref alt]
|
|
425 | 428 | pref-only
|
426 | 429 | palt-only)
|
427 | 430 | ini-site-affected (ini-site-affected? ref-exon-seq alt-exon-seq)
|
| 431 | + first-diff-aa-is-ter-site (first-diff-aa-is-ter-site? base-ppos |
| 432 | + ref-prot-seq |
| 433 | + alt-prot-seq*) |
428 | 434 | t (cond
|
429 |
| - ref-include-from-ter-start-and-over-ter-end :frame-shift |
| 435 | + (and ref-include-from-ter-start-and-over-ter-end |
| 436 | + (not first-diff-aa-is-ter-site)) :frame-shift |
430 | 437 | (= (+ base-ppos offset) (count ref-prot-seq)) (if (and (= "" pref-only palt-only)
|
431 | 438 | (ter-site-same-pos? ref-prot-seq alt-prot-seq*))
|
432 | 439 | :no-effect
|
|
443 | 450 |
|
444 | 451 | :else
|
445 | 452 | :frame-shift)
|
446 |
| - (and (pos? npref) (= (first palt-only) \*)) :substitution |
| 453 | + (and (pos? npref) (= (first palt-only) \*)) (if (ter-site-same-pos? ref-prot-seq alt-prot-seq*) |
| 454 | + :no-effect |
| 455 | + :substitution) |
447 | 456 | (not= ref-prot-rest alt-prot-rest) (cond
|
448 | 457 | (or (and (= (first alt-prot-rest) \*)
|
449 | 458 | (>= nprefo npalto)
|
450 | 459 | (= palt (subs pref 0 (count palt))))
|
451 | 460 | (= (first palt-only) \*)) :fs-ter-substitution
|
452 | 461 | ref-include-ter-site :indel
|
453 |
| - (first-diff-aa-is-ter-site? base-ppos |
454 |
| - ref-prot-seq |
455 |
| - alt-prot-seq*) :extension |
| 462 | + first-diff-aa-is-ter-site :extension |
456 | 463 | :else :frame-shift)
|
457 | 464 | (or (and (zero? nprefo) (zero? npalto))
|
458 | 465 | (and (= nprefo 1) (= npalto 1))) :substitution
|
|
572 | 579 | (coord/unknown-coordinate))))))))
|
573 | 580 |
|
574 | 581 | (defn- protein-extension
|
575 |
| - [ppos pref palt {:keys [ref-prot-seq alt-tx-prot-seq c-ter-adjusted-alt-prot-seq ini-offset prefer-extension-for-initial-codon-alt?] :as seq-info}] |
576 |
| - (cond |
577 |
| - (and (not= ppos 1) |
578 |
| - (ter-site-same-pos? ref-prot-seq c-ter-adjusted-alt-prot-seq)) |
579 |
| - (mut/protein-no-effect) |
| 582 | + [ppos pref palt {:keys [ref-prot-seq alt-tx-prot-seq ini-offset prefer-extension-for-initial-codon-alt?] :as seq-info}] |
| 583 | + (let [alt-prot-seq* (format-alt-prot-seq seq-info)] |
| 584 | + (cond |
| 585 | + (and (not= ppos 1) |
| 586 | + (ter-site-same-pos? ref-prot-seq alt-prot-seq*)) |
| 587 | + (mut/protein-no-effect) |
580 | 588 |
|
581 |
| - (and (= ppos 1) (not prefer-extension-for-initial-codon-alt?)) |
582 |
| - (mut/protein-unknown-mutation) |
| 589 | + (and (= ppos 1) (not prefer-extension-for-initial-codon-alt?)) |
| 590 | + (mut/protein-unknown-mutation) |
583 | 591 |
|
584 |
| - :else |
585 |
| - (let [[_ ins offset _] (diff-bases (or pref "") (or palt "")) |
586 |
| - alt-prot-seq* (format-alt-prot-seq seq-info) |
587 |
| - ini-site ((comp str first) ref-prot-seq) |
588 |
| - first-diff-aa-info (if (= ppos 1) |
589 |
| - {:ppos 1 |
590 |
| - :pref ini-site} |
591 |
| - (get-first-diff-aa-info ppos |
592 |
| - ref-prot-seq |
593 |
| - alt-prot-seq*)) |
594 |
| - rest-seq (if (= ppos 1) |
595 |
| - (-> alt-tx-prot-seq |
596 |
| - (subs 0 ini-offset) |
597 |
| - reverse |
598 |
| - (#(apply str %))) |
599 |
| - (subs alt-prot-seq* (:ppos first-diff-aa-info))) |
600 |
| - alt-aa (mut/->long-amino-acid (if (= ppos 1) |
601 |
| - (or (last ins) (first rest-seq)) |
602 |
| - (:palt first-diff-aa-info))) |
603 |
| - alt-aa-offset (if (and (= ppos 1) (nil? (last ins))) -1 0) |
604 |
| - new-aa-pos (some-> (string/index-of rest-seq (:pref first-diff-aa-info)) inc (+ alt-aa-offset))] |
605 |
| - (if (and (= ppos 1) (= alt-aa "Ter")) |
606 |
| - (mut/protein-unknown-mutation) |
607 |
| - (mut/protein-extension (if (= ppos 1) (mut/->long-amino-acid ini-site) "Ter") |
608 |
| - (coord/protein-coordinate (if (= ppos 1) 1 (+ ppos offset))) |
609 |
| - alt-aa |
610 |
| - (if (= ppos 1) :upstream :downstream) |
611 |
| - (if new-aa-pos |
612 |
| - (coord/protein-coordinate new-aa-pos) |
613 |
| - (coord/unknown-coordinate))))))) |
| 592 | + :else |
| 593 | + (let [[_ ins offset _] (diff-bases (or pref "") (or palt "")) |
| 594 | + ini-site ((comp str first) ref-prot-seq) |
| 595 | + first-diff-aa-info (if (= ppos 1) |
| 596 | + {:ppos 1 |
| 597 | + :pref ini-site} |
| 598 | + (get-first-diff-aa-info ppos |
| 599 | + ref-prot-seq |
| 600 | + alt-prot-seq*)) |
| 601 | + rest-seq (if (= ppos 1) |
| 602 | + (-> alt-tx-prot-seq |
| 603 | + (subs 0 ini-offset) |
| 604 | + reverse |
| 605 | + (#(apply str %))) |
| 606 | + (subs alt-prot-seq* (:ppos first-diff-aa-info))) |
| 607 | + alt-aa (mut/->long-amino-acid (if (= ppos 1) |
| 608 | + (or (last ins) (first rest-seq)) |
| 609 | + (:palt first-diff-aa-info))) |
| 610 | + alt-aa-offset (if (and (= ppos 1) (nil? (last ins))) -1 0) |
| 611 | + new-aa-pos (some-> (string/index-of rest-seq (:pref first-diff-aa-info)) inc (+ alt-aa-offset))] |
| 612 | + (if (and (= ppos 1) (= alt-aa "Ter")) |
| 613 | + (mut/protein-unknown-mutation) |
| 614 | + (mut/protein-extension (if (= ppos 1) (mut/->long-amino-acid ini-site) "Ter") |
| 615 | + (coord/protein-coordinate (if (= ppos 1) 1 (+ ppos offset))) |
| 616 | + alt-aa |
| 617 | + (if (= ppos 1) :upstream :downstream) |
| 618 | + (if new-aa-pos |
| 619 | + (coord/protein-coordinate new-aa-pos) |
| 620 | + (coord/unknown-coordinate)))))))) |
614 | 621 |
|
615 | 622 | (defn- protein-indel
|
616 | 623 | [ppos pref palt {:keys [ref-prot-seq c-ter-adjusted-alt-prot-seq
|
|
0 commit comments