|
177 | 177 | (and (= pos ter-start-pos) (<= ter-end-pos pos-end))
|
178 | 178 | (and (= pos-end ter-start-pos) (<= pos ter-end-pos)))))
|
179 | 179 |
|
| 180 | +(defn- ref-include-from-ter-upstream-and-over-ter-end? |
| 181 | + [{:keys [strand cds-start cds-end]} pos ref alt] |
| 182 | + (let [[del _ offset _] (diff-bases ref alt) |
| 183 | + pos (+ pos offset) |
| 184 | + ndel (count del) |
| 185 | + ter-start-pos (if (= strand :forward) |
| 186 | + (- cds-end 2) |
| 187 | + (+ cds-start 2)) |
| 188 | + ter-end-pos (if (= strand :forward) |
| 189 | + cds-end |
| 190 | + cds-start) |
| 191 | + pos-end (+ pos (if (= ndel 0) 0 (dec ndel)))] |
| 192 | + (if (= strand :forward) |
| 193 | + (and (< pos ter-start-pos) (< ter-end-pos pos-end)) |
| 194 | + (and (< ter-start-pos pos-end) (< pos ter-end-pos))))) |
| 195 | + |
180 | 196 | (defn- ter-site-same-pos?
|
181 | 197 | [ref-prot-seq alt-prot-seq]
|
182 | 198 | (and (string/includes? ref-prot-seq "*")
|
|
260 | 276 | :else
|
261 | 277 | pos-start*)))
|
262 | 278 |
|
| 279 | +(defn- in-frame? |
| 280 | + [pos ref alt {:keys [cds-start cds-end strand] :as _rg}] |
| 281 | + (let [[del ins offset] (diff-bases ref alt) |
| 282 | + ndel (count del) |
| 283 | + nins (count ins) |
| 284 | + pos* (+ pos offset) |
| 285 | + pos-end (+ pos offset (dec ndel)) |
| 286 | + over-ter-site? (if (= strand :forward) |
| 287 | + (< pos cds-end pos-end) |
| 288 | + (< pos cds-start pos-end)) |
| 289 | + ndel-to-cds-end (if (= strand :forward) |
| 290 | + (inc (- cds-end pos*)) |
| 291 | + (inc (- pos-end cds-start))) |
| 292 | + ndel* (if over-ter-site? |
| 293 | + ndel-to-cds-end |
| 294 | + ndel)] |
| 295 | + (or (= ndel nins 1) |
| 296 | + (= 0 (rem (- ndel* nins) 3))))) |
| 297 | + |
263 | 298 | (defn- apply-offset
|
264 | 299 | [pos ref alt cds-start cds-end exon-ranges pos*]
|
265 | 300 | (let [[del ins offset _] (diff-bases ref alt)
|
|
288 | 323 | ref-include-utr-ini-site-boundary (include-utr-ini-site-boundary? rg pos ref alt)
|
289 | 324 | ref-include-ter-site (include-ter-site? rg pos ref alt)
|
290 | 325 | ref-include-from-ter-start-and-over-ter-end (ref-include-from-ter-start-and-over-ter-end? rg pos ref alt)
|
| 326 | + ref-include-from-ter-upstream-and-over-ter-end (ref-include-from-ter-upstream-and-over-ter-end? rg pos ref alt) |
291 | 327 | frameshift-within-cds (frameshift-within-cds? rg pos ref alt)
|
292 | 328 | alt-seq (common/alt-sequence ref-seq tx-start pos ref alt)
|
293 | 329 | alt-exon-ranges* (alt-exon-ranges exon-ranges pos ref alt)
|
|
301 | 337 | alt-up-exon-seq (make-alt-up-exon-seq alt-up-exon-seq tx-start (dec alt-cds-start) alt-exon-ranges* strand)
|
302 | 338 | alt-down-exon-seq (make-alt-down-exon-seq alt-down-exon-seq (inc alt-cds-end) alt-tx-end alt-exon-ranges* strand)
|
303 | 339 | ter-site-adjusted-alt-seq (make-ter-site-adjusted-alt-seq alt-cds-exon-seq alt-up-exon-seq alt-down-exon-seq
|
304 |
| - strand cds-start cds-end pos ref ref-include-ter-site)] |
| 340 | + strand cds-start cds-end pos ref ref-include-ter-site) |
| 341 | + in-frame (in-frame? pos ref alt rg)] |
305 | 342 | {:ref-exon-seq ref-cds-exon-seq
|
306 | 343 | :ref-prot-seq (codon/amino-acid-sequence (cond-> ref-cds-exon-seq
|
307 | 344 | (= strand :reverse) util-seq/revcomp))
|
|
327 | 364 | :ref-include-utr-ini-site-boundary ref-include-utr-ini-site-boundary
|
328 | 365 | :ref-include-ter-site ref-include-ter-site
|
329 | 366 | :ref-include-from-ter-start-and-over-ter-end ref-include-from-ter-start-and-over-ter-end
|
| 367 | + :ref-include-from-ter-upstream-and-over-ter-end ref-include-from-ter-upstream-and-over-ter-end |
330 | 368 | :frameshift-within-cds frameshift-within-cds
|
331 |
| - :utr-variant (utr-variant? cds-start cds-end pos ref alt)}))) |
| 369 | + :utr-variant (utr-variant? cds-start cds-end pos ref alt) |
| 370 | + :in-frame in-frame}))) |
332 | 371 |
|
333 | 372 | (defn- protein-position
|
334 | 373 | "Converts genomic position to protein position. If pos is outside of CDS,
|
|
621 | 660 |
|
622 | 661 | (defn- protein-indel
|
623 | 662 | [ppos pref palt {:keys [ref-prot-seq c-ter-adjusted-alt-prot-seq
|
624 |
| - ref-include-ter-site frameshift-within-cds] :as seq-info}] |
| 663 | + ref-include-ter-site frameshift-within-cds |
| 664 | + ref-include-from-ter-upstream-and-over-ter-end in-frame] :as seq-info}] |
625 | 665 | (let [[pref* palt* ppos*] (if ref-include-ter-site
|
626 | 666 | (let [{adjusted-ppos :ppos} (get-first-diff-aa-info ppos ref-prot-seq c-ter-adjusted-alt-prot-seq)
|
627 | 667 | ppos (or adjusted-ppos ppos)
|
|
658 | 698 |
|
659 | 699 | (empty? ins)
|
660 | 700 | (protein-deletion ppos* pref* palt*)
|
| 701 | + |
| 702 | + (and ref-include-from-ter-upstream-and-over-ter-end |
| 703 | + (not in-frame)) |
| 704 | + (protein-frame-shift ppos* seq-info) |
661 | 705 |
|
662 | 706 | alt-retain-ter-site?
|
663 | 707 | (mut/protein-indel (mut/->long-amino-acid (first del))
|
|
0 commit comments