|
14 | 14 | [varity.ref-gene :as rg]
|
15 | 15 | [varity.vcf-to-hgvs.common :refer [diff-bases] :as common]))
|
16 | 16 |
|
| 17 | +(defn- overlap-exon-intron-boundary? |
| 18 | + [exon-ranges pos ref alt] |
| 19 | + (let [nref (count ref) |
| 20 | + nalt (count alt)] |
| 21 | + (and (not (= 1 nref nalt)) |
| 22 | + (not= 1 (count exon-ranges)) |
| 23 | + (some (fn [[s e]] |
| 24 | + (and (not= s e) |
| 25 | + (or (and (< pos s) (<= s (+ pos nref -1))) |
| 26 | + (and (<= pos e) (< e (+ pos nref -1)))))) |
| 27 | + exon-ranges)))) |
| 28 | + |
17 | 29 | (defn alt-exon-ranges
|
18 | 30 | "Returns exon ranges a variant applied."
|
19 | 31 | [exon-ranges pos ref alt]
|
|
25 | 37 | :else :same)
|
26 | 38 | tpos (+ pos (min nref nalt))
|
27 | 39 | d (Math/abs (- nref nalt))]
|
28 |
| - (when (and (not (= 1 nref nalt)) |
29 |
| - (not= 1 (count exon-ranges)) |
30 |
| - (some (fn [[s e]] |
31 |
| - (and (not= s e) |
32 |
| - (or (and (< pos s) (<= s (+ pos nref -1))) |
33 |
| - (and (<= pos e) (< e (+ pos nref -1)))))) |
34 |
| - exon-ranges)) |
35 |
| - (throw |
36 |
| - (ex-info |
37 |
| - "Variants overlapping a boundary of exon/intron are unsupported" |
38 |
| - {:exon-ranges exon-ranges, :pos pos, :ref ref, :alt alt}))) |
39 |
| - (->> exon-ranges |
40 |
| - (keep (fn [[s e]] |
41 |
| - (case typ |
42 |
| - :ins (cond |
43 |
| - (< tpos s) [(+ s d) (+ e d)] |
44 |
| - (<= s tpos e) [s (+ e d)] |
45 |
| - :else [s e]) |
46 |
| - :del (let [dels tpos |
47 |
| - dele (dec (+ tpos d))] |
48 |
| - (cond |
49 |
| - (< dele s) [(- s d) (- e d)] |
50 |
| - (<= dels s) (when (< dele e) [dels (- e d)]) |
51 |
| - (<= dels e) (if (< dele e) |
52 |
| - [s (- e d)] |
53 |
| - [s (dec dels)]) |
54 |
| - :else [s e])) |
55 |
| - :same [s e]))) |
56 |
| - vec))) |
| 40 | + (if (overlap-exon-intron-boundary? exon-ranges pos ref alt) |
| 41 | + (do (log/warn "Variants overlapping a boundary of exon/intron are unsupported") |
| 42 | + nil) |
| 43 | + (->> exon-ranges |
| 44 | + (keep (fn [[s e]] |
| 45 | + (case typ |
| 46 | + :ins (cond |
| 47 | + (< tpos s) [(+ s d) (+ e d)] |
| 48 | + (<= s tpos e) [s (+ e d)] |
| 49 | + :else [s e]) |
| 50 | + :del (let [dels tpos |
| 51 | + dele (dec (+ tpos d))] |
| 52 | + (cond |
| 53 | + (< dele s) [(- s d) (- e d)] |
| 54 | + (<= dels s) (when (< dele e) [dels (- e d)]) |
| 55 | + (<= dels e) (if (< dele e) |
| 56 | + [s (- e d)] |
| 57 | + [s (dec dels)]) |
| 58 | + :else [s e])) |
| 59 | + :same [s e]))) |
| 60 | + vec)))) |
57 | 61 |
|
58 | 62 | (defn exon-sequence
|
59 | 63 | "Extracts bases in exon from supplied sequence, returning the sequence of
|
|
197 | 201 | :c-ter-adjusted-alt-prot-seq (codon/amino-acid-sequence
|
198 | 202 | (cond-> ter-site-adjusted-alt-seq
|
199 | 203 | (= strand :reverse) util-seq/revcomp))
|
200 |
| - :alt-rg (-> rg |
201 |
| - (assoc :exon-ranges alt-exon-ranges*) |
202 |
| - (update :cds-start apply-offset*) |
203 |
| - (update :cds-end apply-offset*) |
204 |
| - (update :tx-end apply-offset*)) |
205 |
| - :ref-include-ter-site ref-include-ter-site})) |
| 204 | + :alt-rg (when alt-exon-ranges* |
| 205 | + (-> rg |
| 206 | + (assoc :exon-ranges alt-exon-ranges*) |
| 207 | + (update :cds-start apply-offset*) |
| 208 | + (update :cds-end apply-offset*) |
| 209 | + (update :tx-end apply-offset*))) |
| 210 | + :ref-include-ter-site ref-include-ter-site |
| 211 | + :overlap-exon-intron-boundary (overlap-exon-intron-boundary? exon-ranges pos ref alt)})) |
206 | 212 |
|
207 | 213 | (defn- protein-position
|
208 | 214 | "Converts genomic position to protein position. If pos is outside of CDS,
|
|
255 | 261 | (= ref-exon-seq alt-exon-seq)
|
256 | 262 | {:type :no-effect, :pos 1, :ref nil, :alt nil}
|
257 | 263 |
|
| 264 | + (:overlap-exon-intron-boundary seq-info) |
| 265 | + {:type :overlap-exon-intron-boundary, :pos nil, :ref nil, :alt nil} |
| 266 | + |
258 | 267 | (pos? (mod (count ref-exon-seq) 3))
|
259 | 268 | (do (log/warnf "CDS length is indivisible by 3: %d (%s, %s)"
|
260 | 269 | (count ref-exon-seq) (:name rg) (:name2 rg))
|
|
459 | 468 | (let [seq-info (read-sequence-info seq-rdr rg pos ref alt)]
|
460 | 469 | (when-let [pvariant (->protein-variant rg pos ref alt seq-info options)]
|
461 | 470 | (let [{ppos :pos, pref :ref, palt :alt}
|
462 |
| - (if-not (#{:no-effect :unknown} (:type pvariant)) |
| 471 | + (if-not (#{:no-effect :unknown :overlap-exon-intron-boundary} (:type pvariant)) |
463 | 472 | (common/apply-3'-rule pvariant (:ref-prot-seq seq-info))
|
464 | 473 | pvariant)]
|
465 | 474 | (case (:type pvariant)
|
|
472 | 481 | :frame-shift (protein-frame-shift ppos seq-info)
|
473 | 482 | :extension (protein-extension ppos pref palt seq-info)
|
474 | 483 | :no-effect (mut/protein-no-effect)
|
475 |
| - :unknown (mut/protein-unknown-mutation)))))) |
| 484 | + :unknown (mut/protein-unknown-mutation) |
| 485 | + :overlap-exon-intron-boundary nil))))) |
476 | 486 |
|
477 | 487 | (defn ->hgvs
|
478 | 488 | ([variant seq-rdr rg]
|
|
0 commit comments