diff --git a/Cargo.lock b/Cargo.lock index 1a8cca442a..c28729acfa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1256,15 +1256,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "atomic" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a89cbf775b137e9b968e67227ef7f775587cde3fd31b0d8599dbd0f598a48340" -dependencies = [ - "bytemuck", -] - [[package]] name = "atomic-waker" version = "1.1.2" @@ -1588,7 +1579,7 @@ dependencies = [ "http 0.2.12", "http 1.4.0", "http-body 0.4.6", - "lru 0.12.5", + "lru", "percent-encoding", "regex-lite", "sha2", @@ -2907,15 +2898,6 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" -[[package]] -name = "castaway" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dec551ab6e7578819132c713a93c022a05d60159dc86e7a7050223577484c55a" -dependencies = [ - "rustversion", -] - [[package]] name = "cbc" version = "0.1.2" @@ -3348,20 +3330,6 @@ dependencies = [ "unicode-width 0.2.2", ] -[[package]] -name = "compact_str" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a" -dependencies = [ - "castaway", - "cfg-if", - "itoa", - "rustversion", - "ryu", - "static_assertions", -] - [[package]] name = "compression-codecs" version = "0.4.37" @@ -3857,13 +3825,9 @@ checksum = "d8b9f2e4c67f833b660cdb0a3523065869fb35570177239812ed4c905aeff87b" dependencies = [ "bitflags 2.11.0", "crossterm_winapi", - "derive_more 2.1.1", "document-features", - "mio", "parking_lot", "rustix 1.1.3", - "signal-hook", - "signal-hook-mio", "winapi", ] @@ -3916,16 +3880,6 @@ dependencies = [ "typenum", ] -[[package]] -name = "csscolorparser" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb2a7d3066da2de787b7f032c736763eb7ae5d355f81a68bab2675a96008b0bf" -dependencies = [ - "lab", - "phf 0.11.3", -] - [[package]] name = "cssparser" version = "0.29.6" @@ -4393,12 +4347,6 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26bf8fc351c5ed29b5c2f0cbbac1b209b74f60ecd62e675a998df72c49af5204" -[[package]] -name = "deltae" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5729f5117e208430e437df2f4843f5e5952997175992d1414f94c57d61e270b4" - [[package]] name = "deno_core" version = "0.338.0" @@ -5512,16 +5460,6 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" -[[package]] -name = "fancy-regex" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b95f7c0680e4142284cf8b22c14a476e87d61b004a3a0861872b32ef7ead40a2" -dependencies = [ - "bit-set 0.5.3", - "regex", -] - [[package]] name = "fancy-regex" version = "0.16.2" @@ -5685,17 +5623,6 @@ dependencies = [ "flate2", ] -[[package]] -name = "filedescriptor" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e40758ed24c9b2eeb76c35fb0aebc66c626084edd827e07e1552279814c6682d" -dependencies = [ - "libc", - "thiserror 1.0.69", - "winapi", -] - [[package]] name = "filetime" version = "0.2.27" @@ -5725,18 +5652,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "finl_unicode" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9844ddc3a6e533d62bba727eb6c28b5d360921d5175e9ff0f1e621a5c590a4d5" - -[[package]] -name = "fixedbitset" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" - [[package]] name = "fixedbitset" version = "0.5.7" @@ -9427,19 +9342,6 @@ dependencies = [ "tempfile", ] -[[package]] -name = "instability" -version = "0.3.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357b7205c6cd18dd2c86ed312d1e70add149aea98e7ef72b9fdf0270e555c11d" -dependencies = [ - "darling 0.23.0", - "indoc", - "proc-macro2", - "quote", - "syn 2.0.117", -] - [[package]] name = "instant" version = "0.1.13" @@ -9820,17 +9722,6 @@ dependencies = [ "mutate_once", ] -[[package]] -name = "kasuari" -version = "0.4.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fe90c1150662e858c7d5f945089b7517b0a80d8bf7ba4b1b5ffc984e7230a5b" -dependencies = [ - "hashbrown 0.16.1", - "portable-atomic", - "thiserror 2.0.18", -] - [[package]] name = "keyboard-types" version = "0.7.0" @@ -9987,12 +9878,6 @@ dependencies = [ "smallvec 1.15.1", ] -[[package]] -name = "lab" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf36173d4167ed999940f804952e6b08197cae5ad5d572eb4db150ce8ad5d58f" - [[package]] name = "lago" version = "0.1.0" @@ -10359,15 +10244,6 @@ dependencies = [ "wayland-protocols-wlr", ] -[[package]] -name = "line-clipping" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f4de44e98ddbf09375cbf4d17714d18f39195f4f4894e8524501726fd9a8a4a" -dependencies = [ - "bitflags 2.11.0", -] - [[package]] name = "linked-hash-map" version = "0.5.6" @@ -10528,15 +10404,6 @@ dependencies = [ "hashbrown 0.15.5", ] -[[package]] -name = "lru" -version = "0.16.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1dc47f592c06f33f8e3aea9591776ec7c9f9e4124778ff8a3c3b87159f7e593" -dependencies = [ - "hashbrown 0.16.1", -] - [[package]] name = "lru-slab" version = "0.1.2" @@ -10585,16 +10452,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" -[[package]] -name = "mac_address" -version = "1.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0aeb26bf5e836cc1c341c8106051b573f1766dfa05aa87f0b98be5e51b02303" -dependencies = [ - "nix 0.29.0", - "winapi", -] - [[package]] name = "mac_address2" version = "2.0.2" @@ -10826,12 +10683,6 @@ dependencies = [ "libc", ] -[[package]] -name = "memmem" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a64a92489e2744ce060c349162be1c5f33c6969234104dbd99ddb5feb08b8c15" - [[package]] name = "memo-map" version = "0.3.3" @@ -11199,19 +11050,6 @@ dependencies = [ "memoffset", ] -[[package]] -name = "nix" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" -dependencies = [ - "bitflags 2.11.0", - "cfg-if", - "cfg_aliases 0.2.1", - "libc", - "memoffset", -] - [[package]] name = "nix" version = "0.30.1" @@ -12216,15 +12054,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" -[[package]] -name = "ordered-float" -version = "4.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bb71e1b3fa6ca1c61f383464aaf2bb0e2f8e772a1f01d486832464de363b951" -dependencies = [ - "num-traits", -] - [[package]] name = "ordered-float" version = "5.1.0" @@ -12709,7 +12538,7 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" dependencies = [ - "fixedbitset 0.5.7", + "fixedbitset", "indexmap 2.13.0", ] @@ -12719,7 +12548,7 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" dependencies = [ - "fixedbitset 0.5.7", + "fixedbitset", "hashbrown 0.15.5", "indexmap 2.13.0", ] @@ -14008,91 +13837,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "ratatui" -version = "0.30.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1ce67fb8ba4446454d1c8dbaeda0557ff5e94d39d5e5ed7f10a65eb4c8266bc" -dependencies = [ - "instability", - "ratatui-core", - "ratatui-crossterm", - "ratatui-macros", - "ratatui-termwiz", - "ratatui-widgets", -] - -[[package]] -name = "ratatui-core" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ef8dea09a92caaf73bff7adb70b76162e5937524058a7e5bff37869cbbec293" -dependencies = [ - "bitflags 2.11.0", - "compact_str", - "hashbrown 0.16.1", - "indoc", - "itertools 0.14.0", - "kasuari", - "lru 0.16.3", - "strum 0.27.2", - "thiserror 2.0.18", - "unicode-segmentation", - "unicode-truncate", - "unicode-width 0.2.2", -] - -[[package]] -name = "ratatui-crossterm" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "577c9b9f652b4c121fb25c6a391dd06406d3b092ba68827e6d2f09550edc54b3" -dependencies = [ - "cfg-if", - "crossterm", - "instability", - "ratatui-core", -] - -[[package]] -name = "ratatui-macros" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7f1342a13e83e4bb9d0b793d0ea762be633f9582048c892ae9041ef39c936f4" -dependencies = [ - "ratatui-core", - "ratatui-widgets", -] - -[[package]] -name = "ratatui-termwiz" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f76fe0bd0ed4295f0321b1676732e2454024c15a35d01904ddb315afd3d545c" -dependencies = [ - "ratatui-core", - "termwiz", -] - -[[package]] -name = "ratatui-widgets" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7dbfa023cd4e604c2553483820c5fe8aa9d71a42eea5aa77c6e7f35756612db" -dependencies = [ - "bitflags 2.11.0", - "hashbrown 0.16.1", - "indoc", - "instability", - "itertools 0.14.0", - "line-clipping", - "ratatui-core", - "strum 0.27.2", - "time", - "unicode-segmentation", - "unicode-width 0.2.2", -] - [[package]] name = "rav1e" version = "0.8.1" @@ -16028,17 +15772,6 @@ dependencies = [ "signal-hook-registry", ] -[[package]] -name = "signal-hook-mio" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b75a19a7a740b25bc7944bdee6172368f988763b744e3d4dfe753f6b4ece40cc" -dependencies = [ - "libc", - "mio", - "signal-hook", -] - [[package]] name = "signal-hook-registry" version = "1.4.8" @@ -17459,7 +17192,7 @@ dependencies = [ "itertools 0.14.0", "levenshtein_automata", "log", - "lru 0.12.5", + "lru", "lz4_flex", "measure_time", "memmap2", @@ -18338,7 +18071,7 @@ dependencies = [ "intercept", "language", "mac 0.1.0", - "ordered-float 5.1.0", + "ordered-float", "owhisper-client", "owhisper-interface", "quickcheck", @@ -18365,6 +18098,7 @@ dependencies = [ "tokio-stream", "tokio-util", "tracing", + "transcript", "url", "uuid", "vad-ext", @@ -18395,6 +18129,7 @@ dependencies = [ "tokio", "tokio-stream", "tracing", + "transcript", ] [[package]] @@ -19340,75 +19075,12 @@ dependencies = [ "unicode-segmentation", ] -[[package]] -name = "terminfo" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4ea810f0692f9f51b382fff5893887bb4580f5fa246fde546e0b13e7fcee662" -dependencies = [ - "fnv", - "nom 7.1.3", - "phf 0.11.3", - "phf_codegen 0.11.3", -] - -[[package]] -name = "termios" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "411c5bf740737c7918b8b1fe232dca4dc9f8e754b8ad5e20966814001ed0ac6b" -dependencies = [ - "libc", -] - [[package]] name = "termtree" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683" -[[package]] -name = "termwiz" -version = "0.23.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4676b37242ccbd1aabf56edb093a4827dc49086c0ffd764a5705899e0f35f8f7" -dependencies = [ - "anyhow", - "base64 0.22.1", - "bitflags 2.11.0", - "fancy-regex 0.11.0", - "filedescriptor", - "finl_unicode", - "fixedbitset 0.4.2", - "hex", - "lazy_static", - "libc", - "log", - "memmem", - "nix 0.29.0", - "num-derive", - "num-traits", - "ordered-float 4.6.0", - "pest", - "pest_derive", - "phf 0.11.3", - "sha2", - "signal-hook", - "siphasher 1.0.2", - "terminfo", - "termios", - "thiserror 1.0.69", - "ucd-trie", - "unicode-segmentation", - "vtparse", - "wezterm-bidi", - "wezterm-blob-leases", - "wezterm-color-types", - "wezterm-dynamic", - "wezterm-input-types", - "winapi", -] - [[package]] name = "testcontainers" version = "0.24.0" @@ -20351,26 +20023,9 @@ dependencies = [ name = "transcript" version = "0.1.0" dependencies = [ - "audio", - "audio-utils", - "axum 0.8.8", - "bytes", - "clap", - "crossterm", - "data", - "futures-util", - "libc", - "owhisper-client", "owhisper-interface", - "ratatui", "serde", - "serde_json", "specta", - "strum 0.27.2", - "tokio", - "tokio-stream", - "tower 0.5.3", - "transcribe-cactus", "uuid", ] @@ -21120,17 +20775,6 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" -[[package]] -name = "unicode-truncate" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16b380a1238663e5f8a691f9039c73e1cdae598a30e9855f541d29b08b53e9a5" -dependencies = [ - "itertools 0.14.0", - "unicode-segmentation", - "unicode-width 0.2.2", -] - [[package]] name = "unicode-vo" version = "0.1.0" @@ -21327,7 +20971,6 @@ version = "1.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb" dependencies = [ - "atomic", "getrandom 0.4.1", "js-sys", "serde_core", @@ -21532,15 +21175,6 @@ dependencies = [ "libc", ] -[[package]] -name = "vtparse" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d9b2acfb050df409c972a37d3b8e08cdea3bddb0c09db9d53137e504cfabed0" -dependencies = [ - "utf8parse", -] - [[package]] name = "walkdir" version = "2.5.0" @@ -22043,78 +21677,6 @@ version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a28ac98ddc8b9274cb41bb4d9d4d5c425b6020c50c46f25559911905610b4a88" -[[package]] -name = "wezterm-bidi" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c0a6e355560527dd2d1cf7890652f4f09bb3433b6aadade4c9b5ed76de5f3ec" -dependencies = [ - "log", - "wezterm-dynamic", -] - -[[package]] -name = "wezterm-blob-leases" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "692daff6d93d94e29e4114544ef6d5c942a7ed998b37abdc19b17136ea428eb7" -dependencies = [ - "getrandom 0.3.4", - "mac_address", - "sha2", - "thiserror 1.0.69", - "uuid", -] - -[[package]] -name = "wezterm-color-types" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7de81ef35c9010270d63772bebef2f2d6d1f2d20a983d27505ac850b8c4b4296" -dependencies = [ - "csscolorparser", - "deltae", - "lazy_static", - "wezterm-dynamic", -] - -[[package]] -name = "wezterm-dynamic" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f2ab60e120fd6eaa68d9567f3226e876684639d22a4219b313ff69ec0ccd5ac" -dependencies = [ - "log", - "ordered-float 4.6.0", - "strsim", - "thiserror 1.0.69", - "wezterm-dynamic-derive", -] - -[[package]] -name = "wezterm-dynamic-derive" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c0cf2d539c645b448eaffec9ec494b8b19bd5077d9e58cb1ae7efece8d575b" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "wezterm-input-types" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7012add459f951456ec9d6c7e6fc340b1ce15d6fc9629f8c42853412c029e57e" -dependencies = [ - "bitflags 1.3.2", - "euclid", - "lazy_static", - "serde", - "wezterm-dynamic", -] - [[package]] name = "which" version = "4.4.2" diff --git a/apps/desktop/src/components/main/body/sessions/note-input/transcript/shared/index.tsx b/apps/desktop/src/components/main/body/sessions/note-input/transcript/shared/index.tsx index c4edda3503..2472900c26 100644 --- a/apps/desktop/src/components/main/body/sessions/note-input/transcript/shared/index.tsx +++ b/apps/desktop/src/components/main/body/sessions/note-input/transcript/shared/index.tsx @@ -2,7 +2,6 @@ import { type RefObject, useCallback, useMemo, useRef, useState } from "react"; import { useHotkeys } from "react-hotkeys-hook"; import type { DegradedError } from "@hypr/plugin-listener"; -import type { RuntimeSpeakerHint } from "@hypr/transcript"; import { DancingSticks } from "@hypr/ui/components/ui/dancing-sticks"; import { cn } from "@hypr/utils"; @@ -46,42 +45,12 @@ export function TranscriptContainer({ const partialWordsByChannel = useListener( (state) => state.partialWordsByChannel, ); - const partialHintsByChannel = useListener( - (state) => state.partialHintsByChannel, - ); const partialWords = useMemo( () => Object.values(partialWordsByChannel).flat(), [partialWordsByChannel], ); - const partialHints = useMemo(() => { - const channelIndices = Object.keys(partialWordsByChannel) - .map(Number) - .sort((a, b) => a - b); - - const offsetByChannel = new Map(); - let currentOffset = 0; - for (const channelIndex of channelIndices) { - offsetByChannel.set(channelIndex, currentOffset); - currentOffset += partialWordsByChannel[channelIndex]?.length ?? 0; - } - - const reindexedHints: RuntimeSpeakerHint[] = []; - for (const channelIndex of channelIndices) { - const hints = partialHintsByChannel[channelIndex] ?? []; - const offset = offsetByChannel.get(channelIndex) ?? 0; - for (const hint of hints) { - reindexedHints.push({ - ...hint, - wordIndex: hint.wordIndex + offset, - }); - } - } - - return reindexedHints; - }, [partialWordsByChannel, partialHintsByChannel]); - const containerRef = useRef(null); const [scrollElement, setScrollElement] = useState( null, @@ -169,11 +138,6 @@ export function TranscriptContainer({ ? partialWords : [] } - partialHints={ - index === transcriptIds.length - 1 && currentActive - ? partialHints - : [] - } operations={operations} /> {index < transcriptIds.length - 1 && } diff --git a/apps/desktop/src/components/main/body/sessions/note-input/transcript/shared/render-transcript.tsx b/apps/desktop/src/components/main/body/sessions/note-input/transcript/shared/render-transcript.tsx index a1f6c34233..a77b72d3c5 100644 --- a/apps/desktop/src/components/main/body/sessions/note-input/transcript/shared/render-transcript.tsx +++ b/apps/desktop/src/components/main/body/sessions/note-input/transcript/shared/render-transcript.tsx @@ -1,10 +1,6 @@ import { memo, useEffect, useMemo } from "react"; -import type { - PartialWord, - RuntimeSpeakerHint, - Segment, -} from "@hypr/transcript"; +import type { PartialWord, Segment } from "@hypr/transcript"; import { cn } from "@hypr/utils"; import * as main from "../../../../../../../store/tinybase/store/main"; @@ -31,7 +27,6 @@ export function RenderTranscript({ editable, transcriptId, partialWords, - partialHints, operations, }: { scrollElement: HTMLDivElement | null; @@ -40,7 +35,6 @@ export function RenderTranscript({ editable: boolean; transcriptId: string; partialWords: PartialWord[]; - partialHints: RuntimeSpeakerHint[]; operations?: Operations; }) { const finalWords = useFinalWords(transcriptId); @@ -54,14 +48,7 @@ export function RenderTranscript({ ) as string | undefined; const numSpeakers = useSessionSpeakers(sessionId); - const allSpeakerHints = useMemo(() => { - const finalWordsCount = finalWords.length; - const adjustedPartialHints = partialHints.map((hint) => ({ - ...hint, - wordIndex: finalWordsCount + hint.wordIndex, - })); - return [...finalSpeakerHints, ...adjustedPartialHints]; - }, [finalWords.length, finalSpeakerHints, partialHints]); + const allSpeakerHints = useMemo(() => finalSpeakerHints, [finalSpeakerHints]); const segments = useStableSegments( finalWords, diff --git a/apps/desktop/src/hooks/useRunBatch.ts b/apps/desktop/src/hooks/useRunBatch.ts index 159b2a7bd4..ce670ea409 100644 --- a/apps/desktop/src/hooks/useRunBatch.ts +++ b/apps/desktop/src/hooks/useRunBatch.ts @@ -5,13 +5,7 @@ import type { BatchParams } from "@hypr/plugin-listener2"; import { useConfigValue } from "../config/use-config"; import { useListener } from "../contexts/listener"; import * as main from "../store/tinybase/store/main"; -import type { SpeakerHintWithId, WordWithId } from "../store/transcript/types"; -import { - parseTranscriptHints, - parseTranscriptWords, - updateTranscriptHints, - updateTranscriptWords, -} from "../store/transcript/utils"; +import { makePersistCallback } from "../store/transcript/utils"; import type { HandlePersistCallback } from "../store/zustand/listener/transcript"; import { type Tab, useTabs } from "../store/zustand/tabs"; import { id } from "../utils"; @@ -99,71 +93,8 @@ export const useRunBatch = (sessionId: string) => { speaker_hints: "[]", }); - const handlePersist: HandlePersistCallback | undefined = - options?.handlePersist; - - const persist = - handlePersist ?? - ((words, hints) => { - if (words.length === 0) { - return; - } - - const existingWords = parseTranscriptWords(store, transcriptId); - const existingHints = parseTranscriptHints(store, transcriptId); - - const newWords: WordWithId[] = []; - const newWordIds: string[] = []; - - words.forEach((word) => { - const wordId = id(); - - newWords.push({ - id: wordId, - text: word.text, - start_ms: word.start_ms, - end_ms: word.end_ms, - channel: word.channel, - }); - - newWordIds.push(wordId); - }); - - const newHints: SpeakerHintWithId[] = []; - - hints.forEach((hint) => { - if (hint.data.type !== "provider_speaker_index") { - return; - } - - const wordId = newWordIds[hint.wordIndex]; - const word = words[hint.wordIndex]; - - if (!wordId || !word) { - return; - } - - newHints.push({ - id: id(), - word_id: wordId, - type: "provider_speaker_index", - value: JSON.stringify({ - provider: hint.data.provider ?? conn.provider, - channel: hint.data.channel ?? word.channel, - speaker_index: hint.data.speaker_index, - }), - }); - }); - - updateTranscriptWords(store, transcriptId, [ - ...existingWords, - ...newWords, - ]); - updateTranscriptHints(store, transcriptId, [ - ...existingHints, - ...newHints, - ]); - }); + const persist: HandlePersistCallback = + options?.handlePersist ?? makePersistCallback(store, transcriptId); const params: BatchParams = { session_id: sessionId, diff --git a/apps/desktop/src/hooks/useStartListening.ts b/apps/desktop/src/hooks/useStartListening.ts index ca3c1f1d19..463a2d3e50 100644 --- a/apps/desktop/src/hooks/useStartListening.ts +++ b/apps/desktop/src/hooks/useStartListening.ts @@ -5,14 +5,7 @@ import { commands as analyticsCommands } from "@hypr/plugin-analytics"; import { useConfigValue } from "../config/use-config"; import { useListener } from "../contexts/listener"; import * as main from "../store/tinybase/store/main"; -import type { SpeakerHintWithId, WordWithId } from "../store/transcript/types"; -import { - parseTranscriptHints, - parseTranscriptWords, - updateTranscriptHints, - updateTranscriptWords, -} from "../store/transcript/utils"; -import type { HandlePersistCallback } from "../store/zustand/listener/transcript"; +import { makePersistCallback } from "../store/transcript/utils"; import { id } from "../utils"; import { getSessionEventById } from "../utils/session-event"; import { useKeywords } from "./useKeywords"; @@ -55,70 +48,6 @@ export function useStartListening(sessionId: string) { stt_model: conn.model, }); - const handlePersist: HandlePersistCallback = (words, hints) => { - if (words.length === 0) { - return; - } - - store.transaction(() => { - const existingWords = parseTranscriptWords(store, transcriptId); - const existingHints = parseTranscriptHints(store, transcriptId); - - const newWords: WordWithId[] = []; - const newWordIds: string[] = []; - - words.forEach((word) => { - const wordId = id(); - - newWords.push({ - id: wordId, - text: word.text, - start_ms: word.start_ms, - end_ms: word.end_ms, - channel: word.channel, - }); - - newWordIds.push(wordId); - }); - - const newHints: SpeakerHintWithId[] = []; - - if (conn.provider === "deepgram") { - hints.forEach((hint) => { - if (hint.data.type !== "provider_speaker_index") { - return; - } - - const wordId = newWordIds[hint.wordIndex]; - const word = words[hint.wordIndex]; - if (!wordId || !word) { - return; - } - - newHints.push({ - id: id(), - word_id: wordId, - type: "provider_speaker_index", - value: JSON.stringify({ - provider: hint.data.provider ?? conn.provider, - channel: hint.data.channel ?? word.channel, - speaker_index: hint.data.speaker_index, - }), - }); - }); - } - - updateTranscriptWords(store, transcriptId, [ - ...existingWords, - ...newWords, - ]); - updateTranscriptHints(store, transcriptId, [ - ...existingHints, - ...newHints, - ]); - }); - }; - start( { session_id: sessionId, @@ -131,7 +60,7 @@ export function useStartListening(sessionId: string) { keywords, }, { - handlePersist, + handlePersist: makePersistCallback(store, transcriptId), }, ); }, [ diff --git a/apps/desktop/src/store/transcript/types.ts b/apps/desktop/src/store/transcript/types.ts index 4135a7df76..0857e20736 100644 --- a/apps/desktop/src/store/transcript/types.ts +++ b/apps/desktop/src/store/transcript/types.ts @@ -1,4 +1,7 @@ import type { SpeakerHintStorage, WordStorage } from "@hypr/store"; -export type WordWithId = WordStorage & { id: string }; +export type WordWithId = WordStorage & { + id: string; + state?: "final" | "pending"; +}; export type SpeakerHintWithId = SpeakerHintStorage & { id: string }; diff --git a/apps/desktop/src/store/transcript/utils.ts b/apps/desktop/src/store/transcript/utils.ts index 32e9f0e248..9c61da88c4 100644 --- a/apps/desktop/src/store/transcript/utils.ts +++ b/apps/desktop/src/store/transcript/utils.ts @@ -1,3 +1,7 @@ +import type { TranscriptDelta } from "@hypr/plugin-listener"; + +import { id } from "../../utils"; +import type { HandlePersistCallback } from "../zustand/listener/transcript"; import type { SpeakerHintWithId, WordWithId } from "./types"; interface TranscriptStore { @@ -12,6 +16,7 @@ interface TranscriptStore { cellId: "words" | "speaker_hints", value: string, ): void; + transaction(fn: () => T): T; } export function parseTranscriptWords( @@ -66,3 +71,68 @@ export function updateTranscriptHints( JSON.stringify(hints), ); } + +export function replaceTranscriptWords( + store: TranscriptStore, + transcriptId: string, + replacedIds: Set, + newWords: WordWithId[], +): void { + const existing = parseTranscriptWords(store, transcriptId).filter( + (w) => !replacedIds.has(w.id), + ); + const existingHints = parseTranscriptHints(store, transcriptId).filter( + (h) => h.word_id == null || !replacedIds.has(h.word_id), + ); + updateTranscriptWords(store, transcriptId, [...existing, ...newWords]); + updateTranscriptHints(store, transcriptId, existingHints); +} + +export function makePersistCallback( + store: TranscriptStore, + transcriptId: string, +): HandlePersistCallback { + return (delta: TranscriptDelta) => { + if (delta.new_words.length === 0 && delta.replaced_ids.length === 0) { + return; + } + + store.transaction(() => { + const newWords: WordWithId[] = delta.new_words.map((w) => ({ + id: w.id, + text: w.text, + start_ms: w.start_ms, + end_ms: w.end_ms, + channel: w.channel, + state: w.state, + })); + + const newHints: SpeakerHintWithId[] = delta.hints.map((h) => ({ + id: id(), + word_id: h.word_id, + type: "provider_speaker_index" as const, + value: JSON.stringify({ speaker_index: h.speaker_index }), + })); + + if (delta.replaced_ids.length > 0) { + replaceTranscriptWords( + store, + transcriptId, + new Set(delta.replaced_ids), + newWords, + ); + } else { + const existing = parseTranscriptWords(store, transcriptId); + updateTranscriptWords(store, transcriptId, [...existing, ...newWords]); + } + + if (newHints.length > 0) { + const existingHints = parseTranscriptHints(store, transcriptId); + updateTranscriptHints(store, transcriptId, [ + ...existingHints, + ...newHints, + ]); + } + }); + }; +} diff --git a/apps/desktop/src/store/zustand/listener/batch.ts b/apps/desktop/src/store/zustand/listener/batch.ts index 2f7ef0f9c5..b002478d68 100644 --- a/apps/desktop/src/store/zustand/listener/batch.ts +++ b/apps/desktop/src/store/zustand/listener/batch.ts @@ -1,14 +1,8 @@ import type { StoreApi } from "zustand"; -import type { BatchResponse, StreamResponse } from "@hypr/plugin-listener2"; +import type { BatchEvent } from "@hypr/plugin-listener2"; -import { - ChannelProfile, - type RuntimeSpeakerHint, - type WordLike, -} from "../../../utils/segment"; import type { HandlePersistCallback } from "./transcript"; -import { transformWordEntries } from "./utils"; export type BatchPhase = "importing" | "transcribing"; @@ -27,12 +21,7 @@ export type BatchState = { export type BatchActions = { handleBatchStarted: (sessionId: string, phase?: BatchPhase) => void; - handleBatchResponse: (sessionId: string, response: BatchResponse) => void; - handleBatchResponseStreamed: ( - sessionId: string, - response: StreamResponse, - percentage: number, - ) => void; + handleBatchEvent: (sessionId: string, event: BatchEvent) => void; handleBatchFailed: (sessionId: string, error: string) => void; clearBatchSession: (sessionId: string) => void; setBatchPersist: (sessionId: string, callback: HandlePersistCallback) => void; @@ -60,62 +49,29 @@ export const createBatchSlice = ( })); }, - handleBatchResponse: (sessionId, response) => { + handleBatchEvent: (sessionId, event) => { const persist = get().batchPersist[sessionId]; - const [words, hints] = transformBatch(response); - if (!words.length) { - return; - } - - persist?.(words, hints); + if (event.type === "batchProgress") { + const { delta, percentage } = event; + const isComplete = percentage >= 1; - set((state) => { - if (!state.batch[sessionId]) { - return state; + if (delta.new_words.length > 0 || delta.replaced_ids.length > 0) { + persist?.(delta); } - const { [sessionId]: _, ...rest } = state.batch; - return { + set((state) => ({ ...state, - batch: rest, - }; - }); - }, - - handleBatchResponseStreamed: (sessionId, response, percentage) => { - const persist = get().batchPersist[sessionId]; - - if (persist && response.type === "Results") { - const channelIndex = response.channel_index[0]; - const alternative = response.channel.alternatives[0]; - - if (channelIndex !== undefined && alternative) { - const [words, hints] = transformWordEntries( - alternative.words, - alternative.transcript, - channelIndex, - ); - - if (words.length > 0) { - persist(words, hints); - } - } - } - - const isComplete = response.type === "Results" && response.from_finalize; - - set((state) => ({ - ...state, - batch: { - ...state.batch, - [sessionId]: { - percentage, - isComplete: isComplete || false, - phase: "transcribing", + batch: { + ...state.batch, + [sessionId]: { + percentage, + isComplete: isComplete || false, + phase: "transcribing", + }, }, - }, - })); + })); + } }, handleBatchFailed: (sessionId, error) => { @@ -137,12 +93,8 @@ export const createBatchSlice = ( if (!(sessionId in state.batch)) { return state; } - const { [sessionId]: _, ...rest } = state.batch; - return { - ...state, - batch: rest, - }; + return { ...state, batch: rest }; }); }, @@ -161,44 +113,8 @@ export const createBatchSlice = ( if (!(sessionId in state.batchPersist)) { return state; } - const { [sessionId]: _, ...rest } = state.batchPersist; - return { - ...state, - batchPersist: rest, - }; + return { ...state, batchPersist: rest }; }); }, }); - -function transformBatch( - response: BatchResponse, -): [WordLike[], RuntimeSpeakerHint[]] { - const allWords: WordLike[] = []; - const allHints: RuntimeSpeakerHint[] = []; - let wordOffset = 0; - - response.results.channels.forEach((channel) => { - const alternative = channel.alternatives[0]; - if (!alternative || !alternative.words || !alternative.words.length) { - return; - } - - const [words, hints] = transformWordEntries( - alternative.words, - alternative.transcript, - ChannelProfile.MixedCapture, - ); - - hints.forEach((hint) => { - allHints.push({ - ...hint, - wordIndex: hint.wordIndex + wordOffset, - }); - }); - allWords.push(...words); - wordOffset += words.length; - }); - - return [allWords, allHints]; -} diff --git a/apps/desktop/src/store/zustand/listener/general.test.ts b/apps/desktop/src/store/zustand/listener/general.test.ts index 6ed7331a69..94dfdef2a4 100644 --- a/apps/desktop/src/store/zustand/listener/general.test.ts +++ b/apps/desktop/src/store/zustand/listener/general.test.ts @@ -37,76 +37,29 @@ describe("General Listener Slice", () => { test("getSessionMode returns running_batch when session is in batch", () => { const sessionId = "session-456"; - const { handleBatchResponseStreamed, getSessionMode } = store.getState(); + const { handleBatchEvent, getSessionMode } = store.getState(); - const mockResponse = { - type: "Results" as const, - start: 0, - duration: 5, - is_final: false, - speech_final: false, - from_finalize: false, - channel: { - alternatives: [ - { - transcript: "test", - words: [], - confidence: 0.9, - }, - ], - }, - metadata: { - request_id: "test-request", - model_info: { - name: "test-model", - version: "1.0", - arch: "test-arch", - }, - model_uuid: "test-uuid", - }, - channel_index: [0], - }; - - handleBatchResponseStreamed(sessionId, mockResponse, 0.5); + handleBatchEvent(sessionId, { + type: "batchProgress", + session_id: sessionId, + delta: { new_words: [], hints: [], replaced_ids: [], partials: [] }, + percentage: 0.5, + }); expect(getSessionMode(sessionId)).toBe("running_batch"); }); }); describe("Batch State", () => { - test("handleBatchResponseStreamed tracks progress per session", () => { + test("handleBatchEvent tracks progress per session", () => { const sessionId = "session-progress"; - const { handleBatchResponseStreamed, clearBatchSession } = - store.getState(); + const { handleBatchEvent, clearBatchSession } = store.getState(); - const mockResponse = { - type: "Results" as const, - start: 0, - duration: 5, - is_final: false, - speech_final: false, - from_finalize: false, - channel: { - alternatives: [ - { - transcript: "test", - words: [], - confidence: 0.9, - }, - ], - }, - metadata: { - request_id: "test-request", - model_info: { - name: "test-model", - version: "1.0", - arch: "test-arch", - }, - model_uuid: "test-uuid", - }, - channel_index: [0], - }; - - handleBatchResponseStreamed(sessionId, mockResponse, 0.5); + handleBatchEvent(sessionId, { + type: "batchProgress", + session_id: sessionId, + delta: { new_words: [], hints: [], replaced_ids: [], partials: [] }, + percentage: 0.5, + }); expect(store.getState().batch[sessionId]).toEqual({ percentage: 0.5, isComplete: false, diff --git a/apps/desktop/src/store/zustand/listener/general.ts b/apps/desktop/src/store/zustand/listener/general.ts index 0936839c9d..4430702cb6 100644 --- a/apps/desktop/src/store/zustand/listener/general.ts +++ b/apps/desktop/src/store/zustand/listener/general.ts @@ -15,7 +15,6 @@ import { type SessionLifecycleEvent, type SessionParams, type SessionProgressEvent, - type StreamResponse, } from "@hypr/plugin-listener"; import { type BatchParams, @@ -311,9 +310,8 @@ export const createGeneralSlice = < }; }), ); - } else if (payload.type === "stream_response") { - const response = payload.response; - get().handleTranscriptResponse(response as unknown as StreamResponse); + } else if (payload.type === "transcript_delta") { + get().handleTranscriptDelta(payload.delta); } else if (payload.type === "mic_muted") { set((state) => mutate(state, (draft) => { @@ -526,21 +524,6 @@ export const createGeneralSlice = < return; } - if (payload.type === "batchProgress") { - get().handleBatchResponseStreamed( - sessionId, - payload.response, - payload.percentage, - ); - - const batchState = get().batch[sessionId]; - if (batchState?.isComplete) { - cleanup(); - resolve(); - } - return; - } - if (payload.type === "batchFailed") { get().handleBatchFailed(sessionId, payload.error); cleanup(false); @@ -548,16 +531,16 @@ export const createGeneralSlice = < return; } - if (payload.type !== "batchResponse") { - return; - } - try { - get().handleBatchResponse(sessionId, payload.response); - cleanup(); - resolve(); + get().handleBatchEvent(sessionId, payload); + + const batchState = get().batch[sessionId]; + if (batchState?.isComplete) { + cleanup(); + resolve(); + } } catch (error) { - console.error("[runBatch] error handling batch response", error); + console.error("[runBatch] error handling batch event", error); const errorMessage = error instanceof Error ? error.message : String(error); get().handleBatchFailed(sessionId, errorMessage); diff --git a/apps/desktop/src/store/zustand/listener/transcript.test.ts b/apps/desktop/src/store/zustand/listener/transcript.test.ts index 3f2ae661b6..882bd85158 100644 --- a/apps/desktop/src/store/zustand/listener/transcript.test.ts +++ b/apps/desktop/src/store/zustand/listener/transcript.test.ts @@ -1,9 +1,8 @@ -import { afterEach, beforeEach, describe, expect, test, vi } from "vitest"; +import { beforeEach, describe, expect, test, vi } from "vitest"; import { createStore } from "zustand"; -import type { StreamResponse, StreamWord } from "@hypr/plugin-listener"; +import type { TranscriptDelta } from "@hypr/plugin-listener"; -import type { RuntimeSpeakerHint, WordLike } from "../../../utils/segment"; import { createTranscriptSlice, type TranscriptActions, @@ -16,64 +15,15 @@ const createTranscriptStore = () => { ); }; -describe("transcript slice", () => { - const defaultWords: StreamWord[] = [ - { - word: "another", - punctuated_word: "Another", - start: 0, - end: 1, - confidence: 1, - speaker: 0, - language: "en", - }, - { - word: "problem", - punctuated_word: "problem", - start: 1, - end: 2, - confidence: 1, - speaker: 1, - language: "en", - }, - ]; - - const createResponse = ({ - words, - transcript, - isFinal, - channelIndex = 0, - }: { - words: StreamWord[]; - transcript: string; - isFinal: boolean; - channelIndex?: number; - }): StreamResponse => { - return { - type: "Results", - start: 0, - duration: 0, - is_final: isFinal, - speech_final: isFinal, - from_finalize: false, - channel_index: [channelIndex], - channel: { - alternatives: [ - { - transcript, - confidence: 1, - words, - }, - ], - }, - metadata: { - request_id: "test", - model_info: { name: "model", version: "1", arch: "cpu" }, - model_uuid: "model", - }, - } satisfies StreamResponse; - }; +const makeDelta = (partial: Partial): TranscriptDelta => ({ + new_words: [], + hints: [], + replaced_ids: [], + partials: [], + ...partial, +}); +describe("transcript slice", () => { type TranscriptStore = ReturnType; let store: TranscriptStore; @@ -81,197 +31,101 @@ describe("transcript slice", () => { store = createTranscriptStore(); }); - afterEach(() => { - vi.useRealTimers(); - }); - - test("stores partial words and hints from streaming updates", () => { - const initialPartial = createResponse({ - words: defaultWords, - transcript: "Another problem", - isFinal: false, - }); - - store.getState().handleTranscriptResponse(initialPartial); - - const stateAfterFirst = store.getState(); - const firstChannelWords = stateAfterFirst.partialWordsByChannel[0]; - expect(firstChannelWords).toHaveLength(2); - expect(firstChannelWords?.map((word) => word.text)).toEqual([ - " Another", - " problem", - ]); - expect(stateAfterFirst.partialHintsByChannel[0]).toHaveLength(2); - expect(stateAfterFirst.partialHintsByChannel[0]?.[0]?.wordIndex).toBe(0); - expect(stateAfterFirst.partialHintsByChannel[0]?.[1]?.wordIndex).toBe(1); + test("stores partials grouped by channel", () => { + store.getState().handleTranscriptDelta( + makeDelta({ + partials: [ + { text: " Hello", start_ms: 0, end_ms: 500, channel: 0 }, + { text: " world", start_ms: 500, end_ms: 1000, channel: 0 }, + { text: " hi", start_ms: 0, end_ms: 300, channel: 1 }, + ], + }), + ); - const extendedPartial = createResponse({ - words: [ - ...defaultWords, - { - word: "exists", - punctuated_word: "exists", - start: 2, - end: 3, - confidence: 1, - speaker: 1, - language: "en", - }, - ], - transcript: "Another problem exists", - isFinal: false, - }); + const state = store.getState(); + expect(state.partialWordsByChannel[0]).toHaveLength(2); + expect(state.partialWordsByChannel[1]).toHaveLength(1); + }); - store.getState().handleTranscriptResponse(extendedPartial); + test("replaces partials snapshot on each delta", () => { + store.getState().handleTranscriptDelta( + makeDelta({ + partials: [{ text: " Hello", start_ms: 0, end_ms: 500, channel: 0 }], + }), + ); + store.getState().handleTranscriptDelta( + makeDelta({ + partials: [ + { text: " Hello", start_ms: 0, end_ms: 500, channel: 0 }, + { text: " world", start_ms: 500, end_ms: 1000, channel: 0 }, + ], + }), + ); - const stateAfterSecond = store.getState(); - const updatedWords = stateAfterSecond.partialWordsByChannel[0]; - expect(updatedWords).toHaveLength(3); - expect(updatedWords?.map((word) => word.text)).toEqual([ - " Another", - " problem", - " exists", - ]); - const channelHints = stateAfterSecond.partialHintsByChannel[0] ?? []; - expect(channelHints).toHaveLength(3); - const lastPartialHint = channelHints[channelHints.length - 1]; - expect(lastPartialHint?.wordIndex).toBe(2); + expect(store.getState().partialWordsByChannel[0]).toHaveLength(2); }); - test("persists only new final words", () => { + test("calls persist when new_words are present", () => { const persist = vi.fn(); store.getState().setTranscriptPersist(persist); - const finalResponse = createResponse({ - words: [ - { - word: "hello", - punctuated_word: "Hello", - start: 0, - end: 0.5, - confidence: 1, - speaker: 0, - language: "en", - }, - { - word: "world", - punctuated_word: "world", - start: 0.5, - end: 1.5, - confidence: 1, - speaker: null, - language: "en", - }, - ], - transcript: "Hello world", - isFinal: true, - }); - - store.getState().handleTranscriptResponse(finalResponse); - expect(persist).toHaveBeenCalledTimes(1); - - const [words, hints] = persist.mock.calls[0] as [ - WordLike[], - RuntimeSpeakerHint[], - ]; - expect(words.map((word) => word.text)).toEqual([" Hello", " world"]); - expect(words.map((word) => word.end_ms)).toEqual([500, 1500]); - expect(hints).toEqual([ - { - data: { type: "provider_speaker_index", speaker_index: 0 }, - wordIndex: 0, - }, - ]); + store.getState().handleTranscriptDelta( + makeDelta({ + new_words: [ + { + id: "1", + text: " Hello", + start_ms: 0, + end_ms: 500, + channel: 0, + state: "final", + }, + ], + }), + ); - store.getState().handleTranscriptResponse(finalResponse); expect(persist).toHaveBeenCalledTimes(1); - expect(store.getState().finalWordsMaxEndMsByChannel[0]).toBe(1500); + const delta = persist.mock.calls[0][0] as TranscriptDelta; + expect(delta.new_words).toHaveLength(1); + expect(delta.new_words[0]?.text).toBe(" Hello"); }); - test("adjusts partial hint indices after filtering partial words", () => { + test("calls persist when replaced_ids are present", () => { const persist = vi.fn(); store.getState().setTranscriptPersist(persist); - const partialResponse = createResponse({ - words: [ - { - word: "hello", - punctuated_word: "Hello", - start: 0, - end: 0.5, - confidence: 1, - speaker: 0, - language: "en", - }, - { - word: "world", - punctuated_word: "world", - start: 0.5, - end: 1.0, - confidence: 1, - speaker: 1, - language: "en", - }, - { - word: "test", - punctuated_word: "test", - start: 1.1, - end: 1.5, - confidence: 1, - speaker: 0, - language: "en", - }, - ], - transcript: "Hello world test", - isFinal: false, - }); + store + .getState() + .handleTranscriptDelta(makeDelta({ replaced_ids: ["old-id-1"] })); - store.getState().handleTranscriptResponse(partialResponse); - - const stateAfterPartial = store.getState(); - expect(stateAfterPartial.partialWordsByChannel[0]).toHaveLength(3); - expect(stateAfterPartial.partialHintsByChannel[0]).toHaveLength(3); + expect(persist).toHaveBeenCalledTimes(1); + }); - const finalResponse = createResponse({ - words: [ - { - word: "hello", - punctuated_word: "Hello", - start: 0, - end: 0.5, - confidence: 1, - speaker: 0, - language: "en", - }, - { - word: "world", - punctuated_word: "world", - start: 0.5, - end: 1.0, - confidence: 1, - speaker: 1, - language: "en", - }, - ], - transcript: "Hello world", - isFinal: true, - }); + test("does not call persist for partial-only deltas", () => { + const persist = vi.fn(); + store.getState().setTranscriptPersist(persist); - store.getState().handleTranscriptResponse(finalResponse); + store.getState().handleTranscriptDelta( + makeDelta({ + partials: [{ text: " Hello", start_ms: 0, end_ms: 500, channel: 0 }], + }), + ); - const stateAfterFinal = store.getState(); - const remainingPartialWords = stateAfterFinal.partialWordsByChannel[0]; - const remainingHints = stateAfterFinal.partialHintsByChannel[0] ?? []; + expect(persist).not.toHaveBeenCalled(); + }); - expect(remainingPartialWords).toHaveLength(1); - expect(remainingPartialWords?.[0]?.text).toBe(" test"); + test("clears state on resetTranscript", () => { + store.getState().setTranscriptPersist(vi.fn()); + store.getState().handleTranscriptDelta( + makeDelta({ + partials: [{ text: " Hello", start_ms: 0, end_ms: 500, channel: 0 }], + }), + ); - expect(remainingHints).toHaveLength(1); - expect(remainingHints[0]?.wordIndex).toBe(0); + store.getState().resetTranscript(); - const hintedWord = - remainingPartialWords?.[remainingHints[0]?.wordIndex ?? -1]; - expect(hintedWord).toBeDefined(); - expect(hintedWord?.text).toBe(" test"); + const state = store.getState(); + expect(state.partialWordsByChannel).toEqual({}); + expect(state.handlePersist).toBeUndefined(); }); }); diff --git a/apps/desktop/src/store/zustand/listener/transcript.ts b/apps/desktop/src/store/zustand/listener/transcript.ts index 6c7858dd56..7ab35e2147 100644 --- a/apps/desktop/src/store/zustand/listener/transcript.ts +++ b/apps/desktop/src/store/zustand/listener/transcript.ts @@ -1,35 +1,25 @@ import { create as mutate } from "mutative"; import type { StoreApi } from "zustand"; -import type { StreamResponse } from "@hypr/plugin-listener"; +import type { PartialWord, TranscriptDelta } from "@hypr/plugin-listener"; -import type { RuntimeSpeakerHint, WordLike } from "../../../utils/segment"; -import { transformWordEntries } from "./utils"; +type PartialWordsByChannel = Record; -type WordsByChannel = Record; - -export type HandlePersistCallback = ( - words: WordLike[], - hints: RuntimeSpeakerHint[], -) => void; +export type HandlePersistCallback = (delta: TranscriptDelta) => void; export type TranscriptState = { - finalWordsMaxEndMsByChannel: Record; - partialWordsByChannel: WordsByChannel; - partialHintsByChannel: Record; + partialWordsByChannel: PartialWordsByChannel; handlePersist?: HandlePersistCallback; }; export type TranscriptActions = { setTranscriptPersist: (callback?: HandlePersistCallback) => void; - handleTranscriptResponse: (response: StreamResponse) => void; + handleTranscriptDelta: (delta: TranscriptDelta) => void; resetTranscript: () => void; }; const initialState: TranscriptState = { - finalWordsMaxEndMsByChannel: {}, partialWordsByChannel: {}, - partialHintsByChannel: {}, handlePersist: undefined, }; @@ -38,192 +28,39 @@ export const createTranscriptSlice = < >( set: StoreApi["setState"], get: StoreApi["getState"], -): TranscriptState & TranscriptActions => { - const handleFinalWords = ( - channelIndex: number, - words: WordLike[], - hints: RuntimeSpeakerHint[], - ): void => { - const { - partialWordsByChannel, - partialHintsByChannel, - handlePersist, - finalWordsMaxEndMsByChannel, - } = get(); - - const lastPersistedEndMs = finalWordsMaxEndMsByChannel[channelIndex] ?? 0; - const lastEndMs = getLastEndMs(words); - - const firstNewWordIndex = words.findIndex( - (word) => word.end_ms > lastPersistedEndMs, - ); - if (firstNewWordIndex === -1) { - return; - } - - const newWords = words.slice(firstNewWordIndex); - const newHints = hints - .filter((hint) => hint.wordIndex >= firstNewWordIndex) - .map((hint) => ({ - ...hint, - wordIndex: hint.wordIndex - firstNewWordIndex, - })); - - const existingPartialWords = partialWordsByChannel[channelIndex] ?? []; - const remainingPartialWords = existingPartialWords.filter( - (word) => word.start_ms > lastEndMs, +): TranscriptState & TranscriptActions => ({ + ...initialState, + setTranscriptPersist: (callback) => { + set((state) => + mutate(state, (draft) => { + draft.handlePersist = callback; + }), ); - - const oldToNewIndex = new Map(); - let newIdx = 0; - for (let oldIdx = 0; oldIdx < existingPartialWords.length; oldIdx++) { - if (existingPartialWords[oldIdx].start_ms > lastEndMs) { - oldToNewIndex.set(oldIdx, newIdx); - newIdx++; - } + }, + handleTranscriptDelta: (delta) => { + const partialWordsByChannel: PartialWordsByChannel = {}; + for (const word of delta.partials) { + const ch = word.channel; + partialWordsByChannel[ch] ??= []; + partialWordsByChannel[ch].push(word); } - const existingPartialHints = partialHintsByChannel[channelIndex] ?? []; - const remainingPartialHints = existingPartialHints - .filter((hint) => oldToNewIndex.has(hint.wordIndex)) - .map((hint) => ({ - ...hint, - wordIndex: oldToNewIndex.get(hint.wordIndex)!, - })); - set((state) => mutate(state, (draft) => { - draft.partialWordsByChannel[channelIndex] = remainingPartialWords; - draft.partialHintsByChannel[channelIndex] = remainingPartialHints; - draft.finalWordsMaxEndMsByChannel[channelIndex] = lastEndMs; + draft.partialWordsByChannel = partialWordsByChannel; }), ); - handlePersist?.(newWords, newHints); - }; - - const handlePartialWords = ( - channelIndex: number, - words: WordLike[], - hints: RuntimeSpeakerHint[], - ): void => { - const { partialWordsByChannel, partialHintsByChannel } = get(); - const existing = partialWordsByChannel[channelIndex] ?? []; - - const firstStartMs = getFirstStartMs(words); - const lastEndMs = getLastEndMs(words); - - const [before, after] = [ - existing.filter((word) => word.end_ms <= firstStartMs), - existing.filter((word) => word.start_ms >= lastEndMs), - ]; - - const newWords = [...before, ...words, ...after]; - - const hintsWithAdjustedIndices = hints.map((hint) => ({ - ...hint, - wordIndex: before.length + hint.wordIndex, - })); - - const existingHints = partialHintsByChannel[channelIndex] ?? []; - const filteredOldHints = existingHints.filter((hint) => { - const word = existing[hint.wordIndex]; - return ( - word && (word.end_ms <= firstStartMs || word.start_ms >= lastEndMs) - ); - }); - + if (delta.new_words.length > 0 || delta.replaced_ids.length > 0) { + get().handlePersist?.(delta); + } + }, + resetTranscript: () => { set((state) => mutate(state, (draft) => { - draft.partialWordsByChannel[channelIndex] = newWords; - draft.partialHintsByChannel[channelIndex] = [ - ...filteredOldHints, - ...hintsWithAdjustedIndices, - ]; + draft.partialWordsByChannel = {}; + draft.handlePersist = undefined; }), ); - }; - - return { - ...initialState, - setTranscriptPersist: (callback) => { - set((state) => - mutate(state, (draft) => { - draft.handlePersist = callback; - }), - ); - }, - handleTranscriptResponse: (response) => { - if (response.type !== "Results") { - return; - } - - const channelIndex = response.channel_index[0]; - const alternative = response.channel.alternatives[0]; - if (channelIndex === undefined || !alternative) { - return; - } - - const [words, hints] = transformWordEntries( - alternative.words, - alternative.transcript, - channelIndex, - ); - if (!words.length) { - return; - } - - if (response.is_final) { - handleFinalWords(channelIndex, words, hints); - } else { - handlePartialWords(channelIndex, words, hints); - } - }, - resetTranscript: () => { - const { partialWordsByChannel, partialHintsByChannel, handlePersist } = - get(); - - const remainingWords = Object.values(partialWordsByChannel).flat(); - - const channelIndices = Object.keys(partialWordsByChannel) - .map(Number) - .sort((a, b) => a - b); - - const offsetByChannel = new Map(); - let currentOffset = 0; - for (const channelIndex of channelIndices) { - offsetByChannel.set(channelIndex, currentOffset); - currentOffset += partialWordsByChannel[channelIndex]?.length ?? 0; - } - - const remainingHints: RuntimeSpeakerHint[] = []; - for (const channelIndex of channelIndices) { - const hints = partialHintsByChannel[channelIndex] ?? []; - const offset = offsetByChannel.get(channelIndex) ?? 0; - for (const hint of hints) { - remainingHints.push({ - ...hint, - wordIndex: hint.wordIndex + offset, - }); - } - } - - if (remainingWords.length > 0) { - handlePersist?.(remainingWords, remainingHints); - } - - set((state) => - mutate(state, (draft) => { - draft.partialWordsByChannel = {}; - draft.partialHintsByChannel = {}; - draft.finalWordsMaxEndMsByChannel = {}; - draft.handlePersist = undefined; - }), - ); - }, - }; -}; - -const getLastEndMs = (words: WordLike[]): number => - words[words.length - 1]?.end_ms ?? 0; -const getFirstStartMs = (words: WordLike[]): number => words[0]?.start_ms ?? 0; + }, +}); diff --git a/apps/desktop/src/store/zustand/listener/utils.test.ts b/apps/desktop/src/store/zustand/listener/utils.test.ts deleted file mode 100644 index 216d8352b2..0000000000 --- a/apps/desktop/src/store/zustand/listener/utils.test.ts +++ /dev/null @@ -1,38 +0,0 @@ -import { describe, expect, test } from "vitest"; - -import { fixSpacingForWords } from "./utils"; - -describe("fixSpacingForWords", () => { - const testCases = [ - { - transcript: "Hello", - input: ["Hello"], - output: [" Hello"], - }, - { - transcript: "Yes. Because we", - input: ["Yes.", "Because", "we"], - output: [" Yes.", " Because", " we"], - }, - { - transcript: "shouldn't", - input: ["shouldn", "'t"], - output: [" shouldn", "'t"], - }, - { - transcript: "Yes. Because we shouldn't be false.", - input: ["Yes.", "Because", "we", "shouldn", "'t", "be", "false."], - output: [" Yes.", " Because", " we", " shouldn", "'t", " be", " false."], - }, - ]; - - test.each(testCases)( - "transcript: $transcript", - ({ transcript, input, output }) => { - expect(output.join("")).toEqual(` ${transcript}`); - - const actual = fixSpacingForWords(input, transcript); - expect(actual).toEqual(output); - }, - ); -}); diff --git a/apps/desktop/src/store/zustand/listener/utils.ts b/apps/desktop/src/store/zustand/listener/utils.ts deleted file mode 100644 index 67c62a20eb..0000000000 --- a/apps/desktop/src/store/zustand/listener/utils.ts +++ /dev/null @@ -1,77 +0,0 @@ -import type { RuntimeSpeakerHint, WordLike } from "../../../utils/segment"; - -export function fixSpacingForWords( - words: string[], - transcript: string, -): string[] { - const result: string[] = []; - let pos = 0; - - for (const [i, word] of words.entries()) { - const trimmed = word.trim(); - - if (!trimmed) { - result.push(word); - continue; - } - - const foundAt = transcript.indexOf(trimmed, pos); - if (foundAt === -1) { - result.push(word); - continue; - } - - const prefix = i === 0 ? " " : transcript.slice(pos, foundAt); - result.push(prefix + trimmed); - pos = foundAt + trimmed.length; - } - - return result; -} - -export type WordEntry = { - word: string; - punctuated_word?: string | null; - start: number; - end: number; - speaker?: number | null; -}; - -export function transformWordEntries( - wordEntries: WordEntry[] | null | undefined, - transcript: string, - channel: number, -): [WordLike[], RuntimeSpeakerHint[]] { - const words: WordLike[] = []; - const hints: RuntimeSpeakerHint[] = []; - - const entries = wordEntries ?? []; - const textsWithSpacing = fixSpacingForWords( - entries.map((w) => w.punctuated_word ?? w.word), - transcript, - ); - - for (let i = 0; i < entries.length; i++) { - const word = entries[i]; - const text = textsWithSpacing[i]; - - words.push({ - text, - start_ms: Math.round(word.start * 1000), - end_ms: Math.round(word.end * 1000), - channel, - }); - - if (typeof word.speaker === "number") { - hints.push({ - wordIndex: i, - data: { - type: "provider_speaker_index", - speaker_index: word.speaker, - }, - }); - } - } - - return [words, hints]; -} diff --git a/crates/transcript/Cargo.toml b/crates/transcript/Cargo.toml index 0e530cb862..d4462370cc 100644 --- a/crates/transcript/Cargo.toml +++ b/crates/transcript/Cargo.toml @@ -3,28 +3,6 @@ name = "transcript" version = "0.1.0" edition = "2024" -[dev-dependencies] -hypr-data = { workspace = true } -libc = "0.2" - -serde_json = { workspace = true } -strum = { workspace = true, features = ["derive"] } - -clap = { workspace = true, features = ["derive", "env"] } -crossterm = "0.29" -ratatui = { version = "0.30", features = ["crossterm_0_29"] } - -axum = { workspace = true } -bytes = { workspace = true } -futures-util = { workspace = true } -hypr-audio = { workspace = true } -hypr-audio-utils = { workspace = true } -hypr-transcribe-cactus = { workspace = true } -owhisper-client = { workspace = true } -tokio = { workspace = true, features = ["rt", "macros", "sync", "net"] } -tokio-stream = { workspace = true } -tower = { workspace = true } - [dependencies] owhisper-interface = { workspace = true } diff --git a/crates/transcript/examples/cli/app.rs b/crates/transcript/examples/cli/app.rs deleted file mode 100644 index c91081868b..0000000000 --- a/crates/transcript/examples/cli/app.rs +++ /dev/null @@ -1,282 +0,0 @@ -use crossterm::event::{KeyCode, MouseButton, MouseEvent, MouseEventKind}; -use owhisper_interface::stream::StreamResponse; -use ratatui::style::Style; -use transcript::SequentialIdGen; -use transcript::input::TranscriptInput; -use transcript::postprocess::PostProcessUpdate; -use transcript::types::{PartialWord, SpeakerHint, TranscriptWord}; -use transcript::view::{ProcessOutcome, TranscriptView}; - -use crate::feed::TranscriptFeed; -use crate::logger::LogBuffer; -use crate::renderer::debug::DebugSection; -use crate::renderer::{LayoutInfo, WordRegion}; -use crate::viewport::ViewportState; - -fn lookup_word(region: &WordRegion, view: &TranscriptView) -> Option { - let frame = view.frame(); - let dbg = view.pipeline_debug(); - if region.is_final { - let word = frame.final_words.get(region.index)?.clone(); - let speaker = frame - .speaker_hints - .iter() - .find(|h| h.word_id == word.id) - .cloned(); - Some(SelectedWord::Final { word, speaker }) - } else { - let word = frame.partial_words.get(region.index)?.clone(); - let stability = dbg - .partial_stability - .iter() - .find(|(text, _)| *text == word.text) - .map(|(_, count)| *count); - Some(SelectedWord::Partial { word, stability }) - } -} - -#[derive(Clone, Copy, PartialEq, Eq)] -pub enum LastEvent { - Final, - Partial, - Correction, - Skipped, -} - -pub enum KeyAction { - Quit, - Continue { reset_tick: bool }, -} - -pub enum SelectedWord { - Final { - word: TranscriptWord, - speaker: Option, - }, - Partial { - word: PartialWord, - stability: Option, - }, -} - -pub struct App { - source: Box, - source_debug: Vec, - pub position: usize, - pub paused: bool, - pub speed_ms: u64, - pub view: TranscriptView, - pub source_name: String, - pub last_event: LastEvent, - pub last_postprocess: Option, - pub viewport: ViewportState, - pub selected_word: Option, - pub log_buffer: LogBuffer, -} - -impl App { - pub fn new( - source: Box, - source_debug: Vec, - speed_ms: u64, - source_name: String, - log_buffer: LogBuffer, - ) -> Self { - let paused = !source.is_live(); - Self { - source, - source_debug, - position: 0, - paused, - speed_ms, - view: TranscriptView::with_config(SequentialIdGen::new()), - source_name, - last_event: LastEvent::Skipped, - last_postprocess: None, - viewport: ViewportState::new(), - selected_word: None, - log_buffer, - } - } - - pub fn source_debug_sections(&self) -> Vec { - let mut sections = self.source_debug.clone(); - sections.extend(self.source.debug_sections()); - sections - } - - pub fn source_word_style(&self, channel: i32, start_ms: i64, end_ms: i64) -> Option