Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions .github/workflows/local_stt_e2e.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
on:
workflow_dispatch:
push:
branches: [main]
paths:
- crates/transcribe-cactus/**
- crates/cactus/**
- crates/cactus-sys/**
pull_request:
paths:
- crates/transcribe-cactus/**
- crates/cactus/**
- crates/cactus-sys/**

jobs:
local-stt-e2e:
runs-on: depot-ubuntu-24.04-arm-8
strategy:
matrix:
model:
- name: whisper-small
repo: openai/whisper-small
defaults:
run:
shell: bash
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- uses: ./.github/actions/rust_install
with:
platform: linux
- run: |
sudo apt-get update
sudo apt-get install -y cmake build-essential libcurl4-openssl-dev libclang-dev
- run: |
pip3 install --break-system-packages huggingface-hub
pip3 install --break-system-packages -e vendor/cactus/python/ --no-deps
- uses: actions/cache@v4
with:
path: vendor/cactus/weights/
key: cactus-models-${{ matrix.model.name }}-arm-v1
- run: cactus download ${{ matrix.model.repo }}
- run: cargo test -p transcribe-cactus -- --ignored --nocapture
env:
CACTUS_STT_MODEL: ${{ github.workspace }}/vendor/cactus/weights/${{ matrix.model.name }}
CACTUS_CLOUD_API_KEY: ${{ secrets.CACTUS_CLOUD_API_KEY }}
E2E_AUDIO_SECS: 20
17 changes: 17 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions crates/cactus/src/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::error::{Error, Result};
pub struct Model {
handle: NonNull<std::ffi::c_void>,
inference_lock: Mutex<()>,
is_moonshine: bool,
}

unsafe impl Send for Model {}
Expand Down Expand Up @@ -39,9 +40,16 @@ impl ModelBuilder {
let handle =
NonNull::new(raw).ok_or_else(|| Error::Init("cactus_init returned null".into()))?;

let is_moonshine = self
.model_path
.to_string_lossy()
.to_lowercase()
.contains("moonshine");

Ok(Model {
handle,
inference_lock: Mutex::new(()),
is_moonshine,
})
}
}
Expand All @@ -57,6 +65,10 @@ impl Model {
Self::builder(model_path).build()
}

pub fn is_moonshine(&self) -> bool {
self.is_moonshine
}

/// Cancel an in-progress inference. Safe to call concurrently — only sets an
/// atomic flag on the C++ side.
pub fn stop(&self) {
Expand Down
7 changes: 6 additions & 1 deletion crates/cactus/src/stt/batch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@ impl Model {
options: &TranscribeOptions,
) -> Result<TranscriptionResult> {
let guard = self.lock_inference();
let prompt_c = CString::new(build_whisper_prompt(options))?;
let prompt = if self.is_moonshine() {
String::new()
} else {
build_whisper_prompt(options)
};
let prompt_c = CString::new(prompt)?;
let options_c = CString::new(serde_json::to_string(options)?)?;
let mut buf = vec![0u8; RESPONSE_BUF_SIZE];

Expand Down
9 changes: 9 additions & 0 deletions crates/transcribe-cactus/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,14 @@ tower = { workspace = true }
tracing = { workspace = true }

[dev-dependencies]
hypr-audio-utils = { workspace = true }
hypr-cactus = { workspace = true }
hypr-data = { workspace = true }

axum = { workspace = true, features = ["ws"] }
futures-util = { workspace = true }
reqwest = { workspace = true, features = ["json"] }
sequential-test = "0.2"
serde_json = { workspace = true }
tokio = { workspace = true }
tokio-tungstenite = { workspace = true }
4 changes: 2 additions & 2 deletions crates/transcribe-cactus/src/config.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
#[derive(Clone, Debug)]
pub struct CactusConfig {
pub cloud_handoff: bool,
pub cloud: hypr_cactus::CloudConfig,
pub min_chunk_sec: f32,
}

impl Default for CactusConfig {
fn default() -> Self {
Self {
cloud_handoff: true,
cloud: hypr_cactus::CloudConfig::default(),
min_chunk_sec: 2.5,
}
}
Expand Down
9 changes: 7 additions & 2 deletions crates/transcribe-cactus/src/service/batch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -320,9 +320,14 @@ mod tests {
println!("{}", alternative.transcript.trim());
println!("--- END (confidence={:.2}) ---\n", alternative.confidence);

let transcript = alternative.transcript.trim().to_lowercase();
assert!(!transcript.is_empty(), "expected non-empty transcript");
assert!(
!alternative.transcript.trim().is_empty(),
"expected non-empty transcript"
transcript.contains("maybe")
|| transcript.contains("this")
|| transcript.contains("talking"),
"transcript looks like a hallucination (got: {:?})",
transcript
);
assert!(
alternative.confidence.is_finite(),
Expand Down
121 changes: 121 additions & 0 deletions crates/transcribe-cactus/src/service/streaming/message.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,124 @@ pub(super) fn process_incoming_message(msg: &Message, channels: u8) -> IncomingM
_ => IncomingMessage::Audio(AudioExtract::Empty),
}
}

#[cfg(test)]
mod tests {
use axum::extract::ws::Message;
use owhisper_interface::ControlMessage;

use super::*;

#[test]
fn control_message_finalize_parsed() {
let msg = Message::Text(r#"{"type":"Finalize"}"#.into());
match process_incoming_message(&msg, 1) {
IncomingMessage::Control(ControlMessage::Finalize) => {}
other => panic!(
"expected Finalize, got {:?}",
std::mem::discriminant(&other)
),
}
}

#[test]
fn control_message_keep_alive_parsed() {
let msg = Message::Text(r#"{"type":"KeepAlive"}"#.into());
match process_incoming_message(&msg, 1) {
IncomingMessage::Control(ControlMessage::KeepAlive) => {}
other => panic!(
"expected KeepAlive, got {:?}",
std::mem::discriminant(&other)
),
}
}

#[test]
fn control_message_close_stream_parsed() {
let msg = Message::Text(r#"{"type":"CloseStream"}"#.into());
match process_incoming_message(&msg, 1) {
IncomingMessage::Control(ControlMessage::CloseStream) => {}
other => panic!(
"expected CloseStream, got {:?}",
std::mem::discriminant(&other)
),
}
}

#[test]
fn audio_chunk_parsed_over_control() {
let chunk = owhisper_interface::ListenInputChunk::End;
let json = serde_json::to_string(&chunk).unwrap();
let msg = Message::Text(json.into());
match process_incoming_message(&msg, 1) {
IncomingMessage::Audio(AudioExtract::End) => {}
other => panic!(
"expected Audio(End), got {:?}",
std::mem::discriminant(&other)
),
}
}

#[test]
fn close_frame_yields_end() {
let msg = Message::Close(None);
match process_incoming_message(&msg, 1) {
IncomingMessage::Audio(AudioExtract::End) => {}
other => panic!(
"expected Audio(End), got {:?}",
std::mem::discriminant(&other)
),
}
}

#[test]
fn binary_single_channel_yields_mono() {
let samples: Vec<i16> = vec![1000, 2000, 3000];
let data: Vec<u8> = samples.iter().flat_map(|s| s.to_le_bytes()).collect();
let msg = Message::Binary(data.into());
match process_incoming_message(&msg, 1) {
IncomingMessage::Audio(AudioExtract::Mono(s)) => assert!(!s.is_empty()),
other => panic!(
"expected Audio(Mono), got {:?}",
std::mem::discriminant(&other)
),
}
}

#[test]
fn binary_dual_channel_yields_dual() {
// 2 interleaved i16 samples (4 bytes per frame: ch0, ch1)
let samples: Vec<i16> = vec![1000, -1000, 2000, -2000];
let data: Vec<u8> = samples.iter().flat_map(|s| s.to_le_bytes()).collect();
let msg = Message::Binary(data.into());
match process_incoming_message(&msg, 2) {
IncomingMessage::Audio(AudioExtract::Dual { ch0, ch1 }) => {
assert_eq!(ch0.len(), 2);
assert_eq!(ch1.len(), 2);
assert!(ch0[0] > 0.0);
assert!(ch1[0] < 0.0);
}
other => panic!(
"expected Audio(Dual), got {:?}",
std::mem::discriminant(&other)
),
}
}

#[test]
fn dual_audio_json_yields_dual() {
let chunk = owhisper_interface::ListenInputChunk::DualAudio {
mic: vec![0x00, 0x10],
speaker: vec![0x00, 0x20],
};
let json = serde_json::to_string(&chunk).unwrap();
let msg = Message::Text(json.into());
match process_incoming_message(&msg, 1) {
IncomingMessage::Audio(AudioExtract::Dual { .. }) => {}
other => panic!(
"expected Audio(Dual), got {:?}",
std::mem::discriminant(&other)
),
}
}
}
3 changes: 0 additions & 3 deletions crates/transcribe-cactus/src/service/streaming/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@ mod message;
mod response;
mod session;

#[cfg(test)]
mod tests;

use std::{
future::Future,
path::PathBuf,
Expand Down
Loading
Loading