Skip to content

Commit 8cbbbf9

Browse files
committed
audio processing with examples
1 parent 7ef8af5 commit 8cbbbf9

File tree

5 files changed

+375
-15
lines changed

5 files changed

+375
-15
lines changed

examples/process_audio_example.py

Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Audio Effects Processor using StreamProcessor
4+
5+
This example demonstrates real audio modification with PyTrickle, including:
6+
- Volume adjustment
7+
- Low-pass filtering
8+
- Echo/reverb effects
9+
- Channel manipulation
10+
"""
11+
12+
import logging
13+
import numpy as np
14+
from scipy import signal
15+
from pytrickle import StreamProcessor
16+
from pytrickle.frames import AudioFrame
17+
import time
18+
from typing import List
19+
20+
logging.basicConfig(level=logging.INFO)
21+
logger = logging.getLogger(__name__)
22+
23+
# Global state
24+
volume = 1.0
25+
echo_delay = 0.1 # seconds
26+
echo_decay = 0.3 # echo strength (0.0 to 1.0)
27+
lowpass_cutoff = 8000 # Hz
28+
enable_effects = True
29+
delay = 0.0
30+
ready = False
31+
32+
# Echo buffer for storing previous samples
33+
echo_buffer = {} # Will store buffers per sample rate
34+
35+
def load_model(**kwargs):
36+
"""Initialize audio processor state - called during model loading phase."""
37+
global volume, echo_delay, echo_decay, lowpass_cutoff, enable_effects, ready
38+
39+
logger.info(f"load_model called with kwargs: {kwargs}")
40+
41+
# Set processor variables from kwargs or use defaults
42+
volume = max(0.0, min(2.0, kwargs.get('volume', 1.0)))
43+
echo_delay = max(0.0, min(1.0, kwargs.get('echo_delay', 0.1)))
44+
echo_decay = max(0.0, min(1.0, kwargs.get('echo_decay', 0.3)))
45+
lowpass_cutoff = max(100, min(20000, kwargs.get('lowpass_cutoff', 8000)))
46+
enable_effects = kwargs.get('enable_effects', True)
47+
48+
ready = True
49+
logger.info(f"✅ Audio effects processor ready:")
50+
logger.info(f" 📢 Volume: {volume:.2f}")
51+
logger.info(f" 🔉 Echo delay: {echo_delay:.2f}s, decay: {echo_decay:.2f}")
52+
logger.info(f" 🎛️ Low-pass cutoff: {lowpass_cutoff} Hz")
53+
logger.info(f" ⚡ Effects enabled: {enable_effects}")
54+
55+
async def process_audio(frame: AudioFrame) -> List[AudioFrame]:
56+
"""Apply audio effects including volume, echo, and filtering."""
57+
global volume, echo_delay, echo_decay, lowpass_cutoff, enable_effects, ready, delay, echo_buffer
58+
59+
# Simulated processing time
60+
if delay > 0:
61+
time.sleep(delay)
62+
63+
if not ready or not enable_effects:
64+
# Pass through unchanged
65+
return [frame]
66+
67+
try:
68+
# Get audio samples - they're already numpy arrays
69+
samples = frame.samples.copy().astype(np.float32)
70+
sample_rate = frame.rate
71+
72+
# Normalize integer samples to float range [-1, 1] if needed
73+
if frame.format in ['s16', 's16p']:
74+
samples = samples / 32768.0
75+
elif frame.format in ['s32', 's32p']:
76+
samples = samples / 2147483648.0
77+
# float formats are already in [-1, 1] range
78+
79+
# Handle different audio layouts
80+
if samples.ndim == 1:
81+
# Mono audio
82+
channels = 1
83+
samples = samples.reshape(1, -1)
84+
elif samples.ndim == 2:
85+
# Multi-channel audio
86+
if frame.format.endswith('p'):
87+
# Planar format: (channels, samples)
88+
channels = samples.shape[0]
89+
else:
90+
# Packed format: (samples, channels) -> transpose to (channels, samples)
91+
samples = samples.T
92+
channels = samples.shape[0]
93+
else:
94+
logger.warning(f"Unexpected audio shape: {samples.shape}")
95+
return [frame]
96+
97+
# Initialize echo buffer for this sample rate if needed
98+
buffer_key = f"{sample_rate}_{channels}"
99+
if buffer_key not in echo_buffer:
100+
# Create buffer to store echo_delay seconds of audio
101+
buffer_size = int(sample_rate * echo_delay)
102+
echo_buffer[buffer_key] = np.zeros((channels, buffer_size), dtype=np.float32)
103+
104+
current_buffer = echo_buffer[buffer_key]
105+
106+
# Process each channel
107+
processed_samples = np.zeros_like(samples)
108+
109+
for ch in range(channels):
110+
channel_samples = samples[ch]
111+
112+
# Apply volume adjustment
113+
channel_samples = channel_samples * volume
114+
115+
# Apply echo effect
116+
if echo_decay > 0 and current_buffer.shape[1] > 0:
117+
# Add delayed samples from buffer
118+
buffer_samples = current_buffer[ch]
119+
echo_samples = buffer_samples * echo_decay
120+
121+
# Mix echo with current samples
122+
mix_length = min(len(channel_samples), len(echo_samples))
123+
channel_samples[:mix_length] += echo_samples[:mix_length]
124+
125+
# Update buffer with current samples for next frame
126+
if len(channel_samples) >= len(buffer_samples):
127+
# Current samples are longer than buffer
128+
current_buffer[ch] = channel_samples[-len(buffer_samples):]
129+
else:
130+
# Shift buffer and add new samples
131+
shift_amount = len(channel_samples)
132+
current_buffer[ch] = np.roll(current_buffer[ch], -shift_amount)
133+
current_buffer[ch][-shift_amount:] = channel_samples
134+
135+
# Apply low-pass filter
136+
if lowpass_cutoff < sample_rate / 2:
137+
# Design Butterworth low-pass filter
138+
nyquist = sample_rate / 2
139+
normalized_cutoff = lowpass_cutoff / nyquist
140+
b, a = signal.butter(4, normalized_cutoff, btype='low')
141+
142+
# Apply filter
143+
channel_samples = signal.filtfilt(b, a, channel_samples)
144+
145+
# Clip to prevent overflow
146+
channel_samples = np.clip(channel_samples, -1.0, 1.0)
147+
148+
processed_samples[ch] = channel_samples
149+
150+
# Convert back to original format
151+
if frame.format in ['s16', 's16p']:
152+
processed_samples = (processed_samples * 32767).astype(np.int16)
153+
elif frame.format in ['s32', 's32p']:
154+
processed_samples = (processed_samples * 2147483647).astype(np.int32)
155+
# float formats stay as float32
156+
157+
# Convert back to original layout
158+
if frame.format.endswith('p'):
159+
# Keep planar format: (channels, samples)
160+
final_samples = processed_samples
161+
else:
162+
# Convert back to packed format: (samples, channels)
163+
if channels == 1:
164+
final_samples = processed_samples.squeeze(0) # Remove channel dimension for mono
165+
else:
166+
final_samples = processed_samples.T
167+
168+
# Create new AudioFrame with modified samples
169+
# We'll create it manually since there's no replace_samples method
170+
new_frame = AudioFrame.__new__(AudioFrame)
171+
new_frame.samples = final_samples
172+
new_frame.nb_samples = frame.nb_samples
173+
new_frame.format = frame.format
174+
new_frame.rate = frame.rate
175+
new_frame.layout = frame.layout
176+
new_frame.timestamp = frame.timestamp
177+
new_frame.time_base = frame.time_base
178+
new_frame.log_timestamps = frame.log_timestamps.copy()
179+
new_frame.side_data = frame.side_data
180+
181+
logger.debug(f"🎵 Processed audio: {channels} channels, {len(final_samples)} samples, "
182+
f"volume={volume:.2f}, echo={echo_decay:.2f}, lpf={lowpass_cutoff}Hz")
183+
184+
return [new_frame]
185+
186+
except Exception as e:
187+
logger.error(f"Error in audio processing: {e}")
188+
# Return original frame on error
189+
return [frame]
190+
191+
def update_params(params: dict):
192+
"""Update audio effect parameters."""
193+
global volume, echo_delay, echo_decay, lowpass_cutoff, enable_effects, delay, echo_buffer
194+
195+
if "volume" in params:
196+
old = volume
197+
volume = max(0.0, min(2.0, float(params["volume"])))
198+
if old != volume:
199+
logger.info(f"📢 Volume: {old:.2f}{volume:.2f}")
200+
201+
if "echo_delay" in params:
202+
old = echo_delay
203+
echo_delay = max(0.0, min(1.0, float(params["echo_delay"])))
204+
if old != echo_delay:
205+
logger.info(f"⏱️ Echo delay: {old:.2f}s → {echo_delay:.2f}s")
206+
# Clear echo buffers when delay changes
207+
echo_buffer.clear()
208+
209+
if "echo_decay" in params:
210+
old = echo_decay
211+
echo_decay = max(0.0, min(1.0, float(params["echo_decay"])))
212+
if old != echo_decay:
213+
logger.info(f"🔉 Echo decay: {old:.2f}{echo_decay:.2f}")
214+
215+
if "lowpass_cutoff" in params:
216+
old = lowpass_cutoff
217+
lowpass_cutoff = max(100, min(20000, int(params["lowpass_cutoff"])))
218+
if old != lowpass_cutoff:
219+
logger.info(f"🎛️ Low-pass cutoff: {old} Hz → {lowpass_cutoff} Hz")
220+
221+
if "enable_effects" in params:
222+
old = enable_effects
223+
enable_effects = bool(params["enable_effects"])
224+
if old != enable_effects:
225+
logger.info(f"⚡ Effects: {'ON' if enable_effects else 'OFF'}")
226+
227+
if "delay" in params:
228+
old = delay
229+
delay = max(0.0, float(params["delay"]))
230+
if old != delay:
231+
logger.info(f"⏳ Processing delay: {old:.2f}s → {delay:.2f}s")
232+
233+
if "clear_echo_buffer" in params and params["clear_echo_buffer"]:
234+
echo_buffer.clear()
235+
logger.info("🧹 Echo buffer cleared")
236+
237+
# Create and run StreamProcessor
238+
if __name__ == "__main__":
239+
processor = StreamProcessor(
240+
audio_processor=process_audio,
241+
model_loader=load_model,
242+
param_updater=update_params,
243+
name="audio-effects-processor",
244+
port=8000
245+
)
246+
247+
logger.info("🚀 Starting audio effects processor...")
248+
logger.info("🎵 Available effects: volume, echo, low-pass filter")
249+
logger.info("🔧 Update parameters via /api/update_params:")
250+
logger.info(" - volume: 0.0 to 2.0 (1.0 = normal)")
251+
logger.info(" - echo_delay: 0.0 to 1.0 seconds")
252+
logger.info(" - echo_decay: 0.0 to 1.0 (echo strength)")
253+
logger.info(" - lowpass_cutoff: 100 to 20000 Hz")
254+
logger.info(" - enable_effects: true/false")
255+
logger.info(" - delay: processing delay in seconds")
256+
logger.info(" - clear_echo_buffer: true to reset echo buffer")
257+
258+
processor.run()

examples/requirements.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
1-
# Optional: Additional video processing libraries for examples
2-
opencv-python
1+
# Optional: Additional processing libraries for examples
2+
opencv-python
3+
scipy

0 commit comments

Comments
 (0)