|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Audio Effects Processor using StreamProcessor |
| 4 | +
|
| 5 | +This example demonstrates real audio modification with PyTrickle, including: |
| 6 | +- Volume adjustment |
| 7 | +- Low-pass filtering |
| 8 | +- Echo/reverb effects |
| 9 | +- Channel manipulation |
| 10 | +""" |
| 11 | + |
| 12 | +import logging |
| 13 | +import numpy as np |
| 14 | +from scipy import signal |
| 15 | +from pytrickle import StreamProcessor |
| 16 | +from pytrickle.frames import AudioFrame |
| 17 | +import time |
| 18 | +from typing import List |
| 19 | + |
| 20 | +logging.basicConfig(level=logging.INFO) |
| 21 | +logger = logging.getLogger(__name__) |
| 22 | + |
| 23 | +# Global state |
| 24 | +volume = 1.0 |
| 25 | +echo_delay = 0.1 # seconds |
| 26 | +echo_decay = 0.3 # echo strength (0.0 to 1.0) |
| 27 | +lowpass_cutoff = 8000 # Hz |
| 28 | +enable_effects = True |
| 29 | +delay = 0.0 |
| 30 | +ready = False |
| 31 | + |
| 32 | +# Echo buffer for storing previous samples |
| 33 | +echo_buffer = {} # Will store buffers per sample rate |
| 34 | + |
| 35 | +def load_model(**kwargs): |
| 36 | + """Initialize audio processor state - called during model loading phase.""" |
| 37 | + global volume, echo_delay, echo_decay, lowpass_cutoff, enable_effects, ready |
| 38 | + |
| 39 | + logger.info(f"load_model called with kwargs: {kwargs}") |
| 40 | + |
| 41 | + # Set processor variables from kwargs or use defaults |
| 42 | + volume = max(0.0, min(2.0, kwargs.get('volume', 1.0))) |
| 43 | + echo_delay = max(0.0, min(1.0, kwargs.get('echo_delay', 0.1))) |
| 44 | + echo_decay = max(0.0, min(1.0, kwargs.get('echo_decay', 0.3))) |
| 45 | + lowpass_cutoff = max(100, min(20000, kwargs.get('lowpass_cutoff', 8000))) |
| 46 | + enable_effects = kwargs.get('enable_effects', True) |
| 47 | + |
| 48 | + ready = True |
| 49 | + logger.info(f"✅ Audio effects processor ready:") |
| 50 | + logger.info(f" 📢 Volume: {volume:.2f}") |
| 51 | + logger.info(f" 🔉 Echo delay: {echo_delay:.2f}s, decay: {echo_decay:.2f}") |
| 52 | + logger.info(f" 🎛️ Low-pass cutoff: {lowpass_cutoff} Hz") |
| 53 | + logger.info(f" ⚡ Effects enabled: {enable_effects}") |
| 54 | + |
| 55 | +async def process_audio(frame: AudioFrame) -> List[AudioFrame]: |
| 56 | + """Apply audio effects including volume, echo, and filtering.""" |
| 57 | + global volume, echo_delay, echo_decay, lowpass_cutoff, enable_effects, ready, delay, echo_buffer |
| 58 | + |
| 59 | + # Simulated processing time |
| 60 | + if delay > 0: |
| 61 | + time.sleep(delay) |
| 62 | + |
| 63 | + if not ready or not enable_effects: |
| 64 | + # Pass through unchanged |
| 65 | + return [frame] |
| 66 | + |
| 67 | + try: |
| 68 | + # Get audio samples - they're already numpy arrays |
| 69 | + samples = frame.samples.copy().astype(np.float32) |
| 70 | + sample_rate = frame.rate |
| 71 | + |
| 72 | + # Normalize integer samples to float range [-1, 1] if needed |
| 73 | + if frame.format in ['s16', 's16p']: |
| 74 | + samples = samples / 32768.0 |
| 75 | + elif frame.format in ['s32', 's32p']: |
| 76 | + samples = samples / 2147483648.0 |
| 77 | + # float formats are already in [-1, 1] range |
| 78 | + |
| 79 | + # Handle different audio layouts |
| 80 | + if samples.ndim == 1: |
| 81 | + # Mono audio |
| 82 | + channels = 1 |
| 83 | + samples = samples.reshape(1, -1) |
| 84 | + elif samples.ndim == 2: |
| 85 | + # Multi-channel audio |
| 86 | + if frame.format.endswith('p'): |
| 87 | + # Planar format: (channels, samples) |
| 88 | + channels = samples.shape[0] |
| 89 | + else: |
| 90 | + # Packed format: (samples, channels) -> transpose to (channels, samples) |
| 91 | + samples = samples.T |
| 92 | + channels = samples.shape[0] |
| 93 | + else: |
| 94 | + logger.warning(f"Unexpected audio shape: {samples.shape}") |
| 95 | + return [frame] |
| 96 | + |
| 97 | + # Initialize echo buffer for this sample rate if needed |
| 98 | + buffer_key = f"{sample_rate}_{channels}" |
| 99 | + if buffer_key not in echo_buffer: |
| 100 | + # Create buffer to store echo_delay seconds of audio |
| 101 | + buffer_size = int(sample_rate * echo_delay) |
| 102 | + echo_buffer[buffer_key] = np.zeros((channels, buffer_size), dtype=np.float32) |
| 103 | + |
| 104 | + current_buffer = echo_buffer[buffer_key] |
| 105 | + |
| 106 | + # Process each channel |
| 107 | + processed_samples = np.zeros_like(samples) |
| 108 | + |
| 109 | + for ch in range(channels): |
| 110 | + channel_samples = samples[ch] |
| 111 | + |
| 112 | + # Apply volume adjustment |
| 113 | + channel_samples = channel_samples * volume |
| 114 | + |
| 115 | + # Apply echo effect |
| 116 | + if echo_decay > 0 and current_buffer.shape[1] > 0: |
| 117 | + # Add delayed samples from buffer |
| 118 | + buffer_samples = current_buffer[ch] |
| 119 | + echo_samples = buffer_samples * echo_decay |
| 120 | + |
| 121 | + # Mix echo with current samples |
| 122 | + mix_length = min(len(channel_samples), len(echo_samples)) |
| 123 | + channel_samples[:mix_length] += echo_samples[:mix_length] |
| 124 | + |
| 125 | + # Update buffer with current samples for next frame |
| 126 | + if len(channel_samples) >= len(buffer_samples): |
| 127 | + # Current samples are longer than buffer |
| 128 | + current_buffer[ch] = channel_samples[-len(buffer_samples):] |
| 129 | + else: |
| 130 | + # Shift buffer and add new samples |
| 131 | + shift_amount = len(channel_samples) |
| 132 | + current_buffer[ch] = np.roll(current_buffer[ch], -shift_amount) |
| 133 | + current_buffer[ch][-shift_amount:] = channel_samples |
| 134 | + |
| 135 | + # Apply low-pass filter |
| 136 | + if lowpass_cutoff < sample_rate / 2: |
| 137 | + # Design Butterworth low-pass filter |
| 138 | + nyquist = sample_rate / 2 |
| 139 | + normalized_cutoff = lowpass_cutoff / nyquist |
| 140 | + b, a = signal.butter(4, normalized_cutoff, btype='low') |
| 141 | + |
| 142 | + # Apply filter |
| 143 | + channel_samples = signal.filtfilt(b, a, channel_samples) |
| 144 | + |
| 145 | + # Clip to prevent overflow |
| 146 | + channel_samples = np.clip(channel_samples, -1.0, 1.0) |
| 147 | + |
| 148 | + processed_samples[ch] = channel_samples |
| 149 | + |
| 150 | + # Convert back to original format |
| 151 | + if frame.format in ['s16', 's16p']: |
| 152 | + processed_samples = (processed_samples * 32767).astype(np.int16) |
| 153 | + elif frame.format in ['s32', 's32p']: |
| 154 | + processed_samples = (processed_samples * 2147483647).astype(np.int32) |
| 155 | + # float formats stay as float32 |
| 156 | + |
| 157 | + # Convert back to original layout |
| 158 | + if frame.format.endswith('p'): |
| 159 | + # Keep planar format: (channels, samples) |
| 160 | + final_samples = processed_samples |
| 161 | + else: |
| 162 | + # Convert back to packed format: (samples, channels) |
| 163 | + if channels == 1: |
| 164 | + final_samples = processed_samples.squeeze(0) # Remove channel dimension for mono |
| 165 | + else: |
| 166 | + final_samples = processed_samples.T |
| 167 | + |
| 168 | + # Create new AudioFrame with modified samples |
| 169 | + # We'll create it manually since there's no replace_samples method |
| 170 | + new_frame = AudioFrame.__new__(AudioFrame) |
| 171 | + new_frame.samples = final_samples |
| 172 | + new_frame.nb_samples = frame.nb_samples |
| 173 | + new_frame.format = frame.format |
| 174 | + new_frame.rate = frame.rate |
| 175 | + new_frame.layout = frame.layout |
| 176 | + new_frame.timestamp = frame.timestamp |
| 177 | + new_frame.time_base = frame.time_base |
| 178 | + new_frame.log_timestamps = frame.log_timestamps.copy() |
| 179 | + new_frame.side_data = frame.side_data |
| 180 | + |
| 181 | + logger.debug(f"🎵 Processed audio: {channels} channels, {len(final_samples)} samples, " |
| 182 | + f"volume={volume:.2f}, echo={echo_decay:.2f}, lpf={lowpass_cutoff}Hz") |
| 183 | + |
| 184 | + return [new_frame] |
| 185 | + |
| 186 | + except Exception as e: |
| 187 | + logger.error(f"Error in audio processing: {e}") |
| 188 | + # Return original frame on error |
| 189 | + return [frame] |
| 190 | + |
| 191 | +def update_params(params: dict): |
| 192 | + """Update audio effect parameters.""" |
| 193 | + global volume, echo_delay, echo_decay, lowpass_cutoff, enable_effects, delay, echo_buffer |
| 194 | + |
| 195 | + if "volume" in params: |
| 196 | + old = volume |
| 197 | + volume = max(0.0, min(2.0, float(params["volume"]))) |
| 198 | + if old != volume: |
| 199 | + logger.info(f"📢 Volume: {old:.2f} → {volume:.2f}") |
| 200 | + |
| 201 | + if "echo_delay" in params: |
| 202 | + old = echo_delay |
| 203 | + echo_delay = max(0.0, min(1.0, float(params["echo_delay"]))) |
| 204 | + if old != echo_delay: |
| 205 | + logger.info(f"⏱️ Echo delay: {old:.2f}s → {echo_delay:.2f}s") |
| 206 | + # Clear echo buffers when delay changes |
| 207 | + echo_buffer.clear() |
| 208 | + |
| 209 | + if "echo_decay" in params: |
| 210 | + old = echo_decay |
| 211 | + echo_decay = max(0.0, min(1.0, float(params["echo_decay"]))) |
| 212 | + if old != echo_decay: |
| 213 | + logger.info(f"🔉 Echo decay: {old:.2f} → {echo_decay:.2f}") |
| 214 | + |
| 215 | + if "lowpass_cutoff" in params: |
| 216 | + old = lowpass_cutoff |
| 217 | + lowpass_cutoff = max(100, min(20000, int(params["lowpass_cutoff"]))) |
| 218 | + if old != lowpass_cutoff: |
| 219 | + logger.info(f"🎛️ Low-pass cutoff: {old} Hz → {lowpass_cutoff} Hz") |
| 220 | + |
| 221 | + if "enable_effects" in params: |
| 222 | + old = enable_effects |
| 223 | + enable_effects = bool(params["enable_effects"]) |
| 224 | + if old != enable_effects: |
| 225 | + logger.info(f"⚡ Effects: {'ON' if enable_effects else 'OFF'}") |
| 226 | + |
| 227 | + if "delay" in params: |
| 228 | + old = delay |
| 229 | + delay = max(0.0, float(params["delay"])) |
| 230 | + if old != delay: |
| 231 | + logger.info(f"⏳ Processing delay: {old:.2f}s → {delay:.2f}s") |
| 232 | + |
| 233 | + if "clear_echo_buffer" in params and params["clear_echo_buffer"]: |
| 234 | + echo_buffer.clear() |
| 235 | + logger.info("🧹 Echo buffer cleared") |
| 236 | + |
| 237 | +# Create and run StreamProcessor |
| 238 | +if __name__ == "__main__": |
| 239 | + processor = StreamProcessor( |
| 240 | + audio_processor=process_audio, |
| 241 | + model_loader=load_model, |
| 242 | + param_updater=update_params, |
| 243 | + name="audio-effects-processor", |
| 244 | + port=8000 |
| 245 | + ) |
| 246 | + |
| 247 | + logger.info("🚀 Starting audio effects processor...") |
| 248 | + logger.info("🎵 Available effects: volume, echo, low-pass filter") |
| 249 | + logger.info("🔧 Update parameters via /api/update_params:") |
| 250 | + logger.info(" - volume: 0.0 to 2.0 (1.0 = normal)") |
| 251 | + logger.info(" - echo_delay: 0.0 to 1.0 seconds") |
| 252 | + logger.info(" - echo_decay: 0.0 to 1.0 (echo strength)") |
| 253 | + logger.info(" - lowpass_cutoff: 100 to 20000 Hz") |
| 254 | + logger.info(" - enable_effects: true/false") |
| 255 | + logger.info(" - delay: processing delay in seconds") |
| 256 | + logger.info(" - clear_echo_buffer: true to reset echo buffer") |
| 257 | + |
| 258 | + processor.run() |
0 commit comments