diff --git a/main.c b/main.c index 2905cdf..3a337ab 100644 --- a/main.c +++ b/main.c @@ -18,15 +18,21 @@ #include "stretch.h" +#define SILENCE_THRESHOLD_DB -40 +#define AUDIO_WINDOW_MS 25 + static const char *sign_on = "\n" -" AUDIO-STRETCH Time Domain Harmonic Scaling Demo Version 0.3\n" +" AUDIO-STRETCH Time Domain Harmonic Scaling Demo Version 0.4\n" " Copyright (c) 2022 David Bryant. All Rights Reserved.\n\n"; static const char *usage = " Usage: AUDIO-STRETCH [-options] infile.wav outfile.wav\n\n" " Options: -r = stretch ratio (0.25 to 4.0, default = 1.0)\n" +" -g = gap/silence stretch ratio (if different)\n" " -u = upper freq period limit (default = 333 Hz)\n" " -l = lower freq period limit (default = 55 Hz)\n" +" -b = audio buffer/window length (ms, default = 25)\n" +" -t = gap/silence threshold (dB re FS, default = -40)\n" " -c = cycle through all ratios, starting higher\n" " -cc = cycle through all ratios, starting lower\n" " -d = force dual instance even for shallow ratios\n" @@ -68,23 +74,23 @@ typedef struct { #define WAVE_FORMAT_EXTENSIBLE 0xfffe static int write_pcm_wav_header (FILE *outfile, uint32_t num_samples, int num_channels, int bytes_per_sample, uint32_t sample_rate); - -#define BUFFER_SAMPLES 1024 +double rms_level_dB (int16_t *audio, int samples, int channels); static int verbose_mode, quiet_mode; int main (argc, argv) int argc; char **argv; { int asked_help = 0, overwrite = 0, scale_rate = 0, force_fast = 0, force_normal = 0, force_dual = 0, cycle_ratio = 0; - int buffer_samples = BUFFER_SAMPLES, upper_frequency = 333, lower_frequency = 55, min_period, max_period; + float ratio = 1.0, silence_ratio = 0.0, silence_threshold_dB = SILENCE_THRESHOLD_DB; uint32_t samples_to_process, insamples = 0, outsamples = 0; + int upper_frequency = 333, lower_frequency = 55; char *infilename = NULL, *outfilename = NULL; + int audio_window_ms = AUDIO_WINDOW_MS; RiffChunkHeader riff_chunk_header; WaveHeader WaveHeader = { 0 }; ChunkHeader chunk_header; StretchHandle stretcher; FILE *infile, *outfile; - float ratio = 1.0; // loop through command-line arguments @@ -119,6 +125,17 @@ int main (argc, argv) int argc; char **argv; --*argv; break; + case 'B': case 'b': + audio_window_ms = strtol (++*argv, argv, 10); + + if (audio_window_ms < 1 || audio_window_ms > 100) { + fprintf (stderr, "\naudio window is from 1 to 100 ms!\n"); + return -1; + } + + --*argv; + break; + case 'R': case 'r': ratio = strtod (++*argv, argv); @@ -130,6 +147,28 @@ int main (argc, argv) int argc; char **argv; --*argv; break; + case 'G': case 'g': + silence_ratio = strtod (++*argv, argv); + + if (silence_ratio < 0.25 || silence_ratio > 4.0) { + fprintf (stderr, "\ngap/silence ratio must be from 0.25 to 4.0!\n"); + return -1; + } + + --*argv; + break; + + case 'T': case 't': + silence_threshold_dB = strtod (++*argv, argv); + + if (silence_threshold_dB < -70 || silence_threshold_dB > -10) { + fprintf (stderr, "\nsilence threshold must be from -10 to -70 dB!\n"); + return -1; + } + + --*argv; + break; + case 'S': case 's': scale_rate = 1; break; @@ -311,22 +350,28 @@ int main (argc, argv) int argc; char **argv; return 1; } - min_period = WaveHeader.SampleRate / upper_frequency; - max_period = WaveHeader.SampleRate / lower_frequency; - int flags = 0; + int flags = 0, silence_mode = silence_ratio && !cycle_ratio && silence_ratio != ratio; + int buffer_samples = WaveHeader.SampleRate * (audio_window_ms / 1000.0); + int min_period = WaveHeader.SampleRate / upper_frequency; + int max_period = WaveHeader.SampleRate / lower_frequency; + float max_ratio = ratio; - if (force_dual || ratio < 0.5 || ratio > 2.0) - flags |= STRETCH_DUAL_FLAG; + if (force_dual || ratio < 0.5 || ratio > 2.0 || + (silence_mode && (silence_ratio < 0.5 || silence_ratio > 2.0))) + flags |= STRETCH_DUAL_FLAG; if ((force_fast || WaveHeader.SampleRate >= 32000) && !force_normal) flags |= STRETCH_FAST_FLAG; - if (verbose_mode) - fprintf (stderr, "initializing stretch library with period range = %d to %d, %d channels, %s, %s\n", + if (verbose_mode) { + fprintf (stderr, "file sample rate is %lu Hz (%s), buffer size is %d samples\n", + (unsigned long) WaveHeader.SampleRate, WaveHeader.NumChannels == 2 ? "stereo" : "mono", buffer_samples); + fprintf (stderr, "stretch period range = %d to %d, %d channels, %s, %s\n", min_period, max_period, WaveHeader.NumChannels, (flags & STRETCH_FAST_FLAG) ? "fast mode" : "normal mode", (flags & STRETCH_DUAL_FLAG) ? "dual instance" : "single instance"); + } - if (!quiet_mode && ratio == 1.0 && !cycle_ratio) + if (!quiet_mode && ratio == 1.0 && !silence_mode && !cycle_ratio) fprintf (stderr, "warning: a ratio of 1.0 will do nothing but copy the WAV file!\n"); if (!quiet_mode && ratio != 1.0 && cycle_ratio && !scale_rate) @@ -350,65 +395,127 @@ int main (argc, argv) int argc; char **argv; write_pcm_wav_header (outfile, 0, WaveHeader.NumChannels, 2, scaled_rate); if (cycle_ratio) - ratio = (flags & STRETCH_DUAL_FLAG) ? 4.0 : 2.0; + max_ratio = (flags & STRETCH_DUAL_FLAG) ? 4.0 : 2.0; + else if (silence_mode && silence_ratio > max_ratio) + max_ratio = silence_ratio; - int max_expected_samples = stretch_output_capacity (stretcher, buffer_samples, ratio); + int max_expected_samples = stretch_output_capacity (stretcher, buffer_samples, max_ratio); + int16_t *inbuffer = malloc (buffer_samples * WaveHeader.BlockAlign), *prebuffer = NULL; int16_t *outbuffer = malloc (max_expected_samples * WaveHeader.BlockAlign); - int16_t *inbuffer = malloc (buffer_samples * WaveHeader.BlockAlign); + int non_silence_frames = 0, silence_frames = 0, used_silence_frames = 0; int max_generated_stretch = 0, max_generated_flush = 0; + int samples_to_stretch = 0, consecutive_silence_frames = 1; - if (!inbuffer || !outbuffer) { + /* in the gap/silence mode we need an additional buffer to scan the "next" buffer for level */ + + if (silence_mode) + prebuffer = malloc (buffer_samples * WaveHeader.BlockAlign); + + if (!inbuffer || !outbuffer || (silence_mode && !prebuffer)) { fprintf (stderr, "can't allocate required memory!\n"); fclose (infile); return 1; } + /* read the entire file in frames and process with stretch */ + while (1) { - int samples_read = fread (inbuffer, WaveHeader.BlockAlign, + int samples_read = fread (silence_mode ? prebuffer : inbuffer, WaveHeader.BlockAlign, samples_to_process >= buffer_samples ? buffer_samples : samples_to_process, infile); - int samples_generated; + + if (!silence_mode && !samples_read) + break; insamples += samples_read; samples_to_process -= samples_read; + /* this is where we scan the frame we just read to see if it's below the silence threshold */ + + if (silence_mode) { + if (samples_read) { + double level = rms_level_dB (prebuffer, samples_read, WaveHeader.NumChannels); + + if (level > silence_threshold_dB) { + consecutive_silence_frames = 0; + non_silence_frames++; + } + else { + consecutive_silence_frames++; + silence_frames++; + } + } + } + else + samples_to_stretch = samples_read; + if (cycle_ratio) { if (flags & STRETCH_DUAL_FLAG) - ratio = (sin ((double) outsamples / WaveHeader.SampleRate) * (cycle_ratio & 1 ? 1.875 : -1.875)) + 2.125; + ratio = (sin ((double) outsamples / WaveHeader.SampleRate / 2.0) * (cycle_ratio & 1 ? 1.875 : -1.875)) + 2.125; else ratio = (sin ((double) outsamples / WaveHeader.SampleRate) * (cycle_ratio & 1 ? 0.75 : -0.75)) + 1.25; } - if (samples_read) { - samples_generated = stretch_samples (stretcher, inbuffer, samples_read, outbuffer, ratio); + if (samples_to_stretch) { + int samples_generated; - if (samples_generated > max_generated_stretch) - max_generated_stretch = samples_generated; - } - else { - samples_generated = stretch_flush (stretcher, outbuffer); + /* we use the gap/silence stretch ratio if the current frame, and the ones on either side, measure below the threshold */ - if (samples_generated > max_generated_flush) - max_generated_flush = samples_generated; - } + if (consecutive_silence_frames >= 3) { + samples_generated = stretch_samples (stretcher, inbuffer, samples_to_stretch, outbuffer, silence_ratio); + used_silence_frames++; + } + else + samples_generated = stretch_samples (stretcher, inbuffer, samples_to_stretch, outbuffer, ratio); - if (samples_generated) { - fwrite (outbuffer, WaveHeader.BlockAlign, samples_generated, outfile); - outsamples += samples_generated; + if (samples_generated) { + if (samples_generated > max_generated_stretch) + max_generated_stretch = samples_generated; - if (samples_generated > max_expected_samples) { - fprintf (stderr, "%s: generated samples (%d) exceeded expected (%d)!\n", samples_read ? "stretch" : "flush", - samples_generated, max_expected_samples); - fclose (infile); - return 1; + fwrite (outbuffer, WaveHeader.BlockAlign, samples_generated, outfile); + outsamples += samples_generated; + + if (samples_generated > max_expected_samples) { + fprintf (stderr, "stretch: generated samples (%d) exceeded expected (%d)!\n", samples_generated, max_expected_samples); + fclose (infile); + return 1; + } + } + } + + if (silence_mode) { + if (samples_read) { + memcpy (inbuffer, prebuffer, samples_read * WaveHeader.BlockAlign); + samples_to_stretch = samples_read; } + else + break; } + } + + /* next call the stretch flush function until it returns zero */ + + while (1) { + int samples_flushed = stretch_flush (stretcher, outbuffer); - if (!samples_read && !samples_generated) + if (!samples_flushed) break; + + if (samples_flushed > max_generated_flush) + max_generated_flush = samples_flushed; + + fwrite (outbuffer, WaveHeader.BlockAlign, samples_flushed, outfile); + outsamples += samples_flushed; + + if (samples_flushed > max_expected_samples) { + fprintf (stderr, "flush: generated samples (%d) exceeded expected (%d)!\n", samples_flushed, max_expected_samples); + fclose (infile); + return 1; + } } free (inbuffer); free (outbuffer); + free (prebuffer); stretch_deinit (stretcher); fclose (infile); @@ -425,6 +532,12 @@ int main (argc, argv) int argc; char **argv; (unsigned long) WaveHeader.SampleRate, (unsigned long) scaled_rate); fprintf (stderr, "max expected samples = %d, actually seen = %d stretch, %d flush\n", max_expected_samples, max_generated_stretch, max_generated_flush); + if (silence_frames || non_silence_frames) { + int total_frames = silence_frames + non_silence_frames; + fprintf (stderr, "%d silence frames detected (%.2f%%), %d actually used (%.2f%%)\n", + silence_frames, silence_frames * 100.0 / total_frames, + used_silence_frames, used_silence_frames * 100.0 / total_frames); + } } return 0; @@ -462,3 +575,20 @@ static int write_pcm_wav_header (FILE *outfile, uint32_t num_samples, int num_ch fwrite (&wavhdr, wavhdrsize, 1, outfile) && fwrite (&datahdr, sizeof (datahdr), 1, outfile); } + +double rms_level_dB (int16_t *audio, int samples, int channels) +{ + double rms_sum = 0.0; + int i; + + if (channels == 1) + for (i = 0; i < samples; ++i) + rms_sum += (double) audio [i] * audio [i]; + else + for (i = 0; i < samples; ++i) { + double average = (audio [i * 2] + audio [i * 2 + 1]) / 2.0; + rms_sum += average * average; + } + + return log10 (rms_sum / samples / (32768.0 * 32767.0 * 0.5)) * 10.0; +}