From c0576b103d2a13975e7e2644fa9b0a76020603ec Mon Sep 17 00:00:00 2001 From: David Bryant Date: Sun, 16 Oct 2022 20:57:51 -0700 Subject: [PATCH] issue #5: refinement and bug fixes on cascaded instances --- main.c | 58 ++++++++++++++++++++++++------- stretch.c | 102 ++++++++++++++++++++++++++++++++++++++++++++++-------- stretch.h | 1 + 3 files changed, 134 insertions(+), 27 deletions(-) diff --git a/main.c b/main.c index a7e214c..2905cdf 100644 --- a/main.c +++ b/main.c @@ -24,11 +24,12 @@ static const char *sign_on = "\n" static const char *usage = " Usage: AUDIO-STRETCH [-options] infile.wav outfile.wav\n\n" -" Options: -r = stretch ratio (0.5 to 2.0, default = 1.0)\n" +" Options: -r = stretch ratio (0.25 to 4.0, default = 1.0)\n" " -u = upper freq period limit (default = 333 Hz)\n" " -l = lower freq period limit (default = 55 Hz)\n" " -c = cycle through all ratios, starting higher\n" " -cc = cycle through all ratios, starting lower\n" +" -d = force dual instance even for shallow ratios\n" " -s = scale rate to preserve duration (not pitch)\n" " -f = fast pitch detection (default >= 32 kHz)\n" " -n = normal pitch detection (default < 32 kHz)\n" @@ -74,8 +75,8 @@ static int verbose_mode, quiet_mode; int main (argc, argv) int argc; char **argv; { - int asked_help = 0, overwrite = 0, scale_rate = 0, force_fast = 0, force_normal = 0, cycle_ratio = 0; - int upper_frequency = 333, lower_frequency = 55, min_period, max_period; + int asked_help = 0, overwrite = 0, scale_rate = 0, force_fast = 0, force_normal = 0, force_dual = 0, cycle_ratio = 0; + int buffer_samples = BUFFER_SAMPLES, upper_frequency = 333, lower_frequency = 55, min_period, max_period; uint32_t samples_to_process, insamples = 0, outsamples = 0; char *infilename = NULL, *outfilename = NULL; RiffChunkHeader riff_chunk_header; @@ -137,6 +138,10 @@ int main (argc, argv) int argc; char **argv; cycle_ratio++; break; + case 'D': case 'd': + force_dual = 1; + break; + case 'F': case 'f': force_fast = 1; break; @@ -310,15 +315,16 @@ int main (argc, argv) int argc; char **argv; max_period = WaveHeader.SampleRate / lower_frequency; int flags = 0; - if (ratio < 0.5 || ratio > 2.0) + if (force_dual || ratio < 0.5 || ratio > 2.0) flags |= STRETCH_DUAL_FLAG; if ((force_fast || WaveHeader.SampleRate >= 32000) && !force_normal) flags |= STRETCH_FAST_FLAG; if (verbose_mode) - fprintf (stderr, "initializing stretch library with period range = %d to %d, %d channels, %s\n", - min_period, max_period, WaveHeader.NumChannels, (flags & STRETCH_FAST_FLAG) ? "fast mode" : "normal mode"); + fprintf (stderr, "initializing stretch library with period range = %d to %d, %d channels, %s, %s\n", + min_period, max_period, WaveHeader.NumChannels, (flags & STRETCH_FAST_FLAG) ? "fast mode" : "normal mode", + (flags & STRETCH_DUAL_FLAG) ? "dual instance" : "single instance"); if (!quiet_mode && ratio == 1.0 && !cycle_ratio) fprintf (stderr, "warning: a ratio of 1.0 will do nothing but copy the WAV file!\n"); @@ -343,8 +349,13 @@ int main (argc, argv) int argc; char **argv; uint32_t scaled_rate = scale_rate ? (uint32_t)(WaveHeader.SampleRate * ratio + 0.5) : WaveHeader.SampleRate; write_pcm_wav_header (outfile, 0, WaveHeader.NumChannels, 2, scaled_rate); - int16_t *inbuffer = malloc (BUFFER_SAMPLES * WaveHeader.BlockAlign); - int16_t *outbuffer = malloc ((BUFFER_SAMPLES * 4 + max_period * 8) * WaveHeader.BlockAlign); + if (cycle_ratio) + ratio = (flags & STRETCH_DUAL_FLAG) ? 4.0 : 2.0; + + int max_expected_samples = stretch_output_capacity (stretcher, buffer_samples, ratio); + int16_t *outbuffer = malloc (max_expected_samples * WaveHeader.BlockAlign); + int16_t *inbuffer = malloc (buffer_samples * WaveHeader.BlockAlign); + int max_generated_stretch = 0, max_generated_flush = 0; if (!inbuffer || !outbuffer) { fprintf (stderr, "can't allocate required memory!\n"); @@ -354,23 +365,42 @@ int main (argc, argv) int argc; char **argv; while (1) { int samples_read = fread (inbuffer, WaveHeader.BlockAlign, - samples_to_process >= BUFFER_SAMPLES ? BUFFER_SAMPLES : samples_to_process, infile); + samples_to_process >= buffer_samples ? buffer_samples : samples_to_process, infile); int samples_generated; insamples += samples_read; samples_to_process -= samples_read; - if (cycle_ratio) - ratio = (sin ((double) outsamples / WaveHeader.SampleRate) * (cycle_ratio & 1 ? 0.75 : -0.75)) + 1.25; + if (cycle_ratio) { + if (flags & STRETCH_DUAL_FLAG) + ratio = (sin ((double) outsamples / WaveHeader.SampleRate) * (cycle_ratio & 1 ? 1.875 : -1.875)) + 2.125; + else + ratio = (sin ((double) outsamples / WaveHeader.SampleRate) * (cycle_ratio & 1 ? 0.75 : -0.75)) + 1.25; + } - if (samples_read) + if (samples_read) { samples_generated = stretch_samples (stretcher, inbuffer, samples_read, outbuffer, ratio); - else + + if (samples_generated > max_generated_stretch) + max_generated_stretch = samples_generated; + } + else { samples_generated = stretch_flush (stretcher, outbuffer); + if (samples_generated > max_generated_flush) + max_generated_flush = samples_generated; + } + if (samples_generated) { fwrite (outbuffer, WaveHeader.BlockAlign, samples_generated, outfile); outsamples += samples_generated; + + if (samples_generated > max_expected_samples) { + fprintf (stderr, "%s: generated samples (%d) exceeded expected (%d)!\n", samples_read ? "stretch" : "flush", + samples_generated, max_expected_samples); + fclose (infile); + return 1; + } } if (!samples_read && !samples_generated) @@ -393,6 +423,8 @@ int main (argc, argv) int argc; char **argv; if (scale_rate) fprintf (stderr, "sample rate changed from %lu Hz to %lu Hz\n", (unsigned long) WaveHeader.SampleRate, (unsigned long) scaled_rate); + fprintf (stderr, "max expected samples = %d, actually seen = %d stretch, %d flush\n", + max_expected_samples, max_generated_stretch, max_generated_flush); } return 0; diff --git a/stretch.c b/stretch.c index 669a30d..dbb692a 100644 --- a/stretch.c +++ b/stretch.c @@ -69,10 +69,12 @@ static int find_period (struct stretch_cnxt *cnxt, int16_t *samples); StretchHandle stretch_init (int shortest_period, int longest_period, int num_channels, int flags) { struct stretch_cnxt *cnxt; + int max_periods = 3; if (flags & STRETCH_FAST_FLAG) { longest_period = (longest_period + 1) & ~1; shortest_period &= ~1; + max_periods = 4; } if (longest_period <= shortest_period || shortest_period < MIN_PERIOD || longest_period > MAX_PERIOD) { @@ -83,7 +85,7 @@ StretchHandle stretch_init (int shortest_period, int longest_period, int num_cha cnxt = (struct stretch_cnxt *) calloc (1, sizeof (struct stretch_cnxt)); if (cnxt) { - cnxt->inbuff_samples = longest_period * num_channels * 6; + cnxt->inbuff_samples = longest_period * num_channels * max_periods; cnxt->inbuff = calloc (cnxt->inbuff_samples, sizeof (*cnxt->inbuff)); if (num_channels == 2 || (flags & STRETCH_FAST_FLAG)) @@ -105,7 +107,7 @@ StretchHandle stretch_init (int shortest_period, int longest_period, int num_cha if (flags & STRETCH_DUAL_FLAG) { cnxt->next = stretch_init (shortest_period, longest_period, num_channels, flags & ~STRETCH_DUAL_FLAG); - cnxt->intermediate = calloc (longest_period * num_channels * 4, sizeof (*cnxt->intermediate)); + cnxt->intermediate = calloc (longest_period * num_channels * max_periods, sizeof (*cnxt->intermediate)); } return (StretchHandle) cnxt; @@ -119,12 +121,48 @@ StretchHandle stretch_init (int shortest_period, int longest_period, int num_cha void stretch_reset (StretchHandle handle) { struct stretch_cnxt *cnxt = (struct stretch_cnxt *) handle; + cnxt->head = cnxt->tail = cnxt->longest; + memset (cnxt->inbuff, 0, cnxt->tail * sizeof (*cnxt->inbuff)); if (cnxt->next) - cnxt->next->head = cnxt->next->tail = cnxt->next->longest; + stretch_reset (cnxt->next); } +/* + * Determine how many samples (per channel) should be reserved in 'output'-array + * for stretch_samples() and stretch_flush(). max_num_samples is the maximum for + * 'num_samples' when calling stretch_samples(). + */ + +int stretch_output_capacity (StretchHandle handle, int max_num_samples, float max_ratio) +{ + struct stretch_cnxt *cnxt = (struct stretch_cnxt *) handle; + int max_period = cnxt->longest / cnxt->num_chans; + int max_expected_samples; + float next_ratio; + + if (cnxt->next) { + if (max_ratio < 0.5) { + next_ratio = max_ratio / 0.5; + max_ratio = 0.5; + } + else if (max_ratio > 2.0) { + next_ratio = max_ratio / 2.0; + max_ratio = 2.0; + } + else + next_ratio = 1.0; + } + + max_expected_samples = (int) ceil (max_num_samples * ceil (max_ratio * 2.0) / 2.0) + + max_period * (cnxt->fast_mode ? 4 : 3); + + if (cnxt->next) + max_expected_samples = stretch_output_capacity (cnxt->next, max_expected_samples, next_ratio); + + return max_expected_samples; +} /* * Process the specified samples with the given ratio (which is clipped to the @@ -140,10 +178,21 @@ int stretch_samples (StretchHandle handle, const int16_t *samples, int num_sampl struct stretch_cnxt *cnxt = (struct stretch_cnxt *) handle; int out_samples = 0, next_samples = 0; int16_t *outbuf = output; + float next_ratio; if (cnxt->next) { outbuf = cnxt->intermediate; - ratio = sqrt (ratio); + + if (ratio < 0.5) { + next_ratio = ratio / 0.5; + ratio = 0.5; + } + else if (ratio > 2.0) { + next_ratio = ratio / 2.0; + ratio = 2.0; + } + else + next_ratio = 1.0; } num_samples *= cnxt->num_chans; @@ -173,9 +222,16 @@ int stretch_samples (StretchHandle handle, const int16_t *samples, int num_sampl /* while there are enough samples to process, do so */ while (cnxt->tail >= cnxt->longest && cnxt->head - cnxt->tail >= cnxt->longest * (cnxt->fast_mode ? 3 : 2)) { - int period = cnxt->fast_mode ? find_period_fast (cnxt, cnxt->inbuff + cnxt->tail) : - find_period (cnxt, cnxt->inbuff + cnxt->tail); float process_ratio; + int period; + + if (ratio != 1.0 || cnxt->outsamples_error) + period = cnxt->fast_mode ? find_period_fast (cnxt, cnxt->inbuff + cnxt->tail) : + find_period (cnxt, cnxt->inbuff + cnxt->tail); + else + period = cnxt->longest; + + // printf ("%d\n", period / cnxt->num_chans); /* * Once we have calculated the best-match period, there are 4 possible transformations @@ -202,7 +258,12 @@ int stretch_samples (StretchHandle handle, const int16_t *samples, int num_sampl } else if (process_ratio == 1.0) { memcpy (outbuf + out_samples, cnxt->inbuff + cnxt->tail, period * 2 * sizeof (cnxt->inbuff [0])); - cnxt->outsamples_error += (period * 2.0) - (period * 2.0 * ratio); + + if (ratio != 1.0) + cnxt->outsamples_error += (period * 2.0) - (period * 2.0 * ratio); + else + cnxt->outsamples_error = 0; + out_samples += period * 2; cnxt->tail += period * 2; } @@ -236,7 +297,7 @@ int stretch_samples (StretchHandle handle, const int16_t *samples, int num_sampl fprintf (stderr, "stretch_samples: fatal programming error: process_ratio == %g\n", process_ratio); if (cnxt->next) { - next_samples += stretch_samples (cnxt->next, outbuf, out_samples / cnxt->num_chans, output + next_samples * cnxt->num_chans, ratio); + next_samples += stretch_samples (cnxt->next, outbuf, out_samples / cnxt->num_chans, output + next_samples * cnxt->num_chans, next_ratio); out_samples = 0; } } @@ -258,17 +319,28 @@ int stretch_samples (StretchHandle handle, const int16_t *samples, int num_sampl return cnxt->next ? next_samples : out_samples / cnxt->num_chans; } -/* flush any leftover samples out at normal speed */ +/* + * Flush any leftover samples out at normal speed. For cascaded dual instances this must be called + * twice to completely flush, or simply call it until it returns zero samples + */ int stretch_flush (StretchHandle handle, int16_t *output) { struct stretch_cnxt *cnxt = (struct stretch_cnxt *) handle; - int samples_to_copy = (cnxt->head - cnxt->tail) / cnxt->num_chans; + int samples_leftover = (cnxt->head - cnxt->tail) / cnxt->num_chans; + int samples_flushed; + + if (cnxt->next && samples_leftover) + samples_flushed = stretch_samples (cnxt->next, cnxt->inbuff + cnxt->tail, samples_leftover, output, 1.0); + else if (cnxt->next) + samples_flushed = stretch_flush (cnxt->next, output); + else { + memcpy (output, cnxt->inbuff + cnxt->tail, samples_leftover * cnxt->num_chans * sizeof (*output)); + samples_flushed = samples_leftover; + } - memcpy (output, cnxt->inbuff + cnxt->tail, samples_to_copy * cnxt->num_chans * sizeof (*output)); cnxt->tail = cnxt->head; - - return samples_to_copy; + return samples_flushed; } /* free handle */ @@ -281,8 +353,10 @@ void stretch_deinit (StretchHandle handle) free (cnxt->results); free (cnxt->inbuff); - if (cnxt->next) + if (cnxt->next) { stretch_deinit (cnxt->next); + free (cnxt->intermediate); + } free (cnxt); } diff --git a/stretch.h b/stretch.h index 82d56ae..bbeb3d6 100644 --- a/stretch.h +++ b/stretch.h @@ -37,6 +37,7 @@ extern "C" { typedef void *StretchHandle; StretchHandle stretch_init (int shortest_period, int longest_period, int num_chans, int flags); +int stretch_output_capacity (StretchHandle handle, int max_num_samples, float max_ratio); int stretch_samples (StretchHandle handle, const int16_t *samples, int num_samples, int16_t *output, float ratio); int stretch_flush (StretchHandle handle, int16_t *output); void stretch_reset (StretchHandle handle);