diff --git a/AudioAnalysis.sln.DotSettings b/AudioAnalysis.sln.DotSettings index 0664e38ad..5cadf81a8 100644 --- a/AudioAnalysis.sln.DotSettings +++ b/AudioAnalysis.sln.DotSettings @@ -300,6 +300,7 @@ True True True + True True True True diff --git a/src/Acoustics.Shared/Acoustics.Shared.csproj b/src/Acoustics.Shared/Acoustics.Shared.csproj index 123de157e..642a851b3 100644 --- a/src/Acoustics.Shared/Acoustics.Shared.csproj +++ b/src/Acoustics.Shared/Acoustics.Shared.csproj @@ -1,4 +1,4 @@ - + netcoreapp3.1 Acoustics.Shared @@ -28,6 +28,7 @@ + diff --git a/src/Acoustics.Shared/ColorScales/Palette.cs b/src/Acoustics.Shared/ColorScales/Palette.cs index ae876690f..6729a3070 100644 --- a/src/Acoustics.Shared/ColorScales/Palette.cs +++ b/src/Acoustics.Shared/ColorScales/Palette.cs @@ -16,6 +16,6 @@ public class Palette public List Colors { get; internal set; } - public Color[] ForClassCount(int classCount) => this.Colors.FirstOrDefault(x => x.Length == classCount); + public Color[] ForClassCount(int classCount) => this.Colors.FirstOrDefault(x => x.Length >= classCount); } } \ No newline at end of file diff --git a/src/Acoustics.Shared/Extensions/ArrayExtensions.cs b/src/Acoustics.Shared/Extensions/ArrayExtensions.cs index 04de278b0..21c844ea4 100644 --- a/src/Acoustics.Shared/Extensions/ArrayExtensions.cs +++ b/src/Acoustics.Shared/Extensions/ArrayExtensions.cs @@ -10,7 +10,12 @@ // ReSharper disable once CheckNamespace namespace System { + //using MoreLinq; + using static MoreLinq.Extensions.RepeatExtension; + using System.Collections.Generic; using System.Diagnostics; + using System.Linq; + using Acoustics.Shared.Extensions; public static class ArrayExtensions { @@ -105,6 +110,43 @@ public static T[] Print(this T[] array) return array; } + /// + /// Prints a multi-dimensional matrix as a C# literal. + /// + /// Assumes all ranks have a lower bound of zero. + /// Cast element items to T before toString is called. + /// The source array to print. + public static string PrintAsLiteral(this Array array) + { + var dimensions = Enumerable.Range(0, array.Rank); + var sizes = dimensions.Select(array.GetLength).ToArray(); + //var dimensionsWithSize = dimensions.Zip(sizes); + + var last = Enumerable.Range(0, sizes[^1]).ToArray(); + + // the last dimension we iterate across to generate the literal values + var result = sizes[..^1] + .Select(s => Enumerable.Range(0, s)) + .MultiCartesian(FormatValue) + .Join(", ") + .WordWrap(leftPadding: 4, splitOn: "},", keepSplit: true); + + return @$"{{ + {result} + }}"; + + string FormatValue(IEnumerable indices) + { + var depth = sizes.Length; + var lineDepth = depth - 1; + var start = "{ ".Repeat(lineDepth).Join(); + var end = " }".Repeat(lineDepth).Join(); + var value = last.Select(x => (T)array.GetValue(indices.Append(x).ToArray())).Join(", "); + + return start + value + end; + } + } + /// /// Compares two arrays, matching each element in order using the default Equals method for the array type T. /// diff --git a/src/Acoustics.Shared/Extensions/CartesianExtension.cs b/src/Acoustics.Shared/Extensions/CartesianExtension.cs new file mode 100644 index 000000000..f0c17a490 --- /dev/null +++ b/src/Acoustics.Shared/Extensions/CartesianExtension.cs @@ -0,0 +1,77 @@ +// +// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group). +// + +namespace Acoustics.Shared.Extensions +{ + using System; + using System.Collections.Generic; + using System.Linq; + using MoreLinq.Extensions; + + /// + /// Extensions to the MoreLinq.Cartesian function. + /// + public static class CartesianExtension + { + /// + /// Returns the Cartesian product of multiple sequences by combining each element of every set with every other element + /// and applying the user-defined projection to the pair. + /// + /// The type of the elements of . + /// The type of the elements of the result sequence. + /// The sequence of sequences of element.s + /// A projection function that combines elements from both sequences. + /// A sequence representing the Cartesian product of the source sequences. + public static IEnumerable MultiCartesian( + this IEnumerable> enumerables, + Func, TResult> resultSelector) + { + if (enumerables == null) + { + throw new ArgumentNullException(nameof(enumerables)); + } + + if (resultSelector == null) + { + throw new ArgumentNullException(nameof(resultSelector)); + } + + var enumerators = enumerables + .Select(e => e?.GetEnumerator() ?? throw new ArgumentException("One of the enumerables is null")) + .Pipe(e => e.MoveNext()) + .ToArray(); + + do + { + yield return resultSelector(enumerators.Select(e => e.Current)); + } while (MoveNext()); + + foreach (var enumerator in enumerators) + { + enumerator.Dispose(); + } + + bool MoveNext() + { + for (var i = enumerators.Length - 1; i >= 0; i--) + { + if (enumerators[i].MoveNext()) + { + return true; + } + + if (i == 0) + { + continue; + } + + enumerators[i].Reset(); + enumerators[i].MoveNext(); + } + + return false; + } + } + } +} diff --git a/src/Acoustics.Shared/Extensions/ExtensionsString.cs b/src/Acoustics.Shared/Extensions/ExtensionsString.cs index d13427a81..e2438217b 100644 --- a/src/Acoustics.Shared/Extensions/ExtensionsString.cs +++ b/src/Acoustics.Shared/Extensions/ExtensionsString.cs @@ -203,13 +203,15 @@ public static string Truncate(this string text, int length, string ellipsis, boo return text + ellipsis; } - public static string WordWrap(this string text, int wrapThreshold = 120, int leftPadding = 0) + public static string WordWrap(this string text, int wrapThreshold = 120, int leftPadding = 0, string splitOn = " ", bool keepSplit = false) { if (string.IsNullOrEmpty(text)) { return text; } + int splitLength = splitOn.Length; + string leftPad = string.Empty.PadRight(leftPadding); // wrap lines @@ -229,16 +231,16 @@ public static string WordWrap(this string text, int wrapThreshold = 120, int lef while (currentLine.Length > wrapThreshold) { - int splitPoint = currentLine.Substring(0, wrapThreshold).LastIndexOf(' '); + int splitPoint = currentLine.Substring(0, wrapThreshold).LastIndexOf(splitOn); if (splitPoint < 0) { splitPoint = wrapThreshold; // cuts through a word } - result.AppendLine(leftPad + currentLine.Substring(0, splitPoint)); + result.AppendLine(leftPad + currentLine.Substring(0, splitPoint + (keepSplit ? splitLength : 0))); - currentLine = currentLine.Substring(splitPoint + 1); + currentLine = currentLine.Substring(splitPoint + splitLength); } if (currentLine.IsNotWhitespace()) diff --git a/src/AnalysisConfigFiles/Towsey.SpectrogramGenerator.yml b/src/AnalysisConfigFiles/Towsey.SpectrogramGenerator.yml index f92d2d737..ba03f0a00 100644 --- a/src/AnalysisConfigFiles/Towsey.SpectrogramGenerator.yml +++ b/src/AnalysisConfigFiles/Towsey.SpectrogramGenerator.yml @@ -26,7 +26,10 @@ Images: - DecibelSpectrogramNoiseReduced - Experimental - DifferenceSpectrogram + - MelScaleSpectrogram - CepstralSpectrogram + - OctaveScaleSpectrogram + - RibbonSpectrogram - AmplitudeSpectrogramLocalContrastNormalization diff --git a/src/AnalysisPrograms/PlanesTrainsAndAutomobiles.cs b/src/AnalysisPrograms/PlanesTrainsAndAutomobiles.cs index f0d230a43..4ec60cdd0 100644 --- a/src/AnalysisPrograms/PlanesTrainsAndAutomobiles.cs +++ b/src/AnalysisPrograms/PlanesTrainsAndAutomobiles.cs @@ -185,18 +185,23 @@ public static Tuple> Dete TimeSpan duration = recording.Duration; NoiseReductionType nrt = SNR.KeyToNoiseReductionType("STANDARD"); - var sonogram = (BaseSonogram)SpectrogramStandard.GetSpectralSonogram( - recording.BaseName, - windowSize, - windowOverlap, - bitsPerSample, - windowPower, - sr, - duration, - nrt, - matrix); - - sonogram.DecibelsNormalised = new double[rowCount]; + //Set the default values config + SonogramConfig sonoConfig = new SonogramConfig + { + SourceFName = recording.BaseName, + WindowSize = windowSize, + WindowOverlap = windowOverlap, + NoiseReductionType = nrt, + epsilon = Math.Pow(0.5, bitsPerSample - 1), + WindowPower = windowPower, + SampleRate = sr, + Duration = duration, + }; + + var sonogram = new SpectrogramStandard(sonoConfig, matrix) + { + DecibelsNormalised = new double[rowCount], + }; //foreach frame or time step for (int i = 0; i < rowCount; i++) @@ -205,7 +210,7 @@ public static Tuple> Dete } sonogram.DecibelsNormalised = DataTools.normalise(sonogram.DecibelsNormalised); - return Tuple.Create(sonogram, hits, scoreArray, predictedEvents); - } //end Execute_HDDetect + return Tuple.Create((BaseSonogram)sonogram, hits, scoreArray, predictedEvents); + } } -} //end class PlanesTrainsAndAutomobiles \ No newline at end of file +} \ No newline at end of file diff --git a/src/AnalysisPrograms/SpectrogramGenerator/SpectrogramGenerator.Core.cs b/src/AnalysisPrograms/SpectrogramGenerator/SpectrogramGenerator.Core.cs index ff8a23007..7a03e682c 100644 --- a/src/AnalysisPrograms/SpectrogramGenerator/SpectrogramGenerator.Core.cs +++ b/src/AnalysisPrograms/SpectrogramGenerator/SpectrogramGenerator.Core.cs @@ -28,8 +28,12 @@ public partial class SpectrogramGenerator static SpectrogramGenerator() { var values = (SpectrogramImageType[])Enum.GetValues(typeof(SpectrogramImageType)); + + // This next line is to do with adding a colour tag to images so that they can be more easily identified in unit tests. + // Need a pallette that is large enough to include all the number of images produced. + // Check the ColorBrewer class and change the pallette set if need to. ImageTags = values - .Zip(ColorBrewer.Qualitative.Dark.ForClassCount(values.Length)) + .Zip(ColorBrewer.Qualitative.Set3.ForClassCount(values.Length)) .ToImmutableDictionary(x => x.First, x => x.Second); } @@ -38,7 +42,10 @@ static SpectrogramGenerator() /// Waveform. /// DecibelSpectrogram. /// DecibelSpectrogramNoiseReduced. + /// MelScaleSpectrogram /// CepstralSpectrogram. + /// OctaveScaleSpectrogram + /// RibbonSpectrogram. /// DifferenceSpectrogram. /// AmplitudeSpectrogramLocalContrastNormalization. /// Experimental. @@ -75,6 +82,7 @@ public static AudioToSonogramResult GenerateSpectrogramImages( // EXTRACT ENVELOPE and SPECTROGRAM FROM RECORDING SEGMENT var dspOutput1 = DSP_Frames.ExtractEnvelopeAndFfts(recordingSegment, frameSize, frameStep); + // This constructor initializes default values for Melscale and Mfcc spectrograms and other parameters. var sonoConfig = new SonogramConfig() { epsilon = recordingSegment.Epsilon, @@ -190,7 +198,16 @@ public static AudioToSonogramResult GenerateSpectrogramImages( } } - // IMAGE 6) Cepstral Spectrogram + // IMAGE 6) Mel-frequency Spectrogram + // The default spectrogram has 64 frequency bands. + if (@do.Contains(MelScaleSpectrogram)) + { + images.Add( + MelScaleSpectrogram, + GetMelScaleSpectrogram(sonoConfig, recordingSegment, sourceRecordingName)); + } + + // IMAGE 7) Cepstral Spectrogram if (@do.Contains(CepstralSpectrogram)) { images.Add( @@ -198,7 +215,59 @@ public static AudioToSonogramResult GenerateSpectrogramImages( GetCepstralSpectrogram(sonoConfig, recordingSegment, sourceRecordingName)); } - // IMAGE 7) AmplitudeSpectrogram_LocalContrastNormalization + // IMAGE 8) Octave-frequency scale Spectrogram + if (@do.Contains(OctaveScaleSpectrogram)) + { + //Create new config because calling the octave spectrogram changes it. + var octaveConfig = new SonogramConfig() + { + epsilon = recordingSegment.Epsilon, + SampleRate = sampleRate, + WindowSize = frameSize, + WindowStep = frameStep, + WindowOverlap = frameOverlap, + WindowPower = dspOutput1.WindowPower, + Duration = recordingSegment.Duration, + NoiseReductionType = NoiseReductionType.Standard, + NoiseReductionParameter = bgNoiseThreshold, + }; + + //var type = FreqScaleType.OctaveCustom; + var type = FreqScaleType.OctaveStandard; + int nyquist = sampleRate / 2; + int linearBound = 1000; + int octaveToneCount = 31; // This value is ignored for OctaveStandard type. + int hertzGridInterval = 1000; + var scale = new FrequencyScale(type, nyquist, frameSize, linearBound, octaveToneCount, hertzGridInterval); + + images.Add( + OctaveScaleSpectrogram, + GetOctaveScaleSpectrogram(octaveConfig, scale, recordingSegment, sourceRecordingName)); + } + + // IMAGE 9) RibbonSpectrogram + if (@do.Contains(RibbonSpectrogram)) + { + //Create new config because calling the octave spectrogram changes it. + var octaveConfig = new SonogramConfig() + { + epsilon = recordingSegment.Epsilon, + SampleRate = sampleRate, + WindowSize = frameSize, + WindowStep = frameStep, + WindowOverlap = frameOverlap, + WindowPower = dspOutput1.WindowPower, + Duration = recordingSegment.Duration, + NoiseReductionType = NoiseReductionType.Standard, + NoiseReductionParameter = bgNoiseThreshold, + }; + + images.Add( + RibbonSpectrogram, + GetRibbonSpectrograms(octaveConfig, recordingSegment, sourceRecordingName)); + } + + // IMAGE 10) AmplitudeSpectrogram_LocalContrastNormalization if (@do.Contains(AmplitudeSpectrogramLocalContrastNormalization)) { var neighborhoodSeconds = config.NeighborhoodSeconds; @@ -340,6 +409,28 @@ public static Image GetDecibelSpectrogram_Ridges( return image; } + public static Image GetMelScaleSpectrogram( + SonogramConfig sonoConfig, + AudioRecording recording, + string sourceRecordingName) + { + // TODO at present noise reduction type must be set = Standard. + sonoConfig.NoiseReductionType = NoiseReductionType.Standard; + sonoConfig.NoiseReductionParameter = 3.0; + var melFreqGram = new SpectrogramMelScale(sonoConfig, recording.WavReader); + var image = melFreqGram.GetImage(); + var titleBar = BaseSonogram.DrawTitleBarOfGrayScaleSpectrogram( + "MEL-FREQUENCY SPECTROGRAM " + sourceRecordingName, + image.Width, + ImageTags[MelScaleSpectrogram]); + var startTime = TimeSpan.Zero; + var xAxisTicInterval = TimeSpan.FromSeconds(1); + TimeSpan xAxisPixelDuration = TimeSpan.FromSeconds(sonoConfig.WindowStep / (double)sonoConfig.SampleRate); + var labelInterval = TimeSpan.FromSeconds(5); + image = BaseSonogram.FrameSonogram(image, titleBar, startTime, xAxisTicInterval, xAxisPixelDuration, labelInterval); + return image; + } + public static Image GetCepstralSpectrogram( SonogramConfig sonoConfig, AudioRecording recording, @@ -362,6 +453,95 @@ public static Image GetCepstralSpectrogram( return image; } + public static Image GetOctaveScaleSpectrogram( + SonogramConfig sgConfig, + FrequencyScale freqScale, + AudioRecording recording, + string sourceRecordingName) + { + // ensure that the freq scale and the spectrogram config are consistent. + sgConfig.WindowSize = freqScale.WindowSize; + freqScale.WindowStep = sgConfig.WindowStep; + sgConfig.WindowOverlap = SonogramConfig.CalculateFrameOverlap(freqScale.WindowSize, freqScale.WindowStep); + + // TODO at present noise reduction type must be set = Standard. + sgConfig.NoiseReductionType = NoiseReductionType.Standard; + sgConfig.NoiseReductionParameter = 3.0; + + var octaveScaleGram = new SpectrogramOctaveScale(sgConfig, freqScale, recording.WavReader); + var image = octaveScaleGram.GetImage(); + var title = "OCTAVE-SCALE SPECTROGRAM " + sourceRecordingName; + + //var titleBar = BaseSonogram.DrawTitleBarOfGrayScaleSpectrogram(title, image.Width, ImageTags[OctaveScaleSpectrogram]); + //var startTime = TimeSpan.Zero; + //var xAxisTicInterval = TimeSpan.FromSeconds(1); + //TimeSpan xAxisPixelDuration = TimeSpan.FromSeconds(sgConfig.WindowStep / (double)sgConfig.SampleRate); + //var labelInterval = TimeSpan.FromSeconds(5); + //image = BaseSonogram.FrameSonogram(image, titleBar, startTime, xAxisTicInterval, xAxisPixelDuration, labelInterval); + image = octaveScaleGram.GetImageFullyAnnotated(image, title, freqScale.GridLineLocations, ImageTags[OctaveScaleSpectrogram]); + return image; + } + + public static Image GetRibbonSpectrograms( + SonogramConfig sgConfig, + AudioRecording recording, + string sourceRecordingName) + { + var octaveScaleGram = GetOctaveReducedSpectrogram(sgConfig, recording); + var image1 = octaveScaleGram.GetImage(); + + var linearScaleGram = GetLinearReducedSpectrogram(sgConfig, recording); + var image2 = linearScaleGram.GetImage(); + var spacer = new Image(image1.Width, 5); + + var imageList = new List> { image2, spacer, image1 }; + + var combinedImage = ImageTools.CombineImagesVertically(imageList); + var title = "RIBBON SPECTROGRAMS-Linear32 & Octave19: " + sourceRecordingName; + var image = octaveScaleGram.GetImageFullyAnnotated(combinedImage, title, null, ImageTags[RibbonSpectrogram]); + return image; + } + + public static SpectrogramOctaveScale GetOctaveReducedSpectrogram(SonogramConfig sgConfig, AudioRecording recording) + { + var type = FreqScaleType.OctaveDataReduction; + var freqScale = new FrequencyScale(type); + + // ensure that the freq scale and the spectrogram config are consistent. + sgConfig.WindowSize = freqScale.WindowSize; + freqScale.WindowStep = sgConfig.WindowStep; + sgConfig.WindowOverlap = SonogramConfig.CalculateFrameOverlap(freqScale.WindowSize, freqScale.WindowStep); + + // TODO at present noise reduction type must be set = Standard. + sgConfig.NoiseReductionType = NoiseReductionType.Standard; + sgConfig.NoiseReductionParameter = 3.0; + + var octaveScaleGram = new SpectrogramOctaveScale(sgConfig, freqScale, recording.WavReader); + return octaveScaleGram; + } + + public static SpectrogramStandard GetLinearReducedSpectrogram(SonogramConfig sgConfig, AudioRecording recording) + { + int sampleRate = recording.SampleRate; + var type = FreqScaleType.Linear; + int nyquist = sampleRate / 2; + int finalBinCount = 32; + int frameSize = 512; + int hertzGridInterval = 11000; + var freqScale = new FrequencyScale(type, nyquist, frameSize, finalBinCount, hertzGridInterval); + + // ensure that the freq scale and the spectrogram config are consistent. + sgConfig.WindowSize = freqScale.WindowSize; + freqScale.WindowStep = sgConfig.WindowStep; + sgConfig.WindowOverlap = SonogramConfig.CalculateFrameOverlap(freqScale.WindowSize, freqScale.WindowStep); + + sgConfig.NoiseReductionType = NoiseReductionType.Standard; + sgConfig.NoiseReductionParameter = 3.0; + + var spectrogram = new SpectrogramStandard(sgConfig, freqScale, recording.WavReader); + return spectrogram; + } + public static Image GetLcnSpectrogram( SonogramConfig sonoConfig, AudioRecording recordingSegment, @@ -369,23 +549,23 @@ public static Image GetLcnSpectrogram( double neighbourhoodSeconds, double lcnContrastLevel) { - BaseSonogram sonogram = new AmplitudeSonogram(sonoConfig, recordingSegment.WavReader); - int neighbourhoodFrames = (int)(sonogram.FramesPerSecond * neighbourhoodSeconds); - LoggedConsole.WriteLine("LCN: FramesPerSecond (Prior to LCN) = {0}", sonogram.FramesPerSecond); + BaseSonogram spectrogram = new AmplitudeSonogram(sonoConfig, recordingSegment.WavReader); + int neighbourhoodFrames = (int)(spectrogram.FramesPerSecond * neighbourhoodSeconds); + LoggedConsole.WriteLine("LCN: FramesPerSecond (Prior to LCN) = {0}", spectrogram.FramesPerSecond); LoggedConsole.WriteLine("LCN: Neighbourhood of {0} seconds = {1} frames", neighbourhoodSeconds, neighbourhoodFrames); // subtract the lowest 20% of frames. This is first step in LCN noise removal. Sets the baseline. const int lowPercentile = 20; - sonogram.Data = - NoiseRemoval_Briggs.NoiseReduction_byLowestPercentileSubtraction(sonogram.Data, lowPercentile); - sonogram.Data = - NoiseRemoval_Briggs.NoiseReduction_byLCNDivision(sonogram.Data, neighbourhoodFrames, lcnContrastLevel); + spectrogram.Data = + NoiseRemoval_Briggs.NoiseReduction_byLowestPercentileSubtraction(spectrogram.Data, lowPercentile); + spectrogram.Data = + NoiseRemoval_Briggs.NoiseReduction_byLCNDivision(spectrogram.Data, neighbourhoodFrames, lcnContrastLevel); - //Matrix normalisation - //MatrixTools.PercentileCutoffs(sonogram.Data, 10.0, 90, out double minCut, out double maxCut); - //NoiseRemoval_Briggs.NoiseReduction_byLowestPercentileSubtraction(sonogram.Data, lowPercentile); + // Finally background noise removal. This step is optional. + double[] spectralDecibelBgn = NoiseProfile.CalculateBackgroundNoise(spectrogram.Data); + spectrogram.Data = SNR.TruncateBgNoiseFromSpectrogram(spectrogram.Data, spectralDecibelBgn); - var image = sonogram.GetImageFullyAnnotated( + var image = spectrogram.GetImageFullyAnnotated( "AMPLITUDE SPECTROGRAM with freq bin Local Contrast Normalization - " + sourceRecordingName, ImageTags[AmplitudeSpectrogramLocalContrastNormalization]); return image; diff --git a/src/AnalysisPrograms/SpectrogramGenerator/SpectrogramImageType.cs b/src/AnalysisPrograms/SpectrogramGenerator/SpectrogramImageType.cs index 6b04cdcaa..d7b73ae6f 100644 --- a/src/AnalysisPrograms/SpectrogramGenerator/SpectrogramImageType.cs +++ b/src/AnalysisPrograms/SpectrogramGenerator/SpectrogramImageType.cs @@ -11,7 +11,10 @@ public enum SpectrogramImageType DecibelSpectrogramNoiseReduced = 2, Experimental = 3, DifferenceSpectrogram = 4, - CepstralSpectrogram = 5, - AmplitudeSpectrogramLocalContrastNormalization = 6, + MelScaleSpectrogram = 5, + CepstralSpectrogram = 6, + OctaveScaleSpectrogram = 7, + RibbonSpectrogram = 8, + AmplitudeSpectrogramLocalContrastNormalization = 9, } } \ No newline at end of file diff --git a/src/AudioAnalysisTools/DSP/DSP_Filters.cs b/src/AudioAnalysisTools/DSP/DSP_Filters.cs index 130a22dc0..4128018a8 100644 --- a/src/AudioAnalysisTools/DSP/DSP_Filters.cs +++ b/src/AudioAnalysisTools/DSP/DSP_Filters.cs @@ -20,118 +20,6 @@ public static partial class DspFilters { public const double Pi = Math.PI; - public static void TestMethod_GenerateSignal1() - { - int sampleRate = 22050; - double duration = 20; // signal duration in seconds - int[] harmonics = { 500, 1000, 2000, 4000, 8000 }; - int windowSize = 512; - var freqScale = new FrequencyScale(sampleRate / 2, windowSize, 1000); - string path = @"C:\SensorNetworks\Output\Sonograms\UnitTestSonograms\SineSignal1.png"; - - var recording = GenerateTestRecording(sampleRate, duration, harmonics, WaveType.Cosine); - var sonoConfig = new SonogramConfig - { - WindowSize = freqScale.WindowSize, - WindowOverlap = 0.0, - SourceFName = "Signal1", - NoiseReductionType = NoiseReductionType.Standard, - NoiseReductionParameter = 0.12, - }; - var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); - - // pick a row, any row - var oneSpectrum = MatrixTools.GetRow(sonogram.Data, 40); - oneSpectrum = DataTools.normalise(oneSpectrum); - var peaks = DataTools.GetPeaks(oneSpectrum, 0.5); - for (int i = 2; i < peaks.Length - 2; i++) - { - if (peaks[i]) - { - LoggedConsole.WriteLine($"bin ={freqScale.BinBounds[i, 0]}, Herz={freqScale.BinBounds[i, 1]}-{freqScale.BinBounds[i + 1, 1]} "); - } - } - - if (peaks[11] && peaks[22] && peaks[45] && peaks[92] && peaks[185]) - { - LoggedConsole.WriteSuccessLine("Spectral Peaks found at correct places"); - } - else - { - LoggedConsole.WriteErrorLine("Spectral Peaks found at INCORRECT places"); - } - - foreach (int h in harmonics) - { - LoggedConsole.WriteLine($"Harmonic {h}Herz should be in bin {freqScale.GetBinIdForHerzValue(h)}"); - } - - // spectrogram without framing, annotation etc - var image = sonogram.GetImage(); - string title = $"Spectrogram of Harmonics: {DataTools.Array2String(harmonics)} SR={sampleRate} Window={windowSize}"; - image = sonogram.GetImageFullyAnnotated(image, title, freqScale.GridLineLocations); - image.Save(path); - } - - public static void TestMethod_GenerateSignal2() - { - int sampleRate = 64000; - double duration = 30; // signal duration in seconds - int[] harmonics = { 500, 1000, 2000, 4000, 8000 }; - var freqScale = new FrequencyScale(FreqScaleType.Linear125Octaves7Tones28Nyquist32000); - string path = @"C:\SensorNetworks\Output\Sonograms\UnitTestSonograms\SineSignal2.png"; - var recording = GenerateTestRecording(sampleRate, duration, harmonics, WaveType.Cosine); - - // init the default sonogram config - var sonoConfig = new SonogramConfig - { - WindowSize = freqScale.WindowSize, - WindowOverlap = 0.2, - SourceFName = "Signal2", - NoiseReductionType = NoiseReductionType.None, - NoiseReductionParameter = 0.0, - }; - var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); - sonogram.Data = OctaveFreqScale.ConvertAmplitudeSpectrogramToDecibelOctaveScale(sonogram.Data, freqScale); - - // pick a row, any row - var oneSpectrum = MatrixTools.GetRow(sonogram.Data, 40); - oneSpectrum = DataTools.normalise(oneSpectrum); - var peaks = DataTools.GetPeaks(oneSpectrum, 0.5); - - var peakIds = new List(); - for (int i = 5; i < peaks.Length - 5; i++) - { - if (peaks[i]) - { - int peakId = freqScale.BinBounds[i, 0]; - peakIds.Add(peakId); - LoggedConsole.WriteLine($"Spectral peak located in bin {peakId}, Herz={freqScale.BinBounds[i, 1]}"); - } - } - - //if (peaks[129] && peaks[257] && peaks[513] && peaks[1025] && peaks[2049]) - if (peakIds[0] == 129 && peakIds[1] == 257 && peakIds[2] == 513 && peakIds[3] == 1025 && peakIds[4] == 2049) - { - LoggedConsole.WriteSuccessLine("Spectral Peaks found at correct places"); - } - else - { - LoggedConsole.WriteErrorLine("Spectral Peaks found at INCORRECT places"); - } - - foreach (int h in harmonics) - { - LoggedConsole.WriteLine($"Harmonic {h}Hertz should be in bin {freqScale.GetBinIdForHerzValue(h)}"); - } - - // spectrogram without framing, annotation etc - var image = sonogram.GetImage(); - string title = $"Spectrogram of Harmonics: {DataTools.Array2String(harmonics)} SR={sampleRate} Window={freqScale.WindowSize}"; - image = sonogram.GetImageFullyAnnotated(image, title, freqScale.GridLineLocations); - image.Save(path); - } - public static AudioRecording GenerateTestRecording(int sampleRate, double duration, int[] harmonics, WaveType waveType) { var signal = GenerateTestSignal(sampleRate, duration, harmonics, waveType); diff --git a/src/AudioAnalysisTools/DSP/DSP_Frames.cs b/src/AudioAnalysisTools/DSP/DSP_Frames.cs index 66ba41ae6..88ddcd894 100644 --- a/src/AudioAnalysisTools/DSP/DSP_Frames.cs +++ b/src/AudioAnalysisTools/DSP/DSP_Frames.cs @@ -272,7 +272,7 @@ public static EnvelopeAndFft ExtractEnvelopeAndAmplSpectrogram( { maxAbsValue = absValue; } - } // end of frame + } // end frame double frameDc = frameSum / frameSize; minValues[i] = frameMin; @@ -292,14 +292,7 @@ public static EnvelopeAndFft ExtractEnvelopeAndAmplSpectrogram( // generate the spectra of FFT AMPLITUDES - NOTE: f[0]=DC; f[64]=Nyquist var f1 = fft.InvokeDotNetFFT(signalMinusAv); - // Previous alternative call to do the FFT and return amplitude spectrum - //f1 = fft.Invoke(window); - // Smooth spectrum to reduce variance - // In the early days (pre-2010), we used to smooth the spectra to reduce sonogram variance. This is statistically correct thing to do. - // Later, we stopped this for standard sonograms but kept it for calculating acoustic indices. - // As of 28 March 2017, we are merging the two codes and keeping spectrum smoothing. - // Will need to check the effect on spectrograms. int smoothingWindow = 3; f1 = DataTools.filterMovingAverage(f1, smoothingWindow); @@ -310,7 +303,7 @@ public static EnvelopeAndFft ExtractEnvelopeAndAmplSpectrogram( } } // end frames - // Remove the DC column ie column zero from amplitude spectrogram. + // Remove the DC column (ie column zero) from amplitude spectrogram. double[,] amplitudeSpectrogram = MatrixTools.Submatrix(spectrogram, 0, 1, spectrogram.GetLength(0) - 1, spectrogram.GetLength(1) - 1); // check the envelope for clipping. Accept a clip if two consecutive frames have max value = 1,0 diff --git a/src/AudioAnalysisTools/DSP/FrequencyScale.cs b/src/AudioAnalysisTools/DSP/FrequencyScale.cs index 31698481e..f3fb8e459 100644 --- a/src/AudioAnalysisTools/DSP/FrequencyScale.cs +++ b/src/AudioAnalysisTools/DSP/FrequencyScale.cs @@ -5,15 +5,11 @@ namespace AudioAnalysisTools.DSP { using System; - using System.IO; using Acoustics.Shared.ImageSharp; using AudioAnalysisTools.StandardSpectrograms; - using AudioAnalysisTools.WavTools; using SixLabors.ImageSharp; using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.Processing; - using TowseyLibrary; - using Path = System.IO.Path; // IMPORTANT NOTE: If you are converting Hertz scale from LINEAR to MEL or OCTAVE, this conversion MUST be done BEFORE noise reduction @@ -25,13 +21,9 @@ public enum FreqScaleType { Linear = 0, Mel = 1, - Linear62Octaves7Tones31Nyquist11025 = 2, - Linear125Octaves6Tones30Nyquist11025 = 3, - Octaves24Nyquist32000 = 4, - Linear125Octaves7Tones28Nyquist32000 = 5, - - // alias Octave to predefined choice - Octave = Linear125Octaves7Tones28Nyquist32000, + OctaveCustom = 2, + OctaveStandard = 3, + OctaveDataReduction = 4, } public class FrequencyScale @@ -39,7 +31,7 @@ public class FrequencyScale /// /// Initializes a new instance of the class. /// CONSTRUCTOR - /// Calling this constructor assumes a full-scale linear freq scale is required. + /// Calling this constructor assumes the standard linear 0-nyquist freq scale is required. /// public FrequencyScale(int nyquist, int frameSize, int hertzGridInterval) { @@ -53,6 +45,23 @@ public FrequencyScale(int nyquist, int frameSize, int hertzGridInterval) this.GridLineLocations = GetLinearGridLineLocations(nyquist, this.HertzGridInterval, this.FinalBinCount); } + /// + /// Initializes a new instance of the class. + /// CONSTRUCTOR + /// Call this constructor when want to change freq scale but keep it linear. + /// + public FrequencyScale(int nyquist, int frameSize, int finalBinCount, int hertzGridInterval) + { + this.ScaleType = FreqScaleType.Linear; + this.Nyquist = nyquist; + this.WindowSize = frameSize; + this.FinalBinCount = finalBinCount; + this.HertzGridInterval = hertzGridInterval; + this.LinearBound = nyquist; + this.BinBounds = this.GetLinearBinBounds(); + this.GridLineLocations = GetLinearGridLineLocations(nyquist, this.HertzGridInterval, this.FinalBinCount); + } + /// /// Initializes a new instance of the class. /// CONSTRUCTOR @@ -60,17 +69,20 @@ public FrequencyScale(int nyquist, int frameSize, int hertzGridInterval) /// public FrequencyScale(FreqScaleType type, int nyquist, int frameSize, int finalBinCount, int hertzGridInterval) { - this.ScaleType = type; this.Nyquist = nyquist; this.WindowSize = frameSize; this.FinalBinCount = finalBinCount; this.HertzGridInterval = hertzGridInterval; if (type == FreqScaleType.Mel) { - this.BinBounds = this.GetMelBinBounds(); - this.GridLineLocations = GetMelGridLineLocations(this.HertzGridInterval, nyquist, this.FinalBinCount); + this.BinBounds = MFCCStuff.GetMelBinBounds(this.Nyquist, this.FinalBinCount); + this.GridLineLocations = SpectrogramMelScale.GetMelGridLineLocations(this.HertzGridInterval, nyquist, this.FinalBinCount); this.LinearBound = 1000; } + else if (type == FreqScaleType.OctaveStandard || type == FreqScaleType.OctaveCustom) + { + throw new Exception("Wrong Constructor. Use the Octave scale constructor."); + } else { // linear is the default @@ -80,6 +92,51 @@ public FrequencyScale(FreqScaleType type, int nyquist, int frameSize, int finalB } } + /// + /// Initializes a new instance of the class. + /// CONSTRUCTION OF OCTAVE Frequency Scales + /// IMPORTANT NOTE: If you are converting Herz scale from LINEAR to OCTAVE, this conversion MUST be done BEFORE noise reduction + /// WARNING!: Changing the constants for the octave scales will have undefined effects. + /// The options below have been debugged to give what is required. + /// However other values have not been debugged - so user should check the output to ensure it is what is required. + /// NOTE: octaveToneCount = the number of fractional Hz steps within one octave. A piano octave contains 12 steps per octave. + /// NOTE: Not all combinations of parameter values are effective. nor have they all been tested. + /// The following have been tested: + /// nyquist=11025, linearBound=125 t0 1000, octaveToneCount=31, 32. + /// nyquist=11025, linearBound=125, octaveToneCount=31. + /// nyquist=32000, linearBound=125, octaveToneCount=28. + /// + /// Scale type must be an OCTAVE type. + /// sr / 2. + /// Assumes that frame size is power of 2. + /// The bound (Hz value) that divides lower linear and upper octave parts of the frequency scale. + /// Number of fractional Hz steps within one octave. This is ignored in case of custom scale. + /// Only used where appropriate to draw frequency gridlines. + public FrequencyScale(FreqScaleType type, int nyquist, int frameSize, int linearBound, int octaveToneCount, int hertzGridInterval) + { + this.ScaleType = type; + this.Nyquist = nyquist; + this.WindowSize = frameSize; + this.LinearBound = linearBound; + + if (type == FreqScaleType.OctaveStandard) + { + // this scale automatically sets the octave tone count. + OctaveFreqScale.GetStandardOctaveScale(this); + } + else if (type == FreqScaleType.OctaveCustom) + { + this.ToneCount = octaveToneCount; + this.BinBounds = OctaveFreqScale.LinearToSplitLinearOctaveScale(nyquist, frameSize, linearBound, nyquist, octaveToneCount); + this.FinalBinCount = this.BinBounds.GetLength(0); + this.GridLineLocations = OctaveFreqScale.GetGridLineLocations(nyquist, linearBound, this.BinBounds); + } + else + { + throw new Exception("Unknown Octave scale."); + } + } + /// /// Initializes a new instance of the class. /// CONSTRUCTOR. @@ -101,18 +158,26 @@ public FrequencyScale(FreqScaleType fst) } else if (fst == FreqScaleType.Mel) { - LoggedConsole.WriteErrorLine("WARNING: Assigning DEFAULT parameters for MEL FREQUENCY SCALE."); - this.Nyquist = 11025; - this.WindowSize = 512; - this.FinalBinCount = 128; - this.HertzGridInterval = 1000; - this.LinearBound = this.Nyquist; - this.GridLineLocations = GetMelGridLineLocations(this.HertzGridInterval, this.Nyquist, this.FinalBinCount); + // WARNING: Making this call will return a standard Mel scale where sr = 22050 and frameSize = 512 + // MEL SCALE spectrograms are available by direct call to SpectrogramGenerator.Core.GetMelScaleSpectrogram() + SpectrogramMelScale.GetStandardMelScale(this); + } + else if (fst == FreqScaleType.OctaveDataReduction) + { + // This spectral conversion is for data reduction purposes. + // It is a split linear-octave frequency scale. + OctaveFreqScale.GetDataReductionScale(this); + + //The data reduction Octave Scale does not require grid lines. + this.GridLineLocations = new int[6, 2]; + } + else if (fst == FreqScaleType.OctaveStandard || fst == FreqScaleType.OctaveCustom) + { + throw new Exception("Not enough info. Use an Octave scale constructor."); } else { - // assume octave scale is only other option - OctaveFreqScale.GetOctaveScale(this); + throw new Exception("Unknown frequency scale type."); } } @@ -129,7 +194,7 @@ public FrequencyScale(FreqScaleType fst) /// /// Gets or sets step size for the FFT window. /// - public int FrameStep { get; set; } + public int WindowStep { get; set; } /// /// Gets or sets number of frequency bins in the final spectrogram. @@ -151,11 +216,6 @@ public FrequencyScale(FreqScaleType fst) /// public int LinearBound { get; set; } - /// - /// Gets or sets number of octave to appear above the linear part of scale. - /// - public int OctaveCount { get; set; } - /// /// Gets or sets number of bands or tones per octave. /// @@ -173,24 +233,17 @@ public FrequencyScale(FreqScaleType fst) public int[,] GridLineLocations { get; set; } /// - /// returns the binId for the grid line closest to the passed frequency. + /// returns the binId for freq bin closest to the passed Hertz value. /// - public int GetBinIdForHerzValue(int herzValue) + public int GetBinIdForHerzValue(int hertzValue) { int binId = 0; - int binCount = this.BinBounds.GetLength(0); - - for (int i = 1; i < binCount; i++) + while (this.BinBounds[binId, 1] < hertzValue) { - if (this.BinBounds[i, 1] > herzValue) - { - binId = this.BinBounds[i, 0]; - break; - } + binId++; } - // subtract 1 because have actually extracted the upper bin bound - return binId - 1; + return binId; } /// @@ -220,11 +273,13 @@ public int GetBinIdInReducedSpectrogramForHerzValue(int herzValue) public int[,] GetLinearBinBounds() { double herzInterval = this.Nyquist / (double)this.FinalBinCount; + double scaleFactor = this.WindowSize / 2 / (double)this.FinalBinCount; + var binBounds = new int[this.FinalBinCount, 2]; for (int i = 0; i < this.FinalBinCount; i++) { - binBounds[i, 0] = i; + binBounds[i, 0] = (int)Math.Round(i * scaleFactor); binBounds[i, 1] = (int)Math.Round(i * herzInterval); } @@ -232,24 +287,28 @@ public int GetBinIdInReducedSpectrogramForHerzValue(int herzValue) } /// - /// Returns an [N, 2] matrix with bin ID in column 1 and lower Herz bound in column 2 but on Mel scale. + /// THis method assumes that the frameSize will be power of 2 + /// FOR DEBUG PURPOSES, when sr = 22050 and frame size = 8192, the following Hz are located at index: + /// Hz Index + /// 15 6 + /// 31 12 + /// 62 23 + /// 125 46 + /// 250 93 + /// 500 186 + /// 1000 372. /// - public int[,] GetMelBinBounds() + public static double[] GetLinearFreqScale(int nyquist, int binCount) { - double maxMel = (int)MFCCStuff.Mel(this.Nyquist); - int melBinCount = this.FinalBinCount; - double melPerBin = maxMel / melBinCount; - - var binBounds = new int[this.FinalBinCount, 2]; + double freqStep = nyquist / (double)binCount; + double[] linearFreqScale = new double[binCount]; - for (int i = 0; i < melBinCount; i++) + for (int i = 0; i < binCount; i++) { - binBounds[i, 0] = i; - double mel = i * melPerBin; - binBounds[i, 1] = (int)MFCCStuff.InverseMel(mel); + linearFreqScale[i] = freqStep * i; } - return binBounds; + return linearFreqScale; } /// @@ -273,65 +332,9 @@ public int GetBinIdInReducedSpectrogramForHerzValue(int herzValue) return gridLineLocations; } - /// - /// This method is only called from Basesonogram.GetImage_ReducedSonogram(int factor, bool drawGridLines) - /// when drawing a reduced sonogram. - /// - public static int[] CreateLinearYaxis(int herzInterval, int nyquistFreq, int imageHt) - { - int minFreq = 0; - int maxFreq = nyquistFreq; - int freqRange = maxFreq - minFreq + 1; - double pixelPerHz = imageHt / (double)freqRange; - int[] vScale = new int[imageHt]; - - for (int f = minFreq + 1; f < maxFreq; f++) - { - // convert freq value to pixel id - if (f % 1000 == 0) - { - int hzOffset = f - minFreq; - int pixelId = (int)(hzOffset * pixelPerHz) + 1; - if (pixelId >= imageHt) - { - pixelId = imageHt - 1; - } - - // LoggedConsole.WriteLine("f=" + f + " hzOffset=" + hzOffset + " pixelID=" + pixelID); - vScale[pixelId] = 1; - } - } - - return vScale; - } - - /// - /// THIS METHOD NEEDS TO BE DEBUGGED. HAS NOT BEEN USED IN YEARS! - /// Use this method to generate grid lines for mel scale image - /// Currently this method is only called from BaseSonogram.GetImage() when bool doMelScale = true; - /// Frequencyscale.Draw1kHzLines(Image{Rgb24} bmp, bool doMelScale, int nyquist, double freqBinWidth). - /// - public static int[,] GetMelGridLineLocations(int gridIntervalInHertz, int nyquistFreq, int melBinCount) + public static void DrawFrequencyLinesOnImage(Image bmp, FrequencyScale freqScale, bool includeLabels) { - double maxMel = (int)MFCCStuff.Mel(nyquistFreq); - double melPerBin = maxMel / melBinCount; - int gridCount = nyquistFreq / gridIntervalInHertz; - - var gridLines = new int[gridCount, 2]; - - for (int f = 1; f <= gridCount; f++) - { - int herz = f * 1000; - int melValue = (int)MFCCStuff.Mel(herz); - int melBinId = (int)(melValue / melPerBin); - if (melBinId < melBinCount) - { - gridLines[f - 1, 0] = melBinId; - gridLines[f - 1, 1] = herz; - } - } - - return gridLines; + DrawFrequencyLinesOnImage(bmp, freqScale.GridLineLocations, includeLabels); } public static void DrawFrequencyLinesOnImage(Image bmp, int[,] gridLineLocations, bool includeLabels) @@ -369,6 +372,12 @@ public static void DrawFrequencyLinesOnImage(Image bmp, int[,] gridLineLo int height = bmp.Height; int bandCount = gridLineLocations.GetLength(0); + if (gridLineLocations == null || bmp.Height < 50) + { + // there is no point placing gridlines on a narrow image. It obscures too much spectrogram. + return; + } + // draw the grid line for each frequency band for (int b = 0; b < bandCount; b++) { @@ -408,290 +417,6 @@ public static void DrawFrequencyLinesOnImage(Image bmp, int[,] gridLineLo } } }); - } //end AddHzGridLines() - - public static void DrawFrequencyLinesOnImage(Image bmp, FrequencyScale freqScale, bool includeLabels) - { - DrawFrequencyLinesOnImage(bmp, freqScale.GridLineLocations, includeLabels); - } - - // **************************************************************************************************************************** - // ******** BELOW ARE SET OF TEST METHODS FOR THE VARIOUS FREQUENCY SCALES - // ******** They should probably be deleted as they have been replaced by proper VS Unit Testing methods in DSP.FrequencyScaletests.cs. - - /// - /// METHOD TO CHECK IF Default linear FREQ SCALE IS WORKING - /// Check it on standard one minute recording. - /// - public static void TESTMETHOD_LinearFrequencyScaleDefault() - { - var recordingPath = @"C:\SensorNetworks\SoftwareTests\TestRecordings\BAC2_20071008-085040.wav"; - var outputDir = @"C:\SensorNetworks\SoftwareTests\TestFrequencyScale".ToDirectoryInfo(); - var expectedResultsDir = Path.Combine(outputDir.FullName, TestTools.ExpectedResultsDir).ToDirectoryInfo(); - var outputImagePath = Path.Combine(outputDir.FullName, "linearScaleSonogram_default.png"); - var opFileStem = "BAC2_20071008"; - - var recording = new AudioRecording(recordingPath); - - // default linear scale - var fst = FreqScaleType.Linear; - var freqScale = new FrequencyScale(fst); - - var sonoConfig = new SonogramConfig - { - WindowSize = freqScale.FinalBinCount * 2, - WindowOverlap = 0.2, - SourceFName = recording.BaseName, - - //NoiseReductionType = NoiseReductionType.Standard, - NoiseReductionType = NoiseReductionType.None, - NoiseReductionParameter = 0.0, - }; - - var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); - sonogram.Configuration.WindowSize = freqScale.WindowSize; - - // DO NOISE REDUCTION - var dataMatrix = SNR.NoiseReduce_Standard(sonogram.Data); - sonogram.Data = dataMatrix; - - var image = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "SPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); - image.Save(outputImagePath); - - // DO FILE EQUALITY TEST - string testName = "testName"; - var expectedTestFile = new FileInfo(Path.Combine(expectedResultsDir.FullName, "FrequencyDefaultScaleTest.EXPECTED.json")); - var resultFile = new FileInfo(Path.Combine(outputDir.FullName, opFileStem + "FrequencyDefaultScaleTestResults.json")); - Acoustics.Shared.Csv.Csv.WriteMatrixToCsv(resultFile, freqScale.GridLineLocations); - TestTools.FileEqualityTest(testName, resultFile, expectedTestFile); - - LoggedConsole.WriteLine("Completed Default Linear Frequency Scale test"); - Console.WriteLine("\n\n"); - } - - /// - /// METHOD TO CHECK IF SPECIFIED linear FREQ SCALE IS WORKING - /// Check it on standard one minute recording. - /// - public static void TESTMETHOD_LinearFrequencyScale() - { - var recordingPath = @"C:\SensorNetworks\SoftwareTests\TestRecordings\BAC2_20071008-085040.wav"; - var outputDir = @"C:\SensorNetworks\SoftwareTests\TestFrequencyScale".ToDirectoryInfo(); - var expectedResultsDir = Path.Combine(outputDir.FullName, TestTools.ExpectedResultsDir).ToDirectoryInfo(); - var outputImagePath = Path.Combine(outputDir.FullName, "linearScaleSonogram.png"); - var opFileStem = "BAC2_20071008"; - - var recording = new AudioRecording(recordingPath); - - // specfied linear scale - int nyquist = 11025; - int frameSize = 1024; - int hertzInterval = 1000; - var freqScale = new FrequencyScale(nyquist, frameSize, hertzInterval); - var fst = freqScale.ScaleType; - - var sonoConfig = new SonogramConfig - { - WindowSize = freqScale.FinalBinCount * 2, - WindowOverlap = 0.2, - SourceFName = recording.BaseName, - - //NoiseReductionType = NoiseReductionType.Standard, - NoiseReductionType = NoiseReductionType.None, - NoiseReductionParameter = 0.0, - }; - - var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); - - // DO NOISE REDUCTION - var dataMatrix = SNR.NoiseReduce_Standard(sonogram.Data); - sonogram.Data = dataMatrix; - sonogram.Configuration.WindowSize = freqScale.WindowSize; - - var image = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "SPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); - image.Save(outputImagePath); - - // DO FILE EQUALITY TEST - string testName = "testName"; - var expectedTestFile = new FileInfo(Path.Combine(expectedResultsDir.FullName, "FrequencyLinearScaleTest.EXPECTED.json")); - var resultFile = new FileInfo(Path.Combine(outputDir.FullName, opFileStem + "FrequencyLinearScaleTestResults.json")); - Acoustics.Shared.Csv.Csv.WriteMatrixToCsv(resultFile, freqScale.GridLineLocations); - TestTools.FileEqualityTest(testName, resultFile, expectedTestFile); - - LoggedConsole.WriteLine("Completed Linear Frequency Scale test"); - Console.WriteLine("\n\n"); - } - - /// - /// METHOD TO CHECK IF SPECIFIED MEL FREQ SCALE IS WORKING - /// Check it on standard one minute recording. - /// - public static void TESTMETHOD_MelFrequencyScale() - { - var recordingPath = @"C:\SensorNetworks\SoftwareTests\TestRecordings\BAC2_20071008-085040.wav"; - var outputDir = @"C:\SensorNetworks\SoftwareTests\TestFrequencyScale".ToDirectoryInfo(); - var expectedResultsDir = Path.Combine(outputDir.FullName, TestTools.ExpectedResultsDir).ToDirectoryInfo(); - var outputImagePath = Path.Combine(outputDir.FullName, "melScaleSonogram.png"); - var opFileStem = "BAC2_20071008"; - - var recording = new AudioRecording(recordingPath); - - int nyquist = recording.Nyquist; - int frameSize = 1024; - int finalBinCount = 256; - int hertzInterval = 1000; - FreqScaleType scaleType = FreqScaleType.Mel; - var freqScale = new FrequencyScale(scaleType, nyquist, frameSize, finalBinCount, hertzInterval); - var fst = freqScale.ScaleType; - - var sonoConfig = new SonogramConfig - { - WindowSize = frameSize, - WindowOverlap = 0.2, - SourceFName = recording.BaseName, - DoMelScale = (scaleType == FreqScaleType.Mel) ? true : false, - MelBinCount = (scaleType == FreqScaleType.Mel) ? finalBinCount : frameSize / 2, - - //NoiseReductionType = NoiseReductionType.Standard, - NoiseReductionType = NoiseReductionType.None, - NoiseReductionParameter = 0.0, - }; - - var sonogram = new SpectrogramStandard(sonoConfig, recording.WavReader); - - // DRAW SPECTROGRAM - var image = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "SPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); - image.Save(outputImagePath); - - // DO FILE EQUALITY TEST - string testName = "MelTest"; - var expectedTestFile = new FileInfo(Path.Combine(expectedResultsDir.FullName, "MelFrequencyScaleTest.EXPECTED.json")); - var resultFile = new FileInfo(Path.Combine(outputDir.FullName, opFileStem + "MelFrequencyLinearScaleTestResults.json")); - Acoustics.Shared.Csv.Csv.WriteMatrixToCsv(resultFile, freqScale.GridLineLocations); - TestTools.FileEqualityTest(testName, resultFile, expectedTestFile); - - LoggedConsole.WriteLine("Completed Mel Frequency Scale test"); - Console.WriteLine("\n\n"); - } - - /// - /// METHOD TO CHECK IF Octave FREQ SCALE IS WORKING - /// Check it on standard one minute recording, SR=22050. - /// - public static void TESTMETHOD_OctaveFrequencyScale1() - { - var recordingPath = @"G:\SensorNetworks\WavFiles\LewinsRail\FromLizZnidersic\Lewinsrail_TasmanIs_Tractor_SM304253_0151119_0640_1minMono.wav"; - var outputDir = @"C:\SensorNetworks\Output\LewinsRail\LewinsRail_ThreeCallTypes".ToDirectoryInfo(); - - //var recordingPath = @"C:\SensorNetworks\SoftwareTests\TestRecordings\BAC\BAC2_20071008-085040.wav"; - //var outputDir = @"C:\SensorNetworks\SoftwareTests\TestFrequencyScale".ToDirectoryInfo(); - //var expectedResultsDir = Path.Combine(outputDir.FullName, TestTools.ExpectedResultsDir).ToDirectoryInfo(); - var outputImagePath = Path.Combine(outputDir.FullName, "octaveFrequencyScale1NoNoiseReduciton.png"); - - //var opFileStem = "Lewinsrail_TasmanIs_Tractor"; - - var recording = new AudioRecording(recordingPath); - - // default octave scale - var fst = FreqScaleType.Linear125Octaves6Tones30Nyquist11025; - var freqScale = new FrequencyScale(fst); - - var sonoConfig = new SonogramConfig - { - WindowSize = freqScale.WindowSize, - WindowOverlap = 0.75, - SourceFName = recording.BaseName, - NoiseReductionType = NoiseReductionType.None, - NoiseReductionParameter = 0.0, - }; - - // Generate amplitude sonogram and then conver to octave scale - var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); - sonogram.Data = OctaveFreqScale.ConvertAmplitudeSpectrogramToDecibelOctaveScale(sonogram.Data, freqScale); - - // DO NOISE REDUCTION - //var dataMatrix = SNR.NoiseReduce_Standard(sonogram.Data); - //sonogram.Data = dataMatrix; - sonogram.Configuration.WindowSize = freqScale.WindowSize; - - var image = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "SPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); - image.Save(outputImagePath); - - // DO FILE EQUALITY TEST - //string testName = "test1"; - //var expectedTestFile = new FileInfo(Path.Combine(expectedResultsDir.FullName, "FrequencyOctaveScaleTest1.EXPECTED.json")); - //var resultFile = new FileInfo(Path.Combine(outputDir.FullName, opFileStem + "FrequencyOctaveScaleTest1Results.json")); - //Acoustics.Shared.Csv.Csv.WriteMatrixToCsv(resultFile, freqScale.GridLineLocations); - //TestTools.FileEqualityTest(testName, resultFile, expectedTestFile); - - LoggedConsole.WriteLine("Completed Octave Frequency Scale test 1"); - Console.WriteLine("\n\n"); - } - - /// - /// METHOD TO CHECK IF Octave FREQ SCALE IS WORKING - /// Check it on MARINE RECORDING from JASCO, SR=64000. - /// 24 BIT JASCO RECORDINGS from GBR must be converted to 16 bit. - /// ffmpeg -i source_file.wav -sample_fmt s16 out_file.wav - /// e.g. ". C:\Work\Github\audio-analysis\Extra Assemblies\ffmpeg\ffmpeg.exe" -i "C:\SensorNetworks\WavFiles\MarineRecordings\JascoGBR\AMAR119-00000139.00000139.Chan_1-24bps.1375012796.2013-07-28-11-59-56.wav" -sample_fmt s16 "C:\SensorNetworks\Output\OctaveFreqScale\JascoeMarineGBR116bit.wav" - /// ffmpeg binaries are in C:\Work\Github\audio-analysis\Extra Assemblies\ffmpeg. - /// - public static void TESTMETHOD_OctaveFrequencyScale2() - { - var recordingPath = @"C:\SensorNetworks\SoftwareTests\TestRecordings\MarineJasco_AMAR119-00000139.00000139.Chan_1-24bps.1375012796.2013-07-28-11-59-56-16bit.wav"; - var outputDir = @"C:\SensorNetworks\SoftwareTests\TestFrequencyScale".ToDirectoryInfo(); - var expectedResultsDir = Path.Combine(outputDir.FullName, TestTools.ExpectedResultsDir).ToDirectoryInfo(); - var outputImagePath = Path.Combine(outputDir.FullName, "JascoMarineGBR1.png"); - var opFileStem = "JascoMarineGBR1"; - - var recording = new AudioRecording(recordingPath); - var fst = FreqScaleType.Linear125Octaves7Tones28Nyquist32000; - var freqScale = new FrequencyScale(fst); - - var sonoConfig = new SonogramConfig - { - WindowSize = freqScale.WindowSize, - WindowOverlap = 0.2, - SourceFName = recording.BaseName, - NoiseReductionType = NoiseReductionType.None, - NoiseReductionParameter = 0.0, - }; - - var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); - sonogram.Data = OctaveFreqScale.ConvertAmplitudeSpectrogramToDecibelOctaveScale(sonogram.Data, freqScale); - - // DO NOISE REDUCTION - var dataMatrix = SNR.NoiseReduce_Standard(sonogram.Data); - sonogram.Data = dataMatrix; - sonogram.Configuration.WindowSize = freqScale.WindowSize; - - var image = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "SPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); - image.Save(outputImagePath); - - // DO FILE EQUALITY TEST - string testName = "test2"; - var expectedTestFile = new FileInfo(Path.Combine(expectedResultsDir.FullName, "FrequencyOctaveScaleTest2.EXPECTED.json")); - var resultFile = new FileInfo(Path.Combine(outputDir.FullName, opFileStem + "FrequencyOctaveScaleTest2Results.json")); - Acoustics.Shared.Csv.Csv.WriteMatrixToCsv(resultFile, freqScale.GridLineLocations); - TestTools.FileEqualityTest(testName, resultFile, expectedTestFile); - - LoggedConsole.WriteLine("Completed Octave Frequency Scale " + testName); - Console.WriteLine("\n\n"); - } - - public static void TESTMETHOD_DrawFrequencyLinesOnImage() - { - string filename = @"C:\SensorNetworks\SoftwareTests\TestFrequencyScale\Clusters50.bmp"; - string outputFile = @"C:\SensorNetworks\SoftwareTests\TestFrequencyScale\Clusters50WithGrid.bmp"; - var bmp = Image.Load(filename); - - int nyquist = 11025; - int frameSize = 1024; - int finalBinCount = 128; - int gridInterval = 1000; - var freqScale = new FrequencyScale(FreqScaleType.Mel, nyquist, frameSize, finalBinCount, gridInterval); - DrawFrequencyLinesOnImage((Image)bmp, freqScale, includeLabels: false); - bmp.Save(outputFile); } } } \ No newline at end of file diff --git a/src/AudioAnalysisTools/DSP/MFCCStuff.cs b/src/AudioAnalysisTools/DSP/MFCCStuff.cs index 4aab320c7..85aef0fd8 100644 --- a/src/AudioAnalysisTools/DSP/MFCCStuff.cs +++ b/src/AudioAnalysisTools/DSP/MFCCStuff.cs @@ -212,6 +212,7 @@ public static double Mel(double f) /// /// Converts a Mel value to Herz. + /// NOTE: By default this Mel scale is linear to 1000 Hz. /// /// the Herz value. public static double InverseMel(double mel) @@ -322,6 +323,26 @@ public static double InverseHerzTranform(double m, double c, double div) return outData; } + /// + /// Returns an [N, 2] matrix with bin ID in column 1 and lower Herz bound in column 2 but on Mel scale. + /// + public static int[,] GetMelBinBounds(int nyquist, int melBinCount) + { + double maxMel = (int)MFCCStuff.Mel(nyquist); + double melPerBin = maxMel / melBinCount; + + var binBounds = new int[melBinCount, 2]; + + for (int i = 0; i < melBinCount; i++) + { + binBounds[i, 0] = i; + double mel = i * melPerBin; + binBounds[i, 1] = (int)MFCCStuff.InverseMel(mel); + } + + return binBounds; + } + /// /// Does MelFilterBank for passed sonogram matrix. /// IMPORTANT !!!!! Assumes that min freq of passed sonogram matrix = 0 Hz and maxFreq = Nyquist. diff --git a/src/AudioAnalysisTools/DSP/NoiseProfile.cs b/src/AudioAnalysisTools/DSP/NoiseProfile.cs index cfbb74809..9f21e2bac 100644 --- a/src/AudioAnalysisTools/DSP/NoiseProfile.cs +++ b/src/AudioAnalysisTools/DSP/NoiseProfile.cs @@ -176,7 +176,7 @@ public static NoiseProfile CalculatePercentileNoiseProfile(double[,] matrix, int /// (1) MODAL METHOD /// Calculates the modal background noise for each freqeuncy bin. /// Return the smoothed modal profile. - /// By default set the number of SDs = 0. + /// Set the default zero value for number of SDs. /// /// Assumes the passed spectrogram is oriented as: rows=frames, cols=freq bins. public static double[] CalculateBackgroundNoise(double[,] spectrogram) diff --git a/src/AudioAnalysisTools/DSP/OctaveFreqScale.cs b/src/AudioAnalysisTools/DSP/OctaveFreqScale.cs index 84fb92bd5..3897a6c6d 100644 --- a/src/AudioAnalysisTools/DSP/OctaveFreqScale.cs +++ b/src/AudioAnalysisTools/DSP/OctaveFreqScale.cs @@ -7,131 +7,97 @@ namespace AudioAnalysisTools.DSP using System; using System.Collections.Generic; using AudioAnalysisTools.StandardSpectrograms; - using AudioAnalysisTools.WavTools; using MathNet.Numerics; using TowseyLibrary; public static class OctaveFreqScale { - /// IMPORTANT NOTE: If you are converting Herz scale from LINEAR to OCTAVE, this conversion MUST be done BEFORE noise reduction /// - /// CONSTRUCTION OF Frequency Scales - /// WARNING!: Changing the constants for the octave scales will have undefined effects. - /// The options below have been debugged to give what is required. - /// However other values have not been debugged - so user should check the output to ensure it is what is required. + /// Calculates the parameters for a standard mixed linear-octave frequency scale. + /// IMPORTANT: It assumes that the nyquist, frame size and linear bound have already been set to valid values. + /// What makes this scale "standard" is that the number of octaveDivsions/tones (T) is set equal to number of linear bins. + /// The remainder of the spectrum will be reduced over T-tone octaves. + /// Sensible values for linearUpperBound are 125, 250, 500, 1000. + /// Note that when linearUpperBound = 500, the resulting spectrogram is very similar to the default MelScale. + /// When nyquist=11025 and frameSize = 512, the default MelScale has 64 frequency bins and Linear500-octave has 66 frequency bands. /// - public static void GetOctaveScale(FrequencyScale scale) + public static FrequencyScale GetStandardOctaveScale(FrequencyScale scale) { - int finalBinCount = 256; - int sr, frameSize, octaveDivisions; + int sr = scale.Nyquist * 2; + var binWidth = sr / (double)scale.WindowSize; + int linearUpperBound = scale.LinearBound; - // NOTE: octaveDivisions = the number of fractional Hz steps within one octave. Piano octave contains 12 steps per octave. - - FreqScaleType fst = scale.ScaleType; - - switch (fst) + if (linearUpperBound < 32 || linearUpperBound > scale.Nyquist - 64) { - case FreqScaleType.Linear62Octaves7Tones31Nyquist11025: - // constants required for split linear-octave scale when sr = 22050 - sr = 22050; - frameSize = 8192; - scale.OctaveCount = 7; - octaveDivisions = 31; // tone steps within one octave. Note: piano = 12 steps per octave. - scale.LinearBound = 62; - scale.Nyquist = 11025; - break; + throw new ArgumentException("WARNING: Invalid LinearBound value for method GetStandardOctaveScale()."); + } - case FreqScaleType.Linear125Octaves6Tones30Nyquist11025: - // constants required for split linear-octave scale when sr = 22050 - sr = 22050; - frameSize = 8192; - scale.OctaveCount = 6; - octaveDivisions = 32; // tone steps within one octave. Note: piano = 12 steps per octave. - scale.LinearBound = 125; - scale.Nyquist = 11025; - break; + // init tone steps within one octave. Note: piano octave = 12 steps per octave. + scale.ToneCount = (int)Math.Round(scale.LinearBound / binWidth); + scale.BinBounds = LinearToSplitLinearOctaveScale(scale.Nyquist, scale.WindowSize, scale.LinearBound, scale.Nyquist, scale.ToneCount); + scale.FinalBinCount = scale.BinBounds.GetLength(0); - case FreqScaleType.Octaves24Nyquist32000: - //// constants required for full octave scale when sr = 64000 - sr = 64000; - frameSize = 16384; - scale.OctaveCount = 8; - octaveDivisions = 24; // tone steps within one octave. Note: piano = 12 steps per octave. - scale.LinearBound = 15; - scale.Nyquist = 32000; - break; + // These only work for case where linearUpperScale = 1000 Hz + int topLinearIndex = (int)Math.Round(linearUpperBound / binWidth); - case FreqScaleType.Linear125Octaves7Tones28Nyquist32000: - // constants required for split linear-octave scale when sr = 64000 - sr = 64000; - frameSize = 16384; // = 2*8192 or 4*4096; - scale.OctaveCount = 7; - octaveDivisions = 28; // tone steps within one octave. Note: piano = 12 steps per octave. - scale.LinearBound = 125; - scale.Nyquist = 32000; - break; + var gridLineLocations = new int[4, 2]; + gridLineLocations[0, 0] = topLinearIndex; + gridLineLocations[0, 1] = 1000; + gridLineLocations[1, 0] = topLinearIndex * 2; + gridLineLocations[1, 1] = 2000; - default: - LoggedConsole.WriteErrorLine("WARNING: UNKNOWN OCTAVE SCALE."); - return; + if (linearUpperBound > 256) + { + gridLineLocations[2, 0] = topLinearIndex * 3; + gridLineLocations[2, 1] = 4000; + gridLineLocations[3, 0] = topLinearIndex * 4; + gridLineLocations[3, 1] = 8000; } - scale.WindowSize = frameSize; // = 2*8192 or 4*4096 - scale.FinalBinCount = finalBinCount; - scale.ToneCount = octaveDivisions; - scale.BinBounds = LinearToSplitLinearOctaveScale(sr, frameSize, finalBinCount, scale.LinearBound, scale.Nyquist, scale.ToneCount); - scale.GridLineLocations = GetGridLineLocations(fst, scale.BinBounds); + scale.GridLineLocations = gridLineLocations; + return scale; } /// - /// This method takes an audio recording and returns an octave scale spectrogram. - /// At the present time it only works for recordings with 64000 sample rate and returns a 256 bin sonogram. - /// TODO: generalise this method for other recordings and octave scales. + /// This method is octave frequency scale equivalent of MFCCStuff.DecibelSpectra(dspOutput.AmplitudeSpectrogram, dspOutput.WindowPower, sampleRate, epsilon) + /// The MFCCStuff method is the proper way to convert amplitude spectrogram to decibels. + /// It converts an amplitude spectrogram to a power spectrogram having specified frequency scale. + /// It transforms the amplitude spectrogram in the following steps: + /// (1) It removes the DC row or bin 0 iff there is odd number of spectrogram bins. ASSUMPTION: Bin count should be power of 2 from FFT. + /// (2) Then reduce the linear scale to an octave scale depending on the sr and required number of bins or filters. + /// (3) It converts spectral amplitudes to power, normalising for window power and sample rate. + /// The window contributes power to the signal which must subsequently be removed from the spectral power. Calculate power per sample. + /// See notes in the MFCCStuff.DecibelSpectra for further exp[lanaitons. These normalisations were adapted from MatLab MFCC code. + /// (4) It converts the power spectrogram to decibels. /// - public static BaseSonogram ConvertRecordingToOctaveScaleSonogram(AudioRecording recording, FreqScaleType fst) + /// The amplitude spectrogram. + /// FFT window power comes from DSP_Frames.WindowPower. + /// of the original signal. + /// dependson bit-rate of the original signal. + /// In this case an octave frquency scale. + /// The decibel spectrogram. + public static double[,] ConvertAmplitudeSpectrogramToFreqScaledDecibels(double[,] amplitudeM, double windowPower, int sampleRate, double epsilon, FrequencyScale freqScale) { - var freqScale = new FrequencyScale(fst); - double windowOverlap = 0.75; - var sonoConfig = new SonogramConfig + int frameCount = amplitudeM.GetLength(0); + int binCount = amplitudeM.GetLength(1); + + if (binCount.IsOdd()) { - WindowSize = freqScale.WindowSize, - WindowOverlap = windowOverlap, - SourceFName = recording.BaseName, - NoiseReductionType = NoiseReductionType.None, - NoiseReductionParameter = 0.0, - }; - - // Generate amplitude sonogram and then conver to octave scale - var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); - - // THIS IS THE CRITICAL LINE. - // TODO: SHOULD DEVELOP A SEPARATE UNIT TEST for this method - sonogram.Data = ConvertAmplitudeSpectrogramToDecibelOctaveScale(sonogram.Data, freqScale); - - // DO NOISE REDUCTION - var dataMatrix = SNR.NoiseReduce_Standard(sonogram.Data); - sonogram.Data = dataMatrix; - int windowSize = freqScale.FinalBinCount * 2; - sonogram.Configuration.WindowSize = windowSize; - sonogram.Configuration.WindowStep = (int)Math.Round(windowSize * (1 - windowOverlap)); - return sonogram; - } + // remove the DC freq bin 0. + amplitudeM = MatrixTools.Submatrix(amplitudeM, 0, 1, frameCount - 1, binCount - 1); + } - public static double[,] ConvertAmplitudeSpectrogramToDecibelOctaveScale(double[,] inputSpgram, FrequencyScale freqScale) - { - //var dataMatrix = MatrixTools.Submatrix(inputSpgram, 0, 1, inputSpgram.GetLength(0) - 1, inputSpgram.GetLength(1) - 1); - //square the values to produce power spectrogram - var dataMatrix = MatrixTools.SquareValues(inputSpgram); - - //convert spectrogram to octave scale - dataMatrix = ConvertLinearSpectrogramToOctaveFreqScale(dataMatrix, freqScale); - dataMatrix = MatrixTools.Power2DeciBels(dataMatrix, out var min, out var max); - return dataMatrix; + var octaveScaleM = ConvertLinearSpectrogramToOctaveFreqScale(amplitudeM, freqScale); + var powerSpectrogram = ConvertAmplitudeToPowerSpectrogram(octaveScaleM, windowPower, sampleRate); + + // Convert the power values to log using: dB = 10*log(power) + var powerEpsilon = epsilon * epsilon / windowPower / sampleRate; + var decibelSpectrogram = MatrixTools.SpectrogramPower2DeciBels(powerSpectrogram, powerEpsilon, out var min, out var max); + return decibelSpectrogram; } /// /// Converts a spectrogram having linear freq scale to one having an Octave freq scale. - /// Note that the sample rate (sr) and the frame size both need to be apporpriate to the choice of FreqScaleType. /// TODO: SHOULD DEVELOP A SEPARATE UNIT TEST for this method. /// public static double[,] ConvertLinearSpectrogramToOctaveFreqScale(double[,] inputSpgram, FrequencyScale freqScale) @@ -149,9 +115,6 @@ public static BaseSonogram ConvertRecordingToOctaveScaleSonogram(AudioRecording // get the octave bin bounds for this octave scale type var octaveBinBounds = freqScale.BinBounds; - - //var octaveBinBounds = GetOctaveScale(freqScale.ScaleType); - int newBinCount = octaveBinBounds.GetLength(0); // set up the new octave spectrogram @@ -166,7 +129,8 @@ public static BaseSonogram ConvertRecordingToOctaveScaleSonogram(AudioRecording var linearSpectrum = MatrixTools.GetRow(inputSpgram, row); // convert the spectrum to its octave form - var octaveSpectrum = OctaveSpectrum(octaveBinBounds, linearSpectrum); + //var octaveSpectrum = ConvertLinearSpectrumToOctaveScale(octaveBinBounds, linearSpectrum); + var octaveSpectrum = SpectrogramTools.RescaleSpectrumUsingFilterbank(octaveBinBounds, linearSpectrum); //return the spectrum to output spectrogram. MatrixTools.SetRow(octaveSpectrogram, row, octaveSpectrum); @@ -175,220 +139,202 @@ public static BaseSonogram ConvertRecordingToOctaveScaleSonogram(AudioRecording return octaveSpectrogram; } - public static double[,] DecibelSpectra(double[,] amplitudeM, double windowPower, int sampleRate, double epsilon, FrequencyScale freqScale) - { - double[,] powerSpectra = PowerSpectra(amplitudeM, windowPower, sampleRate, epsilon, freqScale); - - // Convert the power values to log using: dB = 10*log(power) - var decibelSpectra = MatrixTools.Power2DeciBels(powerSpectra, out var min, out var max); - return decibelSpectra; - } - - public static double[,] AmplitudeSpectra(double[,] amplitudeM, double windowPower, int sampleRate, double epsilon, FrequencyScale freqScale) - { - double[,] powerSpectra = PowerSpectra(amplitudeM, windowPower, sampleRate, epsilon, freqScale); - - // Convert the power values back to amplitude by taking the square root. - var amplitudeSpectra = MatrixTools.SquareRootOfValues(powerSpectra); - return amplitudeSpectra; - } - /// - /// Converts an amplitude spectrogram to a power spectrogram having an octave frequency scale. - /// This method has been copied from a method of same name in the class MFCCStuff.cs and adapted to produce an octave freq scale. - /// It transforms the amplitude spectrogram in the following steps: - /// (1) It removes the DC row or bin 0 iff there is odd number of spectrogram bins. ASSUMPTION: Bin count should be power of 2 from FFT. - /// (1) It converts spectral amplitudes to power, normalising for window power and sample rate. - /// The window contributes power to the signal which must subsequently be removed from the spectral power. Calculate power per sample. - /// See notes in the MFCCStuff.DecibelSpectra for further exp[lanaitons. These normalisations were adapted from MatLab MFCC code. - /// (2) Then reduce the linear scale toan octave scale depending on the sr and required number of bins or filters. + /// Converts Amplitude Spectrogram to Power Spectrogram. + /// Square the amplitude values to get power. + /// Power values must be adjusted for the power in the FFT window and also for the sample rate. + /// Must divide by the window power to remove its contribution to amplitude values. + /// Must divide by sample rate to get average power per signal sample. + /// NOTE: Multiply by 2 to accomodate two spectral components, ie positive and neg freq. + /// BUT the last nyquist bin does not require a factor of two. + /// However this method is called only by octave reduced frequency scales where the nyquist bin is just one of several. /// - /// the amplitude spectra. - /// value for window power normalisation. - /// to NormaliseMatrixValues for the sampling rate. - /// small value to avoid log of zero. - /// the kind of frequency scale. - public static double[,] PowerSpectra(double[,] amplitudeM, double windowPower, int sampleRate, double epsilon, FrequencyScale freqScale) + /// The frequency scaled amplitude spectrogram. + /// Power of the FFT window. + /// The sample rate of the original recording. + /// The Power Spectrogram as matrix. Each spectrum is a matrix row. + public static double[,] ConvertAmplitudeToPowerSpectrogram(double[,] amplitudeM, double windowPower, int sampleRate) { int frameCount = amplitudeM.GetLength(0); int binCount = amplitudeM.GetLength(1); - double minPow = epsilon * epsilon / windowPower / sampleRate; - double minPow2 = epsilon * epsilon * 2 / windowPower / sampleRate; - - if (binCount.IsOdd()) - { - // remove the DC freq bin 0. - amplitudeM = MatrixTools.Submatrix(amplitudeM, 0, 1, frameCount - 1, binCount - 1); - } - - // init the spectrogram as a matrix of spectra - double[,] powerSpectra = new double[frameCount, binCount]; + // init the octave scaled spectrogram as a matrix of spectra + double[,] powerSpectrogram = new double[frameCount, binCount]; - // first square the values to calculate power. + // Square the values to calculate power. // Must multiply by 2 to accomodate two spectral components, ie positive and neg freq. - for (int j = 0; j < binCount - 1; j++) + for (int j = 0; j < binCount; j++) { //foreach time step or frame for (int i = 0; i < frameCount; i++) { - if (amplitudeM[i, j] < epsilon) - { - powerSpectra[i, j] = minPow2; - } - else - { - powerSpectra[i, j] = amplitudeM[i, j] * amplitudeM[i, j] * 2 / windowPower / sampleRate; - } - } //end of all frames - } //end of all freq bins + powerSpectrogram[i, j] = amplitudeM[i, j] * amplitudeM[i, j] * 2 / windowPower / sampleRate; + } + } + /* //calculate power of the Nyquist freq bin - last column of matrix //foreach time step or frame for (int i = 0; i < frameCount; i++) { - //calculate power of the DC value - if (amplitudeM[i, binCount - 1] < epsilon) - { - powerSpectra[i, binCount - 1] = minPow; - } - else - { - powerSpectra[i, binCount - 1] = amplitudeM[i, binCount - 1] * amplitudeM[i, binCount - 1] / windowPower / sampleRate; - } + powerSpectra[i, binCount - 1] = amplitudeM[i, binCount - 1] * amplitudeM[i, binCount - 1] / windowPower / sampleRate; } + */ - powerSpectra = ConvertLinearSpectrogramToOctaveFreqScale(powerSpectra, freqScale); - return powerSpectra; + return powerSpectrogram; } - public static int[,] GetGridLineLocations(FreqScaleType ost, int[,] octaveBinBounds) + public static int[,] GetGridLineLocations(int nyquist, int linearBound, int[,] octaveBinBounds) { - int[,] gridLineLocations = null; + int[] gridLocationsHertz = { 62, 125, 250, 500, 1000, 2000, 4000, 8000, 16000, 32000, 64000, 128000 }; + int binCount = octaveBinBounds.GetLength(0); + var gridLineLocations = new List(); + var upperBound = nyquist * 0.75; + + // get location index of the first required gridline. + int glIndex = 0; + while (gridLocationsHertz[glIndex] < linearBound) + { + glIndex++; + } - switch (ost) + int glHertz = gridLocationsHertz[glIndex]; + + for (int i = 0; i < binCount; i++) { - case FreqScaleType.Linear62Octaves7Tones31Nyquist11025: - gridLineLocations = new int[8, 2]; - LoggedConsole.WriteErrorLine("This Octave Scale does not currently have grid data provided."); - break; + if (octaveBinBounds[i, 1] < linearBound) + { + continue; + } - case FreqScaleType.Linear125Octaves6Tones30Nyquist11025: - gridLineLocations = new int[7, 2]; - gridLineLocations[0, 0] = 46; // 125 Hz - gridLineLocations[1, 0] = 79; // 250 - gridLineLocations[2, 0] = 111; // 500 - gridLineLocations[3, 0] = 143; // 1000 - gridLineLocations[4, 0] = 175; // 2000 - gridLineLocations[5, 0] = 207; // 4000 - gridLineLocations[6, 0] = 239; // 8000 - - // enter the Hz value - gridLineLocations[0, 1] = 125; // 125 Hz - gridLineLocations[1, 1] = 250; // 250 - gridLineLocations[2, 1] = 500; // 500 - gridLineLocations[3, 1] = 1000; // 1000 - gridLineLocations[4, 1] = 2000; // 2000 - gridLineLocations[5, 1] = 4000; // 4000 - gridLineLocations[6, 1] = 8000; // 8000 - break; - case FreqScaleType.Octaves24Nyquist32000: - gridLineLocations = new int[8, 2]; - LoggedConsole.WriteErrorLine("This Octave Scale does not currently have grid data provided."); + if (octaveBinBounds[i, 1] > upperBound) + { break; + } - case FreqScaleType.Linear125Octaves7Tones28Nyquist32000: - gridLineLocations = new int[9, 2]; - gridLineLocations[0, 0] = 34; // 125 Hz - gridLineLocations[1, 0] = 62; // 250 - gridLineLocations[2, 0] = 89; // 500 - gridLineLocations[3, 0] = 117; // 1000 - gridLineLocations[4, 0] = 145; // 2000 - gridLineLocations[5, 0] = 173; // 4000 - gridLineLocations[6, 0] = 201; // 8000 - gridLineLocations[7, 0] = 229; //16000 - gridLineLocations[8, 0] = 256; //32000 - - // enter the Hz values - gridLineLocations[0, 1] = 125; // 125 Hz - gridLineLocations[1, 1] = 250; // 250 - gridLineLocations[2, 1] = 500; // 500 - gridLineLocations[3, 1] = 1000; // 1000 - gridLineLocations[4, 1] = 2000; // 2000 - gridLineLocations[5, 1] = 4000; // 4000 - gridLineLocations[6, 1] = 8000; // 8000 - gridLineLocations[7, 1] = 16000; //16000 - gridLineLocations[8, 1] = 32000; //32000 + // if get to a grid location + if (octaveBinBounds[i, 1] >= glHertz) + { + var intArray = new int[2]; + intArray[0] = i; + intArray[1] = glHertz; // octaveBinBounds[i, 1]; + gridLineLocations.Add(intArray); - break; - default: - LoggedConsole.WriteErrorLine("Not a valid Octave Scale."); - break; + glIndex++; + glHertz = gridLocationsHertz[glIndex]; + } } - return gridLineLocations; + // there is better way to do this. + var returnMatrix = new int[gridLineLocations.Count, 2]; + for (int gl = 0; gl < gridLineLocations.Count; gl++) + { + var location = gridLineLocations[gl]; + returnMatrix[gl, 0] = location[0]; + returnMatrix[gl, 1] = location[1]; + } + + return returnMatrix; } /// - /// Converts a single linear spectrum to octave scale spectrum. + /// Returns a matrix that is used to transform a spectrum having linear Hz scale into a spectrum having an octave freq scale. + /// The returned matrix is size N x 2, where N = the length of the output spectrum. + /// In fact the op spectrum has a split Herz scale - bottom part linear, top part octave scaled. + /// Column 0 of the returned matrix contains the index into linear spectrum. + /// Column 1 of the returned matrix contains the Hertz value of the corresponding index into the linear spectrum. /// - public static double[] OctaveSpectrum(int[,] octaveBinBounds, double[] linearSpectrum) + public static int[,] LinearToSplitLinearOctaveScale(int nyquist, int frameSize, int lowerFreqBound, int upperFreqBound, int octaveDivisions) { - int length = octaveBinBounds.GetLength(0); - var octaveSpectrum = new double[length]; - for (int i = 1; i < length - 1; i++) + // Get the linear freq scale. + int inputSpectrumSize = frameSize / 2; + var linearFreqScale = FrequencyScale.GetLinearFreqScale(nyquist, inputSpectrumSize); + + // Get the octave freq scale. + var octaveBandsLowerBounds = GetFractionalOctaveBands(lowerFreqBound, upperFreqBound, octaveDivisions); + + double freqStep = nyquist / (double)inputSpectrumSize; + int topLinearIndex = (int)Math.Round(lowerFreqBound / freqStep); + int opSpectrumSize = topLinearIndex + octaveBandsLowerBounds.GetLength(0); + + // set up the transform matrix. + var splitLinearOctaveIndexBounds = new int[opSpectrumSize, 2]; + + // fill in the linear part of the freq scale + for (int i = 0; i <= topLinearIndex; i++) + { + splitLinearOctaveIndexBounds[i, 0] = i; + splitLinearOctaveIndexBounds[i, 1] = (int)Math.Round(linearFreqScale[i]); + } + + for (int i = topLinearIndex + 1; i < opSpectrumSize; i++) { - int lowIndex = octaveBinBounds[i - 1, 0]; - int centreIndex = octaveBinBounds[i, 0]; - int highIndex = octaveBinBounds[i + 1, 0]; - if (highIndex >= linearSpectrum.Length) + for (int j = 0; j < linearFreqScale.Length; j++) { - highIndex = linearSpectrum.Length - 1; + if (linearFreqScale[j] > octaveBandsLowerBounds[i - topLinearIndex]) + { + splitLinearOctaveIndexBounds[i, 0] = j; + splitLinearOctaveIndexBounds[i, 1] = (int)Math.Round(linearFreqScale[j]); + break; + } } - - octaveSpectrum[i] = FilterbankIntegral(linearSpectrum, lowIndex, centreIndex, highIndex); } - // now fill in the first value of the octave spectrum - int lowIndex1 = octaveBinBounds[0, 0]; - int centreIndex1 = octaveBinBounds[0, 0]; - int highIndex1 = octaveBinBounds[1, 0]; - octaveSpectrum[0] = FilterbankIntegral(linearSpectrum, lowIndex1, centreIndex1, highIndex1); - - // now fill in the last value of the octave spectrum - int lowIndex2 = octaveBinBounds[length - 2, 0]; - int centreIndex2 = octaveBinBounds[length - 1, 0]; - int highIndex2 = octaveBinBounds[length - 1, 0]; - octaveSpectrum[length - 1] = FilterbankIntegral(linearSpectrum, lowIndex2, centreIndex2, highIndex2); - return octaveSpectrum; + // make sure last index extends to last bin of the linear spectrum. + splitLinearOctaveIndexBounds[opSpectrumSize - 1, 0] = linearFreqScale.Length - 1; + splitLinearOctaveIndexBounds[opSpectrumSize - 1, 1] = (int)Math.Round(linearFreqScale[linearFreqScale.Length - 1]); + + return splitLinearOctaveIndexBounds; } /// - /// Returns the index bounds for a split herz scale - bottom part linear, top part octave scaled. + /// This method assumes that the linear spectrum is derived from a 512 frame with sr = 22050. + /// It is a split linear-octave scale. + /// The linear part is from 0-2 kHz with reduction by factor of 8. + /// The octave part is obtained by setting octave divisions or tone count = 5. /// - public static int[,] LinearToSplitLinearOctaveScale(int sr, int frameSize, int finalBinCount, int lowerFreqBound, int upperFreqBound, int octaveDivisions) + /// a frequency scale for spectral-data reduction purposes. + public static FrequencyScale GetDataReductionScale(FrequencyScale scale) { - var bandBounds = GetFractionalOctaveBands(lowerFreqBound, upperFreqBound, octaveDivisions); - int nyquist = sr / 2; - int binCount = frameSize / 2; - var linearFreqScale = GetLinearFreqScale(nyquist, binCount); + int sr = 22050; + int frameSize = 512; + scale.Nyquist = sr / 2; + scale.WindowSize = frameSize; + + // linear reduction of the lower spectrum from 0 - 2 kHz. + scale.LinearBound = 2000; + int linearReductionFactor = 8; + // Reduction of upper spectrum 2-11 kHz: Octave count and tone steps within one octave. + scale.ToneCount = 5; + + var octaveBandsLowerBounds = GetFractionalOctaveBands(scale.LinearBound, scale.Nyquist, scale.ToneCount); + int spectrumBinCount = frameSize / 2; + var linearFreqScale = FrequencyScale.GetLinearFreqScale(scale.Nyquist, spectrumBinCount); + + double linearBinWidth = scale.Nyquist / (double)spectrumBinCount; + int topLinearIndex = (int)Math.Round(scale.LinearBound / linearBinWidth); + + // calculate number of bins in linear portion. +1 because going to finish up at end of linear portion. + int linearReducedBinCount = (topLinearIndex / linearReductionFactor) + 1; + int finalBinCount = linearReducedBinCount + octaveBandsLowerBounds.Length; var splitLinearOctaveIndexBounds = new int[finalBinCount, 2]; - double freqStep = nyquist / (double)binCount; - int topLinearIndex = (int)Math.Round(lowerFreqBound / freqStep) + 1; // fill in the linear part of the freq scale - for (int i = 0; i < topLinearIndex; i++) + int z = 1; + while (splitLinearOctaveIndexBounds[z - 1, 1] < scale.LinearBound) { - splitLinearOctaveIndexBounds[i, 0] = i; - splitLinearOctaveIndexBounds[i, 1] = (int)Math.Round(linearFreqScale[i]); + splitLinearOctaveIndexBounds[z, 0] = z * linearReductionFactor; + splitLinearOctaveIndexBounds[z, 1] = (int)Math.Round(linearFreqScale[z * linearReductionFactor]); + z++; } - for (int i = topLinearIndex; i < finalBinCount; i++) + // fill in the octave part of the freq scale + for (int i = linearReducedBinCount + 1; i < finalBinCount; i++) { for (int j = 0; j < linearFreqScale.Length; j++) { - if (linearFreqScale[j] > bandBounds[i - topLinearIndex]) + if (linearFreqScale[j] > octaveBandsLowerBounds[i - linearReducedBinCount]) { splitLinearOctaveIndexBounds[i, 0] = j; splitLinearOctaveIndexBounds[i, 1] = (int)Math.Round(linearFreqScale[j]); @@ -397,29 +343,29 @@ public static double[] OctaveSpectrum(int[,] octaveBinBounds, double[] linearSpe } } - // make sure last index has values + // make sure last index extends to last bin of the linear spectrum. splitLinearOctaveIndexBounds[finalBinCount - 1, 0] = linearFreqScale.Length - 1; splitLinearOctaveIndexBounds[finalBinCount - 1, 1] = (int)Math.Round(linearFreqScale[linearFreqScale.Length - 1]); - // A HACK!!! Make sure second last index has values if they are zero - if (splitLinearOctaveIndexBounds[finalBinCount - 2, 0] == 0) - { - splitLinearOctaveIndexBounds[finalBinCount - 2, 0] = linearFreqScale.Length - 1; - splitLinearOctaveIndexBounds[finalBinCount - 2, 1] = (int)Math.Round(linearFreqScale[linearFreqScale.Length - 1]); - } - - return splitLinearOctaveIndexBounds; + scale.BinBounds = splitLinearOctaveIndexBounds; + return scale; } /// /// Returns the index bounds for a full octave scale - from lowest freq set by user to top freq. /// + /// Sample rate of the source recording. + /// Frame size of the source recording. + /// Final Bin Count. + /// Lower bound of the octave part of the final scale. + /// Upper bound of the octave scale, most likely the Nyquist. + /// Number of tones/divisions per octave. public static int[,] LinearToFullOctaveScale(int sr, int frameSize, int finalBinCount, int lowerFreqBound, int upperFreqBound, int octaveDivisions) { var bandBounds = GetFractionalOctaveBands(lowerFreqBound, upperFreqBound, octaveDivisions); int nyquist = sr / 2; int binCount = frameSize / 2; - var linearFreqScale = GetLinearFreqScale(nyquist, binCount); + var linearFreqScale = FrequencyScale.GetLinearFreqScale(nyquist, binCount); var octaveIndexBounds = new int[finalBinCount, 2]; @@ -441,46 +387,63 @@ public static double[] OctaveSpectrum(int[,] octaveBinBounds, double[] linearSpe public static double[] GetFractionalOctaveBands(double minFreq, double maxFreq, int octaveDivisions) { - double[] freqBandCentres = { 15.625, 31.25, 62.5, 125, 250, 500, 1000, 2000, 4000, 8000, 16000, 32000, 64000 }; + double[] octaveLowerBounds = { 15.625, 31.25, 62.5, 125, 250, 500, 1000, 2000, 4000, 8000, 16000, 32000, 64000 }; var list = new List(); - for (int i = 0; i < freqBandCentres.Length; i++) + for (int i = 0; i < octaveLowerBounds.Length; i++) { - if (freqBandCentres[i] < minFreq) + // ignore this octave floor if below that required. + if (octaveLowerBounds[i] < minFreq) { continue; } - if (freqBandCentres[i] > maxFreq) + // stop when octave floor is above that required. + if (octaveLowerBounds[i] > maxFreq) { break; } - double[] fractionalOctaveBands = GetFractionalOctaveBands(freqBandCentres[i], octaveDivisions); + // get the frequency tones in the given octave. + double[] tonesInOctave = GetFractionalOctaveBands(octaveLowerBounds[i], octaveDivisions); for (int j = 0; j < octaveDivisions; j++) { - double floor = fractionalOctaveBands[j]; // sqrt2; - if (floor < minFreq) + double toneFloor = tonesInOctave[j]; + if (toneFloor < minFreq) { continue; } - list.Add(floor); + if (toneFloor > maxFreq) + { + break; + } + + list.Add(toneFloor); } } return list.ToArray(); } + /// + /// Returns an array of tones in one octave. + /// The units are frequency in Hertz. + /// NOTE: The octave is divided geometrically. + /// + /// The lower frquency bound of the octave. + /// The number of tones or frequency bins in the octave. + /// The frequency of each tone in the octave. public static double[] GetFractionalOctaveBands(double lowerBound, int subbandCount) { double[] fractionalOctaveBands = new double[subbandCount]; fractionalOctaveBands[0] = lowerBound; double exponent = 1 / (double)subbandCount; - double factor = Math.Pow(2, exponent); + // calculate the frequency increment factor between each tone and the next. + double factor = Math.Pow(2, exponent); for (int i = 1; i < subbandCount; i++) { fractionalOctaveBands[i] = fractionalOctaveBands[i - 1] * factor; @@ -488,135 +451,5 @@ public static double[] GetFractionalOctaveBands(double lowerBound, int subbandCo return fractionalOctaveBands; } - - /// - /// THis method assumes that the frameSize will be power of 2 - /// FOR DEBUG PURPOSES, when sr = 22050 and frame size = 8192, the following Hz are located at index: - /// Hz Index - /// 15 6 - /// 31 12 - /// 62 23 - /// 125 46 - /// 250 93 - /// 500 186 - /// 1000 372. - /// - public static double[] GetLinearFreqScale(int nyquist, int binCount) - { - double freqStep = nyquist / (double)binCount; - double[] linearFreqScale = new double[binCount]; - - for (int i = 0; i < binCount; i++) - { - linearFreqScale[i] = freqStep * i; - } - - return linearFreqScale; - } - - public static double FilterbankIntegral(double[] spectrum, int lowIndex, int centreIndex, int highIndex) - { - // let k = index into spectral vector. - // for all k < lowIndex, filterBank[k] = 0; - // for all k > highIndex, filterBank[k] = 0; - - // for all k in range (lowIndex <= k < centreIndex), filterBank[k] = (k-lowIndex) /(centreIndex - lowIndex) - // for all k in range (centreIndex <= k <= highIndex), filterBank[k] = (highIndex-k)/(highIndex - centreIndex) - - double area = 0.0; - double integral = 0.0; - int delta = centreIndex - lowIndex; - if (delta > 0) - { - for (int k = lowIndex; k < centreIndex; k++) - { - double weight = (k - lowIndex) / (double)delta; - integral += weight * spectrum[k]; - area += weight; - } - } - - integral += spectrum[centreIndex]; - area += 1.0; - - delta = highIndex - centreIndex; - if (delta > 0) - { - for (int k = centreIndex + 1; k <= highIndex; k++) - { - if (delta == 0) - { - continue; - } - - double weight = (highIndex - k) / (double)delta; - integral += weight * spectrum[k]; - area += weight; - } - } - - // NormaliseMatrixValues to area of the triangular filter - integral /= area; - return integral; - } - - /// - /// Returns a simple spectrogram for test purposes. - /// Write code for simple test. Different spectra tried so far: - /// (1) Uniform spectrum = 1.0 - /// (2) Ramp spectrum - /// (3) SPike spectrum. - /// - public static double[] GetSimpleTestSpectrum(int sr, int frameSize) - { - // int nyquist = sr / 2; - int binCount = frameSize / 2; - double[] spectrum = new double[binCount]; - - // return a linear frequency scale - // double freqStep = nyquist / (double)binCount; - for (int i = 0; i < binCount; i++) - { - // ramp spectrum - //spectrum[i] = freqStep * i; - - //Uniform spectrum - spectrum[i] = 1.0; - } - - // Spike spectrum - //spectrum[500] = 1.0; - - return spectrum; - } - - public static void TestOctaveScale(FreqScaleType fst) - { - var freqScale = new FrequencyScale(fst); - var octaveBinBounds = freqScale.BinBounds; - - // now test the octave scale using a test spectrum - int sr = 22050; - int frameSize = 8192; // default for sr = 22050 - - if (fst == FreqScaleType.Octaves24Nyquist32000 || fst == FreqScaleType.Linear125Octaves7Tones28Nyquist32000) - { - sr = 64000; - frameSize = 16384; // default for sr = 64000 - } - - // Get a simple test spectrum - var linearSpectrum = GetSimpleTestSpectrum(sr, frameSize); - - //do the test - var octaveSpectrum = OctaveSpectrum(octaveBinBounds, linearSpectrum); - - // write output - int rowCount = octaveBinBounds.GetLength(0); - for (int i = 0; i < rowCount; i++) - { - Console.WriteLine(i + " bin-" + octaveBinBounds[i, 0] + " " + octaveBinBounds[i, 1] + "Hz " + octaveSpectrum[i]); - } - } } } \ No newline at end of file diff --git a/src/AudioAnalysisTools/DSP/SNR.cs b/src/AudioAnalysisTools/DSP/SNR.cs index a9286970b..132cb5aac 100644 --- a/src/AudioAnalysisTools/DSP/SNR.cs +++ b/src/AudioAnalysisTools/DSP/SNR.cs @@ -825,7 +825,6 @@ public static BackgroundNoise CalculateModalBackgroundNoiseInSignal(double[] arr } int[] histo = Histogram.Histo(array, binCount, out var binWidth, out var min, out var max); - ////Log.WriteLine("BindWidth = "+ binWidth); int smoothingwindow = 3; if (binCount > 250) @@ -834,8 +833,6 @@ public static BackgroundNoise CalculateModalBackgroundNoiseInSignal(double[] arr } double[] smoothHisto = DataTools.filterMovingAverage(histo, smoothingwindow); - ////DataTools.writeBarGraph(histo); - GetModeAndOneStandardDeviation(smoothHisto, out var indexOfMode, out var indexOfOneStdDev); // modal noise level gets symbol Q in Lamel et al. diff --git a/src/AudioAnalysisTools/EventStatistics/EventStatisticsCalculate.cs b/src/AudioAnalysisTools/EventStatistics/EventStatisticsCalculate.cs index 232cc4c13..d6248faa5 100644 --- a/src/AudioAnalysisTools/EventStatistics/EventStatisticsCalculate.cs +++ b/src/AudioAnalysisTools/EventStatistics/EventStatisticsCalculate.cs @@ -127,7 +127,7 @@ public static EventStatistics AnalyzeAudioEvent( stats.SnrDecibels = max; // Now need to convert event matrix back to energy values before calculating other statistics - eventMatrix = MatrixTools.Decibels2Power(eventMatrix); + eventMatrix = MatrixTools.SpectrogramDecibels2Power(eventMatrix); var columnAverages = MatrixTools.GetColumnAverages(eventMatrix); var rowAverages = MatrixTools.GetRowAverages(eventMatrix); diff --git a/src/AudioAnalysisTools/Indices/IndexCalculate.cs b/src/AudioAnalysisTools/Indices/IndexCalculate.cs index 5e0fcdd88..6b586778b 100644 --- a/src/AudioAnalysisTools/Indices/IndexCalculate.cs +++ b/src/AudioAnalysisTools/Indices/IndexCalculate.cs @@ -156,26 +156,23 @@ public static IndexCalculateResult Analysis( //dspOutput1.NyquistBin = dspOutput1.AmplitudeSpectrogram.GetLength(1) - 1; //dspOutput1.FreqBinWidth = sampleRate / (double)dspOutput1.AmplitudeSpectrogram.GetLength(1) / 2; - // Linear or Octave or Mel frequency scale? Set Linear as default. + // Linear or Octave or Mel frequency scale? + // Set Linear as default. var freqScale = new FrequencyScale(nyquist: nyquist, frameSize: frameSize, hertzGridInterval: 1000); var freqScaleType = config.FrequencyScale; - bool octaveScale = freqScaleType == FreqScaleType.Linear125Octaves7Tones28Nyquist32000; + bool octaveScale = freqScaleType == FreqScaleType.OctaveDataReduction; bool melScale = freqScaleType == FreqScaleType.Mel; if (octaveScale) { - // only allow one octave scale at the moment - for Jasco marine recordings. - // ASSUME fixed Occtave scale - USEFUL ONLY FOR JASCO 64000sr MARINE RECORDINGS - // If you wish to use other octave scale types then need to put in the config file and and set up recovery here. - freqScale = new FrequencyScale(FreqScaleType.Linear125Octaves7Tones28Nyquist32000); - - // Recalculate the spectrogram according to octave scale. This option works only when have high SR recordings. - dspOutput1.AmplitudeSpectrogram = OctaveFreqScale.AmplitudeSpectra( - dspOutput1.AmplitudeSpectrogram, - dspOutput1.WindowPower, - sampleRate, - epsilon, - freqScale); - dspOutput1.NyquistBin = dspOutput1.AmplitudeSpectrogram.GetLength(1) - 1; // ASSUMPTION!!! Nyquist is in top Octave bin - not necessarily true!! + // Only allow one octave scale at the moment, OctaveDataReduction. + // This is used to produced a reduced set of indices for content recognition. + // TODO: Much work required to generalise this for other Octave scales. + // Need to put scale type in the config file and and set up recovery here. + freqScale = new FrequencyScale(FreqScaleType.OctaveDataReduction); + + // Calculate the octave scaled spectrogram. + dspOutput1.AmplitudeSpectrogram = OctaveFreqScale.ConvertLinearSpectrogramToOctaveFreqScale(dspOutput1.AmplitudeSpectrogram, freqScale); + dspOutput1.NyquistBin = dspOutput1.AmplitudeSpectrogram.GetLength(1) - 1; // ASSUME Nyquist is in top Octave bin. } else if (melScale) { @@ -189,10 +186,6 @@ public static IndexCalculateResult Analysis( maxFreq); dspOutput1.NyquistBin = dspOutput1.AmplitudeSpectrogram.GetLength(1) - 1; - - // TODO: This doesn't make any sense, since the frequency width changes for each bin. Probably need to set this to NaN. - // TODO: Whatever uses this value below, should probably be changed to not be depending on it. - dspOutput1.FreqBinWidth = sampleRate / (double)dspOutput1.AmplitudeSpectrogram.GetLength(1) / 2; } // NOW EXTRACT SIGNAL FOR BACKGROUND NOISE CALCULATION @@ -214,15 +207,9 @@ public static IndexCalculateResult Analysis( // If necessary, recalculate the spectrogram according to octave scale. This option works only when have high SR recordings. if (octaveScale) { - // ASSUME fixed Occtave scale - USEFUL ONLY FOR JASCO 64000sr MARINE RECORDINGS - // If you wish to use other octave scale types then need to put in the config file and and set up recovery here. - dspOutput2.AmplitudeSpectrogram = OctaveFreqScale.AmplitudeSpectra( - dspOutput2.AmplitudeSpectrogram, - dspOutput2.WindowPower, - sampleRate, - epsilon, - freqScale); - dspOutput2.NyquistBin = dspOutput2.AmplitudeSpectrogram.GetLength(1) - 1; // ASSUMPTION!!! Nyquist is in top Octave bin - not necessarily true!! + // Calculate the octave scaled spectrogram. + dspOutput2.AmplitudeSpectrogram = OctaveFreqScale.ConvertLinearSpectrogramToOctaveFreqScale(dspOutput2.AmplitudeSpectrogram, freqScale); + dspOutput2.NyquistBin = dspOutput2.AmplitudeSpectrogram.GetLength(1) - 1; // ASSUME Nyquist is in top Octave bin. } } @@ -313,13 +300,8 @@ public static IndexCalculateResult Analysis( if (octaveScale) { // the above frequency bin bounds do not apply with octave scale. Need to recalculate them suitable for Octave scale recording. - lowFreqBound = freqScale.LinearBound; lowerBinBound = freqScale.GetBinIdForHerzValue(lowFreqBound); - - midFreqBound = 8000; // This value appears suitable for Jasco Marine recordings. Not much happens above 8kHz. - - //middleBinBound = freqScale.GetBinIdForHerzValue(midFreqBound); - middleBinBound = freqScale.GetBinIdInReducedSpectrogramForHerzValue(midFreqBound); + middleBinBound = freqScale.GetBinIdForHerzValue(midFreqBound); midBandBinCount = middleBinBound - lowerBinBound + 1; } diff --git a/src/AudioAnalysisTools/Indices/IndexCalculateConfig.cs b/src/AudioAnalysisTools/Indices/IndexCalculateConfig.cs index 457893402..fbba3dc2a 100644 --- a/src/AudioAnalysisTools/Indices/IndexCalculateConfig.cs +++ b/src/AudioAnalysisTools/Indices/IndexCalculateConfig.cs @@ -150,7 +150,13 @@ public FreqScaleType FrequencyScale switch (value) { case FreqScaleType.Linear: - case FreqScaleType.Octave: + //case FreqScaleType.OctaveStandard: + // this.frequencyScale = value; + // break; + //case FreqScaleType.OctaveCustom: + // this.frequencyScale = value; + // break; + case FreqScaleType.OctaveDataReduction: this.frequencyScale = value; break; default: diff --git a/src/AudioAnalysisTools/Indices/IndexMatrices.cs b/src/AudioAnalysisTools/Indices/IndexMatrices.cs index 725f67836..73fa8c156 100644 --- a/src/AudioAnalysisTools/Indices/IndexMatrices.cs +++ b/src/AudioAnalysisTools/Indices/IndexMatrices.cs @@ -395,7 +395,8 @@ public static Dictionary AddDerivedIndices(Dictionary, string>[] DrawSpectrogramsFromSpectralIndices( //NEXT produce SPECTROGRAM RIBBONS // These are useful for viewing multiple consecutive days of recording. // Get matrix for each of the three indices. - string[] keys1 = colorMap1.Split('-'); - double[,] indices1 = cs1.GetNormalisedSpectrogramMatrix(keys1[0]); - double[,] indices2 = cs1.GetNormalisedSpectrogramMatrix(keys1[1]); - double[,] indices3 = cs1.GetNormalisedSpectrogramMatrix(keys1[2]); - - var ribbon = LdSpectrogramRibbons.GetSpectrogramRibbon(indices1, indices2, indices3); - ribbon.Save(FilenameHelpers.AnalysisResultPath(outputDirectory, fileStem, colorMap1 + LdSpectrogramRibbons.SpectralRibbonTag, "png")); - - string[] keys2 = colorMap2.Split('-'); - indices1 = cs1.GetNormalisedSpectrogramMatrix(keys2[0]); - indices2 = cs1.GetNormalisedSpectrogramMatrix(keys2[1]); - indices3 = cs1.GetNormalisedSpectrogramMatrix(keys2[2]); - ribbon = LdSpectrogramRibbons.GetSpectrogramRibbon(indices1, indices2, indices3); - ribbon.Save(FilenameHelpers.AnalysisResultPath(outputDirectory, fileStem, colorMap2 + LdSpectrogramRibbons.SpectralRibbonTag, "png")); + // DO NOT draw ribbons if spectrograms are already reduced for data reduction. + if (image1.Height > 128) + { + string[] keys1 = colorMap1.Split('-'); + double[,] indices1 = cs1.GetNormalisedSpectrogramMatrix(keys1[0]); + double[,] indices2 = cs1.GetNormalisedSpectrogramMatrix(keys1[1]); + double[,] indices3 = cs1.GetNormalisedSpectrogramMatrix(keys1[2]); + + var ribbon = LdSpectrogramRibbons.GetSpectrogramRibbon(indices1, indices2, indices3); + ribbon.Save(FilenameHelpers.AnalysisResultPath(outputDirectory, fileStem, colorMap1 + LdSpectrogramRibbons.SpectralRibbonTag, "png")); + + string[] keys2 = colorMap2.Split('-'); + indices1 = cs1.GetNormalisedSpectrogramMatrix(keys2[0]); + indices2 = cs1.GetNormalisedSpectrogramMatrix(keys2[1]); + indices3 = cs1.GetNormalisedSpectrogramMatrix(keys2[2]); + ribbon = LdSpectrogramRibbons.GetSpectrogramRibbon(indices1, indices2, indices3); + ribbon.Save(FilenameHelpers.AnalysisResultPath(outputDirectory, fileStem, colorMap2 + LdSpectrogramRibbons.SpectralRibbonTag, "png")); + } // only return images if chromeless return imageChrome == ImageChrome.Without diff --git a/src/AudioAnalysisTools/SpectralPeakTracks.cs b/src/AudioAnalysisTools/SpectralPeakTracks.cs index 879ddf580..63bc167ce 100644 --- a/src/AudioAnalysisTools/SpectralPeakTracks.cs +++ b/src/AudioAnalysisTools/SpectralPeakTracks.cs @@ -6,6 +6,7 @@ namespace AudioAnalysisTools { using System.Collections.Generic; using System.Linq; + using System.Runtime; using AudioAnalysisTools.DSP; using AudioAnalysisTools.StandardSpectrograms; using AudioAnalysisTools.WavTools; @@ -388,8 +389,8 @@ public static SpectralPeakTracks CalculateSpectralPeakTracks(AudioRecording reco double[,] decibelSpectrogram; if (octaveScale) { - var freqScale = new FrequencyScale(FreqScaleType.Linear125Octaves7Tones28Nyquist32000); - decibelSpectrogram = OctaveFreqScale.DecibelSpectra(dspOutput.AmplitudeSpectrogram, dspOutput.WindowPower, sampleRate, epsilon, freqScale); + var freqScale = new FrequencyScale(FreqScaleType.OctaveStandard); + decibelSpectrogram = OctaveFreqScale.ConvertAmplitudeSpectrogramToFreqScaledDecibels(dspOutput.AmplitudeSpectrogram, dspOutput.WindowPower, sampleRate, epsilon, freqScale); } else { diff --git a/src/AudioAnalysisTools/StandardSpectrograms/AmplitudeSonogram.cs b/src/AudioAnalysisTools/StandardSpectrograms/AmplitudeSonogram.cs index 7fe7ae03e..bd3cc7f1e 100644 --- a/src/AudioAnalysisTools/StandardSpectrograms/AmplitudeSonogram.cs +++ b/src/AudioAnalysisTools/StandardSpectrograms/AmplitudeSonogram.cs @@ -13,7 +13,7 @@ namespace AudioAnalysisTools.StandardSpectrograms public class AmplitudeSonogram : BaseSonogram { public AmplitudeSonogram(SonogramConfig config, WavReader wav) - : base(config, wav, false) + : base(config, wav) { } diff --git a/src/AudioAnalysisTools/StandardSpectrograms/BaseSonogram.cs b/src/AudioAnalysisTools/StandardSpectrograms/BaseSonogram.cs index b6b245f9a..02387942d 100644 --- a/src/AudioAnalysisTools/StandardSpectrograms/BaseSonogram.cs +++ b/src/AudioAnalysisTools/StandardSpectrograms/BaseSonogram.cs @@ -18,45 +18,23 @@ namespace AudioAnalysisTools.StandardSpectrograms using SixLabors.ImageSharp.Processing; using TowseyLibrary; - /* /// - /// Sonogram type. + /// Base Sonogram. /// - public enum SonogramType + public abstract partial class BaseSonogram { /// - /// Ampltude Sonogram. - /// - Amplitude, - - /// - /// Spectral Sonogram. - /// - Spectral, - - /// - /// Cepstral Sonogram. - /// - Cepstral, - - /// - /// Acoustic Vectors Sonogram. + /// Gets or sets the config information. + /// The Configuration object should contain all the parameters required to construct an amplitude spectrogram given a recording. /// - AcousticVectors, + public SonogramConfig Configuration { get; set; } /// - /// Sobel Edge Sonogram. + /// Gets or sets the frequency scale information. + /// The FreqScale object should contain all the parameters required to convert the linear frquency scale of the amplitude spectrogram + /// into any reduced or non-linear frequency scale required. /// - SobelEdge, - } - */ - - /// - /// Base Sonogram. - /// - public abstract partial class BaseSonogram - { - public SonogramConfig Configuration { get; set; } + public FrequencyScale FreqScale { get; set; } public double MaxAmplitude { get; set; } @@ -124,21 +102,71 @@ public BaseSonogram(SonogramConfig config) /// /// Initializes a new instance of the class. - /// BASE CONSTRUCTOR - /// This constructor contains all steps required to prepare the amplitude spectrogram. - /// The third boolean parameter is simply a place-filler to ensure a different Constructor signature. - /// from the principle Constructor which follows. + /// BASE CONSTRUCTOR. + /// + /// config file to use. + /// wav. + public BaseSonogram(SonogramConfig config, WavReader wav) + : this(config) + { + this.InitialiseSpectrogram(wav); + + // this Make() call makes the desired spectrogram. + this.Make(this.Data); + } + + /// + /// Initializes a new instance of the class. + /// BASE CONSTRUCTOR. /// /// config file to use. /// wav. - /// filler boolean. Calculate in method. - public BaseSonogram(SonogramConfig config, WavReader wav, bool dummy) + public BaseSonogram(SonogramConfig config, FrequencyScale freqScale, WavReader wav) : this(config) { - // As of 28 March 2017 drop capability to get sub-band of spectrogram because was not being used. - // can be recovered later if desired. - //bool doExtractSubband = this.SubBandMinHz > 0 || this.SubBandMaxHz < this.NyquistFrequency; + // check that the frameWidths are consistent. + if (config.WindowSize != freqScale.WindowSize) + { + throw new Exception("BaseSonogram: CONSTRUCTOR ERROR: Inconsistency in Frequency Scale conversion data."); + } + + this.FreqScale = freqScale; + this.InitialiseSpectrogram(wav); + + if (this.FreqScale.ScaleType == FreqScaleType.Linear && this.FreqScale.WindowSize != this.FreqScale.FinalBinCount) + { + // convert the spectrogram frequency scale + this.Data = RescaleLinearFrequencyScale(this.Data, this.FreqScale); + } + + this.Make(this.Data); + } + + /// + /// Initializes a new instance of the class. + /// Use this BASE CONSTRUCTOR when already have the amplitude spectrogram in matrix. + /// Init normalised signal energy array but do nothing with it. This has to be done from outside. + /// + /// the spectrogram config. + /// data of an amplitude Spectrogram. + public BaseSonogram(SonogramConfig config, double[,] amplitudeSpectrogramData) + { + this.Configuration = config; + this.FrameCount = amplitudeSpectrogramData.GetLength(0); + this.SampleRate = this.Configuration.SampleRate; + + //init normalised signal energy array but do nothing with it. This has to be done from outside + this.DecibelsNormalised = new double[this.FrameCount]; + this.Data = amplitudeSpectrogramData; + } + + public abstract void Make(double[,] amplitudeM); + /// + /// This method creates the amplitude spectrogram. + /// + private void InitialiseSpectrogram(WavReader wav) + { this.Duration = wav.Time; double minDuration = 0.2; if (this.Duration.TotalSeconds < minDuration) @@ -156,8 +184,8 @@ public BaseSonogram(SonogramConfig config, WavReader wav, bool dummy) var recording = new AudioRecording(wav); var fftData = DSP_Frames.ExtractEnvelopeAndFfts( recording, - config.WindowSize, - config.WindowOverlap, + this.Configuration.WindowSize, + this.Configuration.WindowOverlap, this.Configuration.WindowFunction); // now recover required data @@ -169,98 +197,73 @@ public BaseSonogram(SonogramConfig config, WavReader wav, bool dummy) //init normalised signal energy array but do nothing with it. This has to be done from outside this.DecibelsNormalised = new double[this.FrameCount]; + this.Data = fftData.AmplitudeSpectrogram; - // ENERGY PER FRAME and NORMALISED dB PER FRAME AND SNR // currently DoSnr = true by default - if (config.DoSnr) + if (this.Configuration.DoSnr) { - // If the FractionOfHighEnergyFrames PRIOR to noise removal exceeds SNR.FractionalBoundForMode, - // then Lamel's noise removal algorithm may not work well. - if (fftData.FractionOfHighEnergyFrames > SNR.FractionalBoundForMode) - { - Log.WriteIfVerbose("\nWARNING ##############"); - Log.WriteIfVerbose( - "\t############### BaseSonogram(): This is a high energy recording. Percent of high energy frames = {0:f0} > {1:f0}%", - fftData.FractionOfHighEnergyFrames * 100, - SNR.FractionalBoundForMode * 100); - Log.WriteIfVerbose("\t############### Noise reduction algorithm may not work well in this instance!\n"); - } - - //AUDIO SEGMENTATION/END POINT DETECTION - based on Lamel et al - // Setting segmentation/endpoint detection parameters is broken as of September 2014. - // The next line is a hack replacement - EndpointDetectionConfiguration.SetDefaultSegmentationConfig(); - this.SigState = EndpointDetectionConfiguration.DetermineVocalisationEndpoints(this.DecibelsPerFrame, this.FrameStep); + this.CalculateSnrData(fftData.FractionOfHighEnergyFrames); } + } - /* AS OF 30 MARCH 2017, NO LONGER IMPLEMENT SUB-BAND THINGS, because not being used for years. - // EXTRACT REQUIRED FREQUENCY BAND - if (doExtractSubband) + public static double[,] RescaleLinearFrequencyScale(double[,] inputSpgram, FrequencyScale freqScale) + { + if (freqScale == null) { - this.Data = SpectrogramTools.ExtractFreqSubband( - this.Data, - this.subBandMinHz, - this.subBandMaxHz, - this.Configuration.DoMelScale, - this.Configuration.FreqBinCount, - this.FBinWidth); - this.CalculateSubbandSNR(this.Data); + throw new ArgumentNullException(nameof(freqScale)); } - */ - } - /// - /// Initializes a new instance of the class. - /// This BASE CONSTRUCTOR is the one most used - it automatically makes the Amplitude spectrum and - /// then, using a call to Make(), it converts the Amplitude matrix to a Spectrogram whose values are decibels. - /// - /// All parameters required to make spectrogram. - /// The recording whose spectrogram is to be made. - public BaseSonogram(SonogramConfig config, WavReader wav) - : this(config, wav, false) - { - this.Make(this.Data); - } + if (freqScale.ScaleType != FreqScaleType.Linear) + { + LoggedConsole.WriteLine("Require a Linear frequency scale for this method."); + throw new ArgumentNullException(nameof(freqScale)); + } - /// - /// Initializes a new instance of the class. - /// Use this BASE CONSTRUCTOR when already have the amplitude spectrogram in matrix. - /// Init normalised signal energy array but do nothing with it. This has to be done from outside. - /// - /// the spectrogram config. - /// data of an amplitude Spectrogram. - public BaseSonogram(SonogramConfig config, double[,] amplitudeSpectrogramData) - { - this.Configuration = config; - this.FrameCount = amplitudeSpectrogramData.GetLength(0); - this.SampleRate = this.Configuration.SampleRate; + // get the bin bounds for this scale type + var binBounds = freqScale.BinBounds; + int newBinCount = binBounds.GetLength(0); - //init normalised signal energy array but do nothing with it. This has to be done from outside - this.DecibelsNormalised = new double[this.FrameCount]; - this.Data = amplitudeSpectrogramData; - } + // set up the new spectrogram + int frameCount = inputSpgram.GetLength(0); - public abstract void Make(double[,] amplitudeM); + double[,] opM = new double[frameCount, newBinCount]; - /* AS OF 30 MARCH 2017, NO LONGER IMPLEMENT SUB-BAND THINGS, because not being used for years. - public void CalculateSubbandSNR(double[,] subband) - { - this.SnrSubband = new SNR(subband); //subband is the amplitude values + for (int row = 0; row < frameCount; row++) + { + //get each frame or spectrum in turn and rescale. + var linearSpectrum = MatrixTools.GetRow(inputSpgram, row); + var rescaledSpectrum = SpectrogramTools.RescaleSpectrumUsingFilterbank(binBounds, linearSpectrum); - //RECALCULATE DecibelsNormalised and dB REFERENCE LEVEL - need for MFCCs - this.DecibelsInSubband = SnrSubband.Decibels; - this.DecibelReference = SnrSubband.MaxReferenceDecibelsWrtNoise; - this.DecibelsNormalised = SnrSubband.NormaliseDecibelArray_ZeroOne(this.DecibelReference); + //return the spectrum to output spectrogram. + MatrixTools.SetRow(opM, row, rescaledSpectrum); + } - //RECALCULATE ENDPOINTS OF VOCALISATIONS - SigState = EndpointDetectionConfiguration.DetermineVocalisationEndpoints(this.DecibelsInSubband, this.FrameStep); + return opM; } - */ - public void SetTimeScale(TimeSpan duration) + /// + /// Calculates SNR, ENERGY PER FRAME and NORMALISED dB PER FRAME. + /// + private void CalculateSnrData(double highEnergyFraction) { - this.Duration = duration; + // If the FractionOfHighEnergyFrames PRIOR to noise removal exceeds SNR.FractionalBoundForMode, + // then Lamel's noise removal algorithm may not work well. + if (highEnergyFraction > SNR.FractionalBoundForMode) + { + Log.WriteIfVerbose("\nWARNING ##############"); + Log.WriteIfVerbose( + "\t############### BaseSonogram(): This is a high energy recording. Percent of high energy frames = {0:f0} > {1:f0}%", + highEnergyFraction * 100, + SNR.FractionalBoundForMode * 100); + Log.WriteIfVerbose("\t############### Noise reduction algorithm may not work well in this instance!\n"); + } + + //AUDIO SEGMENTATION/END POINT DETECTION - based on Lamel et al + // Setting segmentation/endpoint detection parameters is broken as of September 2014. + // The next line sets default parameters. + EndpointDetectionConfiguration.SetDefaultSegmentationConfig(); + this.SigState = EndpointDetectionConfiguration.DetermineVocalisationEndpoints(this.DecibelsPerFrame, this.FrameStep); } /// @@ -279,6 +282,17 @@ public Image GetImageFullyAnnotated(string title, Color? tag = null) return image; } + /// + /// This method fully annotates a short-time scale spectrogram. + /// The grid-lines are drawn according to indices in gridLineLocations. + /// Therefore the method will accept spectrograms with octave or any frequency scale. + /// The time scale is calculated from recording duration and width of image. + /// + /// The raw spectrogram image. + /// To go on the title bar. + /// A matrix of values. + /// Used to identify images??. + /// The annotated spectrogram. public Image GetImageFullyAnnotated(Image image, string title, int[,] gridLineLocations, Color? tag = null) { if (image == null) @@ -286,8 +300,12 @@ public Image GetImageFullyAnnotated(Image image, string title, int throw new ArgumentNullException(nameof(image)); } - FrequencyScale.DrawFrequencyLinesOnImage(image, gridLineLocations, includeLabels: true); + if (gridLineLocations != null) + { + FrequencyScale.DrawFrequencyLinesOnImage(image, gridLineLocations, includeLabels: true); + } + // collect all the images and combine. var titleBar = DrawTitleBarOfGrayScaleSpectrogram(title, image.Width, tag); var timeBmp = ImageTrack.DrawTimeTrack(this.Duration, image.Width); var list = new List> { titleBar, timeBmp, image, timeBmp }; @@ -321,8 +339,6 @@ public Image GetImage() public Image GetImage(bool doHighlightSubband, bool add1KHzLines, bool doMelScale) { - // doHighlightSubband function still working but have removed min/max bounds from user control. - // doHighlightSubband = true; int subBandMinHz = 1000; int subBandMaxHz = 9000; @@ -338,7 +354,7 @@ public Image GetImage(bool doHighlightSubband, bool add1KHzLines, bool do if (doMelScale) { - gridLineLocations = FrequencyScale.GetMelGridLineLocations(kHzInterval, this.NyquistFrequency, image.Height); + gridLineLocations = SpectrogramMelScale.GetMelGridLineLocations(kHzInterval, this.NyquistFrequency, image.Height); } else { @@ -351,103 +367,6 @@ public Image GetImage(bool doHighlightSubband, bool add1KHzLines, bool do return image; } - public Image GetImage_ReducedSonogramWithWidth(int width, bool drawGridLines) - { - var data = this.Data; //sonogram intensity values - int frameCount = data.GetLength(0); // Number of spectra in sonogram - - int factor = frameCount / width; - - if (factor <= 1) - { - return this.GetImage(); - } - - return this.GetImage_ReducedSonogram(factor, drawGridLines); - } - - public Image GetImage_ReducedSonogram(int factor, bool drawGridLines) - { - // double[] logEnergy = this.LogEnPerFrame; - var data = this.Data; //sonogram intensity values - int frameCount = data.GetLength(0); // Number of spectra in sonogram - int imageHeight = data.GetLength(1); // image ht = sonogram ht. Later include grid and score scales - int imageWidth = frameCount / factor; - int subSample = frameCount / imageWidth; - - //set up min, max, range for normalising of dB values - DataTools.MinMax(data, out double min, out double max); - double range = max - min; - - var grayScale = ImageTools.GrayScale(); - - //set up the 1000kHz scale - int herzInterval = 1000; - int[] vScale = FrequencyScale.CreateLinearYaxis(herzInterval, this.NyquistFrequency, imageHeight); //calculate location of 1000Hz grid lines - var bmp = new Image(imageWidth, imageHeight); - for (int w = 0; w < imageWidth; w++) - { - int start = w * subSample; - int end = ((w + 1) * subSample) - 1; - double maxE = -double.MaxValue; - int maxId = 0; - for (int x = start; x < end; x++) - { - // NOTE!@#$%^ This was changed from LogEnergy on 30th March 2009. - if (maxE < this.DecibelsPerFrame[x]) - { - maxE = this.DecibelsPerFrame[x]; - maxId = x; - } - } - - // have found the frame with max energy. Now draw its spectrum - // over all freq bins - for (int y = 0; y < data.GetLength(1); y++) - { - // NormaliseMatrixValues and bound the value - use min bound, max and 255 image intensity range - double value = (data[maxId, y] - min) / range; - int c = 255 - (int)Math.Floor(255.0 * value); //original version - if (c < 0) - { - c = 0; - } - else if (c >= 256) - { - c = 255; - } - - var col = grayScale[c]; - bmp[w, imageHeight - y - 1] = col; - } //end over all freq bins - - //set up grid color - - if (drawGridLines) - { - var gridCol = Color.Black; - if (w % 2 == 0) - { - gridCol = Color.Black; - } - - //over all Y-axis pixels - for (int p = 0; p < vScale.Length; p++) - { - if (vScale[p] == 0) - { - continue; - } - - int y = imageHeight - p; - bmp[w, y] = gridCol; - } - } - } - - return bmp; - } - private static bool IsInBand(int y, int? minFreq, int? maxFreq) { if (minFreq == null && maxFreq == null) @@ -791,49 +710,5 @@ public static Image DrawTitleBarOfGrayScaleSpectrogram(string title, int return bmp; } - - // mark of time scale according to scale. - public static Image DrawTimeTrack(TimeSpan offsetMinute, TimeSpan xAxisPixelDuration, TimeSpan xAxisTicInterval, TimeSpan labelInterval, int trackWidth, int trackHeight, string title) - { - var bmp = new Image(trackWidth, trackHeight); - bmp.Mutate(g => - { - g.Clear(Color.Black); - - double elapsedTime = offsetMinute.TotalSeconds; - double pixelDuration = xAxisPixelDuration.TotalSeconds; - int labelSecondsInterval = (int)labelInterval.TotalSeconds; - var whitePen = new Pen(Color.White, 1); - var stringFont = Drawing.Arial8; - - // for columns, draw in second lines - double xInterval = (int)(xAxisTicInterval.TotalMilliseconds / xAxisPixelDuration.TotalMilliseconds); - - // for pixels in the line - for (int x = 1; x < trackWidth; x++) - { - elapsedTime += pixelDuration; - if (x % xInterval <= pixelDuration) - { - g.DrawLine(whitePen, x, 0, x, trackHeight); - int totalSeconds = (int)Math.Round(elapsedTime); - if (totalSeconds % labelSecondsInterval == 0) - { - int minutes = totalSeconds / 60; - int seconds = totalSeconds % 60; - string time = $"{minutes}m{seconds}s"; - g.DrawTextSafe(time, stringFont, Color.White, new PointF(x + 1, 2)); //draw time - } - } - } - - g.DrawLine(whitePen, 0, 0, trackWidth, 0); //draw upper boundary - g.DrawLine(whitePen, 0, trackHeight - 1, trackWidth, trackHeight - 1); //draw lower boundary - g.DrawLine(whitePen, trackWidth, 0, trackWidth, trackHeight - 1); //draw right end boundary - g.DrawTextSafe(title, stringFont, Color.White, new PointF(4, 3)); - }); - - return bmp; - } } } \ No newline at end of file diff --git a/src/AudioAnalysisTools/StandardSpectrograms/BaseSonogramConfig.cs b/src/AudioAnalysisTools/StandardSpectrograms/BaseSonogramConfig.cs index 1e06c12c1..749482dbd 100644 --- a/src/AudioAnalysisTools/StandardSpectrograms/BaseSonogramConfig.cs +++ b/src/AudioAnalysisTools/StandardSpectrograms/BaseSonogramConfig.cs @@ -133,6 +133,7 @@ public static SonogramConfig Load(string configFile) /// /// Initializes a new instance of the class. /// Default Constructor - initialises a configuration with the default values. + /// This sets the default values for most spectrogram types, including Mel-scale and MFCC spectrograms. /// public SonogramConfig() { @@ -356,5 +357,10 @@ public double GetFrameOffset(int sampleRate) int step = DSP_Frames.FrameStep(this.WindowSize, this.WindowOverlap); return step / (double)sampleRate; } + + public static double CalculateFrameOverlap(int frameWidth, int frameStep) + { + return 1 - (frameStep / (double)frameWidth); + } } } \ No newline at end of file diff --git a/src/AudioAnalysisTools/StandardSpectrograms/EnergySpectrogram.cs b/src/AudioAnalysisTools/StandardSpectrograms/EnergySpectrogram.cs index e88c50647..e33305b12 100644 --- a/src/AudioAnalysisTools/StandardSpectrograms/EnergySpectrogram.cs +++ b/src/AudioAnalysisTools/StandardSpectrograms/EnergySpectrogram.cs @@ -41,6 +41,7 @@ public EnergySpectrogram(AmplitudeSpectrogram amplitudeSpectrogram) /// public double[,] Data { get; set; } + /* public void GetPsd(string path) { var psd = MatrixTools.GetColumnAverages(this.Data); @@ -55,7 +56,7 @@ public void GetPsd(string path) public void DrawLogPsd(string path) { var psd = MatrixTools.GetColumnAverages(this.Data); - var logPsd = DataTools.LogValues(psd); + var logPsd = DataTools.Log10Values(psd); FileTools.WriteArray2File(logPsd, path + ".csv"); GraphsAndCharts.DrawGraph(logPsd, "log PSD", new FileInfo(path)); @@ -66,8 +67,9 @@ public void DrawLogPsd(string path) public double[] GetLogPsd() { var psd = MatrixTools.GetColumnAverages(this.Data); - var logPsd = DataTools.LogValues(psd); + var logPsd = DataTools.Log10Values(psd); return logPsd; } + */ } } \ No newline at end of file diff --git a/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramCepstral.cs b/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramCepstral.cs index cf51991ea..7b249d298 100644 --- a/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramCepstral.cs +++ b/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramCepstral.cs @@ -7,7 +7,6 @@ namespace AudioAnalysisTools.StandardSpectrograms using System; using Acoustics.Tools.Wav; using AudioAnalysisTools.DSP; - using AudioAnalysisTools.WavTools; using TowseyLibrary; public class SpectrogramCepstral : BaseSonogram @@ -53,13 +52,6 @@ public SpectrogramCepstral(AmplitudeSonogram sg, int minHz, int maxHz) this.SampleRate = sg.SampleRate; this.SigState = sg.SigState; this.SnrData = sg.SnrData; - - // sub-band highlighting no longer available - //this.subBandMinHz = minHz; - //this.subBandMaxHz = maxHz; - //double[] noise_subband = BaseSonogram.ExtractModalNoiseSubband(this.SnrData.ModalNoiseProfile, minHz, maxHz, sg.doMelScale, - // sonogram.Configuration.FreqBinCount, sonogram.FBinWidth); - this.Data = SpectrogramTools.ExtractFreqSubband(sg.Data, minHz, maxHz, this.Configuration.DoMelScale, sg.Configuration.FreqBinCount, sg.FBinWidth); //converts amplitude matrix to cepstral sonogram @@ -133,49 +125,7 @@ protected static Tuple MakeCepstrogram(SonogramConfig confi // return matrix and full bandwidth modal noise profile return tuple2; } - - /// - /// Returns a Spectrogram and Cepstrogram from the passed recording. These are NOT noise reduced. - /// however, tuple also returns the modal noise and sub-band modal noise. - /// - public static Tuple GetAllSonograms(AudioRecording recording, SonogramConfig sonoConfig, int minHz, int maxHz) - { - int sr = recording.SampleRate; - bool doMelScale = sonoConfig.DoMelScale; - int ccCount = sonoConfig.mfccConfig.CcCount; - bool includeDelta = sonoConfig.mfccConfig.IncludeDelta; - bool includeDoubleDelta = sonoConfig.mfccConfig.IncludeDoubleDelta; - sonoConfig.SourceFName = recording.BaseName; - - var basegram = new AmplitudeSonogram(sonoConfig, recording.WavReader); - var sonogram = new SpectrogramStandard(basegram); //spectrogram has dim[N,257] - - Log.WriteLine("Signal: Duration={0}, Sample Rate={1}", sonogram.Duration, sr); - Log.WriteLine( - $"Frames: Size={0}, Count={1}, Duration={2:f1}ms, Overlap={5:f0}%, Offset={3:f1}ms, Frames/s={4:f1}", - sonogram.Configuration.WindowSize, - sonogram.FrameCount, - sonogram.FrameDuration * 1000, - sonogram.FrameStep * 1000, - sonogram.FramesPerSecond, - sonoConfig.WindowOverlap * 100); - - int binCount = (int)(maxHz / sonogram.FBinWidth) - (int)(minHz / sonogram.FBinWidth) + 1; - Log.WriteLine("Freqs : {0} Hz - {1} Hz. (Freq bin count = {2})", minHz, maxHz, binCount); - Log.WriteLine("MFCCs : doMelScale=" + doMelScale + "; ccCount=" + ccCount + "; includeDelta=" + includeDelta + "; includeDoubleDelta=" + includeDoubleDelta); - - //CALCULATE MODAL NOISE PROFILE - USER MAY REQUIRE IT FOR NOISE REDUCTION - double[] modalNoise = sonogram.SnrData.ModalNoiseProfile; - - //extract sub-band modal noise profile - double[] noiseSubband = SpectrogramTools.ExtractModalNoiseSubband(modalNoise, minHz, maxHz, doMelScale, sonogram.NyquistFrequency, sonogram.FBinWidth); - - // CALCULATE CEPSTRO-GRAM. //cepstrogram has dim[N,13] - var cepstrogram = new SpectrogramCepstral(basegram, minHz, maxHz); - var tuple = Tuple.Create(sonogram, cepstrogram, modalNoise, noiseSubband); - return tuple; - } - } // end class CepstralSonogram + } // class CepstralSonogram //################################################################################################################################## //################################################################################################################################## diff --git a/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramMelScale.cs b/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramMelScale.cs new file mode 100644 index 000000000..505e5165e --- /dev/null +++ b/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramMelScale.cs @@ -0,0 +1,171 @@ +// +// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group). +// + +namespace AudioAnalysisTools.StandardSpectrograms +{ + using System; + using Acoustics.Tools.Wav; + using AudioAnalysisTools.DSP; + using TowseyLibrary; + + public class SpectrogramMelScale : BaseSonogram + { + public SpectrogramMelScale(string configFile, WavReader wav) + : this(SonogramConfig.Load(configFile), wav) + { + } + + public SpectrogramMelScale(SonogramConfig config, WavReader wav) + : base(config, wav) + { + } + + public SpectrogramMelScale(AmplitudeSonogram sg) + : base(sg.Configuration) + { + this.Configuration = sg.Configuration; + this.DecibelsPerFrame = sg.DecibelsPerFrame; + this.DecibelsNormalised = sg.DecibelsNormalised; + this.Duration = sg.Duration; + this.FrameCount = sg.FrameCount; + this.DecibelReference = sg.DecibelReference; + this.MaxAmplitude = sg.MaxAmplitude; + this.SampleRate = sg.SampleRate; + this.SigState = sg.SigState; + this.SnrData = sg.SnrData; + this.Data = sg.Data; + + //converts amplitude matrix to Mel-frequency scale spectrogram + this.Make(this.Data); + } + + public SpectrogramMelScale(AmplitudeSonogram sg, int minHz, int maxHz) + : this(sg) + { + this.DecibelsPerFrame = sg.DecibelsPerFrame; + this.DecibelsNormalised = sg.DecibelsNormalised; + this.Duration = sg.Duration; + this.FrameCount = sg.FrameCount; + this.DecibelReference = sg.DecibelReference; + this.MaxAmplitude = sg.MaxAmplitude; + this.SampleRate = sg.SampleRate; + this.SigState = sg.SigState; + this.SnrData = sg.SnrData; + + //converts amplitude matrix to mel-frequency scale spectrogram + this.Data = SpectrogramTools.ExtractFreqSubband(sg.Data, minHz, maxHz, this.Configuration.DoMelScale, sg.Configuration.FreqBinCount, sg.FBinWidth); + this.Make(this.Data); + } + + /// + /// Converts amplitude matrix to mel-frequency scale spectrogram. + /// + /// Matrix of amplitude values. + public override void Make(double[,] amplitudeM) + { + var m = MakeMelScaleSpectrogram(this.Configuration, amplitudeM, this.SampleRate); + + //(iii) NOISE REDUCTION + var nrt = this.Configuration.NoiseReductionType; + var nrp = this.Configuration.NoiseReductionParameter; + var tuple1 = SNR.NoiseReduce(m, nrt, nrp); + + //store the full bandwidth modal noise profile + this.ModalNoiseProfile = tuple1.Item2; + this.Data = DataTools.normalise(tuple1.Item1); + } + + //################################################################################################################################## + + /// + /// NOTE!!!! The decibel array has been normalised in 0 - 1. + /// + public static double[,] MakeMelScaleSpectrogram(SonogramConfig config, double[,] matrix, int sampleRate) + { + double[,] m = matrix; + int nyquist = sampleRate / 2; + double epsilon = config.epsilon; + + //(i) APPLY FILTER BANK + //number of Hz bands = 2^N +1. Subtract DC bin + int fftBinCount = config.FreqBinCount; + + // Mel band count is set to 64 by default in BaseSonogramConfig class at line 158. + int bandCount = config.mfccConfig.FilterbankCount; + Log.WriteIfVerbose("ApplyFilterBank(): Dim prior to filter bank =" + matrix.GetLength(1)); + + //error check that filterBankCount < Number of FFT bins + if (bandCount > fftBinCount) + { + throw new Exception( + "## FATAL ERROR in MakeMelScaleSpectrogram(): Filterbank Count > number of FFT bins. (" + + bandCount + " > " + fftBinCount + ")\n\n"); + } + + //this is the filter count for full bandwidth 0-Nyquist. This number is trimmed proportionately to fit the required bandwidth. + m = MFCCStuff.MelFilterBank(m, bandCount, nyquist, 0, nyquist); + + Log.WriteIfVerbose("\tDim after filter bank=" + m.GetLength(1) + " (Max filter bank=" + bandCount + ")"); + + //(ii) CONVERT AMPLITUDES TO DECIBELS + m = MFCCStuff.DecibelSpectra(m, config.WindowPower, sampleRate, epsilon); //from spectrogram + return m; + } + + /// + /// WARNING: This method assigns DEFAULT parameters for MEL FREQUENCY SCALE. + /// It works only for "standard" recordings, i.e. sr = 22050 and frame = 512. + /// The default MelScale has 64 frequency bins. + /// The Linear500-octave scale is almost similar and has 66 frequency bands. + /// Currently, the MEL scale is implemented directly in MakeMelScaleSpectrogram() method. + /// + public static FrequencyScale GetStandardMelScale(FrequencyScale scale) + { + scale.ScaleType = FreqScaleType.Mel; + int sr = 22050; + int frameSize = 512; + + scale.Nyquist = sr / 2; + scale.FinalBinCount = 64; + scale.WindowSize = frameSize; + scale.LinearBound = 1000; + scale.BinBounds = MFCCStuff.GetMelBinBounds(scale.Nyquist, scale.FinalBinCount); + scale.HertzGridInterval = 1000; + scale.GridLineLocations = SpectrogramMelScale.GetMelGridLineLocations(scale.HertzGridInterval, scale.Nyquist, scale.FinalBinCount); + return scale; + } + + /// + /// THIS METHOD NEEDS TO BE DEBUGGED. HAS NOT BEEN USED IN YEARS! + /// Use this method to generate grid lines for mel scale image + /// Currently this method is only called from BaseSonogram.GetImage() when bool doMelScale = true; + /// Frequencyscale.Draw1kHzLines(Image{Rgb24} bmp, bool doMelScale, int nyquist, double freqBinWidth). + /// + public static int[,] GetMelGridLineLocations(int gridIntervalInHertz, int nyquistFreq, int melBinCount) + { + double maxMel = (int)MFCCStuff.Mel(nyquistFreq); + double melPerBin = maxMel / melBinCount; + + // There is no point drawing gridlines above 8 kHz because they are too close together. + int maxGridValue = 4000; + int gridCount = maxGridValue / gridIntervalInHertz; + + var gridLines = new int[gridCount, 2]; + + for (int f = 1; f <= gridCount; f++) + { + int herz = f * 1000; + int melValue = (int)MFCCStuff.Mel(herz); + int melBinId = (int)(melValue / melPerBin); + if (melBinId < melBinCount) + { + gridLines[f - 1, 0] = melBinId; + gridLines[f - 1, 1] = herz; + } + } + + return gridLines; + } + } +} \ No newline at end of file diff --git a/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramOctaveScale.cs b/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramOctaveScale.cs new file mode 100644 index 000000000..26ab3d197 --- /dev/null +++ b/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramOctaveScale.cs @@ -0,0 +1,40 @@ +// +// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group). +// + +namespace AudioAnalysisTools.StandardSpectrograms +{ + using System; + using Acoustics.Tools.Wav; + using AudioAnalysisTools.DSP; + using AudioAnalysisTools.WavTools; + using TowseyLibrary; + + public class SpectrogramOctaveScale : BaseSonogram + { + public SpectrogramOctaveScale(SonogramConfig config, FrequencyScale scale, WavReader wav) + : base(config, scale, wav) + { + } + + /// + /// Converts amplitude matrix to octave frequency scale spectrogram. + /// IMPORTANT: DOES NOISE REDUCTION after conversion. + /// + /// Matrix of amplitude values. + public override void Make(double[,] amplitudeM) + { + double windowPower = this.Configuration.WindowPower; + int sampleRate = this.SampleRate; + double epsilon = this.Configuration.epsilon; + double[,] m = OctaveFreqScale.ConvertAmplitudeSpectrogramToFreqScaledDecibels(amplitudeM, windowPower, sampleRate, epsilon, this.FreqScale); + + // Do noise reduction + var tuple = SNR.NoiseReduce(m, this.Configuration.NoiseReductionType, this.Configuration.NoiseReductionParameter); + this.Data = DataTools.normalise(tuple.Item1); + + //store the full bandwidth modal noise profile + this.ModalNoiseProfile = tuple.Item2; + } + } +} \ No newline at end of file diff --git a/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramStandard.cs b/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramStandard.cs index 721adb390..53f0472e7 100644 --- a/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramStandard.cs +++ b/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramStandard.cs @@ -7,11 +7,10 @@ namespace AudioAnalysisTools.StandardSpectrograms using System; using Acoustics.Tools.Wav; using AudioAnalysisTools.DSP; - using TowseyLibrary; public class SpectrogramStandard : BaseSonogram { - //There are five CONSTRUCTORS + //There are six CONSTRUCTORS /// /// Initializes a new instance of the class. @@ -28,6 +27,18 @@ public SpectrogramStandard(SonogramConfig config, WavReader wav) { } + /// + /// Initializes a new instance of the class. + /// Use this constructor when want to increase or decrease the linear frquency scale. + /// + /// Other info to construct the spectrogram. + /// The required new frequency scale. + /// The recording. + public SpectrogramStandard(SonogramConfig config, FrequencyScale scale, WavReader wav) + : base(config, scale, wav) + { + } + /// /// Initializes a new instance of the class. /// Use this constructor when you want to init a new Spectrogram by extracting portion of an existing sonogram. @@ -82,15 +93,6 @@ public SpectrogramStandard(SpectrogramStandard sg, double startTime, double endT this.DecibelsPerFrame[i] = sg.DecibelsPerFrame[startFrame + i]; } - /* - // Subband functionality no longer available. Discontinued March 2017 because not being used - // this.subBandMinHz = sg.subBandMinHz; //min freq (Hz) of the required subband - // this.subBandMaxHz = sg.subBandMaxHz; //max freq (Hz) of the required subband - //sg.SnrSubband { get; private set; } - //this.DecibelsInSubband = new double[frameCount]; // Normalised decibels in extracted freq band - //for (int i = 0; i < frameCount; i++) this.DecibelsInSubband[i] = sg.DecibelsInSubband[startFrame + i]; - */ - this.DecibelReference = sg.DecibelReference; // Used to NormaliseMatrixValues the dB values for MFCCs this.DecibelsNormalised = new double[frameCount]; for (int i = 0; i < frameCount; i++) @@ -142,52 +144,5 @@ public override void Make(double[,] amplitudeM) this.SnrData.ModalNoiseProfile = tuple.Item2; // store the full bandwidth modal noise profile } } - - /// - /// Normalise the dynamic range of spectrogram between 0dB and value of DynamicRange. - /// Also must adjust the SNR.DecibelsInSubband and this.DecibelsNormalised. - /// - public void NormaliseDynamicRange(double dynamicRange) - { - int frameCount = this.Data.GetLength(0); - int featureCount = this.Data.GetLength(1); - DataTools.MinMax(this.Data, out var minIntensity, out var maxIntensity); - double[,] newMatrix = new double[frameCount, featureCount]; - - //each row of matrix is a frame - for (int i = 0; i < frameCount; i++) - { - //each col of matrix is a feature - for (int j = 0; j < featureCount; j++) - { - newMatrix[i, j] = this.Data[i, j]; - } - } - - this.Data = newMatrix; - } - - //################################################################################################################################## - //####### STATIC METHODS ########################################################################################################### - //################################################################################################################################## - - public static SpectrogramStandard GetSpectralSonogram(string recordingFileName, int frameSize, double windowOverlap, int bitsPerSample, double windowPower, int sr, - TimeSpan duration, NoiseReductionType nrt, double[,] amplitudeSpectrogram) - { - SonogramConfig sonoConfig = new SonogramConfig - { - SourceFName = recordingFileName, - WindowSize = frameSize, - WindowOverlap = windowOverlap, - NoiseReductionType = nrt, - epsilon = Math.Pow(0.5, bitsPerSample - 1), - WindowPower = windowPower, - SampleRate = sr, - Duration = duration, - }; //default values config - var sonogram = new SpectrogramStandard(sonoConfig, amplitudeSpectrogram); - sonogram.SetTimeScale(duration); - return sonogram; - } - } //end of class SpectralSonogram : BaseSonogram + } } \ No newline at end of file diff --git a/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramTools.cs b/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramTools.cs index fdb808e0e..0f0994642 100644 --- a/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramTools.cs +++ b/src/AudioAnalysisTools/StandardSpectrograms/SpectrogramTools.cs @@ -41,6 +41,101 @@ public static class SpectrogramTools return m; } + /// + /// Converts a single linear frequency scale spectrum to a reduced linear or non-linear frequency scale spectrum. + /// The scale conversion is defined in the transformMatrix variable. + /// The transformMatrix defines a filter bank. + /// Strictly speaking the input spectrum can be any vector of values but typically it should be linear spectrum. + /// + public static double[] RescaleSpectrumUsingFilterbank(int[,] transformMatrix, double[] linearSpectrum) + { + int length = transformMatrix.GetLength(0); + var rescaledSpectrum = new double[length]; + + // Fill in the first value of the rescaled spectrum + int lowIndex1 = transformMatrix[0, 0]; + int centreIndex1 = transformMatrix[0, 0]; + int highIndex1 = transformMatrix[1, 0]; + rescaledSpectrum[0] = FilterbankIntegral(linearSpectrum, lowIndex1, centreIndex1, highIndex1); + + // fill in remainder except last + for (int i = 1; i < length - 1; i++) + { + int lowIndex = transformMatrix[i - 1, 0]; + int centreIndex = transformMatrix[i, 0]; + int highIndex = transformMatrix[i + 1, 0]; + if (highIndex >= linearSpectrum.Length) + { + highIndex = linearSpectrum.Length - 1; + } + + rescaledSpectrum[i] = FilterbankIntegral(linearSpectrum, lowIndex, centreIndex, highIndex); + } + + // now fill in the last value of the rescaled spectrum + int lowIndex2 = transformMatrix[length - 2, 0]; + int centreIndex2 = transformMatrix[length - 1, 0]; + int highIndex2 = transformMatrix[length - 1, 0]; + rescaledSpectrum[length - 1] = FilterbankIntegral(linearSpectrum, lowIndex2, centreIndex2, highIndex2); + + return rescaledSpectrum; + } + + /// + /// Implements the integral for a single filter in a filterbank. + /// + /// THe input (linear) spectrum. + /// The lower bound of the filter. + /// Centre index of the filter. + /// Upper bound of the filter. + /// The integral of the filter. + public static double FilterbankIntegral(double[] spectrum, int lowIndex, int centreIndex, int highIndex) + { + // Below is the implicit calculation of the integral. + // let k = index into spectral vector. + // for all k < lowIndex, filterBank[k] = 0; + // for all k > highIndex, filterBank[k] = 0; + + // for all k in range (lowIndex <= k < centreIndex), filterBank[k] = (k-lowIndex) /(centreIndex - lowIndex) + // for all k in range (centreIndex <= k <= highIndex), filterBank[k] = (highIndex-k)/(highIndex - centreIndex) + + double area = 0.0; + double integral = 0.0; + int delta = centreIndex - lowIndex; + if (delta > 0) + { + for (int k = lowIndex; k < centreIndex; k++) + { + double weight = (k - lowIndex) / (double)delta; + integral += weight * spectrum[k]; + area += weight; + } + } + + integral += spectrum[centreIndex]; + area += 1.0; + + delta = highIndex - centreIndex; + if (delta > 0) + { + for (int k = centreIndex + 1; k <= highIndex; k++) + { + if (delta == 0) + { + continue; + } + + double weight = (highIndex - k) / (double)delta; + integral += weight * spectrum[k]; + area += weight; + } + } + + // Normalise to area of the triangular filter + integral /= area; + return integral; + } + /// /// This method draws a spectrogram with other useful information attached. /// @@ -650,6 +745,12 @@ public static double[] ExtractModalNoiseSubband(double[] modalNoise, int minHz, /// freq Scale. public static void DrawGridLinesOnImage(Image bmp, TimeSpan startOffset, TimeSpan fullDuration, TimeSpan xAxisTicInterval, FrequencyScale freqScale) { + if (freqScale.ScaleType == FreqScaleType.OctaveDataReduction) + { + //do not want grid lines in this case. + return; + } + FrequencyScale.DrawFrequencyLinesOnImage(bmp, freqScale, includeLabels: true); // We have stopped drawing temporal gridlines on these spectrograms. Create unnecessary clutter. diff --git a/src/TowseyLibrary/DataTools.cs b/src/TowseyLibrary/DataTools.cs index 34bed1be3..ff24c2063 100644 --- a/src/TowseyLibrary/DataTools.cs +++ b/src/TowseyLibrary/DataTools.cs @@ -1278,7 +1278,7 @@ public static int[] Peaks_CropToFirstAndLast(double[] data, double severity) } /// - /// Finds the local peaks in a vector. + /// Finds the local maxima in a vector. /// public static bool[] GetPeaks(double[] data) { @@ -2961,38 +2961,6 @@ public static double DotProduct(double[,] m1, double[,] m2) return sum; } - /// - /// convert values to Decibels. - /// Assume that all values are positive. - /// - /// matrix of positive power values. - public static double[,] DeciBels(double[,] m, out double min, out double max) - { - min = double.MaxValue; - max = -double.MaxValue; - - int rows = m.GetLength(0); - int cols = m.GetLength(1); - double[,] ret = new double[rows, cols]; - - for (int i = 0; i < rows; i++) - { - for (int j = 0; j < cols; j++) - { - double dBels = 10 * Math.Log10(m[i, j]); // convert power to decibels - - // NOTE: the decibels calculation should be a ratio. - // Here the ratio is implied ie relative to the power in the original normalised signal - // if (dBels <= min) min = dBels; - // else - // if (dBels >= max) max = dBels; - ret[i, j] = dBels; - } - } - - return ret; - } - public static double[] Order(double[] array, double[] order) { int length = array.Length; @@ -3254,8 +3222,10 @@ public static double[] NormaliseValues(int[] val, int normMin, int normMax) } /// - /// normalizes the passed array between 0,1. + /// Normalizes the passed array between 0,1. /// Ensures all values are positive. + /// Minimum array value set = 0.0. + /// Maximum array value set = 1.0. /// public static double[] normalise(double[] v) { @@ -3676,30 +3646,13 @@ public static double[] SquareRootOfValues(double[] data) return sqrtArray; } - public static double[] LogTransform(double[] data) - { - if (data == null) - { - return null; - } - - var logArray = new double[data.Length]; - for (int i = 0; i < data.Length; i++) - { - if (data[i] <= 0.0) - { - logArray[i] = 0.0; - } - else - { - logArray[i] = Math.Log10(1 + data[i]); - } - } - - return logArray; - } - - public static double[] LogValues(double[] data) + /// + /// Gets the log10 of an array of values. + /// Assume that all values are non-zero and positive. + /// + /// The input data array. + /// The smallest accepted non-zero value. + public static double[] Log10Values(double[] data, double epsilon) { if (data == null) { @@ -3709,9 +3662,9 @@ public static double[] LogValues(double[] data) var logArray = new double[data.Length]; for (int i = 0; i < data.Length; i++) { - if (data[i] <= 0.0) + if (data[i] <= epsilon) { - logArray[i] = -1000000.0; + logArray[i] = Math.Log10(epsilon); } else { diff --git a/src/TowseyLibrary/FFT.cs b/src/TowseyLibrary/FFT.cs index 9862c1b81..579c489a6 100644 --- a/src/TowseyLibrary/FFT.cs +++ b/src/TowseyLibrary/FFT.cs @@ -38,8 +38,8 @@ public FFT(int windowSize) /// /// Initializes a new instance of the class. - /// wrapper for FFT. - /// Window Power equals sum of squared window values. Default window is Hamming. + /// It is a wrapper for calling method that does the FFT. + /// Window Power equals sum of squared window values. /// public FFT(int windowSize, WindowFunc w) { @@ -76,7 +76,7 @@ public FFT(int windowSize, WindowFunc w) /// /// Invokes an FFT on the given data array. /// cdata contains the real and imaginary terms of the coefficients representing cos and sin components respectively. - /// cdata is symmetrical about terms 512 & 513. Can ignore all coefficients 512 and above . + /// cdata is symmetrical about terms 512 & 513. Can ignore all coefficients 512 and above. /// /// a single frame of signal values. public double[] Invoke(double[] data) @@ -100,7 +100,8 @@ public double[] Invoke(double[] data) } //do the FFT - four1(cdata); //array contains real and imaginary values + //The cdata array contains real and imaginary values + Four1(cdata); double[] f = new double[this.CoeffCount]; //array to contain amplitude data @@ -136,26 +137,26 @@ public double[] Invoke(double[] data, int offset) } //do the FFT - four1(cdata); + Four1(cdata); double[] f = new double[this.CoeffCount]; //array to contain amplitude data // calculate amplitude for (int i = 0; i < this.CoeffCount; i++) { - f[i] = hypot(cdata[2 * i], cdata[(2 * i) + 1]); + f[i] = Hypotenuse(cdata[2 * i], cdata[(2 * i) + 1]); } return f; } - private static double hypot(double x, double y) + private static double Hypotenuse(double x, double y) { return Math.Sqrt((x * x) + (y * y)); } // from http://www.nrbook.com/a/bookcpdf/c12-2.pdf - private static void four1(double[] data) + private static void Four1(double[] data) { int nn = data.Length / 2; int n = nn << 1; @@ -274,10 +275,8 @@ public double[] InvokeDotNetFFT(double[] data) return amplitude; } - // from http://en.wikipedia.org/wiki/Window_function - /// - /// The Hamming window reduces the immediate adjacent sidelobes (conmpared to the Hanning) but at the expense of increased + /// The Hamming window reduces the immediate adjacent sidelobes (compared to the Hanning) but at the expense of increased /// distal side-lobes. . /// public static readonly WindowFunc Hamming = (n, N) => @@ -288,6 +287,11 @@ public double[] InvokeDotNetFFT(double[] data) return 0.54 - (0.46 * Math.Cos(x)); //MATLAB code uses these value and says it is better! }; + /// + /// See comment for Hanning that compares Hamming with Hanning. + /// Our experience with analysis of environmental recordings, where we often up or down sample in order to calculate indices, + /// is that the HANNING window is to be preferred and it was made the default in July 2020. + /// public static readonly WindowFunc Hanning = (n, N) => { double x = 2.0 * Math.PI * n / (N - 1); diff --git a/src/TowseyLibrary/MatrixTools.cs b/src/TowseyLibrary/MatrixTools.cs index abc6f737a..6a4095282 100644 --- a/src/TowseyLibrary/MatrixTools.cs +++ b/src/TowseyLibrary/MatrixTools.cs @@ -519,53 +519,61 @@ public static double[] Matrix2Array(double[,] matrix) } /// - /// Convert the power values in a matrix of spectrogram values to Decibels. + /// Convert the power values in a matrix of spectrogram values to Decibels using: dB = 10*log10(power). /// Assume that all matrix values are positive i.e. due to prior noise removal. /// NOTE: This method also returns the min and max decibel values in the passed matrix. + /// NOTE: A decibel value should be a ratio. + /// Here the ratio is implied ie it is relative to the value of maximum power in the original normalised signal. /// /// matrix of positive power values. /// min value to be return by out. /// max value to be return by out. - public static double[,] Power2DeciBels(double[,] m, out double min, out double max) + public static double[,] SpectrogramPower2DeciBels(double[,] m, double powerEpsilon, out double min, out double max) { + //convert epsilon power to decibels + double minDecibels = 10 * Math.Log10(powerEpsilon); + min = double.MaxValue; max = -double.MaxValue; int rows = m.GetLength(0); int cols = m.GetLength(1); - double[,] ret = new double[rows, cols]; + double[,] returnM = new double[rows, cols]; for (int i = 0; i < rows; i++) { for (int j = 0; j < cols; j++) { - double dBels = 10 * Math.Log10(m[i, j]); //convert power to decibels - - //NOTE: the decibel calculation should be a ratio. - // Here the ratio is implied ie relative to the power in the original normalised signal - if (dBels <= min) + if (m[i, j] <= powerEpsilon) { - min = dBels; + returnM[i, j] = minDecibels; } else - if (dBels >= max) { - max = dBels; + returnM[i, j] = 10 * Math.Log10(m[i, j]); } - ret[i, j] = dBels; + if (returnM[i, j] < min) + { + min = returnM[i, j]; + } + else + if (returnM[i, j] > max) + { + max = returnM[i, j]; + } } } - return ret; + return returnM; } /// - /// Convert the power values in a matrix of spectrogram values to Decibels. + /// Convert the Decibels values in a matrix of spectrogram values to power values. /// Assume that all matrix values are positive due to prior noise removal. /// /// matrix of positive Decibel values. - public static double[,] Decibels2Power(double[,] m) + public static double[,] SpectrogramDecibels2Power(double[,] m) { int rows = m.GetLength(0); int cols = m.GetLength(1); @@ -2026,8 +2034,10 @@ public static double DotProduct(double[,] m1, double[,] m2) } /// - /// Normalises a matrix so that all values below the passed MIN are truncated to 0 and all values greater than the - /// passed MAX are truncated to 1. + /// Normalises a matrix so that --- + /// all values LT passed MIN are truncated to 0 + /// and + /// all values GT passed MAX are truncated to 1. /// public static double[,] NormaliseInZeroOne(double[,] m, double truncateMin, double truncateMax) { @@ -2056,36 +2066,9 @@ public static double DotProduct(double[,] m1, double[,] m2) /// /// Normalises a matrix so that all values lie between 0 and 1. - /// - public static double[,] NormaliseInZeroOne(double[,] m) - { - int rows = m.GetLength(0); - int cols = m.GetLength(1); - DataTools.MinMax(m, out var min, out var max); - double range = max - min; - double[,] m2Return = new double[rows, cols]; - for (int r = 0; r < rows; r++) - { - for (int c = 0; c < cols; c++) - { - m2Return[r, c] = (m[r, c] - min) / range; - if (m2Return[r, c] > 1.0) - { - m2Return[r, c] = 1.0; - } - else if (m2Return[r, c] < 0.0) - { - m2Return[r, c] = 0.0; - } - } - } - - // DataTools.MinMax(m2Return, out min, out max); - return m2Return; - } - - /// - /// Normalises a matrix so that all values lie between 0 and 1. + /// Min value in matrix set to 0.0. + /// Max value in matrix is set to 1.0. + /// Rerturns the min and the max. /// public static double[,] NormaliseInZeroOne(double[,] m, out double min, out double max) { @@ -2110,7 +2093,6 @@ public static double DotProduct(double[,] m1, double[,] m2) } } - // DataTools.MinMax(m2Return, out min, out max); return m2Return; } diff --git a/tests/Acoustics.Test/AnalysisPrograms/AnalyzeLongRecordings/TestAnalyzeLongRecording.cs b/tests/Acoustics.Test/AnalysisPrograms/AnalyzeLongRecordings/TestAnalyzeLongRecording.cs index 68baf5228..3136dc0ca 100644 --- a/tests/Acoustics.Test/AnalysisPrograms/AnalyzeLongRecordings/TestAnalyzeLongRecording.cs +++ b/tests/Acoustics.Test/AnalysisPrograms/AnalyzeLongRecordings/TestAnalyzeLongRecording.cs @@ -142,12 +142,13 @@ public void TestAnalyzeSr22050Recording() /// /// Tests the analysis of an artificial seven minute long recording consisting of five harmonics. - /// NOTE: The Acoustic indices are calculated from an Octave frequency scale spectrogram. + /// NOTE: The Acoustic indices are calculated from an Octave frequency scale used for data reduction. + /// Each vector of indices has only 19 elements. /// [TestMethod] - public void TestAnalyzeSr64000Recording() + public void TestAnalyzeSr22050RecordingDataReduction() { - int sampleRate = 64000; + int sampleRate = 22050; double duration = 420; // signal duration in seconds = 7 minutes int[] harmonics = { 500, 1000, 2000, 4000, 8000 }; var recording = DspFilters.GenerateTestRecording(sampleRate, duration, harmonics, WaveType.Cosine); @@ -155,26 +156,15 @@ public void TestAnalyzeSr64000Recording() var recordingPath = this.outputDirectory.CombineFile(recordingName + ".wav"); WavWriter.WriteWavFileViaFfmpeg(recordingPath, recording.WavReader); - var fst = FreqScaleType.Linear125Octaves7Tones28Nyquist32000; + //var fst = FreqScaleType.Linear125OctaveTones28Nyquist32000; + //NOTE: As of 2020 August, the only debugged octave scale is for data reduction + var fst = FreqScaleType.OctaveDataReduction; + //int nyquist = sampleRate / 2; var freqScale = new FrequencyScale(fst); - /* - // draw the signal as spectrogram just for debugging purposes - // but can only draw a two minute spectrogram when sr=64000 - change duration above. - duration = 120; // if drawing sonogram, then set signal duration = 2 minutes - var sonogram = OctaveFreqScale.ConvertRecordingToOctaveScaleSonogram(recording, fst); - var sonogramImage = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "SPECTROGRAM", freqScale.GridLineLocations); - var outputImagePath = this.outputDirectory.CombineFile("SignalSpectrogram_OctaveFreqScale.png"); - sonogramImage.Save(outputImagePath.FullName); - */ - // Now need to rewrite the config file with new parameter settings var configPath = PathHelper.ResolveConfigFile("Towsey.Acoustic.yml"); - // Convert the Config config to IndexCalculateConfig class and merge in the unnecesary parameters. - //Config configuration = Yaml.Deserialise(configPath); - //IndexCalculateConfig config = IndexCalculateConfig.GetConfig(configuration, false); - // because of difficulties in dealing with Config config files, just edit the text file!!!!! var configLines = File.ReadAllLines(configPath.FullName); configLines[configLines.IndexOf(x => x.StartsWith("IndexCalculationDuration: "))] = "IndexCalculationDuration: 15.0"; @@ -184,7 +174,7 @@ public void TestAnalyzeSr64000Recording() // the is the only octave scale currently functioning for IndexCalculate class configLines[configLines.IndexOf(x => x.StartsWith("FrameLength"))] = $"FrameLength: {freqScale.WindowSize}"; - configLines[configLines.IndexOf(x => x.StartsWith("ResampleRate: "))] = "ResampleRate: 64000"; + configLines[configLines.IndexOf(x => x.StartsWith("ResampleRate: "))] = "ResampleRate: 22050"; // write the edited Config file to temporary output directory var newConfigPath = this.outputDirectory.CombineFile("Towsey.Acoustic.yml"); @@ -232,7 +222,7 @@ public void TestAnalyzeSr64000Recording() var array = MatrixTools.GetRow(actualBgn, 0); Assert.AreEqual(28, actualBgn.RowLength()); - Assert.AreEqual(256, array.Length); + Assert.AreEqual(19, array.Length); // draw array just to check peaks are in correct places - just for debugging purposes var ldsBgnSpectrumFile = this.outputDirectory.CombineFile("Spectrum2.png"); @@ -265,17 +255,17 @@ public void TestAnalyzeSr64000Recording() analysisType: analysisType, indexSpectrograms: dictionaryOfSpectra); - // test number of images - should now be 23 + // test number of images - should now be 20 because not producing ribbons. listOfFiles = resultsDirectory.EnumerateFiles().ToArray(); pngCount = listOfFiles.Count(f => f.Name.EndsWith(".png")); - Assert.AreEqual(22, pngCount); + Assert.AreEqual(20, pngCount); var twoMapsImagePath = resultsDirectory.CombineFile(recordingName + "__2Maps.png"); var twoMapsImage = Image.Load(twoMapsImagePath.FullName); // image is (7*4) * 652 Assert.AreEqual(28, twoMapsImage.Width); - Assert.AreEqual(652, twoMapsImage.Height); + Assert.AreEqual(178, twoMapsImage.Height); } [TestMethod] diff --git a/tests/Acoustics.Test/AnalysisPrograms/SpectrogramGenerator/SpectrogramGeneratorTests.cs b/tests/Acoustics.Test/AnalysisPrograms/SpectrogramGenerator/SpectrogramGeneratorTests.cs index 2ee81be20..02dc720ac 100644 --- a/tests/Acoustics.Test/AnalysisPrograms/SpectrogramGenerator/SpectrogramGeneratorTests.cs +++ b/tests/Acoustics.Test/AnalysisPrograms/SpectrogramGenerator/SpectrogramGeneratorTests.cs @@ -20,13 +20,17 @@ namespace Acoustics.Test.AnalysisPrograms.SpectrogramGenerator [TestClass] public class SpectrogramGeneratorTests : GeneratedImageTest { + // There are currently eight spectrogram types plus the waveform. private const int Width = 1096; private const int Waveform = 154; private const int Spectrogram = 310; private const int SpectrogramNoiseRemoved = 310; private const int SpectrogramExperimental = 310; private const int SpectrogramDifference = 310; + private const int SpectrogramMel = 118; private const int Cepstral = 67; + private const int SpectrogramOctave = 157; + private const int RibbonSpectrogram = 110; private const int SpectrogramAmplitude = 310; private static readonly Dictionary All = new Dictionary() @@ -36,11 +40,14 @@ public class SpectrogramGeneratorTests : GeneratedImageTest { SpectrogramImageType.DecibelSpectrogramNoiseReduced, SpectrogramNoiseRemoved }, { SpectrogramImageType.Experimental, SpectrogramExperimental }, { SpectrogramImageType.DifferenceSpectrogram, SpectrogramDifference }, + { SpectrogramImageType.MelScaleSpectrogram, SpectrogramMel }, { SpectrogramImageType.CepstralSpectrogram, Cepstral }, + { SpectrogramImageType.OctaveScaleSpectrogram, SpectrogramOctave }, + { SpectrogramImageType.RibbonSpectrogram, RibbonSpectrogram }, { SpectrogramImageType.AmplitudeSpectrogramLocalContrastNormalization, SpectrogramAmplitude }, }; - private static readonly Func Name = x => x.Select(x => (int)x).Join("_"); + private static readonly Func Name = x => x.Select(imageType => (int)imageType).Join("_"); public SpectrogramGeneratorTests() @@ -91,7 +98,7 @@ public void TestAudio2Sonogram() [DynamicData(nameof(AllCombinations), DynamicDataDisplayName = nameof(AllCombinationsTestName))] public void TestAudio2SonogramCombinations(SpectrogramImageType[] images) { - const int OneSecondWidth = 24; + const int oneSecondWidth = 24; var testFile = PathHelper.ResolveAsset("1s_silence.wav"); var config = new SpectrogramGeneratorConfig() @@ -101,17 +108,12 @@ public void TestAudio2SonogramCombinations(SpectrogramImageType[] images) var result = GenerateSpectrogramImages(testFile, config, null); - // save image for debugging purposes - //var flag = images.Aggregate("", (seed, x) => $"{seed}_{(int)x}"); - //var path = this.TestOutputDirectory.CombineFile($"audio2sonogram_{flag}.png"); - //result.CompositeImage.Save(path); - this.ActualImage = result.CompositeImage; this.ExtraName = Name(images); // get expected height var expectedHeight = images.Select(imageType => All[imageType]).Sum(); - Assert.That.ImageIsSize(OneSecondWidth, expectedHeight, result.CompositeImage); + Assert.That.ImageIsSize(oneSecondWidth, expectedHeight, this.ActualImage); // ensure images are in correct order int y = 0; diff --git a/tests/Acoustics.Test/AudioAnalysisTools/DSP/FrequencyScaleTests.cs b/tests/Acoustics.Test/AudioAnalysisTools/DSP/FrequencyScaleTests.cs index 07cbc6869..e30b8298c 100644 --- a/tests/Acoustics.Test/AudioAnalysisTools/DSP/FrequencyScaleTests.cs +++ b/tests/Acoustics.Test/AudioAnalysisTools/DSP/FrequencyScaleTests.cs @@ -6,8 +6,8 @@ namespace Acoustics.Test.AudioAnalysisTools.DSP { using System; using System.Collections.Generic; + using System.Diagnostics; using System.IO; - using Acoustics.Shared; using Acoustics.Test.TestHelpers; using global::AudioAnalysisTools.DSP; using global::AudioAnalysisTools.StandardSpectrograms; @@ -15,6 +15,7 @@ namespace Acoustics.Test.AudioAnalysisTools.DSP using global::TowseyLibrary; using Microsoft.VisualStudio.TestTools.UnitTesting; using SixLabors.ImageSharp; + //using static global::TowseyLibrary.FFT; using Path = System.IO.Path; /// @@ -180,6 +181,401 @@ public void LinearFrequencyScale() Assert.AreEqual(1621, image.Width); } + /// + /// Test of the default Mel FREQ SCALE + /// Check it on pure tone spectrum. + /// By default, the split between linear and log is at 1000 Hz. + /// NOTE: This mel frequency scale class is not actually used to produce Mel scale spectrograms. + /// Currently, the creation of mel scale spectrograms bypasses use of this FrequencyScale class. + /// + [TestMethod] + public void TestMelFrequencyScale() + { + var fst = FreqScaleType.Mel; + var freqScale = new FrequencyScale(fst); + + // test contents of the bin bounds matrix. + Assert.AreEqual(1000, freqScale.LinearBound); + + // test contents of the octave bin bounds matrix. + int[,] melBinBounds = freqScale.BinBounds; + Assert.AreEqual(64, melBinBounds.GetLength(0)); + Assert.AreEqual(1922, melBinBounds[30, 1]); + Assert.AreEqual(2164, melBinBounds[32, 1]); + Assert.AreEqual(10033, melBinBounds[62, 1]); + Assert.AreEqual(10516, melBinBounds[63, 1]); + + // Check that freqScale.GridLineLocations are correct + var expected = new[,] + { + { 20, 1000 }, + { 30, 2000 }, + { 37, 3000 }, + { 43, 4000 }, + }; + + Assert.That.MatricesAreEqual(expected, freqScale.GridLineLocations); + } + + /// + /// Test making a me-frequency spectrogram using an artificial amplitude spectrogram as input. + /// NOTE: This method bypasses the use of the FrequencyScale class. + /// By default, the Mel scale used here is linear to 1000 Hz. + /// + [TestMethod] + public void TestMakeMelScaleSpectrogram() + { + int sampleRate = 22050; + int windowSize = 512; + int defaultMelBinCount = 64; + var recordingBitsPerSample = 16; + var epsilon = Math.Pow(0.5, recordingBitsPerSample - 1); + + // make fft class using rectangular window - just to get a value for Window power. + var fft = new FFT(windowSize); + + var config = new SonogramConfig + { + WindowSize = windowSize, + WindowOverlap = 0.0, + SourceFName = "Dummy", + NoiseReductionType = NoiseReductionType.None, + NoiseReductionParameter = 0.0, + epsilon = epsilon, + WindowPower = fft.WindowPower, + }; + + // make a dummy spectrogram + int frameCount = 100; + int binCount = windowSize / 2; + double[,] matrix = new double[frameCount, binCount]; + for (int i = 0; i < 100; i++) + { + matrix[i, 0] = 1.0; + matrix[i, 128] = 1.0; + matrix[i, 255] = 1.0; + } + + var spectrogramMatrix = SpectrogramMelScale.MakeMelScaleSpectrogram(config, matrix, sampleRate); + + //Assert.That.MatricesAreEqual(expected, actual); + Assert.AreEqual(frameCount, spectrogramMatrix.GetLength(0)); + Assert.AreEqual(defaultMelBinCount, spectrogramMatrix.GetLength(1)); + + Assert.AreEqual(-73.784157442630288, spectrogramMatrix[0, 0], 0.0001); + Assert.AreEqual(-157.82548429035143, spectrogramMatrix[0, 1], 0.0001); + Assert.AreEqual(-157.82548429035143, spectrogramMatrix[0, 32], 0.0001); + Assert.AreEqual(-157.82548429035143, spectrogramMatrix[0, 62], 0.0001); + Assert.AreEqual(-98.078506874942121, spectrogramMatrix[0, 63], 0.0001); + } + + /// + /// Test static method which returns bin index for a given frequency. + /// + [TestMethod] + public void TestAssignmentOfGridLinesForOctaveFrequencyScale() + { + int nyquist = 11025; + int linearBound = 500; + + // a contrived set of bin bounds. + int[,] octaveBinBounds = new[,] + { + { 2, 100 }, + { 4, 200 }, + { 6, 300 }, + { 8, 400 }, + { 12, 500 }, + { 14, 600 }, + { 16, 700 }, + { 18, 800 }, + { 20, 900 }, + { 23, 1000 }, + { 25, 1200 }, + { 30, 1400 }, + { 40, 1700 }, + { 46, 2001 }, + { 50, 2500 }, + { 55, 3000 }, + { 60, 3500 }, + { 69, 4002 }, + { 80, 5000 }, + { 84, 6000 }, + { 88, 7000 }, + { 92, 8004 }, + { 98, 10000 }, + { 105, 11000 }, + }; + + var gridLineLocations = OctaveFreqScale.GetGridLineLocations(nyquist, linearBound, octaveBinBounds); + + var expected = new[,] + { + { 4, 500 }, + { 9, 1000 }, + { 13, 2000 }, + { 17, 4000 }, + { 21, 8000 }, + }; + + Assert.That.MatricesAreEqual(expected, gridLineLocations); + } + + /// + /// Test static method which returns bin index for a given frequency. + /// + [TestMethod] + public void TestReturnOfBinIndex() + { + var freqScale = new FrequencyScale(FreqScaleType.OctaveDataReduction); + + // test contents of the octave bin bounds matrix. + int[,] octaveBinBounds = freqScale.BinBounds; + + Assert.AreEqual(19, octaveBinBounds.GetLength(0)); + + int hertzValue = 500; + var binId = freqScale.GetBinIdForHerzValue(hertzValue); + Assert.AreEqual(2, binId); + + hertzValue = 1000; + binId = freqScale.GetBinIdForHerzValue(hertzValue); + Assert.AreEqual(3, binId); + + hertzValue = 2000; + binId = freqScale.GetBinIdForHerzValue(hertzValue); + Assert.AreEqual(6, binId); + + hertzValue = 4000; + binId = freqScale.GetBinIdForHerzValue(hertzValue); + Assert.AreEqual(11, binId); + + hertzValue = 8000; + binId = freqScale.GetBinIdForHerzValue(hertzValue); + Assert.AreEqual(16, binId); + } + + /// + /// Test of the default standard split LINEAR-Octave FREQ SCALE + /// Check it on pure tone spectrum. + /// By default, the split between linear and octave is at 1000 Hz. + /// + [TestMethod] + public void TestSplitLinearOctaveFrequencyScale() + { + // Test default octave scale where default linear portion is 0-1000Hz. + //var fst = FreqScaleType.Linear125Octaves6Tones30Nyquist11025; + var fst = FreqScaleType.OctaveStandard; + int nyquist = 11025; + int frameSize = 512; + int linearBound = 1000; + int octaveToneCount = 32; + int gridInterval = 1000; + var freqScale = new FrequencyScale(fst, nyquist, frameSize, linearBound, octaveToneCount, gridInterval); + + Assert.AreEqual(freqScale.ScaleType, FreqScaleType.OctaveStandard); + + // test contents of the octave bin bounds matrix. + int[,] octaveBinBounds = freqScale.BinBounds; + Assert.AreEqual(103, octaveBinBounds.GetLength(0)); + Assert.AreEqual(991, octaveBinBounds[23, 1]); + Assert.AreEqual(1034, octaveBinBounds[24, 1]); + Assert.AreEqual(255, octaveBinBounds[102, 0]); + Assert.AreEqual(10982, octaveBinBounds[102, 1]); + + // Check that freqScale.GridLineLocations are correct + var expected = new[,] + { + { 23, 1000 }, + { 46, 2000 }, + { 69, 4000 }, + { 92, 8000 }, + }; + + Assert.That.MatricesAreEqual(expected, freqScale.GridLineLocations); + + // generate pure tone spectrum. + double[] linearSpectrum = new double[256]; + linearSpectrum[0] = 1.0; + linearSpectrum[128] = 1.0; + linearSpectrum[255] = 1.0; + + double[] octaveSpectrum = SpectrogramTools.RescaleSpectrumUsingFilterbank(octaveBinBounds, linearSpectrum); + + Assert.AreEqual(103, octaveSpectrum.Length); + Assert.AreEqual(1.0, octaveSpectrum[0]); + Assert.AreEqual(0.0, octaveSpectrum[1]); + Assert.AreEqual(0.0, octaveSpectrum[78]); + Assert.AreEqual(0.125, octaveSpectrum[79]); + Assert.AreEqual(0.125, octaveSpectrum[80]); + Assert.AreEqual(0.0, octaveSpectrum[81]); + Assert.AreEqual(0.0, octaveSpectrum[101]); + Assert.AreEqual(0.1666666666, octaveSpectrum[102], 0.000001); + } + + /// + /// METHOD TO CHECK IF Conversion of linear amplitude spectrum to Octave FREQ SCALE IS WORKING + /// Check it on spectrum derived from an assumed signal, SR=22050. + /// + [TestMethod] + public void TestConversionOfAmplitudeSpectrogramToOctaveScaled() + { + int sr = 22050; + int windowSize = 512; + + // set up the frequency scale. + var fst = FreqScaleType.OctaveStandard; + int nyquist = sr / 2; + int linearBound = 1000; + int octaveToneCount = 25; //not used + int gridInterval = 1000; + var freqScale = new FrequencyScale(fst, nyquist, windowSize, linearBound, octaveToneCount, gridInterval); + + // make a dummy spectrum + int binCount = windowSize / 2; + var linearSpectrum = new double[binCount]; + linearSpectrum[0] = 1.0; + linearSpectrum[64] = 1.0; + linearSpectrum[128] = 1.0; + linearSpectrum[192] = 1.0; + linearSpectrum[255] = 1.0; + + var octaveSpectrum = SpectrogramTools.RescaleSpectrumUsingFilterbank(freqScale.BinBounds, linearSpectrum); + Assert.AreEqual(1.000, octaveSpectrum[0], 0.0001); + Assert.AreEqual(0.000, octaveSpectrum[1], 0.0001); + Assert.AreEqual(0.000, octaveSpectrum[55], 0.0001); + Assert.AreEqual(0.250, octaveSpectrum[56], 0.0001); + Assert.AreEqual(0.250, octaveSpectrum[57], 0.0001); + Assert.AreEqual(0.000, octaveSpectrum[78], 0.0001); + Assert.AreEqual(0.125, octaveSpectrum[79], 0.0001); + Assert.AreEqual(0.125, octaveSpectrum[80], 0.0001); + Assert.AreEqual(0.000, octaveSpectrum[81], 0.0001); + Assert.AreEqual(0.000, octaveSpectrum[92], 0.0001); + Assert.AreEqual(0.166, octaveSpectrum[93], 0.001); + Assert.AreEqual(0.000, octaveSpectrum[94], 0.0001); + Assert.AreEqual(0.000, octaveSpectrum[101], 0.0001); + Assert.AreEqual(0.166, octaveSpectrum[102], 0.001); + + // Now test conversion of amplitude to power values. + // make fft class using rectangular window - just to get a value for Window power. + var fft = new FFT(windowSize); + double windowPower = fft.WindowPower; + var recordingBitsPerSample = 16; + var epsilon = Math.Pow(0.5, recordingBitsPerSample - 1); + + // set up matrix derived from previous spectrum to test conversion to power. + var octaveScaleM = new double[2, octaveSpectrum.Length]; + MatrixTools.SetRow(octaveScaleM, 0, octaveSpectrum); + MatrixTools.SetRow(octaveScaleM, 1, octaveSpectrum); + var powerSpectrogram = OctaveFreqScale.ConvertAmplitudeToPowerSpectrogram(octaveScaleM, windowPower, sr); + + Assert.AreEqual(1.771541E-07, powerSpectrogram[0, 0], 0.000001); + Assert.AreEqual(0.000, powerSpectrogram[1, 1], 0.0001); + Assert.AreEqual(0.000, powerSpectrogram[0, 55], 0.0001); + Assert.AreEqual(1.107213E-08, powerSpectrogram[1, 56], 0.000001); + Assert.AreEqual(1.107213E-08, powerSpectrogram[0, 57], 0.000001); + Assert.AreEqual(0.000, powerSpectrogram[1, 78], 0.0001); + Assert.AreEqual(2.768034E-09, powerSpectrogram[0, 79], 0.0001); + Assert.AreEqual(2.768034E-09, powerSpectrogram[1, 80], 0.0001); + Assert.AreEqual(0.000, powerSpectrogram[0, 81], 0.0001); + Assert.AreEqual(0.000, powerSpectrogram[1, 92], 0.0001); + Assert.AreEqual(4.920949E-09, powerSpectrogram[0, 93], 0.0001); + Assert.AreEqual(0.000, powerSpectrogram[1, 94], 0.0001); + Assert.AreEqual(0.000, powerSpectrogram[0, 101], 0.0001); + Assert.AreEqual(1.771541E-07, powerSpectrogram[1, 102], 0.001); + + // now test conversion from power to decibels. + // Convert the power values to log using: dB = 10*log(power) + var powerEpsilon = epsilon * epsilon / windowPower / sr; + var dbSpectrogram = MatrixTools.SpectrogramPower2DeciBels(powerSpectrogram, powerEpsilon, out var min, out var max); + Assert.AreEqual(-67.516485, dbSpectrogram[0, 0], 0.000001); + Assert.AreEqual(-160.83578, dbSpectrogram[1, 1], 0.0001); + Assert.AreEqual(-160.83578, dbSpectrogram[0, 55], 0.0001); + Assert.AreEqual(-79.557685, dbSpectrogram[1, 56], 0.000001); + Assert.AreEqual(-79.557685, dbSpectrogram[0, 57], 0.000001); + Assert.AreEqual(-160.83578, dbSpectrogram[1, 78], 0.0001); + Assert.AreEqual(-85.578285, dbSpectrogram[0, 79], 0.0001); + Assert.AreEqual(-85.578285, dbSpectrogram[1, 80], 0.0001); + Assert.AreEqual(-160.83578, dbSpectrogram[0, 81], 0.0001); + Assert.AreEqual(-160.83578, dbSpectrogram[1, 92], 0.0001); + Assert.AreEqual(-83.079510, dbSpectrogram[0, 93], 0.0001); + Assert.AreEqual(-160.83578, dbSpectrogram[1, 94], 0.0001); + Assert.AreEqual(-160.83578, dbSpectrogram[0, 101], 0.0001); + Assert.AreEqual(-83.079510, dbSpectrogram[1, 102], 0.0001); + } + + /// + /// Tests octave freq scale using an artificial recording containing five sine waves. + /// + [TestMethod] + public void TestFreqScaleOnArtificialSignal2() + { + int sr = 64000; + double duration = 30; // signal duration in seconds + int[] harmonics = { 500, 1000, 2000, 4000, 8000 }; + + //var fst = FreqScaleType.Linear125OctaveTones28Nyquist32000; + var fst = FreqScaleType.OctaveCustom; + int nyquist = sr / 2; + int frameSize = 16384; + int linearBound = 125; + int octaveToneCount = 28; + int gridInterval = 1000; + var freqScale = new FrequencyScale(fst, nyquist, frameSize, linearBound, octaveToneCount, gridInterval); + var outputImagePath = Path.Combine(this.outputDirectory.FullName, "Signal2_OctaveFreqScale.png"); + var recording = DspFilters.GenerateTestRecording(sr, duration, harmonics, WaveType.Cosine); + + // init the default sonogram config + var sonoConfig = new SonogramConfig + { + WindowSize = freqScale.WindowSize, + WindowOverlap = 0.2, + SourceFName = "Signal2", + NoiseReductionType = NoiseReductionType.None, + NoiseReductionParameter = 0.0, + }; + var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); + var windowPower = sonogram.Configuration.WindowPower; + var epsilon = sonogram.Configuration.epsilon; + sonogram.Data = OctaveFreqScale.ConvertAmplitudeSpectrogramToFreqScaledDecibels(sonogram.Data, windowPower, sr, epsilon, freqScale); + + // pick a row, any row - they should all be the same. + var oneSpectrum = MatrixTools.GetRow(sonogram.Data, 40); + var peaks = DataTools.GetPeaks(oneSpectrum); + + var peakIds = new List(); + for (int i = 5; i < peaks.Length - 5; i++) + { + if (peaks[i]) + { + int peakId = freqScale.BinBounds[i, 0]; + peakIds.Add(peakId); + LoggedConsole.WriteLine($"Spectral peak located in bin {peakId}, Herz={freqScale.BinBounds[i, 1]}"); + } + } + + foreach (int h in harmonics) + { + LoggedConsole.WriteLine($"Harmonic {h}Herz should be in bin {freqScale.GetBinIdForHerzValue(h)}"); + } + + Assert.AreEqual(5, peakIds.Count); + Assert.AreEqual(129, peakIds[0]); + Assert.AreEqual(257, peakIds[1]); + Assert.AreEqual(513, peakIds[2]); + Assert.AreEqual(1025, peakIds[3]); + Assert.AreEqual(2049, peakIds[4]); + + var image = sonogram.GetImage(); + string title = $"Spectrogram of Harmonics: {DataTools.Array2String(harmonics)} SR={sr} Window={freqScale.WindowSize}"; + image = sonogram.GetImageFullyAnnotated(image, title, freqScale.GridLineLocations); + image.Save(outputImagePath); + + // Check that image dimensions are correct + Assert.AreEqual(146, image.Width); + Assert.AreEqual(311, image.Height); + } + /// /// METHOD TO CHECK IF Octave FREQ SCALE IS WORKING /// Check it on standard one minute recording, SR=22050. @@ -187,16 +583,19 @@ public void LinearFrequencyScale() [TestMethod] public void OctaveFrequencyScale1() { + //var opFileStem = "BAC2_20071008"; var recordingPath = PathHelper.ResolveAsset("Recordings", "BAC2_20071008-085040.wav"); - var opFileStem = "BAC2_20071008"; var outputDir = this.outputDirectory; var outputImagePath = Path.Combine(outputDir.FullName, "Octave1ScaleSonogram.png"); - var recording = new AudioRecording(recordingPath); - // default octave scale - var fst = FreqScaleType.Linear125Octaves6Tones30Nyquist11025; - var freqScale = new FrequencyScale(fst); + var fst = FreqScaleType.OctaveCustom; + int nyquist = recording.SampleRate / 2; + int frameSize = 16384; + int linearBound = 125; + int octaveToneCount = 25; + int gridInterval = 1000; + var freqScale = new FrequencyScale(fst, nyquist, frameSize, linearBound, octaveToneCount, gridInterval); var sonoConfig = new SonogramConfig { @@ -207,61 +606,65 @@ public void OctaveFrequencyScale1() NoiseReductionParameter = 0.0, }; - // Generate amplitude sonogram and then conver to octave scale - var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); + // Generate amplitude sonogram and then convert to octave scale + var amplitudeSpectrogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); - // THIS IS THE CRITICAL LINE. COULD DO WITH SEPARATE UNIT TEST - sonogram.Data = OctaveFreqScale.ConvertAmplitudeSpectrogramToDecibelOctaveScale(sonogram.Data, freqScale); + // THIS IS THE CRITICAL LINE. It has separate UNIT TEST above. + double windowPower = amplitudeSpectrogram.Configuration.WindowPower; + int sampleRate = amplitudeSpectrogram.SampleRate; + double epsilon = amplitudeSpectrogram.Configuration.epsilon; + amplitudeSpectrogram.Data = OctaveFreqScale.ConvertAmplitudeSpectrogramToFreqScaledDecibels(amplitudeSpectrogram.Data, windowPower, sampleRate, epsilon, freqScale); // DO NOISE REDUCTION - var dataMatrix = SNR.NoiseReduce_Standard(sonogram.Data); - sonogram.Data = dataMatrix; - sonogram.Configuration.WindowSize = freqScale.WindowSize; + var dataMatrix = SNR.NoiseReduce_Standard(amplitudeSpectrogram.Data); + amplitudeSpectrogram.Data = dataMatrix; + amplitudeSpectrogram.Configuration.WindowSize = freqScale.WindowSize; - var image = sonogram.GetImageFullyAnnotated(sonogram.GetImage(), "SPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); + var image = amplitudeSpectrogram.GetImageFullyAnnotated(amplitudeSpectrogram.GetImage(), "SPECTROGRAM: " + fst.ToString(), freqScale.GridLineLocations); image.Save(outputImagePath); + //string binLiteral = freqScale.BinBounds.PrintAsLiteral(); #pragma warning disable SA1500 // Braces for multi-line statements should not share line var expectedBinBounds = new[,] { - { 0, 0 }, { 1, 3 }, { 2, 5 }, { 3, 8 }, { 4, 11 }, { 5, 13 }, { 6, 16 }, { 7, 19 }, { 8, 22 }, - { 9, 24 }, { 10, 27 }, { 11, 30 }, { 12, 32 }, { 13, 35 }, { 14, 38 }, { 15, 40 }, { 16, 43 }, - { 17, 46 }, { 18, 48 }, { 19, 51 }, { 20, 54 }, { 21, 57 }, { 22, 59 }, { 23, 62 }, { 24, 65 }, - { 25, 67 }, { 26, 70 }, { 27, 73 }, { 28, 75 }, { 29, 78 }, { 30, 81 }, { 31, 83 }, { 32, 86 }, - { 33, 89 }, { 34, 92 }, { 35, 94 }, { 36, 97 }, { 37, 100 }, { 38, 102 }, { 39, 105 }, { 40, 108 }, - { 41, 110 }, { 42, 113 }, { 43, 116 }, { 44, 118 }, { 45, 121 }, { 46, 124 }, { 47, 127 }, { 48, 129 }, - { 49, 132 }, { 50, 135 }, { 51, 137 }, { 52, 140 }, { 53, 143 }, { 55, 148 }, { 56, 151 }, { 57, 153 }, - { 58, 156 }, { 59, 159 }, { 61, 164 }, { 62, 167 }, { 63, 170 }, { 65, 175 }, { 66, 178 }, { 68, 183 }, - { 69, 186 }, { 71, 191 }, { 72, 194 }, { 74, 199 }, { 75, 202 }, { 77, 207 }, { 79, 213 }, { 80, 215 }, - { 82, 221 }, { 84, 226 }, { 86, 231 }, { 88, 237 }, { 89, 240 }, { 91, 245 }, { 93, 250 }, { 95, 256 }, - { 97, 261 }, { 100, 269 }, { 102, 275 }, { 104, 280 }, { 106, 285 }, { 109, 293 }, { 111, 299 }, - { 113, 304 }, { 116, 312 }, { 118, 318 }, { 121, 326 }, { 124, 334 }, { 126, 339 }, { 129, 347 }, - { 132, 355 }, { 135, 363 }, { 138, 371 }, { 141, 380 }, { 144, 388 }, { 147, 396 }, { 150, 404 }, - { 153, 412 }, { 157, 423 }, { 160, 431 }, { 164, 441 }, { 167, 450 }, { 171, 460 }, { 175, 471 }, - { 178, 479 }, { 182, 490 }, { 186, 501 }, { 190, 511 }, { 194, 522 }, { 199, 536 }, { 203, 546 }, - { 208, 560 }, { 212, 571 }, { 217, 584 }, { 221, 595 }, { 226, 608 }, { 231, 622 }, { 236, 635 }, - { 241, 649 }, { 247, 665 }, { 252, 678 }, { 258, 694 }, { 263, 708 }, { 269, 724 }, { 275, 740 }, - { 281, 756 }, { 287, 773 }, { 293, 789 }, { 300, 807 }, { 306, 824 }, { 313, 842 }, { 320, 861 }, - { 327, 880 }, { 334, 899 }, { 341, 918 }, { 349, 939 }, { 356, 958 }, { 364, 980 }, { 372, 1001 }, - { 380, 1023 }, { 388, 1044 }, { 397, 1069 }, { 406, 1093 }, { 415, 1117 }, { 424, 1141 }, { 433, 1165 }, - { 442, 1190 }, { 452, 1217 }, { 462, 1244 }, { 472, 1270 }, { 482, 1297 }, { 493, 1327 }, { 504, 1357 }, - { 515, 1386 }, { 526, 1416 }, { 537, 1445 }, { 549, 1478 }, { 561, 1510 }, { 573, 1542 }, { 586, 1577 }, - { 599, 1612 }, { 612, 1647 }, { 625, 1682 }, { 639, 1720 }, { 653, 1758 }, { 667, 1795 }, { 682, 1836 }, - { 697, 1876 }, { 712, 1916 }, { 728, 1960 }, { 744, 2003 }, { 760, 2046 }, { 776, 2089 }, { 793, 2134 }, - { 811, 2183 }, { 829, 2231 }, { 847, 2280 }, { 865, 2328 }, { 884, 2379 }, { 903, 2431 }, { 923, 2484 }, - { 943, 2538 }, { 964, 2595 }, { 985, 2651 }, { 1007, 2710 }, { 1029, 2770 }, { 1051, 2829 }, - { 1074, 2891 }, { 1098, 2955 }, { 1122, 3020 }, { 1146, 3085 }, { 1172, 3155 }, { 1197, 3222 }, - { 1223, 3292 }, { 1250, 3365 }, { 1278, 3440 }, { 1305, 3513 }, { 1334, 3591 }, { 1363, 3669 }, - { 1393, 3749 }, { 1424, 3833 }, { 1455, 3916 }, { 1487, 4002 }, { 1519, 4089 }, { 1552, 4177 }, - { 1586, 4269 }, { 1621, 4363 }, { 1657, 4460 }, { 1693, 4557 }, { 1730, 4657 }, { 1768, 4759 }, - { 1806, 4861 }, { 1846, 4969 }, { 1886, 5076 }, { 1928, 5190 }, { 1970, 5303 }, { 2013, 5418 }, - { 2057, 5537 }, { 2102, 5658 }, { 2148, 5782 }, { 2195, 5908 }, { 2243, 6037 }, { 2292, 6169 }, - { 2343, 6307 }, { 2394, 6444 }, { 2446, 6584 }, { 2500, 6729 }, { 2555, 6877 }, { 2610, 7025 }, - { 2668, 7181 }, { 2726, 7337 }, { 2786, 7499 }, { 2847, 7663 }, { 2909, 7830 }, { 2973, 8002 }, - { 3038, 8177 }, { 3104, 8355 }, { 3172, 8538 }, { 3242, 8726 }, { 3313, 8917 }, { 3385, 9111 }, - { 3459, 9310 }, { 3535, 9515 }, { 3612, 9722 }, { 3691, 9935 }, { 3772, 10153 }, { 3855, 10376 }, - { 3939, 10602 }, { 4026, 10837 }, { 4095, 11022 }, - { 4095, 11022 }, + { 0, 0 }, { 1, 1 }, { 2, 3 }, { 3, 4 }, { 4, 5 }, { 5, 7 }, { 6, 8 }, { 7, 9 }, { 8, 11 }, + { 9, 12 }, { 10, 13 }, { 11, 15 }, { 12, 16 }, { 13, 17 }, { 14, 19 }, { 15, 20 }, { 16, 22 }, + { 17, 23 }, { 18, 24 }, { 19, 26 }, { 20, 27 }, { 21, 28 }, { 22, 30 }, { 23, 31 }, { 24, 32 }, + { 25, 34 }, { 26, 35 }, { 27, 36 }, { 28, 38 }, { 29, 39 }, { 30, 40 }, { 31, 42 }, { 32, 43 }, + { 33, 44 }, { 34, 46 }, { 35, 47 }, { 36, 48 }, { 37, 50 }, { 38, 51 }, { 39, 52 }, { 40, 54 }, + { 41, 55 }, { 42, 57 }, { 43, 58 }, { 44, 59 }, { 45, 61 }, { 46, 62 }, { 47, 63 }, { 48, 65 }, + { 49, 66 }, { 50, 67 }, { 51, 69 }, { 52, 70 }, { 53, 71 }, { 54, 73 }, { 55, 74 }, { 56, 75 }, + { 57, 77 }, { 58, 78 }, { 59, 79 }, { 60, 81 }, { 61, 82 }, { 62, 83 }, { 63, 85 }, { 64, 86 }, + { 65, 87 }, { 66, 89 }, { 67, 90 }, { 68, 92 }, { 69, 93 }, { 70, 94 }, { 71, 96 }, { 72, 97 }, + { 73, 98 }, { 74, 100 }, { 75, 101 }, { 76, 102 }, { 77, 104 }, { 78, 105 }, { 79, 106 }, { 80, 108 }, + { 81, 109 }, { 82, 110 }, { 83, 112 }, { 84, 113 }, { 85, 114 }, { 86, 116 }, { 87, 117 }, { 88, 118 }, + { 89, 120 }, { 90, 121 }, { 91, 122 }, { 92, 124 }, { 93, 125 }, { 96, 129 }, { 99, 133 }, + { 101, 136 }, { 104, 140 }, { 107, 144 }, { 110, 148 }, { 113, 152 }, { 116, 156 }, { 120, 161 }, + { 123, 166 }, { 127, 171 }, { 130, 175 }, { 134, 180 }, { 137, 184 }, { 141, 190 }, { 145, 195 }, + { 149, 201 }, { 153, 206 }, { 158, 213 }, { 162, 218 }, { 167, 225 }, { 171, 230 }, { 176, 237 }, + { 181, 244 }, { 186, 250 }, { 191, 257 }, { 197, 265 }, { 202, 272 }, { 208, 280 }, { 214, 288 }, + { 220, 296 }, { 226, 304 }, { 232, 312 }, { 239, 322 }, { 246, 331 }, { 253, 340 }, { 260, 350 }, + { 267, 359 }, { 274, 369 }, { 282, 380 }, { 290, 390 }, { 298, 401 }, { 306, 412 }, { 315, 424 }, + { 324, 436 }, { 333, 448 }, { 342, 460 }, { 352, 474 }, { 362, 487 }, { 372, 501 }, { 382, 514 }, + { 393, 529 }, { 404, 544 }, { 416, 560 }, { 427, 575 }, { 439, 591 }, { 452, 608 }, { 464, 624 }, + { 477, 642 }, { 491, 661 }, { 505, 680 }, { 519, 698 }, { 533, 717 }, { 548, 738 }, { 564, 759 }, + { 579, 779 }, { 596, 802 }, { 612, 824 }, { 630, 848 }, { 647, 871 }, { 666, 896 }, { 684, 921 }, + { 703, 946 }, { 723, 973 }, { 744, 1001 }, { 764, 1028 }, { 786, 1058 }, { 808, 1087 }, { 831, 1118 }, + { 854, 1149 }, { 878, 1182 }, { 903, 1215 }, { 928, 1249 }, { 954, 1284 }, { 981, 1320 }, + { 1009, 1358 }, + { 1037, 1396 }, { 1066, 1435 }, { 1096, 1475 }, { 1127, 1517 }, { 1158, 1558 }, { 1191, 1603 }, + { 1224, 1647 }, { 1259, 1694 }, { 1294, 1741 }, { 1331, 1791 }, { 1368, 1841 }, { 1406, 1892 }, + { 1446, 1946 }, { 1487, 2001 }, { 1528, 2056 }, { 1571, 2114 }, { 1615, 2174 }, { 1661, 2235 }, + { 1708, 2299 }, { 1756, 2363 }, { 1805, 2429 }, { 1856, 2498 }, { 1908, 2568 }, { 1961, 2639 }, + { 2017, 2715 }, { 2073, 2790 }, { 2131, 2868 }, { 2191, 2949 }, { 2253, 3032 }, { 2316, 3117 }, + { 2381, 3204 }, { 2448, 3295 }, { 2517, 3387 }, { 2588, 3483 }, { 2661, 3581 }, { 2735, 3681 }, + { 2812, 3784 }, { 2891, 3891 }, { 2973, 4001 }, { 3056, 4113 }, { 3142, 4229 }, { 3230, 4347 }, + { 3321, 4469 }, { 3415, 4596 }, { 3511, 4725 }, { 3609, 4857 }, { 3711, 4994 }, { 3815, 5134 }, + { 3922, 5278 }, { 4033, 5428 }, { 4146, 5580 }, { 4262, 5736 }, { 4382, 5897 }, { 4505, 6063 }, + { 4632, 6234 }, { 4762, 6409 }, { 4896, 6589 }, { 5034, 6775 }, { 5175, 6965 }, { 5321, 7161 }, + { 5470, 7362 }, { 5624, 7569 }, { 5782, 7782 }, { 5945, 8001 }, { 6112, 8226 }, { 6284, 8457 }, + { 6460, 8694 }, { 6642, 8939 }, { 6829, 9191 }, { 7021, 9449 }, { 7218, 9714 }, { 7421, 9987 }, + { 7630, 10269 }, { 7844, 10557 }, { 8191, 11024 }, }; #pragma warning restore SA1500 // Braces for multi-line statements should not share line @@ -270,20 +673,20 @@ public void OctaveFrequencyScale1() // Check that freqScale.GridLineLocations are correct var expected = new[,] { - { 46, 125 }, - { 79, 250 }, - { 111, 500 }, - { 143, 1000 }, - { 175, 2000 }, - { 207, 4000 }, - { 239, 8000 }, + { 93, 125 }, + { 118, 250 }, + { 143, 500 }, + { 168, 1000 }, + { 193, 2000 }, + { 218, 4000 }, + { 243, 8000 }, }; Assert.That.MatricesAreEqual(expected, freqScale.GridLineLocations); // Check that image dimensions are correct - Assert.AreEqual(645, image.Width); - Assert.AreEqual(310, image.Height); + Assert.AreEqual(321, image.Width); + Assert.AreEqual(309, image.Height); } /// @@ -296,14 +699,21 @@ public void OctaveFrequencyScale1() [TestMethod] public void OctaveFrequencyScale2() { + //var opFileStem = "JascoMarineGBR1"; var recordingPath = PathHelper.ResolveAsset("Recordings", "MarineJasco_AMAR119-00000139.00000139.Chan_1-24bps.1375012796.2013-07-28-11-59-56-16bit-60sec.wav"); - var opFileStem = "JascoMarineGBR1"; var outputDir = this.outputDirectory; var outputImagePath = Path.Combine(this.outputDirectory.FullName, "Octave2ScaleSonogram.png"); var recording = new AudioRecording(recordingPath); - var fst = FreqScaleType.Linear125Octaves7Tones28Nyquist32000; - var freqScale = new FrequencyScale(fst); + int sr = recording.SampleRate; + + var fst = FreqScaleType.OctaveCustom; + int nyquist = sr / 2; + int frameSize = 16384; + int linearBound = 125; + int octaveToneCount = 28; + int gridInterval = 1000; + var freqScale = new FrequencyScale(fst, nyquist, frameSize, linearBound, octaveToneCount, gridInterval); var sonoConfig = new SonogramConfig { @@ -315,7 +725,9 @@ public void OctaveFrequencyScale2() }; var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); - sonogram.Data = OctaveFreqScale.ConvertAmplitudeSpectrogramToDecibelOctaveScale(sonogram.Data, freqScale); + var windowPower = sonogram.Configuration.WindowPower; + var epsilon = sonogram.Configuration.epsilon; + sonogram.Data = OctaveFreqScale.ConvertAmplitudeSpectrogramToFreqScaledDecibels(sonogram.Data, windowPower, sr, epsilon, freqScale); // DO NOISE REDUCTION var dataMatrix = SNR.NoiseReduce_Standard(sonogram.Data); @@ -327,31 +739,88 @@ public void OctaveFrequencyScale2() // DO FILE EQUALITY TESTS // Check that freqScale.OctaveBinBounds are correct - - var expectedBinBoundsFile = PathHelper.ResolveAsset("FrequencyScale", opFileStem + "_Octave2ScaleBinBounds.EXPECTED.json"); - var expectedBinBounds = Json.Deserialize(expectedBinBoundsFile); - - Assert.That.MatricesAreEqual(expectedBinBounds, freqScale.BinBounds); + // WARNING: TODO FIX UP THIS TEST. + //var expectedBinBoundsFile = PathHelper.ResolveAsset("FrequencyScale", opFileStem + "_Octave2ScaleBinBounds.EXPECTED.json"); + //var expectedBinBounds = Json.Deserialize(expectedBinBoundsFile); + //Assert.That.MatricesAreEqual(expectedBinBounds, freqScale.BinBounds); + + // INSTEAD DO THIS TEST + Assert.AreEqual(255, freqScale.BinBounds.GetLength(0)); + Assert.AreEqual(23, freqScale.BinBounds[23, 0]); + Assert.AreEqual(62, freqScale.BinBounds[23, 1]); + Assert.AreEqual(54, freqScale.BinBounds[52, 0]); + Assert.AreEqual(145, freqScale.BinBounds[52, 1]); + Assert.AreEqual(177, freqScale.BinBounds[100, 0]); + Assert.AreEqual(476, freqScale.BinBounds[100, 1]); + Assert.AreEqual(354, freqScale.BinBounds[128, 0]); + Assert.AreEqual(953, freqScale.BinBounds[128, 1]); + Assert.AreEqual(8191, freqScale.BinBounds[254, 0]); + Assert.AreEqual(22047, freqScale.BinBounds[254, 1]); // Check that freqScale.GridLineLocations are correct var expected = new[,] { - { 34, 125 }, - { 62, 250 }, - { 89, 500 }, - { 117, 1000 }, - { 145, 2000 }, - { 173, 4000 }, - { 201, 8000 }, - { 229, 16000 }, - { 256, 32000 }, + { 47, 125 }, + { 74, 250 }, + { 102, 500 }, + { 130, 1000 }, + { 158, 2000 }, + { 186, 4000 }, + { 214, 8000 }, + { 242, 16000 }, }; Assert.That.MatricesAreEqual(expected, freqScale.GridLineLocations); // Check that image dimensions are correct Assert.AreEqual(201, image.Width); - Assert.AreEqual(310, image.Height); + Assert.AreEqual(309, image.Height); + } + + /// + /// Test of frequency scale used for spectral data reduction. + /// Reduces a 256 spectrum to 20 value vector. + /// Check it on artificial spectrum with three tones. + /// By default, the split between linear and octave is at 1000 Hz. + /// + [TestMethod] + public void TestSpectralReductionScale() + { + var fst = FreqScaleType.OctaveDataReduction; + var freqScale = new FrequencyScale(fst); + Assert.AreEqual(freqScale.ScaleType, FreqScaleType.OctaveDataReduction); + + // test contents of the octave bin bounds matrix. + Assert.AreEqual(19, freqScale.BinBounds.GetLength(0)); + + var expectedBinBounds = new[,] + { + { 0, 0 }, { 8, 345 }, { 16, 689 }, // linear up to 1000 Hz + { 24, 1034 }, { 32, 1378 }, { 40, 1723 }, // linear 1000-2000 Hz + { 48, 2067 }, { 54, 2326 }, { 62, 2670 }, { 71, 3058 }, { 81, 3488 }, // first octave 2-4 kHz + { 93, 4005 }, { 107, 4608 }, { 123, 5297 }, { 141, 6072 }, { 162, 6977 }, // second octave 4-8 kHz + { 186, 8010 }, { 214, 9216 }, { 255, 10982 }, // residual octave 8-11 kHz + }; + + Assert.That.MatricesAreEqual(expectedBinBounds, freqScale.BinBounds); + + // generate pure tone spectrum. + double[] linearSpectrum = new double[256]; + linearSpectrum[0] = 1.0; + linearSpectrum[128] = 1.0; + linearSpectrum[255] = 1.0; + + double[] octaveSpectrum = SpectrogramTools.RescaleSpectrumUsingFilterbank(freqScale.BinBounds, linearSpectrum); + + Assert.AreEqual(19, octaveSpectrum.Length); + Assert.AreEqual(0.222222, octaveSpectrum[0], 0.00001); + Assert.AreEqual(0.0, octaveSpectrum[1], 0.00001); + Assert.AreEqual(0.0, octaveSpectrum[12], 0.00001); + Assert.AreEqual(0.042483, octaveSpectrum[13], 0.00001); + Assert.AreEqual(0.014245, octaveSpectrum[14], 0.00001); + Assert.AreEqual(0.0, octaveSpectrum[15]); + Assert.AreEqual(0.0, octaveSpectrum[17]); + Assert.AreEqual(0.047619, octaveSpectrum[18], 0.00001); } /// @@ -412,68 +881,5 @@ public void TestFreqScaleOnArtificialSignal1() Assert.IsTrue(peaks[92]); Assert.IsTrue(peaks[185]); } - - /// - /// Tests octave freq scale using an artificial recording containing five sine waves. - /// - [TestMethod] - public void TestFreqScaleOnArtificialSignal2() - { - int sampleRate = 64000; - double duration = 30; // signal duration in seconds - int[] harmonics = { 500, 1000, 2000, 4000, 8000 }; - var freqScale = new FrequencyScale(FreqScaleType.Linear125Octaves7Tones28Nyquist32000); - var outputImagePath = Path.Combine(this.outputDirectory.FullName, "Signal2_OctaveFreqScale.png"); - var recording = DspFilters.GenerateTestRecording(sampleRate, duration, harmonics, WaveType.Cosine); - - // init the default sonogram config - var sonoConfig = new SonogramConfig - { - WindowSize = freqScale.WindowSize, - WindowOverlap = 0.2, - SourceFName = "Signal2", - NoiseReductionType = NoiseReductionType.None, - NoiseReductionParameter = 0.0, - }; - var sonogram = new AmplitudeSonogram(sonoConfig, recording.WavReader); - sonogram.Data = OctaveFreqScale.ConvertAmplitudeSpectrogramToDecibelOctaveScale(sonogram.Data, freqScale); - - // pick a row, any row - var oneSpectrum = MatrixTools.GetRow(sonogram.Data, 40); - oneSpectrum = DataTools.filterMovingAverage(oneSpectrum, 5); - var peaks = DataTools.GetPeaks(oneSpectrum); - - var peakIds = new List(); - for (int i = 5; i < peaks.Length - 5; i++) - { - if (peaks[i]) - { - int peakId = freqScale.BinBounds[i, 0]; - peakIds.Add(peakId); - LoggedConsole.WriteLine($"Spectral peak located in bin {peakId}, Herz={freqScale.BinBounds[i, 1]}"); - } - } - - foreach (int h in harmonics) - { - LoggedConsole.WriteLine($"Harmonic {h}Herz should be in bin {freqScale.GetBinIdForHerzValue(h)}"); - } - - Assert.AreEqual(5, peakIds.Count); - Assert.AreEqual(129, peakIds[0]); - Assert.AreEqual(257, peakIds[1]); - Assert.AreEqual(513, peakIds[2]); - Assert.AreEqual(1025, peakIds[3]); - Assert.AreEqual(2049, peakIds[4]); - - var image = sonogram.GetImage(); - string title = $"Spectrogram of Harmonics: {DataTools.Array2String(harmonics)} SR={sampleRate} Window={freqScale.WindowSize}"; - image = sonogram.GetImageFullyAnnotated(image, title, freqScale.GridLineLocations); - image.Save(outputImagePath); - - // Check that image dimensions are correct - Assert.AreEqual(146, image.Width); - Assert.AreEqual(310, image.Height); - } } } \ No newline at end of file diff --git a/tests/Acoustics.Test/AudioAnalysisTools/Indices/IndexCalculateTest.cs b/tests/Acoustics.Test/AudioAnalysisTools/Indices/IndexCalculateTest.cs index 0c9d733fe..81d0054ec 100644 --- a/tests/Acoustics.Test/AudioAnalysisTools/Indices/IndexCalculateTest.cs +++ b/tests/Acoustics.Test/AudioAnalysisTools/Indices/IndexCalculateTest.cs @@ -279,14 +279,14 @@ public void TestOfSpectralIndices_ICD20() } /// - /// Test index calculation when the Hertz FreqScaleType = Octave. - /// Only test the BGN spectral index as reasonable to assume that the rest will work if ACI works. + /// Test index calculation when the Hertz FreqScaleType = OctaveDataReduction. + /// Only test the BGN and CVR spectral index as reasonable to assume that the rest will work if these work. /// [TestMethod] - public void TestOfSpectralIndices_Octave() + public void TestOfSpectralIndices_OctaveDataReduction() { // create a two-minute artificial recording containing five harmonics. - int sampleRate = 64000; + int sampleRate = 22050; double duration = 120; // signal duration in seconds int[] harmonics = { 500, 1000, 2000, 4000, 8000 }; var recording = DspFilters.GenerateTestRecording(sampleRate, duration, harmonics, WaveType.Sine); @@ -309,10 +309,10 @@ public void TestOfSpectralIndices_Octave() // CHANGE CONFIG PARAMETERS HERE IF REQUIRED var indexCalculateConfig = ConfigFile.Deserialize(configFile); - indexCalculateConfig.FrequencyScale = FreqScaleType.Octave; + indexCalculateConfig.FrameLength = 512; - var freqScale = new FrequencyScale(indexCalculateConfig.FrequencyScale); - indexCalculateConfig.FrameLength = freqScale.WindowSize; + //WARNING: Currently only one octave scale is available. + indexCalculateConfig.FrequencyScale = FreqScaleType.OctaveDataReduction; var results = IndexCalculate.Analysis( subsegmentRecording, @@ -331,7 +331,7 @@ public void TestOfSpectralIndices_Octave() image.Save(outputImagePath1); // TEST the BGN SPECTRAL INDEX - Assert.AreEqual(256, spectralIndices.BGN.Length); + Assert.AreEqual(19, spectralIndices.BGN.Length); var expectedSpectrumFile1 = PathHelper.ResolveAsset("Indices", "BGN_OctaveScale.bin"); diff --git a/tests/Acoustics.Test/TestHelpers/Random.cs b/tests/Acoustics.Test/TestHelpers/Random.cs index d977c0cfe..8ade2a819 100644 --- a/tests/Acoustics.Test/TestHelpers/Random.cs +++ b/tests/Acoustics.Test/TestHelpers/Random.cs @@ -13,7 +13,7 @@ internal static class Random { public static System.Random GetRandom(int? seed = null) { - seed = seed ?? Environment.TickCount; + seed ??= Environment.TickCount; Trace.WriteLine("\n\nRandom seed used: " + seed.Value); LoggedConsole.WriteWarnLine($"Random seed: {seed}"); diff --git a/tests/Fixtures/Indices/BGN_OctaveScale.bin b/tests/Fixtures/Indices/BGN_OctaveScale.bin index 7f5ddb82a..523435d6b 100644 --- a/tests/Fixtures/Indices/BGN_OctaveScale.bin +++ b/tests/Fixtures/Indices/BGN_OctaveScale.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:873fc6aa38bdac712e218eb0867cad930bfc2e8f9600690598bff8b11bc40ecd -size 2076 +oid sha256:4f5588b8de1e21c5c36728ed2bccb599fd2846fdae9637a9926128407aeedf09 +size 180 diff --git a/tests/Fixtures/Indices/CVR_OctaveScale.bin b/tests/Fixtures/Indices/CVR_OctaveScale.bin index defb981f1..c7e03078c 100644 --- a/tests/Fixtures/Indices/CVR_OctaveScale.bin +++ b/tests/Fixtures/Indices/CVR_OctaveScale.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8f1b637e5bce179219068eaae31f5e3db6af24dbd0e2c91d6a43e9c41acd3629 -size 2076 +oid sha256:619a72998dccaa8054c25f1e646c1ea85333577493247880a9a720f9b48c275c +size 180 diff --git a/tests/Fixtures/LongDuration/BgnMatrix.OctaveScale.csv b/tests/Fixtures/LongDuration/BgnMatrix.OctaveScale.csv index d1255dbf6..ae333084b 100644 --- a/tests/Fixtures/LongDuration/BgnMatrix.OctaveScale.csv +++ b/tests/Fixtures/LongDuration/BgnMatrix.OctaveScale.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f28fa71dd3621ac47c76f5b01eb88c6acac4b324a863c6e967ca17c37033e8eb -size 145265 +oid sha256:ed57ff90b532d105deb290b48e68b07c31a501053abd2ed165ed12fefe3a97b0 +size 10537