Skip to content

Commit a4a9e80

Browse files
authored
Fixed the audio encoder opus encoding. Made audio encoder disposable. (#1427)
1 parent 1582e3d commit a4a9e80

File tree

1 file changed

+58
-18
lines changed

1 file changed

+58
-18
lines changed

src/app/Media/Codecs/AudioEncoder.cs

Lines changed: 58 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,21 +13,38 @@
1313
// BSD 3-Clause "New" or "Revised" License, see included LICENSE.md file.
1414
//-----------------------------------------------------------------------------
1515

16-
using Concentus;
1716
using System;
1817
using System.Collections.Generic;
1918
using System.Linq;
20-
using SIPSorceryMedia.Abstractions;
19+
using Concentus;
2120
using Concentus.Enums;
21+
using Microsoft.Extensions.Logging;
22+
using SIPSorcery.Sys;
23+
using SIPSorceryMedia.Abstractions;
2224

2325
namespace SIPSorcery.Media
2426
{
25-
public class AudioEncoder : IAudioEncoder
27+
public class AudioEncoder : IAudioEncoder, IDisposable
2628
{
2729
private const int G722_BIT_RATE = 64000; // G722 sampling rate is 16KHz with bits per sample of 16.
2830
private const int OPUS_SAMPLE_RATE = 48000; // Opus codec sampling rate, 48KHz.
2931
private const int OPUS_CHANNELS = 2; // Opus codec number of channels.
30-
private const int OPUS_MAXIMUM_FRAME_SIZE = 5760;
32+
33+
/// <summary>
34+
/// The max frame size that the OPUS encoder will accept is 2880 bytes (see IOpusEncoder.Encode).
35+
/// 2880 corresponds to a sample size of 30ms for a single channel at 48Khz with 16 bit PCM. Therefore
36+
/// the max sample size supported by OPUS is 30ms.
37+
/// </summary>
38+
private const int OPUS_MAXIMUM_INPUT_SAMPLES_PER_CHANNEL = 2880;
39+
40+
/// <summary>
41+
/// OPUS max encode size (see IOpusEncoder.Encode).
42+
/// </summary>
43+
private const int OPUS_MAXIMUM_ENCODED_FRAME_SIZE = 1275;
44+
45+
private static ILogger logger = Log.Logger;
46+
47+
private bool _disposedValue = false;
3148

3249
private G722Codec _g722Codec;
3350
private G722CodecState _g722CodecState;
@@ -56,7 +73,7 @@ public class AudioEncoder : IAudioEncoder
5673
new AudioFormat(SDPWellKnownMediaFormatsEnum.G722),
5774
new AudioFormat(SDPWellKnownMediaFormatsEnum.G729),
5875

59-
// Need more testing befoer adding OPUS by default. 24 Dec 2024 AC.
76+
// Need more testing before adding OPUS by default. 24 Dec 2024 AC.
6077
//new AudioFormat(111, AudioCodecsEnum.OPUS.ToString(), OPUS_SAMPLE_RATE, OPUS_CHANNELS, "useinbandfec=1")
6178
};
6279

@@ -139,23 +156,23 @@ public byte[] EncodeAudio(short[] pcm, AudioFormat format)
139156
}
140157
else if (format.Codec == AudioCodecsEnum.OPUS)
141158
{
142-
var channelCount = format.ChannelCount > 0 ? format.ChannelCount : OPUS_CHANNELS;
143-
144159
if (_opusEncoder == null)
145160
{
161+
var channelCount = format.ChannelCount > 0 ? format.ChannelCount : OPUS_CHANNELS;
146162
_opusEncoder = OpusCodecFactory.CreateEncoder(format.ClockRate, channelCount, OpusApplication.OPUS_APPLICATION_VOIP);
147163
}
148164

149-
// Opus expects PCM data in float format [-1.0, 1.0].
150-
float[] pcmFloat = new float[pcm.Length];
151-
for (int i = 0; i < pcm.Length; i++)
165+
if (pcm.Length > _opusEncoder.NumChannels * OPUS_MAXIMUM_INPUT_SAMPLES_PER_CHANNEL)
152166
{
153-
pcmFloat[i] = pcm[i] / 32768f; // Convert to float range [-1.0, 1.0]
167+
logger.LogWarning("{audioEncoder} input sample of length {inputSize} supplied to OPUS encoder exceeded maximum limit of {maxLimit}. Reduce sampling period.", nameof(AudioEncoder), pcm.Length, _opusEncoder.NumChannels * OPUS_MAXIMUM_INPUT_SAMPLES_PER_CHANNEL);
168+
return [];
169+
}
170+
else
171+
{
172+
Span<byte> encodedSample = stackalloc byte[OPUS_MAXIMUM_ENCODED_FRAME_SIZE];
173+
int encodedLength = _opusEncoder.Encode(pcm, pcm.Length / _opusEncoder.NumChannels, encodedSample, encodedSample.Length);
174+
return encodedSample.Slice(0, encodedLength).ToArray();
154175
}
155-
156-
byte[] encodedSample = new byte[OPUS_MAXIMUM_FRAME_SIZE * format.ChannelCount];
157-
int encodedLength = _opusEncoder.Encode(pcmFloat, pcmFloat.Length / channelCount, encodedSample, encodedSample.Length);
158-
return encodedSample.Take(encodedLength).ToArray();
159176
}
160177
else
161178
{
@@ -218,10 +235,11 @@ public short[] DecodeAudio(byte[] encodedSample, AudioFormat format)
218235
{
219236
if (_opusDecoder == null)
220237
{
221-
_opusDecoder = OpusCodecFactory.CreateDecoder(format.ClockRate, format.ChannelCount);
238+
var channelCount = format.ChannelCount > 0 ? format.ChannelCount : OPUS_CHANNELS;
239+
_opusDecoder = OpusCodecFactory.CreateDecoder(format.ClockRate, channelCount);
222240
}
223241

224-
int maxSamples = OPUS_MAXIMUM_FRAME_SIZE * format.ChannelCount;
242+
int maxSamples = OPUS_MAXIMUM_INPUT_SAMPLES_PER_CHANNEL * _opusDecoder.NumChannels;
225243
float[] floatBuf = new float[maxSamples];
226244

227245
// Decode returns the number of samples per channel.
@@ -231,7 +249,7 @@ public short[] DecodeAudio(byte[] encodedSample, AudioFormat format)
231249
floatBuf.Length,
232250
false);
233251

234-
int totalFloats = samplesPerChannel * format.ChannelCount;
252+
int totalFloats = samplesPerChannel * _opusDecoder.NumChannels;
235253

236254
// Convert to 16-bit interleaved PCM.
237255
short[] pcm16 = new short[totalFloats];
@@ -261,5 +279,27 @@ private float ClampToFloat(float value, float min, float max)
261279
if (value > max) { return max; }
262280
return value;
263281
}
282+
283+
protected virtual void Dispose(bool disposing)
284+
{
285+
if (!_disposedValue)
286+
{
287+
if (disposing)
288+
{
289+
(_opusEncoder as IDisposable)?.Dispose();
290+
(_opusDecoder as IDisposable)?.Dispose();
291+
(_g729Encoder as IDisposable)?.Dispose();
292+
(_g729Decoder as IDisposable)?.Dispose();
293+
}
294+
295+
_disposedValue = true;
296+
}
297+
}
298+
299+
public void Dispose()
300+
{
301+
Dispose(disposing: true);
302+
GC.SuppressFinalize(this);
303+
}
264304
}
265305
}

0 commit comments

Comments
 (0)