diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 09f56310..a05b04b7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -16,7 +16,7 @@ jobs: name: - windows_x86_64 name: Build sora-cpp-sdk for ${{ matrix.name }} - runs-on: windows-2019 + runs-on: windows-2022 env: TEST_SIGNALING_URL: ${{ secrets.TEST_SIGNALING_URL }} TEST_CHANNEL_ID_PREFIX: ${{ secrets.TEST_CHANNEL_ID_PREFIX }} diff --git a/CHANGES.md b/CHANGES.md index 13ee17db..0684a5d5 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -15,16 +15,21 @@ - @melpon - [CHANGE] `--webrtc-build-dir`, `--webrtc-build-args` を `--local-webrtc-build-dir` と `--local-webrtc-build-args` に変更する - @melpon -- [CHANGE] SoraVideoDecoderFactory の初期化に利用する関数を CreateVideoDecoder から Create に変更する +- [CHANGE] SoraVideoDecoderFactory の CreateVideoDecoder 関数を Create に変更する - libwebrtc に定義されている継承元クラスが変更されたことに対する追従 - @enm10k +- [CHANGE] SoraVideoEncoderFactory の CreateVideoEncoder 関数を Create に変更する + - libwebrtc に定義されている継承元クラスが変更されたことに対する追従 + - @enm10k +- [CHANGE] `CreateOpenH264VideoEncoder()` 関数の第1引数の型を `const cricket::VideoCodec&` から `const webrtc::SdpVideoFormat&` に変更する + - @melpon - [UPDATE] Boost を 1.85.0 にあげる - @enm10k - [UPDATE] SDL2 を 2.30.3 にあげる - @enm10k - [UPDATE] CLI11 を 2.4.2 にあげる - @enm10k -- [UPDATE] libwebrtc を m124.6367.3.1 にあげる +- [UPDATE] libwebrtc を m125.6422.2.1 にあげる - Android の test アプリがリンクできなくなったため、リンカーを Android NDK のものから libwebrtc のものに変更 - リンカーのバージョンの違いによる互換性の問題でエラーが発生していた - @melpon @enm10k diff --git a/VERSION b/VERSION index 2c8b8429..cd02e470 100644 --- a/VERSION +++ b/VERSION @@ -1,5 +1,5 @@ SORA_CPP_SDK_VERSION=2024.7.0-canary.1 -WEBRTC_BUILD_VERSION=m124.6367.3.1 +WEBRTC_BUILD_VERSION=m125.6422.2.1 BOOST_VERSION=1.85.0 CMAKE_VERSION=3.28.1 CUDA_VERSION=11.8.0-1 diff --git a/buildbase.py b/buildbase.py index 35f1a542..16173925 100644 --- a/buildbase.py +++ b/buildbase.py @@ -940,8 +940,6 @@ def install_sdl2( ] if platform == "windows": cmake_args += [ - "-G", - "Visual Studio 16 2019", "-DSDL_FORCE_STATIC_VCRT=ON", "-DHAVE_LIBC=ON", ] diff --git a/examples/VERSION b/examples/VERSION index 1449c2e9..7cb7e61d 100644 --- a/examples/VERSION +++ b/examples/VERSION @@ -1,6 +1,6 @@ SORA_CPP_SDK_VERSION=2024.7.0-canary.1 -WEBRTC_BUILD_VERSION=m124.6367.3.1 -BOOST_VERSION=1.84.0 +WEBRTC_BUILD_VERSION=m125.6422.2.1 +BOOST_VERSION=1.85.0 CMAKE_VERSION=3.28.1 SDL2_VERSION=2.30.3 CLI11_VERSION=v2.4.2 diff --git a/include/sora/hwenc_jetson/jetson_video_encoder.h b/include/sora/hwenc_jetson/jetson_video_encoder.h index 4e577ebc..a8e2f263 100644 --- a/include/sora/hwenc_jetson/jetson_video_encoder.h +++ b/include/sora/hwenc_jetson/jetson_video_encoder.h @@ -22,6 +22,7 @@ // WebRTC #include #include +#include #include #include #include @@ -40,7 +41,7 @@ namespace sora { class JetsonVideoEncoder : public webrtc::VideoEncoder { public: - explicit JetsonVideoEncoder(const cricket::VideoCodec& codec); + explicit JetsonVideoEncoder(const cricket::Codec& codec); ~JetsonVideoEncoder() override; static bool IsSupportedVP8(); diff --git a/include/sora/open_h264_video_encoder.h b/include/sora/open_h264_video_encoder.h index 8b1b3621..c30b8b4a 100644 --- a/include/sora/open_h264_video_encoder.h +++ b/include/sora/open_h264_video_encoder.h @@ -11,7 +11,7 @@ namespace sora { std::unique_ptr CreateOpenH264VideoEncoder( - const cricket::VideoCodec& codec, + const webrtc::SdpVideoFormat& format, std::string openh264); } diff --git a/include/sora/sora_video_encoder_factory.h b/include/sora/sora_video_encoder_factory.h index 8b6c3391..c9acabd2 100644 --- a/include/sora/sora_video_encoder_factory.h +++ b/include/sora/sora_video_encoder_factory.h @@ -81,12 +81,14 @@ class SoraVideoEncoderFactory : public webrtc::VideoEncoderFactory { std::vector GetSupportedFormats() const override; - std::unique_ptr CreateVideoEncoder( + std::unique_ptr Create( + const webrtc::Environment& env, const webrtc::SdpVideoFormat& format) override; private: // 一番内側のエンコーダを作る std::unique_ptr CreateInternalVideoEncoder( + const webrtc::Environment& env, const webrtc::SdpVideoFormat& format, int& alignment); diff --git a/src/default_video_formats.cpp b/src/default_video_formats.cpp index 2421e508..3da6a29a 100644 --- a/src/default_video_formats.cpp +++ b/src/default_video_formats.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include diff --git a/src/hwenc_jetson/jetson_video_encoder.cpp b/src/hwenc_jetson/jetson_video_encoder.cpp index 6a594941..65e6870e 100644 --- a/src/hwenc_jetson/jetson_video_encoder.cpp +++ b/src/hwenc_jetson/jetson_video_encoder.cpp @@ -16,14 +16,18 @@ // WebRTC #include +#include +#include #include #include #include #include #include -#include -#include -#include + +// libyuv +#include +#include +#include // L4T Multimedia API #include diff --git a/src/hwenc_nvcodec/nvcodec_h264_encoder.cpp b/src/hwenc_nvcodec/nvcodec_h264_encoder.cpp index ec24d935..114b97df 100644 --- a/src/hwenc_nvcodec/nvcodec_h264_encoder.cpp +++ b/src/hwenc_nvcodec/nvcodec_h264_encoder.cpp @@ -16,10 +16,14 @@ #include #include #include -#include #include +#include +#include #include +// libyuv +#include + // NvCodec #ifdef _WIN32 #include diff --git a/src/hwenc_vpl/vpl_video_encoder.cpp b/src/hwenc_vpl/vpl_video_encoder.cpp index a531368e..57cafae7 100644 --- a/src/hwenc_vpl/vpl_video_encoder.cpp +++ b/src/hwenc_vpl/vpl_video_encoder.cpp @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include #include diff --git a/src/open_h264_video_encoder.cpp b/src/open_h264_video_encoder.cpp index a13a0730..90a330f9 100644 --- a/src/open_h264_video_encoder.cpp +++ b/src/open_h264_video_encoder.cpp @@ -28,12 +28,22 @@ #endif // WebRTC +#include #include +#include +#include +#include #include +#include +#include #include #include #include #include +#include +#include +#include +#include #include #include #include @@ -50,16 +60,11 @@ #include #include -namespace webrtc { +class ISVCEncoder; -class OpenH264VideoEncoder : public H264Encoder { - public: - static std::unique_ptr Create(const cricket::VideoCodec& codec, - std::string openh264) { - return std::unique_ptr( - new OpenH264VideoEncoder(codec, std::move(openh264))); - } +namespace webrtc { +class OpenH264VideoEncoder : public VideoEncoder { public: struct LayerConfig { int simulcast_idx = 0; @@ -77,13 +82,14 @@ class OpenH264VideoEncoder : public H264Encoder { void SetStreamState(bool send_stream); }; - public: - explicit OpenH264VideoEncoder(const cricket::VideoCodec& codec, - std::string openh264); + OpenH264VideoEncoder(const Environment& env, + H264EncoderSettings settings, + std::string openh264); + ~OpenH264VideoEncoder() override; - // |settings.max_payload_size| is ignored. - // The following members of |codec_settings| are used. The rest are ignored. + // `settings.max_payload_size` is ignored. + // The following members of `codec_settings` are used. The rest are ignored. // - codecType (must be kVideoCodecH264) // - targetBitrate // - maxFramerate @@ -122,11 +128,16 @@ class OpenH264VideoEncoder : public H264Encoder { std::vector> downscaled_buffers_; std::vector configurations_; std::vector encoded_images_; + std::vector> svc_controllers_; + absl::InlinedVector, kMaxSimulcastStreams> + scalability_modes_; + const Environment env_; VideoCodec codec_; H264PacketizationMode packetization_mode_; size_t max_payload_size_; int32_t number_of_cores_; + absl::optional encoder_thread_limit_; EncodedImageCallback* encoded_image_callback_; bool has_reported_init_; @@ -169,19 +180,30 @@ enum H264EncoderImplEvent { kH264EncoderEventMax = 16, }; -int NumberOfThreads(int width, int height, int number_of_cores) { +int NumberOfThreads(absl::optional encoder_thread_limit, + int width, + int height, + int number_of_cores) { // TODO(hbos): In Chromium, multiple threads do not work with sandbox on Mac, // see crbug.com/583348. Until further investigated, only use one thread. - // if (width * height >= 1920 * 1080 && number_of_cores > 8) { - // return 8; // 8 threads for 1080p on high perf machines. - // } else if (width * height > 1280 * 960 && number_of_cores >= 6) { - // return 3; // 3 threads for 1080p. - // } else if (width * height > 640 * 480 && number_of_cores >= 3) { - // return 2; // 2 threads for qHD/HD. - // } else { - // return 1; // 1 thread for VGA or less. - // } - // TODO(sprang): Also check sSliceArgument.uiSliceNum om GetEncoderPrams(), + // While this limitation is gone, this changes the bitstream format (see + // bugs.webrtc.org/14368) so still guarded by field trial to allow for + // experimentation using th experimental + // WebRTC-VideoEncoderSettings/encoder_thread_limit trial. + if (encoder_thread_limit.has_value()) { + int limit = encoder_thread_limit.value(); + RTC_DCHECK_GE(limit, 1); + if (width * height >= 1920 * 1080 && number_of_cores > 8) { + return std::min(limit, 8); // 8 threads for 1080p on high perf machines. + } else if (width * height > 1280 * 960 && number_of_cores >= 6) { + return std::min(limit, 3); // 3 threads for 1080p. + } else if (width * height > 640 * 480 && number_of_cores >= 3) { + return std::min(limit, 2); // 2 threads for qHD/HD. + } else { + return 1; // 1 thread for VGA or less. + } + } + // TODO(sprang): Also check sSliceArgument.uiSliceNum on GetEncoderParams(), // before enabling multithreading here. return 1; } @@ -202,17 +224,34 @@ VideoFrameType ConvertToVideoFrameType(EVideoFrameType type) { return VideoFrameType::kEmptyFrame; } +absl::optional ScalabilityModeFromTemporalLayers( + int num_temporal_layers) { + switch (num_temporal_layers) { + case 0: + break; + case 1: + return ScalabilityMode::kL1T1; + case 2: + return ScalabilityMode::kL1T2; + case 3: + return ScalabilityMode::kL1T3; + default: + RTC_DCHECK_NOTREACHED(); + } + return absl::nullopt; +} + } // namespace // Helper method used by OpenH264VideoEncoder::Encode. -// Copies the encoded bytes from |info| to |encoded_image|. The -// |encoded_image->_buffer| may be deleted and reallocated if a bigger buffer is +// Copies the encoded bytes from `info` to `encoded_image`. The +// `encoded_image->_buffer` may be deleted and reallocated if a bigger buffer is // required. // -// After OpenH264 encoding, the encoded bytes are stored in |info| spread out +// After OpenH264 encoding, the encoded bytes are stored in `info` spread out // over a number of layers and "NAL units". Each NAL unit is a fragment starting // with the four-byte start code {0,0,0,1}. All of this data (including the -// start codes) is copied to the |encoded_image->_buffer|. +// start codes) is copied to the `encoded_image->_buffer`. static void RtpFragmentize(EncodedImage* encoded_image, SFrameBSInfo* info) { // Calculate minimum buffer size required to hold encoded data. size_t required_capacity = 0; @@ -221,18 +260,17 @@ static void RtpFragmentize(EncodedImage* encoded_image, SFrameBSInfo* info) { const SLayerBSInfo& layerInfo = info->sLayerInfo[layer]; for (int nal = 0; nal < layerInfo.iNalCount; ++nal, ++fragments_count) { RTC_CHECK_GE(layerInfo.pNalLengthInByte[nal], 0); - // Ensure |required_capacity| will not overflow. + // Ensure `required_capacity` will not overflow. RTC_CHECK_LE(layerInfo.pNalLengthInByte[nal], std::numeric_limits::max() - required_capacity); required_capacity += layerInfo.pNalLengthInByte[nal]; } } - // TODO(nisse): Use a cache or buffer pool to avoid allocation? auto buffer = EncodedImageBuffer::Create(required_capacity); encoded_image->SetEncodedData(buffer); // Iterate layers and NAL units, note each NAL unit as a fragment and copy - // the data to |encoded_image->_buffer|. + // the data to `encoded_image->_buffer`. const uint8_t start_code[4] = {0, 0, 0, 1}; size_t frag = 0; encoded_image->set_size(0); @@ -241,8 +279,8 @@ static void RtpFragmentize(EncodedImage* encoded_image, SFrameBSInfo* info) { // Iterate NAL units making up this layer, noting fragments. size_t layer_len = 0; for (int nal = 0; nal < layerInfo.iNalCount; ++nal, ++frag) { - // Because the sum of all layer lengths, |required_capacity|, fits in a - // |size_t|, we know that any indices in-between will not overflow. + // Because the sum of all layer lengths, `required_capacity`, fits in a + // `size_t`, we know that any indices in-between will not overflow. RTC_DCHECK_GE(layerInfo.pNalLengthInByte[nal], 4); RTC_DCHECK_EQ(layerInfo.pBsBuf[layer_len + 0], start_code[0]); RTC_DCHECK_EQ(layerInfo.pBsBuf[layer_len + 1], start_code[1]); @@ -256,27 +294,23 @@ static void RtpFragmentize(EncodedImage* encoded_image, SFrameBSInfo* info) { } } -OpenH264VideoEncoder::OpenH264VideoEncoder(const cricket::VideoCodec& codec, +OpenH264VideoEncoder::OpenH264VideoEncoder(const Environment& env, + H264EncoderSettings settings, std::string openh264) - : packetization_mode_(H264PacketizationMode::SingleNalUnit), + : env_(env), + packetization_mode_(settings.packetization_mode), max_payload_size_(0), number_of_cores_(0), encoded_image_callback_(nullptr), has_reported_init_(false), has_reported_error_(false), openh264_(std::move(openh264)) { - RTC_CHECK(absl::EqualsIgnoreCase(codec.name, cricket::kH264CodecName)); - std::string packetization_mode_string; - if (codec.GetParam(cricket::kH264FmtpPacketizationMode, - &packetization_mode_string) && - packetization_mode_string == "1") { - packetization_mode_ = H264PacketizationMode::NonInterleaved; - } downscaled_buffers_.reserve(kMaxSimulcastStreams - 1); encoded_images_.reserve(kMaxSimulcastStreams); encoders_.reserve(kMaxSimulcastStreams); configurations_.reserve(kMaxSimulcastStreams); tl0sync_limit_.reserve(kMaxSimulcastStreams); + svc_controllers_.reserve(kMaxSimulcastStreams); } OpenH264VideoEncoder::~OpenH264VideoEncoder() { @@ -377,11 +411,14 @@ int32_t OpenH264VideoEncoder::InitEncode( encoded_images_.resize(number_of_streams); encoders_.resize(number_of_streams); pictures_.resize(number_of_streams); + svc_controllers_.resize(number_of_streams); + scalability_modes_.resize(number_of_streams); configurations_.resize(number_of_streams); tl0sync_limit_.resize(number_of_streams); - number_of_cores_ = settings.number_of_cores; max_payload_size_ = settings.max_payload_size; + number_of_cores_ = settings.number_of_cores; + encoder_thread_limit_ = settings.encoder_thread_limit; codec_ = *inst; // Code expects simulcastStream resolutions to be correct, make sure they are @@ -419,9 +456,11 @@ int32_t OpenH264VideoEncoder::InitEncode( configurations_[i].width = codec_.simulcastStream[idx].width; configurations_[i].height = codec_.simulcastStream[idx].height; configurations_[i].max_frame_rate = static_cast(codec_.maxFramerate); + configurations_[i].frame_dropping_on = codec_.GetFrameDropEnabled(); configurations_[i].key_frame_interval = codec_.H264()->keyFrameInterval; configurations_[i].num_temporal_layers = - codec_.simulcastStream[idx].numberOfTemporalLayers; + std::max(codec_.H264()->numberOfTemporalLayers, + codec_.simulcastStream[idx].numberOfTemporalLayers); // Create downscaled image buffers. if (i > 0) { @@ -460,6 +499,17 @@ int32_t OpenH264VideoEncoder::InitEncode( encoded_images_[i].set_size(0); tl0sync_limit_[i] = configurations_[i].num_temporal_layers; + scalability_modes_[i] = ScalabilityModeFromTemporalLayers( + configurations_[i].num_temporal_layers); + if (scalability_modes_[i].has_value()) { + svc_controllers_[i] = CreateScalabilityStructure(*scalability_modes_[i]); + if (svc_controllers_[i] == nullptr) { + RTC_LOG(LS_ERROR) << "Failed to create scalability structure"; + Release(); + ReportError(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + } } SimulcastRateAllocator init_allocator(codec_); @@ -484,6 +534,8 @@ int32_t OpenH264VideoEncoder::Release() { encoded_images_.clear(); pictures_.clear(); tl0sync_limit_.clear(); + svc_controllers_.clear(); + scalability_modes_.clear(); return WEBRTC_VIDEO_CODEC_OK; } @@ -553,29 +605,29 @@ int32_t OpenH264VideoEncoder::Encode( return WEBRTC_VIDEO_CODEC_UNINITIALIZED; } - rtc::scoped_refptr frame_buffer = + rtc::scoped_refptr frame_buffer = input_frame.video_frame_buffer()->ToI420(); + if (!frame_buffer) { + RTC_LOG(LS_ERROR) << "Failed to convert " + << VideoFrameBufferTypeToString( + input_frame.video_frame_buffer()->type()) + << " image to I420. Can't encode frame."; + return WEBRTC_VIDEO_CODEC_ENCODER_FAILURE; + } + RTC_CHECK(frame_buffer->type() == VideoFrameBuffer::Type::kI420 || + frame_buffer->type() == VideoFrameBuffer::Type::kI420A); - bool send_key_frame = false; + bool is_keyframe_needed = false; for (size_t i = 0; i < configurations_.size(); ++i) { if (configurations_[i].key_frame_request && configurations_[i].sending) { - send_key_frame = true; + // This is legacy behavior, generating a keyframe on all layers + // when generating one for a layer that became active for the first time + // or after being disabled. + is_keyframe_needed = true; break; } } - if (!send_key_frame && frame_types) { - for (size_t i = 0; i < configurations_.size(); ++i) { - const size_t simulcast_idx = - static_cast(configurations_[i].simulcast_idx); - if (configurations_[i].sending && simulcast_idx < frame_types->size() && - (*frame_types)[simulcast_idx] == VideoFrameType::kVideoFrameKey) { - send_key_frame = true; - break; - } - } - } - RTC_DCHECK_EQ(configurations_[0].width, frame_buffer->width()); RTC_DCHECK_EQ(configurations_[0].height, frame_buffer->height()); @@ -614,21 +666,29 @@ int32_t OpenH264VideoEncoder::Encode( pictures_[i].iStride[0], pictures_[i].pData[1], pictures_[i].iStride[1], pictures_[i].pData[2], pictures_[i].iStride[2], configurations_[i].width, - configurations_[i].height, libyuv::kFilterBilinear); + configurations_[i].height, libyuv::kFilterBox); } if (!configurations_[i].sending) { continue; } - if (frame_types != nullptr) { + if (frame_types != nullptr && i < frame_types->size()) { // Skip frame? if ((*frame_types)[i] == VideoFrameType::kEmptyFrame) { continue; } } + // Send a key frame either when this layer is configured to require one + // or we have explicitly been asked to. + const size_t simulcast_idx = + static_cast(configurations_[i].simulcast_idx); + bool send_key_frame = + is_keyframe_needed || + (frame_types && simulcast_idx < frame_types->size() && + (*frame_types)[simulcast_idx] == VideoFrameType::kVideoFrameKey); if (send_key_frame) { // API doc says ForceIntraFrame(false) does nothing, but calling this - // function forces a key frame regardless of the |bIDR| argument's value. + // function forces a key frame regardless of the `bIDR` argument's value. // (If every frame is a key frame we get lag/delays.) encoders_[i]->ForceIntraFrame(true); configurations_[i].key_frame_request = false; @@ -637,6 +697,12 @@ int32_t OpenH264VideoEncoder::Encode( SFrameBSInfo info; memset(&info, 0, sizeof(SFrameBSInfo)); + std::vector layer_frames; + if (svc_controllers_[i]) { + layer_frames = svc_controllers_[i]->NextFrameConfig(send_key_frame); + RTC_CHECK_EQ(layer_frames.size(), 1); + } + // Encode! int enc_ret = encoders_[i]->EncodeFrame(&pictures_[i], &info); if (enc_ret != 0) { @@ -649,16 +715,17 @@ int32_t OpenH264VideoEncoder::Encode( encoded_images_[i]._encodedWidth = configurations_[i].width; encoded_images_[i]._encodedHeight = configurations_[i].height; - encoded_images_[i].SetRtpTimestamp(input_frame.timestamp()); + encoded_images_[i].SetRtpTimestamp(input_frame.rtp_timestamp()); + encoded_images_[i].SetColorSpace(input_frame.color_space()); encoded_images_[i]._frameType = ConvertToVideoFrameType(info.eFrameType); - encoded_images_[i].SetSpatialIndex(configurations_[i].simulcast_idx); + encoded_images_[i].SetSimulcastIndex(configurations_[i].simulcast_idx); // Split encoded image up into fragments. This also updates - // |encoded_image_|. + // `encoded_image_`. RtpFragmentize(&encoded_images_[i], &info); // Encoder can skip frames to save bandwidth in which case - // |encoded_images_[i]._length| == 0. + // `encoded_images_[i]._length` == 0. if (encoded_images_[i].size() > 0) { // Parse QP. h264_bitstream_parser_.ParseBitstream(encoded_images_[i]); @@ -679,6 +746,25 @@ int32_t OpenH264VideoEncoder::Encode( codec_specific.codecSpecific.H264.temporal_idx = tid; codec_specific.codecSpecific.H264.base_layer_sync = tid > 0 && tid < tl0sync_limit_[i]; + if (svc_controllers_[i]) { + if (encoded_images_[i]._frameType == VideoFrameType::kVideoFrameKey) { + // Reset the ScalableVideoController on key frame + // to reset the expected dependency structure. + layer_frames = + svc_controllers_[i]->NextFrameConfig(/* restart= */ true); + RTC_CHECK_EQ(layer_frames.size(), 1); + RTC_DCHECK_EQ(layer_frames[0].TemporalId(), 0); + RTC_DCHECK_EQ(layer_frames[0].IsKeyframe(), true); + } + + if (layer_frames[0].TemporalId() != tid) { + RTC_LOG(LS_WARNING) + << "Encoder produced a frame with temporal id " << tid + << ", expected " << layer_frames[0].TemporalId() << "."; + continue; + } + encoded_images_[i].SetTemporalIndex(tid); + } if (codec_specific.codecSpecific.H264.base_layer_sync) { tl0sync_limit_[i] = tid; } @@ -686,6 +772,15 @@ int32_t OpenH264VideoEncoder::Encode( tl0sync_limit_[i] = configurations_[i].num_temporal_layers; } } + if (svc_controllers_[i]) { + codec_specific.generic_frame_info = + svc_controllers_[i]->OnEncodeDone(layer_frames[0]); + if (send_key_frame && codec_specific.generic_frame_info.has_value()) { + codec_specific.template_structure = + svc_controllers_[i]->DependencyStructure(); + } + codec_specific.scalability_mode = scalability_modes_[i]; + } encoded_image_callback_->OnEncodedImage(encoded_images_[i], &codec_specific); } @@ -721,8 +816,8 @@ SEncParamExt OpenH264VideoEncoder::CreateEncoderParams(size_t i) const { // The following parameters are extension parameters (they're in SEncParamExt, // not in SEncParamBase). encoder_params.bEnableFrameSkip = configurations_[i].frame_dropping_on; - // |uiIntraPeriod| - multiple of GOP size - // |keyFrameInterval| - number of frames + // `uiIntraPeriod` - multiple of GOP size + // `keyFrameInterval` - number of frames encoder_params.uiIntraPeriod = configurations_[i].key_frame_interval; // Reuse SPS id if possible. This helps to avoid reset of chromium HW decoder // on each key-frame. @@ -735,8 +830,9 @@ SEncParamExt OpenH264VideoEncoder::CreateEncoderParams(size_t i) const { // 0: auto (dynamic imp. internal encoder) // 1: single thread (default value) // >1: number of threads - encoder_params.iMultipleThreadIdc = NumberOfThreads( - encoder_params.iPicWidth, encoder_params.iPicHeight, number_of_cores_); + encoder_params.iMultipleThreadIdc = + NumberOfThreads(encoder_thread_limit_, encoder_params.iPicWidth, + encoder_params.iPicHeight, number_of_cores_); // The base spatial layer 0 is the only one we use. encoder_params.sSpatialLayers[0].iVideoWidth = encoder_params.iPicWidth; encoder_params.sSpatialLayers[0].iVideoHeight = encoder_params.iPicHeight; @@ -747,7 +843,13 @@ SEncParamExt OpenH264VideoEncoder::CreateEncoderParams(size_t i) const { encoder_params.iMaxBitrate; encoder_params.iTemporalLayerNum = configurations_[i].num_temporal_layers; if (encoder_params.iTemporalLayerNum > 1) { - encoder_params.iNumRefFrame = 1; + // iNumRefFrame specifies total number of reference buffers to allocate. + // For N temporal layers we need at least (N - 1) buffers to store last + // encoded frames of all reference temporal layers. + // Note that there is no API in OpenH264 encoder to specify exact set of + // references to be used to prediction of a given frame. Encoder can + // theoretically use all available reference buffers. + encoder_params.iNumRefFrame = encoder_params.iTemporalLayerNum - 1; } RTC_LOG(LS_INFO) << "OpenH264 version is " << OPENH264_MAJOR << "." << OPENH264_MINOR; @@ -816,9 +918,22 @@ void OpenH264VideoEncoder::LayerConfig::SetStreamState(bool send_stream) { namespace sora { std::unique_ptr CreateOpenH264VideoEncoder( - const cricket::VideoCodec& codec, + const webrtc::SdpVideoFormat& format, std::string openh264) { - return webrtc::OpenH264VideoEncoder::Create(codec, std::move(openh264)); + webrtc::H264EncoderSettings settings; + if (auto it = format.parameters.find(cricket::kH264FmtpPacketizationMode); + it != format.parameters.end()) { + if (it->second == "0") { + settings.packetization_mode = + webrtc::H264PacketizationMode::SingleNalUnit; + } else if (it->second == "1") { + settings.packetization_mode = + webrtc::H264PacketizationMode::NonInterleaved; + } + } + + return absl::make_unique( + webrtc::CreateEnvironment(), settings, std::move(openh264)); } } // namespace sora diff --git a/src/sora_signaling.cpp b/src/sora_signaling.cpp index 73b9c171..1e17f49f 100644 --- a/src/sora_signaling.cpp +++ b/src/sora_signaling.cpp @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include "sora/data_channel.h" #include "sora/rtc_ssl_verifier.h" @@ -518,11 +520,11 @@ SoraSignaling::CreatePeerConnection(boost::json::value jconfig) { dependencies.allocator->set_flags(rtc_config.port_allocator_config.flags); RTC_LOG(LS_INFO) << "Set Proxy: type=" - << rtc::ProxyToString(rtc::PROXY_HTTPS) + << rtc::revive::ProxyToString(rtc::revive::PROXY_HTTPS) << " url=" << config_.proxy_url << " username=" << config_.proxy_username; - rtc::ProxyInfo pi; - pi.type = rtc::PROXY_HTTPS; + rtc::revive::ProxyInfo pi; + pi.type = rtc::revive::PROXY_HTTPS; URLParts parts; if (!URLParts::Parse(config_.proxy_url, parts)) { RTC_LOG(LS_ERROR) << "Failed to parse: proxy_url=" << config_.proxy_url; diff --git a/src/sora_video_encoder_factory.cpp b/src/sora_video_encoder_factory.cpp index a388a3f9..1bc783bb 100644 --- a/src/sora_video_encoder_factory.cpp +++ b/src/sora_video_encoder_factory.cpp @@ -3,6 +3,7 @@ // WebRTC #include #include +#include #include #include #include @@ -81,6 +82,7 @@ SoraVideoEncoderFactory::GetSupportedFormats() const { std::unique_ptr SoraVideoEncoderFactory::CreateInternalVideoEncoder( + const webrtc::Environment& env, const webrtc::SdpVideoFormat& format, int& alignment) { if (formats_.empty()) { @@ -92,43 +94,47 @@ SoraVideoEncoderFactory::CreateInternalVideoEncoder( int n = 0; for (auto& enc : config_.encoders) { - // 対応していないフォーマットを CreateVideoEncoder に渡した時の挙動は未定義なので - // 確実に対応してるフォーマットのみを CreateVideoEncoder に渡すようにする。 + // 対応していないフォーマットを Create に渡した時の挙動は未定義なので + // 確実に対応してるフォーマットのみを Create に渡すようにする。 std::function( - const webrtc::SdpVideoFormat&)> + const webrtc::Environment&, const webrtc::SdpVideoFormat&)> create_video_encoder; std::vector supported_formats = formats_[n++]; if (enc.factory != nullptr) { - create_video_encoder = - [factory = enc.factory.get()](const webrtc::SdpVideoFormat& format) { - return factory->CreateVideoEncoder(format); - }; + create_video_encoder = [factory = enc.factory.get()]( + const webrtc::Environment& env, + const webrtc::SdpVideoFormat& format) { + return factory->Create(env, format); + }; // Factory 経由で作ったエンコーダはアライメントが必要かどうか分からないので全部アライメントする alignment = 16; } else if (enc.create_video_encoder != nullptr) { - create_video_encoder = enc.create_video_encoder; + create_video_encoder = [&enc](const webrtc::Environment& env, + const webrtc::SdpVideoFormat& format) { + return enc.create_video_encoder(format); + }; alignment = enc.alignment; } for (const auto& f : supported_formats) { if (f.IsSameCodec(format)) { - return create_video_encoder(format); + return create_video_encoder(env, format); } } } return nullptr; } -std::unique_ptr -SoraVideoEncoderFactory::CreateVideoEncoder( +std::unique_ptr SoraVideoEncoderFactory::Create( + const webrtc::Environment& env, const webrtc::SdpVideoFormat& format) { if (internal_encoder_factory_ != nullptr) { // サイマルキャストの場合はアダプタを噛ましつつ、無条件ですべてアライメントする std::unique_ptr encoder = std::make_unique( - internal_encoder_factory_.get(), format); + env, internal_encoder_factory_.get(), nullptr, format); if (config_.force_i420_conversion_for_simulcast_adapter) { encoder = std::make_unique(std::move(encoder)); @@ -140,7 +146,7 @@ SoraVideoEncoderFactory::CreateVideoEncoder( } int alignment = 0; - auto encoder = CreateInternalVideoEncoder(format, alignment); + auto encoder = CreateInternalVideoEncoder(env, format, alignment); if (encoder == nullptr) { return nullptr; } @@ -305,24 +311,24 @@ SoraVideoEncoderFactoryConfig GetDefaultVideoEncoderFactoryConfig( SoraVideoEncoderFactoryConfig GetSoftwareOnlyVideoEncoderFactoryConfig( std::optional openh264) { SoraVideoEncoderFactoryConfig config; - config.encoders.push_back(VideoEncoderConfig( - webrtc::kVideoCodecVP8, - [](auto format) { return webrtc::VP8Encoder::Create(); })); + config.encoders.push_back( + VideoEncoderConfig(webrtc::kVideoCodecVP8, [](auto format) { + return webrtc::CreateVp8Encoder(webrtc::CreateEnvironment()); + })); config.encoders.push_back( VideoEncoderConfig(webrtc::kVideoCodecVP9, [](auto format) { - return webrtc::VP9Encoder::Create(cricket::CreateVideoCodec(format)); + return webrtc::CreateVp9Encoder(webrtc::CreateEnvironment()); })); if (openh264) { config.encoders.push_back(VideoEncoderConfig( webrtc::kVideoCodecH264, [openh264 = *openh264](auto format) { - return CreateOpenH264VideoEncoder(cricket::CreateVideoCodec(format), - openh264); + return CreateOpenH264VideoEncoder(format, openh264); })); } #if !defined(__arm__) || defined(__aarch64__) || defined(__ARM_NEON__) config.encoders.push_back(VideoEncoderConfig( webrtc::kVideoCodecAV1, - [](auto format) { return webrtc::CreateLibaomAv1Encoder(); })); + [](auto format) { return webrtc::CreateLibaomAv1Encoder(webrtc::CreateEnvironment()); })); #endif return config; }