#include "media/cast/encoding/audio_encoder.h"
#include <stdint.h>
#include <algorithm>
#include <limits>
#include <string>
#include <utility>
#include "base/compiler_specific.h"
#include "base/containers/heap_array.h"
#include "base/containers/span.h"
#include "base/functional/bind.h"
#include "base/functional/callback_helpers.h"
#include "base/location.h"
#include "base/logging.h"
#include "base/memory/raw_ptr.h"
#include "base/memory/raw_span.h"
#include "base/memory/ref_counted.h"
#include "base/memory/scoped_refptr.h"
#include "base/numerics/byte_conversions.h"
#include "base/numerics/safe_conversions.h"
#include "base/time/tick_clock.h"
#include "base/time/time.h"
#include "base/trace_event/trace_event.h"
#include "build/build_config.h"
#include "media/base/audio_bus.h"
#include "media/base/audio_codecs.h"
#include "media/base/audio_sample_types.h"
#include "media/cast/common/rtp_time.h"
#include "media/cast/common/sender_encoded_frame.h"
#include "media/cast/constants.h"
#include "third_party/openscreen/src/cast/streaming/public/encoded_frame.h"
#include "third_party/opus/src/include/opus.h"
#include "third_party/perfetto/include/perfetto/tracing/track.h"
#if BUILDFLAG(IS_APPLE)
#include <AudioToolbox/AudioToolbox.h>
#endif
namespace media {
namespace cast {
namespace {
const int kUnderrunSkipThreshold = 3;
const int kDefaultFramesPerSecond = 100;
struct OpusEncoderDeleter {
void operator()(OpusEncoder* encoder) { opus_encoder_destroy(encoder); }
};
}
class AudioEncoder::ImplBase
: public base::RefCountedThreadSafe<AudioEncoder::ImplBase> {
public:
REQUIRE_ADOPTION_FOR_REFCOUNTED_TYPE();
ImplBase(const scoped_refptr<CastEnvironment>& cast_environment,
AudioCodec codec,
int num_channels,
int sampling_rate,
int samples_per_frame,
int bitrate,
FrameEncodedCallback callback)
: cast_environment_(cast_environment),
codec_(codec),
num_channels_(num_channels),
samples_per_frame_(samples_per_frame),
bitrate_(bitrate),
callback_(std::move(callback)),
operational_status_(STATUS_UNINITIALIZED),
frame_duration_(base::Seconds(static_cast<double>(samples_per_frame_) /
sampling_rate)),
buffer_fill_end_(0),
frame_id_(FrameId::first()),
samples_dropped_from_buffer_(0) {
const int kMaxSamplesTimesChannelsPerFrame = 48 * 2 * 100;
if (num_channels_ <= 0 || samples_per_frame_ <= 0 ||
frame_duration_.is_zero() ||
samples_per_frame_ * num_channels_ > kMaxSamplesTimesChannelsPerFrame) {
operational_status_ = STATUS_INVALID_CONFIGURATION;
}
}
ImplBase(const ImplBase&) = delete;
ImplBase& operator=(const ImplBase&) = delete;
OperationalStatus InitializationResult() const { return operational_status_; }
int samples_per_frame() const { return samples_per_frame_; }
base::TimeDelta frame_duration() const { return frame_duration_; }
virtual int GetBitrate() const { return 0; }
void EncodeAudio(std::unique_ptr<AudioBus> audio_bus,
const base::TimeTicks recorded_time) {
DCHECK_EQ(operational_status_, STATUS_INITIALIZED);
DCHECK(!recorded_time.is_null());
base::TimeDelta buffer_fill_duration =
buffer_fill_end_ * frame_duration_ / samples_per_frame_;
if (!frame_capture_time_.is_null()) {
const base::TimeDelta amount_ahead_by =
recorded_time - (frame_capture_time_ + buffer_fill_duration);
const int64_t num_frames_missed = amount_ahead_by.IntDiv(frame_duration_);
if (num_frames_missed > kUnderrunSkipThreshold) {
samples_dropped_from_buffer_ += buffer_fill_end_;
buffer_fill_end_ = 0;
buffer_fill_duration = base::TimeDelta();
frame_rtp_timestamp_ +=
RtpTimeDelta::FromTicks(num_frames_missed * samples_per_frame_);
DVLOG(1) << "Skipping RTP timestamp ahead to account for "
<< num_frames_missed * samples_per_frame_
<< " samples' worth of underrun.";
TRACE_EVENT_INSTANT2("cast.stream", "Audio Skip",
TRACE_EVENT_SCOPE_THREAD, "frames missed",
num_frames_missed, "samples dropped",
samples_dropped_from_buffer_);
}
}
frame_capture_time_ = recorded_time - buffer_fill_duration;
int src_pos = 0;
while (src_pos < audio_bus->frames()) {
const base::TimeTicks start_time = base::TimeTicks::Now();
const int num_samples_to_xfer = std::min(
samples_per_frame_ - buffer_fill_end_, audio_bus->frames() - src_pos);
DCHECK_EQ(audio_bus->channels(), num_channels_);
TransferSamplesIntoBuffer(audio_bus.get(), src_pos, buffer_fill_end_,
num_samples_to_xfer);
src_pos += num_samples_to_xfer;
buffer_fill_end_ += num_samples_to_xfer;
if (buffer_fill_end_ < samples_per_frame_) {
break;
}
auto audio_frame = std::make_unique<SenderEncodedFrame>();
audio_frame->is_key_frame = true;
audio_frame->frame_id = frame_id_;
audio_frame->referenced_frame_id = frame_id_;
audio_frame->rtp_timestamp = frame_rtp_timestamp_;
audio_frame->reference_time = frame_capture_time_;
audio_frame->capture_begin_time = frame_capture_time_;
audio_frame->capture_end_time = frame_capture_time_;
TRACE_EVENT_BEGIN("cast.stream", "Audio Encode",
perfetto::Track::FromPointer(audio_frame.get()),
"frame_id", frame_id_.lower_32_bits(), "rtp_timestamp",
frame_rtp_timestamp_.lower_32_bits());
audio_frame->data = EncodeFromFilledBuffer();
if (!audio_frame->data.empty()) {
audio_frame->encoder_utilization =
(base::TimeTicks::Now() - start_time) / frame_duration_;
TRACE_EVENT_END(
"cast.stream", perfetto::Track::FromPointer(audio_frame.get()),
"encoder_utilization", audio_frame->encoder_utilization);
audio_frame->encode_completion_time = cast_environment_->NowTicks();
cast_environment_->PostTask(
CastEnvironment::ThreadId::kMain, FROM_HERE,
base::BindOnce(callback_, std::move(audio_frame),
samples_dropped_from_buffer_));
samples_dropped_from_buffer_ = 0;
}
buffer_fill_end_ = 0;
++frame_id_;
frame_rtp_timestamp_ += RtpTimeDelta::FromTicks(samples_per_frame_);
frame_capture_time_ += frame_duration_;
}
}
protected:
friend class base::RefCountedThreadSafe<ImplBase>;
virtual ~ImplBase() = default;
virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
int source_offset,
int buffer_fill_offset,
int num_samples) = 0;
virtual base::HeapArray<uint8_t> EncodeFromFilledBuffer() = 0;
const scoped_refptr<CastEnvironment> cast_environment_;
const AudioCodec codec_;
const int num_channels_;
const int samples_per_frame_;
const int bitrate_;
const FrameEncodedCallback callback_;
OperationalStatus operational_status_;
const base::TimeDelta frame_duration_;
private:
int buffer_fill_end_;
FrameId frame_id_;
RtpTimeTicks frame_rtp_timestamp_;
base::TimeTicks frame_capture_time_;
int samples_dropped_from_buffer_;
};
class AudioEncoder::OpusImpl final : public AudioEncoder::ImplBase {
public:
OpusImpl(const scoped_refptr<CastEnvironment>& cast_environment,
int num_channels,
int sampling_rate,
int bitrate,
FrameEncodedCallback callback)
: ImplBase(cast_environment,
AudioCodec::kOpus,
num_channels,
sampling_rate,
sampling_rate / kDefaultFramesPerSecond,
bitrate,
std::move(callback)),
opus_encoder_(opus_encoder_create(sampling_rate,
num_channels,
OPUS_APPLICATION_AUDIO,
nullptr)),
buffer_(
base::HeapArray<float>::Uninit(num_channels * samples_per_frame_)) {
if (ImplBase::operational_status_ != STATUS_UNINITIALIZED ||
sampling_rate % samples_per_frame_ != 0 ||
!IsValidFrameDuration(frame_duration_)) {
return;
}
if (!opus_encoder_) {
ImplBase::operational_status_ = STATUS_CODEC_INIT_FAILED;
return;
}
if (opus_encoder_init(opus_encoder_.get(), sampling_rate, num_channels,
OPUS_APPLICATION_AUDIO) != OPUS_OK) {
ImplBase::operational_status_ = STATUS_INVALID_CONFIGURATION;
return;
}
ImplBase::operational_status_ = STATUS_INITIALIZED;
if (bitrate <= 0) {
bitrate = OPUS_AUTO;
}
CHECK_EQ(opus_encoder_ctl(opus_encoder_.get(), OPUS_SET_BITRATE(bitrate)),
OPUS_OK);
}
OpusImpl(const OpusImpl&) = delete;
OpusImpl& operator=(const OpusImpl&) = delete;
int GetBitrate() const override {
int bitrate = 0;
CHECK_EQ(
opus_encoder_ctl(opus_encoder_.get(),
UNSAFE_BUFFERS(OPUS_GET_BITRATE(&bitrate))),
OPUS_OK);
return bitrate;
}
private:
~OpusImpl() final = default;
void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
int source_offset,
int buffer_fill_offset,
int num_samples) final {
DCHECK_EQ(audio_bus->channels(), num_channels_);
base::span<float> dest =
buffer_.subspan(buffer_fill_offset * num_channels_);
audio_bus->ToInterleavedPartial<Float32SampleTypeTraits>(
source_offset, num_samples, dest.data());
}
base::HeapArray<uint8_t> EncodeFromFilledBuffer() final {
auto out = base::HeapArray<uint8_t>::Uninit(kOpusMaxPayloadSize);
const opus_int32 result =
opus_encode_float(opus_encoder_.get(), buffer_.data(),
samples_per_frame_, out.data(), kOpusMaxPayloadSize);
if (result < 0) {
DLOG(ERROR) << "Error code from opus_encode_float(): " << result;
return {};
}
if (result == 0 || result == 1) {
return {};
}
return std::move(out).take_first(result);
}
static bool IsValidFrameDuration(base::TimeDelta duration) {
return duration == base::Microseconds(2500) ||
duration == base::Milliseconds(5) ||
duration == base::Milliseconds(10) ||
duration == base::Milliseconds(20) ||
duration == base::Milliseconds(40) ||
duration == base::Milliseconds(60);
}
std::unique_ptr<OpusEncoder, OpusEncoderDeleter> opus_encoder_;
base::HeapArray<float> buffer_;
static const int kOpusMaxPayloadSize = 4000;
};
#if BUILDFLAG(IS_APPLE)
class AudioEncoder::AppleAacImpl final : public AudioEncoder::ImplBase {
static const int kAccessUnitSamples = 1024;
static const int kAdtsHeaderSize = 7;
public:
AppleAacImpl(const scoped_refptr<CastEnvironment>& cast_environment,
int num_channels,
int sampling_rate,
int bitrate,
FrameEncodedCallback callback)
: ImplBase(cast_environment,
AudioCodec::kAAC,
num_channels,
sampling_rate,
kAccessUnitSamples,
bitrate,
std::move(callback)),
input_buffer_(AudioBus::Create(num_channels, kAccessUnitSamples)),
input_bus_(AudioBus::CreateWrapper(num_channels)) {
if (ImplBase::operational_status_ != STATUS_UNINITIALIZED) {
return;
}
if (!Initialize(sampling_rate, bitrate)) {
ImplBase::operational_status_ = STATUS_INVALID_CONFIGURATION;
return;
}
ImplBase::operational_status_ = STATUS_INITIALIZED;
}
AppleAacImpl(const AppleAacImpl&) = delete;
AppleAacImpl& operator=(const AppleAacImpl&) = delete;
private:
~AppleAacImpl() override { Teardown(); }
void Teardown() {
if (converter_) {
AudioConverterDispose(converter_);
converter_ = nullptr;
}
if (file_) {
AudioFileClose(file_);
file_ = nullptr;
}
}
bool Initialize(int sampling_rate, int bitrate) {
Teardown();
AudioStreamBasicDescription in_asbd;
in_asbd.mSampleRate = sampling_rate;
in_asbd.mFormatID = kAudioFormatLinearPCM;
in_asbd.mFormatFlags =
AudioFormatFlags{kAudioFormatFlagsNativeFloatPacked} |
kAudioFormatFlagIsNonInterleaved;
in_asbd.mChannelsPerFrame = num_channels_;
in_asbd.mBitsPerChannel = sizeof(float) * 8;
in_asbd.mFramesPerPacket = 1;
in_asbd.mBytesPerPacket = in_asbd.mBytesPerFrame = sizeof(float);
in_asbd.mReserved = 0;
AudioStreamBasicDescription out_asbd = {};
out_asbd.mSampleRate = sampling_rate;
out_asbd.mFormatID = kAudioFormatMPEG4AAC;
out_asbd.mChannelsPerFrame = num_channels_;
UInt32 prop_size = sizeof(out_asbd);
if (AudioFormatGetProperty(kAudioFormatProperty_FormatInfo, 0, nullptr,
&prop_size, &out_asbd) != noErr) {
return false;
}
if (AudioConverterNew(&in_asbd, &out_asbd, &converter_) != noErr) {
return false;
}
prop_size = sizeof(out_asbd);
if (AudioConverterGetProperty(converter_,
kAudioConverterCurrentOutputStreamDescription,
&prop_size, &out_asbd) != noErr) {
return false;
}
if (bitrate > 0) {
prop_size = sizeof(int);
if (AudioConverterSetProperty(converter_, kAudioConverterEncodeBitRate,
prop_size, &bitrate) != noErr) {
return false;
}
}
uint32_t max_access_unit_size = out_asbd.mBytesPerPacket;
if (max_access_unit_size == 0) {
prop_size = sizeof(max_access_unit_size);
if (AudioConverterGetProperty(
converter_, kAudioConverterPropertyMaximumOutputPacketSize,
&prop_size, &max_access_unit_size) != noErr) {
return false;
}
}
max_access_unit_size_ = max_access_unit_size;
access_unit_buffer_ =
base::HeapArray<uint8_t>::Uninit(max_access_unit_size);
converter_abl_.mNumberBuffers = 1;
converter_abl_.mBuffers[0].mNumberChannels = num_channels_;
converter_abl_.mBuffers[0].mData = access_unit_buffer_.data();
UInt32 cookie_size;
if (AudioConverterGetPropertyInfo(converter_,
kAudioConverterCompressionMagicCookie,
&cookie_size, nullptr) != noErr) {
return false;
}
auto cookie_data = base::HeapArray<uint8_t>::Uninit(cookie_size);
if (AudioConverterGetProperty(converter_,
kAudioConverterCompressionMagicCookie,
&cookie_size, cookie_data.data()) != noErr) {
return false;
}
if (AudioFileInitializeWithCallbacks(
this, &FileReadCallback, &FileWriteCallback, &FileGetSizeCallback,
&FileSetSizeCallback, kAudioFileAAC_ADTSType, &out_asbd, 0,
&file_) != noErr) {
return false;
}
if (AudioFileSetProperty(file_, kAudioFilePropertyMagicCookieData,
cookie_size, cookie_data.data()) != noErr) {
return false;
}
input_bus_->set_frames(kAccessUnitSamples);
input_bus_->SetAllChannels(input_buffer_->AllChannels());
return true;
}
void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
int source_offset,
int buffer_fill_offset,
int num_samples) final {
DCHECK_EQ(audio_bus->channels(), input_buffer_->channels());
if (num_samples == kAccessUnitSamples &&
source_offset * sizeof(float) % AudioBus::kChannelAlignment == 0) {
DCHECK_EQ(buffer_fill_offset, 0);
input_bus_->SetAllChannels(audio_bus->AllChannelsSubspan(
base::checked_cast<size_t>(source_offset),
static_cast<size_t>(kAccessUnitSamples)));
return;
}
DCHECK_EQ(input_bus_->channel_span(0).data(),
input_buffer_->channel_span(0).data());
audio_bus->CopyPartialFramesTo(source_offset, num_samples,
buffer_fill_offset, input_buffer_.get());
}
base::HeapArray<uint8_t> EncodeFromFilledBuffer() final {
converter_abl_.mBuffers[0].mDataByteSize = max_access_unit_size_;
OSStatus oserr;
UInt32 io_num_packets = 1;
AudioStreamPacketDescription packet_description;
oserr = AudioConverterFillComplexBuffer(
converter_, &ConverterFillDataCallback, this, &io_num_packets,
&converter_abl_, &packet_description);
if (oserr != noErr || io_num_packets == 0) {
return {};
}
auto out = base::HeapArray<uint8_t>::Uninit(
packet_description.mDataByteSize + kAdtsHeaderSize);
output_buffer_ = out;
oserr = AudioFileWritePackets(
file_, false, converter_abl_.mBuffers[0].mDataByteSize,
&packet_description, num_access_units_, &io_num_packets,
converter_abl_.mBuffers[0].mData);
out = std::move(out).take_first(out.size() - output_buffer_.size());
output_buffer_ = {};
if (oserr != noErr || io_num_packets == 0) {
return {};
}
num_access_units_ += io_num_packets;
return out;
}
static OSStatus ConverterFillDataCallback(
AudioConverterRef in_converter,
UInt32* io_num_packets,
AudioBufferList* io_data,
AudioStreamPacketDescription** out_packet_desc,
void* in_encoder) {
CHECK(in_encoder);
auto& encoder = *(reinterpret_cast<AppleAacImpl*>(in_encoder));
auto& input_bus = *encoder.input_bus_;
DCHECK_EQ(static_cast<int>(*io_num_packets), kAccessUnitSamples);
DCHECK_EQ(io_data->mNumberBuffers,
static_cast<unsigned>(input_bus.channels()));
for (int i_buf = 0, end = io_data->mNumberBuffers; i_buf < end; ++i_buf) {
auto& buffer = UNSAFE_BUFFERS(io_data->mBuffers[i_buf]);
buffer.mNumberChannels = 1;
buffer.mDataByteSize = sizeof(float) * *io_num_packets;
buffer.mData = input_bus.channel_span(i_buf).data();
}
input_bus.SetAllChannels(encoder.input_buffer_->AllChannels());
return noErr;
}
static OSStatus FileReadCallback(void* in_encoder,
SInt64 in_position,
UInt32 in_size,
void* in_buffer,
UInt32* out_size) {
NOTREACHED();
}
static OSStatus FileWriteCallback(void* in_encoder,
SInt64 in_position,
UInt32 in_size,
const void* in_buffer,
UInt32* out_size) {
CHECK(in_encoder);
CHECK(in_buffer);
auto& encoder = *(reinterpret_cast<AppleAacImpl*>(in_encoder));
CHECK_GE(encoder.output_buffer_.size(), in_size);
encoder.output_buffer_.copy_prefix_from(
UNSAFE_BUFFERS(
base::span(reinterpret_cast<const uint8_t*>(in_buffer), in_size)));
encoder.output_buffer_ = encoder.output_buffer_.subspan(in_size);
*out_size = in_size;
return noErr;
}
static SInt64 FileGetSizeCallback(void* in_encoder) {
NOTREACHED();
}
static OSStatus FileSetSizeCallback(void* in_encoder, SInt64 in_size) {
return noErr;
}
const std::unique_ptr<AudioBus> input_buffer_;
const std::unique_ptr<AudioBus> input_bus_;
base::HeapArray<uint8_t> access_unit_buffer_;
uint32_t max_access_unit_size_ = 0;
base::raw_span<uint8_t> output_buffer_;
AudioConverterRef converter_ = nullptr;
AudioFileID file_ = nullptr;
AudioBufferList converter_abl_;
uint64_t num_access_units_ = 0u;
};
#endif
AudioEncoder::AudioEncoder(
const scoped_refptr<CastEnvironment>& cast_environment,
int num_channels,
int sampling_rate,
int bitrate,
AudioCodec codec,
FrameEncodedCallback frame_encoded_callback)
: cast_environment_(cast_environment) {
DETACH_FROM_THREAD(insert_thread_checker_);
switch (codec) {
case AudioCodec::kOpus:
impl_ = base::MakeRefCounted<OpusImpl>(cast_environment, num_channels,
sampling_rate, bitrate,
std::move(frame_encoded_callback));
break;
#if BUILDFLAG(IS_APPLE)
case AudioCodec::kAAC:
impl_ = base::MakeRefCounted<AppleAacImpl>(
cast_environment, num_channels, sampling_rate, bitrate,
std::move(frame_encoded_callback));
break;
#endif
default:
NOTREACHED() << "Unsupported or unspecified codec for audio encoder";
}
}
AudioEncoder::~AudioEncoder() = default;
OperationalStatus AudioEncoder::InitializationResult() const {
DCHECK_CALLED_ON_VALID_THREAD(insert_thread_checker_);
if (impl_.get()) {
return impl_->InitializationResult();
}
return STATUS_UNSUPPORTED_CODEC;
}
int AudioEncoder::GetSamplesPerFrame() const {
DCHECK_CALLED_ON_VALID_THREAD(insert_thread_checker_);
CHECK_EQ(InitializationResult(), STATUS_INITIALIZED);
return impl_->samples_per_frame();
}
base::TimeDelta AudioEncoder::GetFrameDuration() const {
DCHECK_CALLED_ON_VALID_THREAD(insert_thread_checker_);
CHECK_EQ(InitializationResult(), STATUS_INITIALIZED);
return impl_->frame_duration();
}
int AudioEncoder::GetBitrate() const {
DCHECK_CALLED_ON_VALID_THREAD(insert_thread_checker_);
if (InitializationResult() != STATUS_INITIALIZED) {
return 0;
}
return impl_->GetBitrate();
}
void AudioEncoder::InsertAudio(std::unique_ptr<AudioBus> audio_bus,
const base::TimeTicks recorded_time) {
DCHECK_CALLED_ON_VALID_THREAD(insert_thread_checker_);
DCHECK(audio_bus.get());
CHECK_EQ(InitializationResult(), STATUS_INITIALIZED);
cast_environment_->PostTask(
CastEnvironment::ThreadId::kAudio, FROM_HERE,
base::BindOnce(&AudioEncoder::ImplBase::EncodeAudio, impl_,
std::move(audio_bus), recorded_time));
}
}
}