* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/agc2/clipping_predictor.h"
#include <algorithm>
#include <memory>
#include "common_audio/include/audio_util.h"
#include "modules/audio_processing/agc2/clipping_predictor_level_buffer.h"
#include "modules/audio_processing/agc2/gain_map_internal.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
#include "rtc_base/numerics/safe_minmax.h"
namespace webrtc {
namespace {
constexpr int kClippingPredictorMaxGainChange = 15;
int ComputeVolumeUpdate(int gain_error_db,
int input_volume,
int min_input_volume,
int max_input_volume) {
RTC_DCHECK_GE(input_volume, 0);
RTC_DCHECK_LE(input_volume, max_input_volume);
if (gain_error_db == 0) {
return input_volume;
}
int new_volume = input_volume;
if (gain_error_db > 0) {
while (kGainMap[new_volume] - kGainMap[input_volume] < gain_error_db &&
new_volume < max_input_volume) {
++new_volume;
}
} else {
while (kGainMap[new_volume] - kGainMap[input_volume] > gain_error_db &&
new_volume > min_input_volume) {
--new_volume;
}
}
return new_volume;
}
float ComputeCrestFactor(const ClippingPredictorLevelBuffer::Level& level) {
const float crest_factor =
FloatS16ToDbfs(level.max) - FloatS16ToDbfs(std::sqrt(level.average));
return crest_factor;
}
class ClippingEventPredictor : public ClippingPredictor {
public:
ClippingEventPredictor(int num_channels,
int window_length,
int reference_window_length,
int reference_window_delay,
float clipping_threshold,
float crest_factor_margin)
: window_length_(window_length),
reference_window_length_(reference_window_length),
reference_window_delay_(reference_window_delay),
clipping_threshold_(clipping_threshold),
crest_factor_margin_(crest_factor_margin) {
RTC_DCHECK_GT(num_channels, 0);
RTC_DCHECK_GT(window_length, 0);
RTC_DCHECK_GT(reference_window_length, 0);
RTC_DCHECK_GE(reference_window_delay, 0);
RTC_DCHECK_GT(reference_window_length + reference_window_delay,
window_length);
const int buffer_length = GetMinFramesProcessed();
RTC_DCHECK_GT(buffer_length, 0);
for (int i = 0; i < num_channels; ++i) {
ch_buffers_.push_back(
std::make_unique<ClippingPredictorLevelBuffer>(buffer_length));
}
}
ClippingEventPredictor(const ClippingEventPredictor&) = delete;
ClippingEventPredictor& operator=(const ClippingEventPredictor&) = delete;
~ClippingEventPredictor() {}
void Reset() {
const int num_channels = ch_buffers_.size();
for (int i = 0; i < num_channels; ++i) {
ch_buffers_[i]->Reset();
}
}
void Analyze(const AudioFrameView<const float>& frame) {
const int num_channels = frame.num_channels();
RTC_DCHECK_EQ(num_channels, ch_buffers_.size());
const int samples_per_channel = frame.samples_per_channel();
RTC_DCHECK_GT(samples_per_channel, 0);
for (int channel = 0; channel < num_channels; ++channel) {
float sum_squares = 0.0f;
float peak = 0.0f;
for (const auto& sample : frame.channel(channel)) {
sum_squares += sample * sample;
peak = std::max(std::fabs(sample), peak);
}
ch_buffers_[channel]->Push(
{sum_squares / static_cast<float>(samples_per_channel), peak});
}
}
absl::optional<int> EstimateClippedLevelStep(int channel,
int level,
int default_step,
int min_mic_level,
int max_mic_level) const {
RTC_CHECK_GE(channel, 0);
RTC_CHECK_LT(channel, ch_buffers_.size());
RTC_DCHECK_GE(level, 0);
RTC_DCHECK_LE(level, 255);
RTC_DCHECK_GT(default_step, 0);
RTC_DCHECK_LE(default_step, 255);
RTC_DCHECK_GE(min_mic_level, 0);
RTC_DCHECK_LE(min_mic_level, 255);
RTC_DCHECK_GE(max_mic_level, 0);
RTC_DCHECK_LE(max_mic_level, 255);
if (level <= min_mic_level) {
return absl::nullopt;
}
if (PredictClippingEvent(channel)) {
const int new_level =
rtc::SafeClamp(level - default_step, min_mic_level, max_mic_level);
const int step = level - new_level;
if (step > 0) {
return step;
}
}
return absl::nullopt;
}
private:
int GetMinFramesProcessed() const {
return reference_window_delay_ + reference_window_length_;
}
bool PredictClippingEvent(int channel) const {
const auto metrics =
ch_buffers_[channel]->ComputePartialMetrics(0, window_length_);
if (!metrics.has_value() ||
!(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) {
return false;
}
const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics(
reference_window_delay_, reference_window_length_);
if (!reference_metrics.has_value()) {
return false;
}
const float crest_factor = ComputeCrestFactor(metrics.value());
const float reference_crest_factor =
ComputeCrestFactor(reference_metrics.value());
if (crest_factor < reference_crest_factor - crest_factor_margin_) {
return true;
}
return false;
}
std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_;
const int window_length_;
const int reference_window_length_;
const int reference_window_delay_;
const float clipping_threshold_;
const float crest_factor_margin_;
};
class ClippingPeakPredictor : public ClippingPredictor {
public:
explicit ClippingPeakPredictor(int num_channels,
int window_length,
int reference_window_length,
int reference_window_delay,
int clipping_threshold,
bool adaptive_step_estimation)
: window_length_(window_length),
reference_window_length_(reference_window_length),
reference_window_delay_(reference_window_delay),
clipping_threshold_(clipping_threshold),
adaptive_step_estimation_(adaptive_step_estimation) {
RTC_DCHECK_GT(num_channels, 0);
RTC_DCHECK_GT(window_length, 0);
RTC_DCHECK_GT(reference_window_length, 0);
RTC_DCHECK_GE(reference_window_delay, 0);
RTC_DCHECK_GT(reference_window_length + reference_window_delay,
window_length);
const int buffer_length = GetMinFramesProcessed();
RTC_DCHECK_GT(buffer_length, 0);
for (int i = 0; i < num_channels; ++i) {
ch_buffers_.push_back(
std::make_unique<ClippingPredictorLevelBuffer>(buffer_length));
}
}
ClippingPeakPredictor(const ClippingPeakPredictor&) = delete;
ClippingPeakPredictor& operator=(const ClippingPeakPredictor&) = delete;
~ClippingPeakPredictor() {}
void Reset() {
const int num_channels = ch_buffers_.size();
for (int i = 0; i < num_channels; ++i) {
ch_buffers_[i]->Reset();
}
}
void Analyze(const AudioFrameView<const float>& frame) {
const int num_channels = frame.num_channels();
RTC_DCHECK_EQ(num_channels, ch_buffers_.size());
const int samples_per_channel = frame.samples_per_channel();
RTC_DCHECK_GT(samples_per_channel, 0);
for (int channel = 0; channel < num_channels; ++channel) {
float sum_squares = 0.0f;
float peak = 0.0f;
for (const auto& sample : frame.channel(channel)) {
sum_squares += sample * sample;
peak = std::max(std::fabs(sample), peak);
}
ch_buffers_[channel]->Push(
{sum_squares / static_cast<float>(samples_per_channel), peak});
}
}
absl::optional<int> EstimateClippedLevelStep(int channel,
int level,
int default_step,
int min_mic_level,
int max_mic_level) const {
RTC_DCHECK_GE(channel, 0);
RTC_DCHECK_LT(channel, ch_buffers_.size());
RTC_DCHECK_GE(level, 0);
RTC_DCHECK_LE(level, 255);
RTC_DCHECK_GT(default_step, 0);
RTC_DCHECK_LE(default_step, 255);
RTC_DCHECK_GE(min_mic_level, 0);
RTC_DCHECK_LE(min_mic_level, 255);
RTC_DCHECK_GE(max_mic_level, 0);
RTC_DCHECK_LE(max_mic_level, 255);
if (level <= min_mic_level) {
return absl::nullopt;
}
absl::optional<float> estimate_db = EstimatePeakValue(channel);
if (estimate_db.has_value() && estimate_db.value() > clipping_threshold_) {
int step = 0;
if (!adaptive_step_estimation_) {
step = default_step;
} else {
const int estimated_gain_change =
rtc::SafeClamp(-static_cast<int>(std::ceil(estimate_db.value())),
-kClippingPredictorMaxGainChange, 0);
step =
std::max(level - ComputeVolumeUpdate(estimated_gain_change, level,
min_mic_level, max_mic_level),
default_step);
}
const int new_level =
rtc::SafeClamp(level - step, min_mic_level, max_mic_level);
if (level > new_level) {
return level - new_level;
}
}
return absl::nullopt;
}
private:
int GetMinFramesProcessed() {
return reference_window_delay_ + reference_window_length_;
}
absl::optional<float> EstimatePeakValue(int channel) const {
const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics(
reference_window_delay_, reference_window_length_);
if (!reference_metrics.has_value()) {
return absl::nullopt;
}
const auto metrics =
ch_buffers_[channel]->ComputePartialMetrics(0, window_length_);
if (!metrics.has_value() ||
!(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) {
return absl::nullopt;
}
const float reference_crest_factor =
ComputeCrestFactor(reference_metrics.value());
const float& mean_squares = metrics.value().average;
const float projected_peak =
reference_crest_factor + FloatS16ToDbfs(std::sqrt(mean_squares));
return projected_peak;
}
std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_;
const int window_length_;
const int reference_window_length_;
const int reference_window_delay_;
const int clipping_threshold_;
const bool adaptive_step_estimation_;
};
}
std::unique_ptr<ClippingPredictor> CreateClippingPredictor(
int num_channels,
const AudioProcessing::Config::GainController1::AnalogGainController::
ClippingPredictor& config) {
if (!config.enabled) {
RTC_LOG(LS_INFO) << "[AGC2] Clipping prediction disabled.";
return nullptr;
}
RTC_LOG(LS_INFO) << "[AGC2] Clipping prediction enabled.";
using ClippingPredictorMode = AudioProcessing::Config::GainController1::
AnalogGainController::ClippingPredictor::Mode;
switch (config.mode) {
case ClippingPredictorMode::kClippingEventPrediction:
return std::make_unique<ClippingEventPredictor>(
num_channels, config.window_length, config.reference_window_length,
config.reference_window_delay, config.clipping_threshold,
config.crest_factor_margin);
case ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction:
return std::make_unique<ClippingPeakPredictor>(
num_channels, config.window_length, config.reference_window_length,
config.reference_window_delay, config.clipping_threshold,
true);
case ClippingPredictorMode::kFixedStepClippingPeakPrediction:
return std::make_unique<ClippingPeakPredictor>(
num_channels, config.window_length, config.reference_window_length,
config.reference_window_delay, config.clipping_threshold,
false);
}
RTC_DCHECK_NOTREACHED();
}
}