* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/agc2/input_volume_controller.h"
#include <algorithm>
#include <fstream>
#include <limits>
#include <string>
#include <vector>
#include "rtc_base/numerics/safe_minmax.h"
#include "rtc_base/strings/string_builder.h"
#include "system_wrappers/include/metrics.h"
#include "test/field_trial.h"
#include "test/gmock.h"
#include "test/gtest.h"
#include "test/testsupport/file_utils.h"
using ::testing::_;
using ::testing::AtLeast;
using ::testing::DoAll;
using ::testing::Return;
using ::testing::SetArgPointee;
namespace webrtc {
namespace {
constexpr int kSampleRateHz = 32000;
constexpr int kNumChannels = 1;
constexpr int kInitialInputVolume = 128;
constexpr int kClippedMin = 165;
constexpr float kAboveClippedThreshold = 0.2f;
constexpr int kMinMicLevel = 20;
constexpr int kClippedLevelStep = 15;
constexpr float kClippedRatioThreshold = 0.1f;
constexpr int kClippedWaitFrames = 300;
constexpr float kHighSpeechProbability = 0.7f;
constexpr float kLowSpeechProbability = 0.1f;
constexpr float kSpeechLevel = -25.0f;
constexpr float kSpeechProbabilityThreshold = 0.5f;
constexpr float kSpeechRatioThreshold = 0.8f;
constexpr float kMinSample = std::numeric_limits<int16_t>::min();
constexpr float kMaxSample = std::numeric_limits<int16_t>::max();
using ClippingPredictorConfig = AudioProcessing::Config::GainController1::
AnalogGainController::ClippingPredictor;
using InputVolumeControllerConfig = InputVolumeController::Config;
constexpr ClippingPredictorConfig kDefaultClippingPredictorConfig{};
std::unique_ptr<InputVolumeController> CreateInputVolumeController(
int clipped_level_step = kClippedLevelStep,
float clipped_ratio_threshold = kClippedRatioThreshold,
int clipped_wait_frames = kClippedWaitFrames,
bool enable_clipping_predictor = false,
int update_input_volume_wait_frames = 0) {
InputVolumeControllerConfig config{
.min_input_volume = kMinMicLevel,
.clipped_level_min = kClippedMin,
.clipped_level_step = clipped_level_step,
.clipped_ratio_threshold = clipped_ratio_threshold,
.clipped_wait_frames = clipped_wait_frames,
.enable_clipping_predictor = enable_clipping_predictor,
.target_range_max_dbfs = -18,
.target_range_min_dbfs = -30,
.update_input_volume_wait_frames = update_input_volume_wait_frames,
.speech_probability_threshold = kSpeechProbabilityThreshold,
.speech_ratio_threshold = kSpeechRatioThreshold,
};
return std::make_unique<InputVolumeController>(1,
config);
}
void WriteAudioBufferSamples(float samples_value,
float clipped_ratio,
AudioBuffer& audio_buffer) {
RTC_DCHECK_GE(samples_value, kMinSample);
RTC_DCHECK_LE(samples_value, kMaxSample);
RTC_DCHECK_GE(clipped_ratio, 0.0f);
RTC_DCHECK_LE(clipped_ratio, 1.0f);
int num_channels = audio_buffer.num_channels();
int num_samples = audio_buffer.num_frames();
int num_clipping_samples = clipped_ratio * num_samples;
for (int ch = 0; ch < num_channels; ++ch) {
int i = 0;
for (; i < num_clipping_samples; ++i) {
audio_buffer.channels()[ch][i] = 32767.0f;
}
for (; i < num_samples; ++i) {
audio_buffer.channels()[ch][i] = samples_value;
}
}
}
void WriteAlternatingAudioBufferSamples(float samples_value,
AudioBuffer& audio_buffer) {
RTC_DCHECK_GE(samples_value, kMinSample);
RTC_DCHECK_LE(samples_value, kMaxSample);
const int num_channels = audio_buffer.num_channels();
const int num_frames = audio_buffer.num_frames();
for (int ch = 0; ch < num_channels; ++ch) {
for (int i = 0; i < num_frames; i += 2) {
audio_buffer.channels()[ch][i] = samples_value;
audio_buffer.channels()[ch][i + 1] = 0.0f;
}
}
}
class SpeechSamplesReader {
private:
static constexpr int kPcmSampleRateHz = 16000;
static constexpr int kPcmNumChannels = 1;
static constexpr int kPcmBytesPerSamples = sizeof(int16_t);
public:
SpeechSamplesReader()
: is_(test::ResourcePath("audio_processing/agc/agc_audio", "pcm"),
std::ios::binary | std::ios::ate),
audio_buffer_(kPcmSampleRateHz,
kPcmNumChannels,
kPcmSampleRateHz,
kPcmNumChannels,
kPcmSampleRateHz,
kPcmNumChannels),
buffer_(audio_buffer_.num_frames()),
buffer_num_bytes_(buffer_.size() * kPcmBytesPerSamples) {
RTC_CHECK(is_);
}
int Feed(int num_frames,
int applied_input_volume,
int gain_db,
float speech_probability,
absl::optional<float> speech_level_dbfs,
InputVolumeController& controller) {
RTC_DCHECK(controller.capture_output_used());
float gain = std::pow(10.0f, gain_db / 20.0f);
is_.seekg(0, is_.beg);
for (int i = 0; i < num_frames; ++i) {
is_.read(reinterpret_cast<char*>(buffer_.data()), buffer_num_bytes_);
if (is_.gcount() < buffer_num_bytes_) {
break;
}
std::transform(buffer_.begin(), buffer_.end(),
audio_buffer_.channels()[0], [gain](int16_t v) -> float {
return rtc::SafeClamp(static_cast<float>(v) * gain,
kMinSample, kMaxSample);
});
controller.AnalyzeInputAudio(applied_input_volume, audio_buffer_);
const auto recommended_input_volume = controller.RecommendInputVolume(
speech_probability, speech_level_dbfs);
EXPECT_TRUE(recommended_input_volume.has_value());
applied_input_volume = *recommended_input_volume;
}
return applied_input_volume;
}
private:
std::ifstream is_;
AudioBuffer audio_buffer_;
std::vector<int16_t> buffer_;
const std::streamsize buffer_num_bytes_;
};
float UpdateRecommendedInputVolume(MonoInputVolumeController& mono_controller,
int applied_input_volume,
float speech_probability,
absl::optional<float> rms_error_dbfs) {
mono_controller.set_stream_analog_level(applied_input_volume);
EXPECT_EQ(mono_controller.recommended_analog_level(), applied_input_volume);
mono_controller.Process(rms_error_dbfs, speech_probability);
return mono_controller.recommended_analog_level();
}
}
constexpr InputVolumeControllerConfig GetInputVolumeControllerTestConfig() {
InputVolumeControllerConfig config{
.clipped_level_min = kClippedMin,
.clipped_level_step = kClippedLevelStep,
.clipped_ratio_threshold = kClippedRatioThreshold,
.clipped_wait_frames = kClippedWaitFrames,
.enable_clipping_predictor = kDefaultClippingPredictorConfig.enabled,
.target_range_max_dbfs = -18,
.target_range_min_dbfs = -30,
.update_input_volume_wait_frames = 0,
.speech_probability_threshold = 0.5f,
.speech_ratio_threshold = 1.0f,
};
return config;
}
class InputVolumeControllerTestHelper {
public:
InputVolumeControllerTestHelper(const InputVolumeController::Config& config =
GetInputVolumeControllerTestConfig())
: audio_buffer(kSampleRateHz,
kNumChannels,
kSampleRateHz,
kNumChannels,
kSampleRateHz,
kNumChannels),
controller(1, config) {
controller.Initialize();
WriteAudioBufferSamples(0.0f, 0.0f,
audio_buffer);
}
absl::optional<int> CallAgcSequence(int applied_input_volume,
float speech_probability,
absl::optional<float> speech_level_dbfs,
int num_calls = 1) {
RTC_DCHECK_GE(num_calls, 1);
absl::optional<int> volume = applied_input_volume;
for (int i = 0; i < num_calls; ++i) {
controller.AnalyzeInputAudio(volume.value_or(applied_input_volume),
audio_buffer);
volume = controller.RecommendInputVolume(speech_probability,
speech_level_dbfs);
if (volume.has_value()) {
EXPECT_EQ(*volume, controller.recommended_input_volume());
}
}
return volume;
}
int CallRecommendInputVolume(int num_calls,
int initial_volume,
float speech_probability,
absl::optional<float> speech_level_dbfs) {
RTC_DCHECK(controller.capture_output_used());
WriteAlternatingAudioBufferSamples(0.1f * kMaxSample, audio_buffer);
int volume = initial_volume;
for (int i = 0; i < num_calls; ++i) {
controller.AnalyzeInputAudio(volume, audio_buffer);
const auto recommended_input_volume = controller.RecommendInputVolume(
speech_probability, speech_level_dbfs);
EXPECT_TRUE(recommended_input_volume.has_value());
volume = *recommended_input_volume;
}
return volume;
}
void CallAnalyzeInputAudio(int num_calls, float clipped_ratio) {
RTC_DCHECK(controller.capture_output_used());
RTC_DCHECK_GE(clipped_ratio, 0.0f);
RTC_DCHECK_LE(clipped_ratio, 1.0f);
WriteAudioBufferSamples(0.0f, clipped_ratio,
audio_buffer);
for (int i = 0; i < num_calls; ++i) {
controller.AnalyzeInputAudio(controller.recommended_input_volume(),
audio_buffer);
}
}
AudioBuffer audio_buffer;
InputVolumeController controller;
};
class InputVolumeControllerChannelSampleRateTest
: public ::testing::TestWithParam<std::tuple<int, int>> {
protected:
int GetNumChannels() const { return std::get<0>(GetParam()); }
int GetSampleRateHz() const { return std::get<1>(GetParam()); }
};
TEST_P(InputVolumeControllerChannelSampleRateTest, CheckIsAlive) {
const int num_channels = GetNumChannels();
const int sample_rate_hz = GetSampleRateHz();
constexpr InputVolumeController::Config kConfig{.enable_clipping_predictor =
true};
InputVolumeController controller(num_channels, kConfig);
controller.Initialize();
AudioBuffer buffer(sample_rate_hz, num_channels, sample_rate_hz, num_channels,
sample_rate_hz, num_channels);
constexpr int kStartupVolume = 100;
int applied_initial_volume = kStartupVolume;
constexpr int kLevelWithinTargetDbfs =
(kConfig.target_range_min_dbfs + kConfig.target_range_max_dbfs) / 2;
WriteAlternatingAudioBufferSamples(kMaxSample, buffer);
const int initial_volume1 = applied_initial_volume;
for (int i = 0; i < 400; ++i) {
controller.AnalyzeInputAudio(applied_initial_volume, buffer);
auto recommended_input_volume = controller.RecommendInputVolume(
kLowSpeechProbability,
kLevelWithinTargetDbfs);
ASSERT_TRUE(recommended_input_volume.has_value());
applied_initial_volume = *recommended_input_volume;
}
ASSERT_LT(controller.recommended_input_volume(), initial_volume1);
WriteAlternatingAudioBufferSamples(1234.5f, buffer);
const int initial_volume2 = controller.recommended_input_volume();
for (int i = 0; i < kConfig.clipped_wait_frames; ++i) {
controller.AnalyzeInputAudio(applied_initial_volume, buffer);
auto recommended_input_volume = controller.RecommendInputVolume(
kHighSpeechProbability,
kConfig.target_range_min_dbfs - 5);
ASSERT_TRUE(recommended_input_volume.has_value());
applied_initial_volume = *recommended_input_volume;
}
EXPECT_GT(controller.recommended_input_volume(), initial_volume2);
const int initial_volume = controller.recommended_input_volume();
for (int i = 0; i < kConfig.update_input_volume_wait_frames; ++i) {
controller.AnalyzeInputAudio(applied_initial_volume, buffer);
auto recommended_input_volume = controller.RecommendInputVolume(
kHighSpeechProbability,
kConfig.target_range_max_dbfs + 5);
ASSERT_TRUE(recommended_input_volume.has_value());
applied_initial_volume = *recommended_input_volume;
}
EXPECT_LT(controller.recommended_input_volume(), initial_volume);
}
INSTANTIATE_TEST_SUITE_P(
,
InputVolumeControllerChannelSampleRateTest,
::testing::Combine(::testing::Values(1, 2, 3, 6),
::testing::Values(8000, 16000, 32000, 48000)));
class InputVolumeControllerParametrizedTest
: public ::testing::TestWithParam<int> {};
TEST_P(InputVolumeControllerParametrizedTest,
StartupMinVolumeConfigurationRespectedWhenAppliedInputVolumeAboveMin) {
InputVolumeControllerTestHelper helper(
{.min_input_volume = GetParam()});
EXPECT_EQ(*helper.CallAgcSequence(128,
0.9f,
-80),
128);
}
TEST_P(
InputVolumeControllerParametrizedTest,
StartupMinVolumeConfigurationRespectedWhenAppliedInputVolumeMaybeBelowMin) {
InputVolumeControllerTestHelper helper(
{.min_input_volume = GetParam()});
EXPECT_GE(*helper.CallAgcSequence(10,
0.9f,
-80),
10);
}
TEST_P(InputVolumeControllerParametrizedTest,
StartupMinVolumeRespectedWhenAppliedVolumeNonZero) {
const int kMinInputVolume = GetParam();
InputVolumeControllerTestHelper helper(
{.min_input_volume = kMinInputVolume,
.target_range_min_dbfs = -30,
.update_input_volume_wait_frames = 1,
.speech_probability_threshold = 0.5f,
.speech_ratio_threshold = 0.5f});
int volume = *helper.CallAgcSequence(1,
0.9f,
-80);
EXPECT_EQ(volume, kMinInputVolume);
}
TEST_P(InputVolumeControllerParametrizedTest,
MinVolumeRepeatedlyRespectedWhenAppliedVolumeNonZero) {
const int kMinInputVolume = GetParam();
InputVolumeControllerTestHelper helper(
{.min_input_volume = kMinInputVolume,
.target_range_min_dbfs = -30,
.update_input_volume_wait_frames = 1,
.speech_probability_threshold = 0.5f,
.speech_ratio_threshold = 0.5f});
for (int i = 0; i < 100; ++i) {
const int volume = *helper.CallAgcSequence(1,
0.9f,
-80);
EXPECT_GE(volume, kMinInputVolume);
}
}
TEST_P(InputVolumeControllerParametrizedTest,
StartupMinVolumeRespectedOnceWhenAppliedVolumeZero) {
const int kMinInputVolume = GetParam();
InputVolumeControllerTestHelper helper(
{.min_input_volume = kMinInputVolume,
.target_range_min_dbfs = -30,
.update_input_volume_wait_frames = 1,
.speech_probability_threshold = 0.5f,
.speech_ratio_threshold = 0.5f});
int volume = *helper.CallAgcSequence(0,
0.9f,
-80);
EXPECT_EQ(volume, kMinInputVolume);
volume = *helper.CallAgcSequence(0,
0.9f,
-80);
EXPECT_EQ(volume, 0);
}
TEST_P(InputVolumeControllerParametrizedTest, MicVolumeResponseToRmsError) {
InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig();
config.min_input_volume = GetParam();
InputVolumeControllerTestHelper helper(config);
int volume = *helper.CallAgcSequence(kInitialInputVolume,
kHighSpeechProbability, kSpeechLevel);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, -23.0f);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, -28.0f);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, -29.0f);
EXPECT_EQ(volume, 128);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, -38.0f);
EXPECT_EQ(volume, 156);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, -23.0f);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, -18.0f);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, -17.0f);
EXPECT_EQ(volume, 155);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, -17.0f);
EXPECT_EQ(volume, 151);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, -9.0f);
EXPECT_EQ(volume, 119);
}
TEST_P(InputVolumeControllerParametrizedTest, MicVolumeIsLimited) {
InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig();
const int min_input_volume = GetParam();
config.min_input_volume = min_input_volume;
InputVolumeControllerTestHelper helper(config);
int volume = *helper.CallAgcSequence(kInitialInputVolume,
kHighSpeechProbability, kSpeechLevel);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, -48.0f);
EXPECT_EQ(volume, 183);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, -48.0f);
EXPECT_EQ(volume, 243);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, -48.0f);
EXPECT_EQ(volume, 255);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, -17.0f);
EXPECT_EQ(volume, 254);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, 22.0f);
EXPECT_EQ(volume, 194);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, 22.0f);
EXPECT_EQ(volume, 137);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, 22.0f);
EXPECT_EQ(volume, 88);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, 22.0f);
EXPECT_EQ(volume, 54);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, 22.0f);
EXPECT_EQ(volume, 33);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, 22.0f);
EXPECT_EQ(volume, std::max(18, min_input_volume));
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, 22.0f);
EXPECT_EQ(volume, std::max(12, min_input_volume));
}
TEST_P(InputVolumeControllerParametrizedTest, NoActionWhileMuted) {
InputVolumeControllerTestHelper helper_1(
{.min_input_volume = GetParam()});
InputVolumeControllerTestHelper helper_2(
{.min_input_volume = GetParam()});
int volume_1 = *helper_1.CallAgcSequence(255,
kHighSpeechProbability, kSpeechLevel,
1);
int volume_2 = *helper_2.CallAgcSequence(255,
kHighSpeechProbability, kSpeechLevel,
1);
EXPECT_EQ(volume_1, 255);
EXPECT_EQ(volume_2, 255);
helper_2.controller.HandleCaptureOutputUsedChange(false);
WriteAlternatingAudioBufferSamples(kMaxSample, helper_1.audio_buffer);
WriteAlternatingAudioBufferSamples(kMaxSample, helper_2.audio_buffer);
volume_1 =
*helper_1.CallAgcSequence(volume_1, kHighSpeechProbability, kSpeechLevel,
1);
volume_2 =
*helper_2.CallAgcSequence(volume_2, kHighSpeechProbability, kSpeechLevel,
1);
EXPECT_LT(volume_1, 255);
EXPECT_EQ(volume_2, 255);
}
TEST_P(InputVolumeControllerParametrizedTest,
UnmutingChecksVolumeWithoutRaising) {
InputVolumeControllerTestHelper helper(
{.min_input_volume = GetParam()});
helper.CallAgcSequence(kInitialInputVolume, kHighSpeechProbability,
kSpeechLevel);
helper.controller.HandleCaptureOutputUsedChange(false);
helper.controller.HandleCaptureOutputUsedChange(true);
constexpr int kInputVolume = 127;
EXPECT_EQ(
helper.CallRecommendInputVolume(1, kInputVolume,
kHighSpeechProbability, kSpeechLevel),
kInputVolume);
}
TEST_P(InputVolumeControllerParametrizedTest, UnmutingRaisesTooLowVolume) {
const int min_input_volume = GetParam();
InputVolumeControllerTestHelper helper(
{.min_input_volume = min_input_volume});
helper.CallAgcSequence(kInitialInputVolume, kHighSpeechProbability,
kSpeechLevel);
helper.controller.HandleCaptureOutputUsedChange(false);
helper.controller.HandleCaptureOutputUsedChange(true);
constexpr int kInputVolume = 11;
EXPECT_EQ(
helper.CallRecommendInputVolume(1, kInputVolume,
kHighSpeechProbability, kSpeechLevel),
min_input_volume);
}
TEST_P(InputVolumeControllerParametrizedTest,
ManualLevelChangeResultsInNoSetMicCall) {
InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig();
config.min_input_volume = GetParam();
InputVolumeControllerTestHelper helper(config);
int volume = *helper.CallAgcSequence(kInitialInputVolume,
kHighSpeechProbability, kSpeechLevel);
ASSERT_NE(volume, 154);
volume = helper.CallRecommendInputVolume(
1, 154, kHighSpeechProbability, -29.0f);
EXPECT_EQ(volume, 154);
volume = helper.CallRecommendInputVolume(
1, 100, kHighSpeechProbability, -17.0f);
EXPECT_EQ(volume, 100);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, -17.0f);
EXPECT_EQ(volume, 99);
}
TEST_P(InputVolumeControllerParametrizedTest,
RecoveryAfterManualLevelChangeFromMax) {
InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig();
config.min_input_volume = GetParam();
InputVolumeControllerTestHelper helper(config);
int volume = *helper.CallAgcSequence(kInitialInputVolume,
kHighSpeechProbability, kSpeechLevel);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, -48.0f);
EXPECT_EQ(volume, 183);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, -48.0f);
EXPECT_EQ(volume, 243);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, -48.0f);
EXPECT_EQ(volume, 255);
volume = helper.CallRecommendInputVolume(
1, 50, kHighSpeechProbability, -17.0f);
EXPECT_EQ(helper.controller.recommended_input_volume(), 50);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, -38.0f);
EXPECT_EQ(volume, 65);
}
TEST_P(InputVolumeControllerParametrizedTest,
EnforceMinInputVolumeDuringUpwardsAdjustment) {
const int min_input_volume = GetParam();
InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig();
config.min_input_volume = min_input_volume;
InputVolumeControllerTestHelper helper(config);
int volume = *helper.CallAgcSequence(kInitialInputVolume,
kHighSpeechProbability, kSpeechLevel);
volume = helper.CallRecommendInputVolume(
1, 1, kHighSpeechProbability, -17.0f);
EXPECT_EQ(volume, min_input_volume);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, -29.0f);
EXPECT_EQ(volume, min_input_volume);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, -30.0f);
EXPECT_EQ(volume, min_input_volume);
volume = helper.CallRecommendInputVolume(10, volume,
kHighSpeechProbability, -38.0f);
EXPECT_GT(volume, min_input_volume);
}
TEST_P(InputVolumeControllerParametrizedTest,
RecoveryAfterManualLevelChangeBelowMin) {
const int min_input_volume = GetParam();
InputVolumeControllerTestHelper helper(
{.min_input_volume = min_input_volume});
int volume = *helper.CallAgcSequence(kInitialInputVolume,
kHighSpeechProbability, kSpeechLevel);
volume = helper.CallRecommendInputVolume(
1, 1, kHighSpeechProbability, -17.0f);
EXPECT_EQ(volume, min_input_volume);
}
TEST_P(InputVolumeControllerParametrizedTest, NoClippingHasNoImpact) {
InputVolumeControllerTestHelper helper(
{.min_input_volume = GetParam()});
helper.CallAgcSequence(kInitialInputVolume, kHighSpeechProbability,
kSpeechLevel);
helper.CallAnalyzeInputAudio(100, 0);
EXPECT_EQ(helper.controller.recommended_input_volume(), 128);
}
TEST_P(InputVolumeControllerParametrizedTest,
ClippingUnderThresholdHasNoImpact) {
InputVolumeControllerTestHelper helper(
{.min_input_volume = GetParam()});
helper.CallAgcSequence(kInitialInputVolume, kHighSpeechProbability,
kSpeechLevel);
helper.CallAnalyzeInputAudio(1, 0.099);
EXPECT_EQ(helper.controller.recommended_input_volume(), 128);
}
TEST_P(InputVolumeControllerParametrizedTest, ClippingLowersVolume) {
InputVolumeControllerTestHelper helper(
{.min_input_volume = GetParam()});
helper.CallAgcSequence(255, kHighSpeechProbability,
kSpeechLevel);
helper.CallAnalyzeInputAudio(1, 0.2);
EXPECT_EQ(helper.controller.recommended_input_volume(), 240);
}
TEST_P(InputVolumeControllerParametrizedTest,
WaitingPeriodBetweenClippingChecks) {
InputVolumeControllerTestHelper helper(
{.min_input_volume = GetParam()});
helper.CallAgcSequence(255, kHighSpeechProbability,
kSpeechLevel);
helper.CallAnalyzeInputAudio(1,
kAboveClippedThreshold);
EXPECT_EQ(helper.controller.recommended_input_volume(), 240);
helper.CallAnalyzeInputAudio(300,
kAboveClippedThreshold);
EXPECT_EQ(helper.controller.recommended_input_volume(), 240);
helper.CallAnalyzeInputAudio(1,
kAboveClippedThreshold);
EXPECT_EQ(helper.controller.recommended_input_volume(), 225);
}
TEST_P(InputVolumeControllerParametrizedTest, ClippingLoweringIsLimited) {
InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig();
config.min_input_volume = GetParam();
InputVolumeControllerTestHelper helper(config);
helper.CallAgcSequence(180, kHighSpeechProbability,
kSpeechLevel);
helper.CallAnalyzeInputAudio(1,
kAboveClippedThreshold);
EXPECT_EQ(helper.controller.recommended_input_volume(), kClippedMin);
helper.CallAnalyzeInputAudio(1000,
kAboveClippedThreshold);
EXPECT_EQ(helper.controller.recommended_input_volume(), kClippedMin);
}
TEST_P(InputVolumeControllerParametrizedTest,
ClippingMaxIsRespectedWhenEqualToLevel) {
InputVolumeControllerTestHelper helper(
{.min_input_volume = GetParam()});
helper.CallAgcSequence(255, kHighSpeechProbability,
kSpeechLevel);
helper.CallAnalyzeInputAudio(1,
kAboveClippedThreshold);
EXPECT_EQ(helper.controller.recommended_input_volume(), 240);
helper.CallRecommendInputVolume(10, 240,
kHighSpeechProbability, -48.0f);
EXPECT_EQ(helper.controller.recommended_input_volume(), 240);
}
TEST_P(InputVolumeControllerParametrizedTest,
ClippingMaxIsRespectedWhenHigherThanLevel) {
InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig();
config.min_input_volume = GetParam();
InputVolumeControllerTestHelper helper(config);
helper.CallAgcSequence(200, kHighSpeechProbability,
kSpeechLevel);
helper.CallAnalyzeInputAudio(1,
kAboveClippedThreshold);
int volume = helper.controller.recommended_input_volume();
EXPECT_EQ(volume, 185);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, -58.0f);
EXPECT_EQ(volume, 240);
volume = helper.CallRecommendInputVolume(10, volume,
kHighSpeechProbability, -58.0f);
EXPECT_EQ(volume, 240);
}
TEST_P(InputVolumeControllerParametrizedTest, UserCanRaiseVolumeAfterClipping) {
InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig();
config.min_input_volume = GetParam();
InputVolumeControllerTestHelper helper(config);
helper.CallAgcSequence(225, kHighSpeechProbability,
kSpeechLevel);
helper.CallAnalyzeInputAudio(1,
kAboveClippedThreshold);
EXPECT_EQ(helper.controller.recommended_input_volume(), 210);
int volume = helper.CallRecommendInputVolume(
1, 250, kHighSpeechProbability, -32.0f);
EXPECT_EQ(volume, 250);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, -8.0f);
EXPECT_EQ(volume, 210);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, -58.0f);
EXPECT_EQ(volume, 250);
volume = helper.CallRecommendInputVolume(1, volume,
kHighSpeechProbability, -48.0f);
EXPECT_EQ(volume, 250);
}
TEST_P(InputVolumeControllerParametrizedTest,
ClippingDoesNotPullLowVolumeBackUp) {
InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig();
config.min_input_volume = GetParam();
InputVolumeControllerTestHelper helper(config);
helper.CallAgcSequence(80, kHighSpeechProbability,
kSpeechLevel);
int initial_volume = helper.controller.recommended_input_volume();
helper.CallAnalyzeInputAudio(1,
kAboveClippedThreshold);
EXPECT_EQ(helper.controller.recommended_input_volume(), initial_volume);
}
TEST_P(InputVolumeControllerParametrizedTest, TakesNoActionOnZeroMicVolume) {
InputVolumeControllerTestHelper helper(
{.min_input_volume = GetParam()});
helper.CallAgcSequence(kInitialInputVolume, kHighSpeechProbability,
kSpeechLevel);
EXPECT_EQ(
helper.CallRecommendInputVolume(10, 0,
kHighSpeechProbability, -48.0f),
0);
}
TEST_P(InputVolumeControllerParametrizedTest, ClippingDetectionLowersVolume) {
InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig();
config.min_input_volume = GetParam();
InputVolumeControllerTestHelper helper(config);
int volume = *helper.CallAgcSequence(255,
kHighSpeechProbability, kSpeechLevel,
1);
EXPECT_EQ(volume, 255);
WriteAlternatingAudioBufferSamples(0.99f * kMaxSample, helper.audio_buffer);
volume = *helper.CallAgcSequence(volume, kHighSpeechProbability, kSpeechLevel,
100);
EXPECT_EQ(volume, 255);
WriteAlternatingAudioBufferSamples(kMaxSample, helper.audio_buffer);
volume = *helper.CallAgcSequence(volume, kHighSpeechProbability, kSpeechLevel,
100);
EXPECT_EQ(volume, 240);
}
TEST_P(InputVolumeControllerParametrizedTest, ClippingParametersVerified) {
std::unique_ptr<InputVolumeController> controller =
CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold,
kClippedWaitFrames);
controller->Initialize();
EXPECT_EQ(controller->clipped_level_step_, kClippedLevelStep);
EXPECT_EQ(controller->clipped_ratio_threshold_, kClippedRatioThreshold);
EXPECT_EQ(controller->clipped_wait_frames_, kClippedWaitFrames);
std::unique_ptr<InputVolumeController> controller_custom =
CreateInputVolumeController(10,
0.2f,
50);
controller_custom->Initialize();
EXPECT_EQ(controller_custom->clipped_level_step_, 10);
EXPECT_EQ(controller_custom->clipped_ratio_threshold_, 0.2f);
EXPECT_EQ(controller_custom->clipped_wait_frames_, 50);
}
TEST_P(InputVolumeControllerParametrizedTest,
DisableClippingPredictorDisablesClippingPredictor) {
std::unique_ptr<InputVolumeController> controller =
CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold,
kClippedWaitFrames,
false);
controller->Initialize();
EXPECT_FALSE(controller->clipping_predictor_enabled());
EXPECT_FALSE(controller->use_clipping_predictor_step());
}
TEST_P(InputVolumeControllerParametrizedTest,
EnableClippingPredictorEnablesClippingPredictor) {
std::unique_ptr<InputVolumeController> controller =
CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold,
kClippedWaitFrames,
true);
controller->Initialize();
EXPECT_TRUE(controller->clipping_predictor_enabled());
EXPECT_TRUE(controller->use_clipping_predictor_step());
}
TEST_P(InputVolumeControllerParametrizedTest,
DisableClippingPredictorDoesNotLowerVolume) {
int volume = 255;
InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig();
config.enable_clipping_predictor = false;
auto helper = InputVolumeControllerTestHelper(config);
helper.controller.Initialize();
EXPECT_FALSE(helper.controller.clipping_predictor_enabled());
EXPECT_FALSE(helper.controller.use_clipping_predictor_step());
for (int j = 0; j < 31; ++j) {
WriteAlternatingAudioBufferSamples(0.99f * kMaxSample, helper.audio_buffer);
volume =
*helper.CallAgcSequence(volume, kLowSpeechProbability, kSpeechLevel,
5);
WriteAudioBufferSamples(0.99f * kMaxSample, 0.0f,
helper.audio_buffer);
volume =
*helper.CallAgcSequence(volume, kLowSpeechProbability, kSpeechLevel,
5);
EXPECT_EQ(volume, 255);
}
}
TEST_P(InputVolumeControllerParametrizedTest,
UsedClippingPredictionsProduceLowerAnalogLevels) {
constexpr int kInitialLevel = 255;
constexpr float kCloseToClippingPeakRatio = 0.99f;
int volume_1 = kInitialLevel;
int volume_2 = kInitialLevel;
auto config_1 = GetInputVolumeControllerTestConfig();
auto config_2 = GetInputVolumeControllerTestConfig();
config_1.enable_clipping_predictor = true;
config_2.enable_clipping_predictor = false;
auto helper_1 = InputVolumeControllerTestHelper(config_1);
auto helper_2 = InputVolumeControllerTestHelper(config_2);
helper_1.controller.Initialize();
helper_2.controller.Initialize();
EXPECT_TRUE(helper_1.controller.clipping_predictor_enabled());
EXPECT_FALSE(helper_2.controller.clipping_predictor_enabled());
EXPECT_TRUE(helper_1.controller.use_clipping_predictor_step());
WriteAlternatingAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample,
helper_1.audio_buffer);
WriteAlternatingAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample,
helper_2.audio_buffer);
volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability,
kSpeechLevel, 5);
volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability,
kSpeechLevel, 5);
WriteAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample,
0.0f, helper_1.audio_buffer);
WriteAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample,
0.0f, helper_2.audio_buffer);
volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability,
kSpeechLevel, 5);
volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability,
kSpeechLevel, 5);
EXPECT_EQ(volume_1, kInitialLevel - kClippedLevelStep);
EXPECT_EQ(volume_2, kInitialLevel);
for (int i = 0; i < kClippedWaitFrames / 10; ++i) {
WriteAlternatingAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample,
helper_1.audio_buffer);
WriteAlternatingAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample,
helper_2.audio_buffer);
volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability,
kSpeechLevel, 5);
volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability,
kSpeechLevel, 5);
WriteAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample,
0.0f, helper_1.audio_buffer);
WriteAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample,
0.0f, helper_2.audio_buffer);
volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability,
kSpeechLevel, 5);
volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability,
kSpeechLevel, 5);
EXPECT_EQ(volume_1, kInitialLevel - kClippedLevelStep);
EXPECT_EQ(volume_2, kInitialLevel);
}
WriteAlternatingAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample,
helper_1.audio_buffer);
WriteAlternatingAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample,
helper_2.audio_buffer);
volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability,
kSpeechLevel, 5);
volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability,
kSpeechLevel, 5);
WriteAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample,
0.0f, helper_1.audio_buffer);
WriteAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample,
0.0f, helper_2.audio_buffer);
volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability,
kSpeechLevel, 5);
volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability,
kSpeechLevel, 5);
EXPECT_EQ(volume_1, kInitialLevel - 2 * kClippedLevelStep);
EXPECT_EQ(volume_2, kInitialLevel);
for (int i = 0; i < 2 * kClippedWaitFrames / 10; ++i) {
WriteAlternatingAudioBufferSamples(0.0f,
helper_1.audio_buffer);
WriteAlternatingAudioBufferSamples(0.0f,
helper_2.audio_buffer);
volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability,
kSpeechLevel, 5);
volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability,
kSpeechLevel, 5);
WriteAudioBufferSamples(0.0f, 0.0f,
helper_1.audio_buffer);
WriteAudioBufferSamples(0.0f, 0.0f,
helper_2.audio_buffer);
volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability,
kSpeechLevel, 5);
volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability,
kSpeechLevel, 5);
}
EXPECT_EQ(volume_1, kInitialLevel - 2 * kClippedLevelStep);
EXPECT_EQ(volume_2, kInitialLevel);
WriteAlternatingAudioBufferSamples(kMaxSample, helper_1.audio_buffer);
WriteAlternatingAudioBufferSamples(kMaxSample, helper_2.audio_buffer);
volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability,
kSpeechLevel, 1);
volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability,
kSpeechLevel, 1);
EXPECT_EQ(volume_1, kInitialLevel - 3 * kClippedLevelStep);
EXPECT_EQ(volume_2, kInitialLevel - kClippedLevelStep);
for (int i = 0; i < kClippedWaitFrames / 10; ++i) {
WriteAlternatingAudioBufferSamples(kMaxSample, helper_1.audio_buffer);
WriteAlternatingAudioBufferSamples(kMaxSample, helper_2.audio_buffer);
volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability,
kSpeechLevel, 5);
volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability,
kSpeechLevel, 5);
WriteAudioBufferSamples(kMaxSample, 1.0f,
helper_1.audio_buffer);
WriteAudioBufferSamples(kMaxSample, 1.0f,
helper_2.audio_buffer);
volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability,
kSpeechLevel, 5);
volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability,
kSpeechLevel, 5);
}
EXPECT_EQ(volume_1, kInitialLevel - 3 * kClippedLevelStep);
EXPECT_EQ(volume_2, kInitialLevel - kClippedLevelStep);
WriteAlternatingAudioBufferSamples(kMaxSample, helper_1.audio_buffer);
WriteAlternatingAudioBufferSamples(kMaxSample, helper_2.audio_buffer);
volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability,
kSpeechLevel, 1);
volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability,
kSpeechLevel, 1);
EXPECT_EQ(volume_1, kInitialLevel - 4 * kClippedLevelStep);
EXPECT_EQ(volume_2, kInitialLevel - 2 * kClippedLevelStep);
}
TEST_P(InputVolumeControllerParametrizedTest, EmptyRmsErrorHasNoEffect) {
InputVolumeController controller(kNumChannels,
GetInputVolumeControllerTestConfig());
controller.Initialize();
constexpr int kNumFrames = 125;
constexpr int kGainDb = -20;
SpeechSamplesReader reader;
int volume = reader.Feed(kNumFrames, kInitialInputVolume, kGainDb,
kLowSpeechProbability, absl::nullopt, controller);
ASSERT_EQ(volume, kInitialInputVolume);
}
TEST(InputVolumeControllerTest, UpdateInputVolumeWaitFramesIsEffective) {
constexpr int kInputVolume = kInitialInputVolume;
std::unique_ptr<InputVolumeController> controller_wait_0 =
CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold,
kClippedWaitFrames,
false,
0);
std::unique_ptr<InputVolumeController> controller_wait_100 =
CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold,
kClippedWaitFrames,
false,
100);
controller_wait_0->Initialize();
controller_wait_100->Initialize();
SpeechSamplesReader reader_1;
SpeechSamplesReader reader_2;
int volume_wait_0 = reader_1.Feed(
99, kInputVolume, 0, kHighSpeechProbability,
-42.0f, *controller_wait_0);
int volume_wait_100 = reader_2.Feed(
99, kInputVolume, 0, kHighSpeechProbability,
-42.0f, *controller_wait_100);
ASSERT_GT(volume_wait_0, kInputVolume);
ASSERT_EQ(volume_wait_100, kInputVolume);
volume_wait_0 =
reader_1.Feed(1, volume_wait_0,
0, kHighSpeechProbability,
-42.0f, *controller_wait_0);
volume_wait_100 =
reader_2.Feed(1, volume_wait_100,
0, kHighSpeechProbability,
-42.0f, *controller_wait_100);
ASSERT_GT(volume_wait_0, kInputVolume);
ASSERT_GT(volume_wait_100, kInputVolume);
}
INSTANTIATE_TEST_SUITE_P(,
InputVolumeControllerParametrizedTest,
::testing::Values(12, 20));
TEST(InputVolumeControllerTest,
MinInputVolumeEnforcedWithClippingWhenAboveClippedLevelMin) {
InputVolumeControllerTestHelper helper(
{.min_input_volume = 80, .clipped_level_min = 70});
WriteAudioBufferSamples(4000.0f, 0.8f,
helper.audio_buffer);
constexpr int kNumCalls = 800;
helper.CallAgcSequence(100, kLowSpeechProbability,
-18.0f, kNumCalls);
EXPECT_EQ(helper.controller.recommended_input_volume(), 80);
}
TEST(InputVolumeControllerTest,
ClippedlevelMinEnforcedWithClippingWhenAboveMinInputVolume) {
InputVolumeControllerTestHelper helper(
{.min_input_volume = 70, .clipped_level_min = 80});
WriteAudioBufferSamples(4000.0f, 0.8f,
helper.audio_buffer);
constexpr int kNumCalls = 800;
helper.CallAgcSequence(100, kLowSpeechProbability,
-18.0f, kNumCalls);
EXPECT_EQ(helper.controller.recommended_input_volume(), 80);
}
TEST(InputVolumeControllerTest, SpeechRatioThresholdIsEffective) {
constexpr int kInputVolume = kInitialInputVolume;
std::unique_ptr<InputVolumeController> controller_1 =
CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold,
kClippedWaitFrames,
false,
10);
std::unique_ptr<InputVolumeController> controller_2 =
CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold,
kClippedWaitFrames,
false,
10);
controller_1->Initialize();
controller_2->Initialize();
SpeechSamplesReader reader_1;
SpeechSamplesReader reader_2;
int volume_1 = reader_1.Feed(1, kInputVolume, 0,
0.7f,
-42.0f, *controller_1);
int volume_2 = reader_2.Feed(1, kInputVolume, 0,
0.4f,
-42.0f, *controller_2);
ASSERT_EQ(volume_1, kInputVolume);
ASSERT_EQ(volume_2, kInputVolume);
volume_1 = reader_1.Feed(2, volume_1, 0,
0.4f,
-42.0f, *controller_1);
volume_2 = reader_2.Feed(2, volume_2, 0,
0.4f,
-42.0f, *controller_2);
ASSERT_EQ(volume_1, kInputVolume);
ASSERT_EQ(volume_2, kInputVolume);
volume_1 = reader_1.Feed(
7, volume_1, 0,
0.7f, -42.0f, *controller_1);
volume_2 = reader_2.Feed(
7, volume_2, 0,
0.7f, -42.0f, *controller_2);
ASSERT_GT(volume_1, kInputVolume);
ASSERT_EQ(volume_2, kInputVolume);
}
TEST(InputVolumeControllerTest, SpeechProbabilityThresholdIsEffective) {
constexpr int kInputVolume = kInitialInputVolume;
std::unique_ptr<InputVolumeController> controller_1 =
CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold,
kClippedWaitFrames,
false,
10);
std::unique_ptr<InputVolumeController> controller_2 =
CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold,
kClippedWaitFrames,
false,
10);
controller_1->Initialize();
controller_2->Initialize();
SpeechSamplesReader reader_1;
SpeechSamplesReader reader_2;
int volume_1 = reader_1.Feed(1, kInputVolume, 0,
0.5f,
-42.0f, *controller_1);
int volume_2 = reader_2.Feed(1, kInputVolume, 0,
0.49f,
-42.0f, *controller_2);
ASSERT_EQ(volume_1, kInputVolume);
ASSERT_EQ(volume_2, kInputVolume);
reader_1.Feed(2, volume_1, 0,
0.49f, -42.0f,
*controller_1);
reader_2.Feed(2, volume_2, 0,
0.49f, -42.0f,
*controller_2);
ASSERT_EQ(volume_1, kInputVolume);
ASSERT_EQ(volume_2, kInputVolume);
volume_1 = reader_1.Feed(
7, volume_1, 0,
0.5f, -42.0f, *controller_1);
volume_2 = reader_2.Feed(
7, volume_2, 0,
0.5f, -42.0f, *controller_2);
ASSERT_GT(volume_1, kInputVolume);
ASSERT_EQ(volume_2, kInputVolume);
}
TEST(InputVolumeControllerTest,
DoNotLogRecommendedInputVolumeOnChangeToMatchTarget) {
metrics::Reset();
SpeechSamplesReader reader;
auto controller = CreateInputVolumeController();
controller->Initialize();
constexpr int kStartupVolume = 255;
const int volume = reader.Feed(14, kStartupVolume,
50, kHighSpeechProbability,
-20.0f, *controller);
ASSERT_LT(volume, kStartupVolume);
EXPECT_METRIC_THAT(
metrics::Samples(
"WebRTC.Audio.Apm.RecommendedInputVolume.OnChangeToMatchTarget"),
::testing::IsEmpty());
}
TEST(InputVolumeControllerTest,
LogRecommendedInputVolumeOnUpwardChangeToMatchTarget) {
metrics::Reset();
SpeechSamplesReader reader;
auto controller = CreateInputVolumeController();
controller->Initialize();
constexpr int kStartupVolume = 100;
const int volume = reader.Feed(14, kStartupVolume,
-6, kHighSpeechProbability,
-50.0f, *controller);
ASSERT_GT(volume, kStartupVolume);
EXPECT_METRIC_THAT(
metrics::Samples(
"WebRTC.Audio.Apm.RecommendedInputVolume.OnChangeToMatchTarget"),
::testing::Not(::testing::IsEmpty()));
}
TEST(InputVolumeControllerTest,
LogRecommendedInputVolumeOnDownwardChangeToMatchTarget) {
metrics::Reset();
SpeechSamplesReader reader;
auto controller = CreateInputVolumeController();
controller->Initialize();
constexpr int kStartupVolume = 100;
const int volume = reader.Feed(14, kStartupVolume,
-6, kHighSpeechProbability,
-5.0f, *controller);
ASSERT_LT(volume, kStartupVolume);
EXPECT_METRIC_THAT(
metrics::Samples(
"WebRTC.Audio.Apm.RecommendedInputVolume.OnChangeToMatchTarget"),
::testing::Not(::testing::IsEmpty()));
}
TEST(MonoInputVolumeControllerTest, CheckHandleClippingLowersVolume) {
constexpr int kInitialInputVolume = 100;
constexpr int kInputVolumeStep = 29;
MonoInputVolumeController mono_controller(
70,
32,
3, kHighSpeechProbability,
kSpeechRatioThreshold);
mono_controller.Initialize();
UpdateRecommendedInputVolume(mono_controller, kInitialInputVolume,
kLowSpeechProbability,
-10.0f);
mono_controller.HandleClipping(kInputVolumeStep);
EXPECT_EQ(mono_controller.recommended_analog_level(),
kInitialInputVolume - kInputVolumeStep);
}
TEST(MonoInputVolumeControllerTest,
CheckProcessNegativeRmsErrorDecreasesInputVolume) {
constexpr int kInitialInputVolume = 100;
MonoInputVolumeController mono_controller(
64,
32,
3, kHighSpeechProbability,
kSpeechRatioThreshold);
mono_controller.Initialize();
int volume = UpdateRecommendedInputVolume(
mono_controller, kInitialInputVolume, kHighSpeechProbability, -10.0f);
volume = UpdateRecommendedInputVolume(mono_controller, volume,
kHighSpeechProbability, -10.0f);
volume = UpdateRecommendedInputVolume(mono_controller, volume,
kHighSpeechProbability, -10.0f);
EXPECT_LT(volume, kInitialInputVolume);
}
TEST(MonoInputVolumeControllerTest,
CheckProcessPositiveRmsErrorIncreasesInputVolume) {
constexpr int kInitialInputVolume = 100;
MonoInputVolumeController mono_controller(
64,
32,
3, kHighSpeechProbability,
kSpeechRatioThreshold);
mono_controller.Initialize();
int volume = UpdateRecommendedInputVolume(
mono_controller, kInitialInputVolume, kHighSpeechProbability, 10.0f);
volume = UpdateRecommendedInputVolume(mono_controller, volume,
kHighSpeechProbability, 10.0f);
volume = UpdateRecommendedInputVolume(mono_controller, volume,
kHighSpeechProbability, 10.0f);
EXPECT_GT(volume, kInitialInputVolume);
}
TEST(MonoInputVolumeControllerTest,
CheckProcessNegativeRmsErrorDecreasesInputVolumeWithLimit) {
constexpr int kInitialInputVolume = 100;
MonoInputVolumeController mono_controller_1(
64,
32,
2, kHighSpeechProbability,
kSpeechRatioThreshold);
MonoInputVolumeController mono_controller_2(
64,
32,
2, kHighSpeechProbability,
kSpeechRatioThreshold);
MonoInputVolumeController mono_controller_3(
64,
32,
2,
0.7,
0.8);
mono_controller_1.Initialize();
mono_controller_2.Initialize();
mono_controller_3.Initialize();
int volume_1 = UpdateRecommendedInputVolume(
mono_controller_1, kInitialInputVolume, kHighSpeechProbability, -14.0f);
volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1,
kHighSpeechProbability, -14.0f);
int volume_2 = UpdateRecommendedInputVolume(
mono_controller_2, kInitialInputVolume, kHighSpeechProbability, -15.0f);
int volume_3 = UpdateRecommendedInputVolume(
mono_controller_3, kInitialInputVolume, kHighSpeechProbability, -30.0f);
volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2,
kHighSpeechProbability, -15.0f);
volume_3 = UpdateRecommendedInputVolume(mono_controller_3, volume_3,
kHighSpeechProbability, -30.0f);
EXPECT_LT(volume_1, kInitialInputVolume);
EXPECT_LT(volume_2, volume_1);
EXPECT_EQ(volume_2, volume_3);
}
TEST(MonoInputVolumeControllerTest,
CheckProcessPositiveRmsErrorIncreasesInputVolumeWithLimit) {
constexpr int kInitialInputVolume = 100;
MonoInputVolumeController mono_controller_1(
64,
32,
2, kHighSpeechProbability,
kSpeechRatioThreshold);
MonoInputVolumeController mono_controller_2(
64,
32,
2, kHighSpeechProbability,
kSpeechRatioThreshold);
MonoInputVolumeController mono_controller_3(
64,
32,
2, kHighSpeechProbability,
kSpeechRatioThreshold);
mono_controller_1.Initialize();
mono_controller_2.Initialize();
mono_controller_3.Initialize();
int volume_1 = UpdateRecommendedInputVolume(
mono_controller_1, kInitialInputVolume, kHighSpeechProbability, 14.0f);
volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1,
kHighSpeechProbability, 14.0f);
int volume_2 = UpdateRecommendedInputVolume(
mono_controller_2, kInitialInputVolume, kHighSpeechProbability, 15.0f);
int volume_3 = UpdateRecommendedInputVolume(
mono_controller_3, kInitialInputVolume, kHighSpeechProbability, 30.0f);
volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2,
kHighSpeechProbability, 15.0f);
volume_3 = UpdateRecommendedInputVolume(mono_controller_3, volume_3,
kHighSpeechProbability, 30.0f);
EXPECT_GT(volume_1, kInitialInputVolume);
EXPECT_GT(volume_2, volume_1);
EXPECT_EQ(volume_2, volume_3);
}
TEST(MonoInputVolumeControllerTest,
CheckProcessRmsErrorDecreasesInputVolumeRepeatedly) {
constexpr int kInitialInputVolume = 100;
MonoInputVolumeController mono_controller(
64,
32,
2, kHighSpeechProbability,
kSpeechRatioThreshold);
mono_controller.Initialize();
int volume_before = UpdateRecommendedInputVolume(
mono_controller, kInitialInputVolume, kHighSpeechProbability, -10.0f);
volume_before = UpdateRecommendedInputVolume(mono_controller, volume_before,
kHighSpeechProbability, -10.0f);
EXPECT_LT(volume_before, kInitialInputVolume);
int volume_after = UpdateRecommendedInputVolume(
mono_controller, volume_before, kHighSpeechProbability, -10.0f);
volume_after = UpdateRecommendedInputVolume(mono_controller, volume_after,
kHighSpeechProbability, -10.0f);
EXPECT_LT(volume_after, volume_before);
}
TEST(MonoInputVolumeControllerTest,
CheckProcessPositiveRmsErrorIncreasesInputVolumeRepeatedly) {
constexpr int kInitialInputVolume = 100;
MonoInputVolumeController mono_controller(
64,
32,
2, kHighSpeechProbability,
kSpeechRatioThreshold);
mono_controller.Initialize();
int volume_before = UpdateRecommendedInputVolume(
mono_controller, kInitialInputVolume, kHighSpeechProbability, 10.0f);
volume_before = UpdateRecommendedInputVolume(mono_controller, volume_before,
kHighSpeechProbability, 10.0f);
EXPECT_GT(volume_before, kInitialInputVolume);
int volume_after = UpdateRecommendedInputVolume(
mono_controller, volume_before, kHighSpeechProbability, 10.0f);
volume_after = UpdateRecommendedInputVolume(mono_controller, volume_after,
kHighSpeechProbability, 10.0f);
EXPECT_GT(volume_after, volume_before);
}
TEST(MonoInputVolumeControllerTest, CheckClippedLevelMinIsEffective) {
constexpr int kInitialInputVolume = 100;
constexpr int kClippedLevelMin = 70;
MonoInputVolumeController mono_controller_1(
kClippedLevelMin,
84,
2, kHighSpeechProbability,
kSpeechRatioThreshold);
MonoInputVolumeController mono_controller_2(
kClippedLevelMin,
84,
2, kHighSpeechProbability,
kSpeechRatioThreshold);
mono_controller_1.Initialize();
mono_controller_2.Initialize();
EXPECT_EQ(UpdateRecommendedInputVolume(mono_controller_1, kInitialInputVolume,
kLowSpeechProbability, -10.0f),
kInitialInputVolume);
EXPECT_EQ(UpdateRecommendedInputVolume(mono_controller_2, kInitialInputVolume,
kLowSpeechProbability, -10.0f),
kInitialInputVolume);
mono_controller_1.HandleClipping(29);
mono_controller_2.HandleClipping(31);
EXPECT_EQ(mono_controller_2.recommended_analog_level(), kClippedLevelMin);
EXPECT_LT(mono_controller_2.recommended_analog_level(),
mono_controller_1.recommended_analog_level());
}
TEST(MonoInputVolumeControllerTest, CheckMinMicLevelIsEffective) {
constexpr int kInitialInputVolume = 100;
constexpr int kMinMicLevel = 64;
MonoInputVolumeController mono_controller_1(
64, kMinMicLevel,
2, kHighSpeechProbability,
kSpeechRatioThreshold);
MonoInputVolumeController mono_controller_2(
64, kMinMicLevel,
2, kHighSpeechProbability,
kSpeechRatioThreshold);
mono_controller_1.Initialize();
mono_controller_2.Initialize();
int volume_1 = UpdateRecommendedInputVolume(
mono_controller_1, kInitialInputVolume, kHighSpeechProbability, -10.0f);
int volume_2 = UpdateRecommendedInputVolume(
mono_controller_2, kInitialInputVolume, kHighSpeechProbability, -10.0f);
EXPECT_EQ(volume_1, kInitialInputVolume);
EXPECT_EQ(volume_2, kInitialInputVolume);
volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1,
kHighSpeechProbability, -10.0f);
volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2,
kHighSpeechProbability, -30.0f);
EXPECT_LT(volume_1, kInitialInputVolume);
EXPECT_LT(volume_2, volume_1);
EXPECT_EQ(volume_2, kMinMicLevel);
}
TEST(MonoInputVolumeControllerTest,
CheckUpdateInputVolumeWaitFramesIsEffective) {
constexpr int kInitialInputVolume = 100;
MonoInputVolumeController mono_controller_1(
64,
84,
1, kHighSpeechProbability,
kSpeechRatioThreshold);
MonoInputVolumeController mono_controller_2(
64,
84,
3, kHighSpeechProbability,
kSpeechRatioThreshold);
mono_controller_1.Initialize();
mono_controller_2.Initialize();
int volume_1 = UpdateRecommendedInputVolume(
mono_controller_1, kInitialInputVolume, kHighSpeechProbability, -10.0f);
int volume_2 = UpdateRecommendedInputVolume(
mono_controller_2, kInitialInputVolume, kHighSpeechProbability, -10.0f);
EXPECT_EQ(volume_1, kInitialInputVolume);
EXPECT_EQ(volume_2, kInitialInputVolume);
volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1,
kHighSpeechProbability, -10.0f);
volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2,
kHighSpeechProbability, -10.0f);
EXPECT_LT(volume_1, kInitialInputVolume);
EXPECT_EQ(volume_2, kInitialInputVolume);
volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2,
kHighSpeechProbability, -10.0f);
EXPECT_LT(volume_2, kInitialInputVolume);
}
TEST(MonoInputVolumeControllerTest,
CheckSpeechProbabilityThresholdIsEffective) {
constexpr int kInitialInputVolume = 100;
constexpr float kSpeechProbabilityThreshold = 0.8f;
MonoInputVolumeController mono_controller_1(
64,
84,
2, kSpeechProbabilityThreshold,
kSpeechRatioThreshold);
MonoInputVolumeController mono_controller_2(
64,
84,
2, kSpeechProbabilityThreshold,
kSpeechRatioThreshold);
mono_controller_1.Initialize();
mono_controller_2.Initialize();
int volume_1 =
UpdateRecommendedInputVolume(mono_controller_1, kInitialInputVolume,
kSpeechProbabilityThreshold, -10.0f);
int volume_2 =
UpdateRecommendedInputVolume(mono_controller_2, kInitialInputVolume,
kSpeechProbabilityThreshold, -10.0f);
EXPECT_EQ(volume_1, kInitialInputVolume);
EXPECT_EQ(volume_2, kInitialInputVolume);
volume_1 = UpdateRecommendedInputVolume(
mono_controller_1, volume_1, kSpeechProbabilityThreshold - 0.1f, -10.0f);
volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2,
kSpeechProbabilityThreshold, -10.0f);
EXPECT_EQ(volume_1, kInitialInputVolume);
EXPECT_LT(volume_2, volume_1);
}
TEST(MonoInputVolumeControllerTest, CheckSpeechRatioThresholdIsEffective) {
constexpr int kInitialInputVolume = 100;
MonoInputVolumeController mono_controller_1(
64,
84,
4, kHighSpeechProbability,
0.75f);
MonoInputVolumeController mono_controller_2(
64,
84,
4, kHighSpeechProbability,
0.75f);
mono_controller_1.Initialize();
mono_controller_2.Initialize();
int volume_1 = UpdateRecommendedInputVolume(
mono_controller_1, kInitialInputVolume, kHighSpeechProbability, -10.0f);
int volume_2 = UpdateRecommendedInputVolume(
mono_controller_2, kInitialInputVolume, kHighSpeechProbability, -10.0f);
volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1,
kHighSpeechProbability, -10.0f);
volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2,
kHighSpeechProbability, -10.0f);
volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1,
kLowSpeechProbability, -10.0f);
volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2,
kLowSpeechProbability, -10.0f);
EXPECT_EQ(volume_1, kInitialInputVolume);
EXPECT_EQ(volume_2, kInitialInputVolume);
volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1,
kLowSpeechProbability, -10.0f);
volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2,
kHighSpeechProbability, -10.0f);
EXPECT_EQ(volume_1, kInitialInputVolume);
EXPECT_LT(volume_2, volume_1);
}
TEST(MonoInputVolumeControllerTest,
CheckProcessEmptyRmsErrorDoesNotLowerVolume) {
constexpr int kInitialInputVolume = 100;
MonoInputVolumeController mono_controller_1(
64,
84,
2, kHighSpeechProbability,
kSpeechRatioThreshold);
MonoInputVolumeController mono_controller_2(
64,
84,
2, kHighSpeechProbability,
kSpeechRatioThreshold);
mono_controller_1.Initialize();
mono_controller_2.Initialize();
int volume_1 = UpdateRecommendedInputVolume(
mono_controller_1, kInitialInputVolume, kHighSpeechProbability, -10.0f);
int volume_2 = UpdateRecommendedInputVolume(
mono_controller_2, kInitialInputVolume, kHighSpeechProbability, -10.0f);
EXPECT_EQ(volume_1, kInitialInputVolume);
EXPECT_EQ(volume_2, kInitialInputVolume);
volume_1 = UpdateRecommendedInputVolume(
mono_controller_1, volume_1, kHighSpeechProbability, absl::nullopt);
volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2,
kHighSpeechProbability, -10.0f);
EXPECT_EQ(volume_1, kInitialInputVolume);
EXPECT_LT(volume_2, volume_1);
}
}