#include "chrome/services/speech/speech_timestamp_estimator.h"
#include "base/time/time.h"
#include "base/types/zip.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace speech {
namespace {
using SpeechTimestamp = SpeechTimestampEstimator::SpeechTimestamp;
using PlaybackDuration = SpeechTimestampEstimator::PlaybackDuration;
using MediaTimestamp = SpeechTimestampEstimator::MediaTimestamp;
using MediaTimestampRange = media::MediaTimestampRange;
using MediaRanges = SpeechTimestampEstimator::MediaRanges;
using SpeechTimestampRange = std::pair<SpeechTimestamp, SpeechTimestamp>;
SpeechTimestampRange SpeechSecondsRange(int start, int end) {
return {SpeechTimestamp(base::Seconds(start)),
SpeechTimestamp(base::Seconds(end))};
}
MediaTimestampRange MediaSecondsRange(int start, int end) {
return {.start = base::Seconds(start), .end = base::Seconds(end)};
}
void VerifyRanges(const MediaRanges& actual_ranges,
const MediaRanges& expected_ranges) {
EXPECT_EQ(actual_ranges.size(), expected_ranges.size());
for (auto [actual, expected] : base::zip(actual_ranges, expected_ranges)) {
EXPECT_EQ(actual.start, expected.start);
EXPECT_EQ(actual.end, expected.end);
EXPECT_LT(actual.start, actual.end);
}
}
class SpeechTimestampEstimatorTest : public testing::Test {
public:
SpeechTimestampEstimatorTest() = default;
~SpeechTimestampEstimatorTest() override = default;
MediaRanges TakeRange(SpeechTimestampRange range) {
return estimator_.TakeTimestampsInRange(range.first, range.second);
}
MediaRanges TakeAllRanges() {
return TakeRange(SpeechSecondsRange(0, base::Days(1).InSeconds()));
}
MediaRanges PeekRange(SpeechTimestampRange range) {
return estimator_.PeekTimestampsInRange(range.first, range.second);
}
void AppendDuration(base::TimeDelta duration) {
estimator_.AppendDuration(PlaybackDuration(duration));
}
void SkipSilence(base::TimeDelta duration) {
estimator_.OnSilentMediaDropped(PlaybackDuration(duration));
}
void AddNewPlayback(base::TimeDelta start) {
estimator_.AddPlaybackStart(MediaTimestamp(start));
}
private:
SpeechTimestampEstimator estimator_;
};
TEST_F(SpeechTimestampEstimatorTest, NoAudio) {
auto results = TakeAllRanges();
EXPECT_TRUE(results.empty());
}
TEST_F(SpeechTimestampEstimatorTest, NoPlayback) {
AppendDuration(base::Seconds(10));
auto results = TakeAllRanges();
EXPECT_TRUE(results.empty());
}
TEST_F(SpeechTimestampEstimatorTest, SinglePlayback_Basic) {
constexpr base::TimeDelta kPlaybackDuration = base::Seconds(10);
{
SCOPED_TRACE("[0s,10s)");
AddNewPlayback(base::Seconds(0));
AppendDuration(kPlaybackDuration);
VerifyRanges(TakeAllRanges(), {MediaSecondsRange(0, 10)});
}
{
SCOPED_TRACE("[100s,110s)");
AddNewPlayback(base::Seconds(100));
AppendDuration(kPlaybackDuration);
VerifyRanges(TakeAllRanges(), {MediaSecondsRange(100, 110)});
}
{
SCOPED_TRACE("[-1s,9s)");
AddNewPlayback(base::Seconds(-1));
AppendDuration(kPlaybackDuration);
VerifyRanges(TakeAllRanges(), {MediaSecondsRange(-1, 9)});
}
{
SCOPED_TRACE("[1s,11s)");
AddNewPlayback(base::Seconds(1));
AppendDuration(kPlaybackDuration);
VerifyRanges(TakeAllRanges(), {MediaSecondsRange(1, 11)});
}
}
TEST_F(SpeechTimestampEstimatorTest, SinglePlayback_MultiplePlaybackChunks) {
{
SCOPED_TRACE("[0s,10s) - two chunks");
AddNewPlayback(base::Seconds(0));
AppendDuration(base::Seconds(5));
AppendDuration(base::Seconds(5));
VerifyRanges(TakeAllRanges(), {MediaSecondsRange(0, 10)});
}
{
SCOPED_TRACE("[0s,10s) - ten chunks");
AddNewPlayback(base::Seconds(0));
for (int i = 0; i < 10; ++i) {
AppendDuration(base::Seconds(1));
}
VerifyRanges(TakeAllRanges(), {MediaSecondsRange(0, 10)});
}
}
TEST_F(SpeechTimestampEstimatorTest, SinglePlayback_OffsetStart) {
AppendDuration(base::Seconds(5));
AddNewPlayback(base::Seconds(100));
AppendDuration(base::Seconds(10));
VerifyRanges(TakeRange(SpeechSecondsRange(0, 10)),
{MediaSecondsRange(100, 105)});
}
TEST_F(SpeechTimestampEstimatorTest, SinglePlayback_RangeLimits) {
constexpr int kPlaybackSeconds = 10;
AddNewPlayback(base::Seconds(100));
AppendDuration(base::Seconds(kPlaybackSeconds));
EXPECT_TRUE(TakeRange(SpeechSecondsRange(-100, -50)).empty());
EXPECT_TRUE(TakeRange(SpeechSecondsRange(1000, 2000)).empty());
EXPECT_TRUE(
TakeRange(SpeechSecondsRange(kPlaybackSeconds, 2 * kPlaybackSeconds))
.empty());
VerifyRanges(
TakeRange(SpeechSecondsRange(-kPlaybackSeconds, 10 * kPlaybackSeconds)),
{MediaSecondsRange(100, 110)});
}
TEST_F(SpeechTimestampEstimatorTest, SinglePlayback_AppendAfterDrainingFifo) {
constexpr base::TimeDelta kPlaybackDuration = base::Seconds(10);
AddNewPlayback(base::Seconds(100));
AppendDuration(kPlaybackDuration);
std::ignore = TakeRange(SpeechSecondsRange(0, base::Days(25).InSeconds()));
AppendDuration(kPlaybackDuration);
VerifyRanges(TakeRange(SpeechSecondsRange(10, 20)),
{MediaSecondsRange(110, 120)});
}
TEST_F(SpeechTimestampEstimatorTest, SinglePlayback_PartialRanges) {
AddNewPlayback(base::Seconds(100));
AppendDuration(base::Seconds(10));
VerifyRanges(TakeRange(SpeechSecondsRange(0, 5)),
{MediaSecondsRange(100, 105)});
EXPECT_TRUE(TakeRange(SpeechSecondsRange(0, 5)).empty());
VerifyRanges(TakeRange(SpeechSecondsRange(5, 10)),
{MediaSecondsRange(105, 110)});
}
TEST_F(SpeechTimestampEstimatorTest,
SinglePlayback_LaterRangesDiscardPreviousRanges) {
AddNewPlayback(base::Seconds(100));
AppendDuration(base::Seconds(10));
AddNewPlayback(base::Seconds(200));
AppendDuration(base::Seconds(10));
VerifyRanges(TakeRange(SpeechSecondsRange(15, 20)),
{MediaSecondsRange(205, 210)});
EXPECT_TRUE(TakeRange(SpeechSecondsRange(0, 10)).empty());
EXPECT_TRUE(TakeRange(SpeechSecondsRange(10, 15)).empty());
}
TEST_F(SpeechTimestampEstimatorTest, MultiplePlaybacks_Simple) {
AddNewPlayback(base::Seconds(100));
AppendDuration(base::Seconds(10));
AddNewPlayback(base::Seconds(120));
AppendDuration(base::Seconds(20));
AddNewPlayback(base::Seconds(95));
AppendDuration(base::Seconds(10));
AddNewPlayback(base::Seconds(105));
AppendDuration(base::Seconds(10));
VerifyRanges(TakeAllRanges(),
{MediaSecondsRange(100, 110), MediaSecondsRange(120, 140),
MediaSecondsRange(95, 105), MediaSecondsRange(105, 115)});
}
TEST_F(SpeechTimestampEstimatorTest, MultiplePlaybacks_EmptyPlaybacks) {
AddNewPlayback(base::Seconds(60));
AddNewPlayback(base::Seconds(100));
AppendDuration(base::Seconds(10));
AddNewPlayback(base::Seconds(110));
AddNewPlayback(base::Seconds(50));
AddNewPlayback(base::Seconds(50));
AddNewPlayback(base::Seconds(200));
AppendDuration(base::Seconds(10));
AddNewPlayback(base::Seconds(300));
VerifyRanges(TakeAllRanges(),
{MediaSecondsRange(100, 110), MediaSecondsRange(200, 210)});
AppendDuration(base::Seconds(10));
VerifyRanges(TakeAllRanges(), {MediaSecondsRange(300, 310)});
}
TEST_F(SpeechTimestampEstimatorTest, MultiplePlaybacks_PartialRanges) {
AddNewPlayback(base::Seconds(100));
AppendDuration(base::Seconds(10));
AddNewPlayback(base::Seconds(140));
AppendDuration(base::Seconds(5));
AddNewPlayback(base::Seconds(100));
AppendDuration(base::Seconds(10));
VerifyRanges(TakeRange(SpeechSecondsRange(1, 21)),
{MediaSecondsRange(101, 110), MediaSecondsRange(140, 145),
MediaSecondsRange(100, 106)});
VerifyRanges(TakeRange(SpeechSecondsRange(24, 25)),
{MediaSecondsRange(109, 110)});
}
TEST_F(SpeechTimestampEstimatorTest, Silences_Simple) {
AddNewPlayback(base::Seconds(100));
AppendDuration(base::Seconds(10));
SkipSilence(base::Seconds(10));
AppendDuration(base::Seconds(10));
VerifyRanges(TakeRange(SpeechSecondsRange(0, 20)),
{MediaSecondsRange(100, 110), MediaSecondsRange(120, 130)});
}
TEST_F(SpeechTimestampEstimatorTest, Silences_MultipleChunks) {
AddNewPlayback(base::Seconds(100));
AppendDuration(base::Seconds(10));
SkipSilence(base::Seconds(5));
SkipSilence(base::Seconds(5));
AppendDuration(base::Seconds(10));
VerifyRanges(TakeRange(SpeechSecondsRange(0, 20)),
{MediaSecondsRange(100, 110), MediaSecondsRange(120, 130)});
}
TEST_F(SpeechTimestampEstimatorTest, Silences_EndSilence) {
AddNewPlayback(base::Seconds(100));
AppendDuration(base::Seconds(10));
SkipSilence(base::Seconds(10));
VerifyRanges(TakeAllRanges(), {MediaSecondsRange(100, 110)});
}
TEST_F(SpeechTimestampEstimatorTest, Silences_StartSilence) {
SkipSilence(base::Seconds(10));
AddNewPlayback(base::Seconds(100));
AppendDuration(base::Seconds(10));
VerifyRanges(TakeAllRanges(), {MediaSecondsRange(100, 110)});
}
TEST_F(SpeechTimestampEstimatorTest, Silences_SilenceAndDurationBeforeStart) {
SkipSilence(base::Seconds(10));
AppendDuration(base::Seconds(10));
SkipSilence(base::Seconds(10));
AddNewPlayback(base::Seconds(100));
AppendDuration(base::Seconds(10));
VerifyRanges(TakeRange(SpeechSecondsRange(0, 10)), {});
VerifyRanges(TakeRange(SpeechSecondsRange(10, 20)),
{MediaSecondsRange(100, 110)});
}
TEST_F(SpeechTimestampEstimatorTest, Silences_TakeDuringSilence) {
AddNewPlayback(base::Seconds(100));
AppendDuration(base::Seconds(10));
SkipSilence(base::Seconds(10));
VerifyRanges(TakeRange(SpeechSecondsRange(0, 20)),
{MediaSecondsRange(100, 110)});
SkipSilence(base::Seconds(10));
AppendDuration(base::Seconds(10));
VerifyRanges(TakeRange(SpeechSecondsRange(10, 20)),
{MediaSecondsRange(130, 140)});
}
TEST_F(SpeechTimestampEstimatorTest, Silences_PlaybackResetsSilence_Simple) {
AddNewPlayback(base::Seconds(100));
AppendDuration(base::Seconds(10));
SkipSilence(base::Seconds(10));
AddNewPlayback(base::Seconds(200));
AppendDuration(base::Seconds(10));
VerifyRanges(TakeRange(SpeechSecondsRange(0, 30)),
{MediaSecondsRange(100, 110), MediaSecondsRange(200, 210)});
}
TEST_F(SpeechTimestampEstimatorTest,
Silences_PlaybackResetsSilence_MultiplePlaybacks) {
AddNewPlayback(base::Seconds(100));
AppendDuration(base::Seconds(10));
SkipSilence(base::Seconds(10));
AddNewPlayback(base::Seconds(200));
SkipSilence(base::Seconds(10));
AddNewPlayback(base::Seconds(300));
AddNewPlayback(base::Seconds(400));
AddNewPlayback(base::Seconds(500));
SkipSilence(base::Seconds(10));
AppendDuration(base::Seconds(10));
VerifyRanges(TakeRange(SpeechSecondsRange(0, 20)),
{MediaSecondsRange(100, 110), MediaSecondsRange(510, 520)});
}
TEST_F(SpeechTimestampEstimatorTest, PeekTimestampsInRange) {
AddNewPlayback(base::Seconds(100));
AppendDuration(base::Seconds(10));
AddNewPlayback(base::Seconds(140));
AppendDuration(base::Seconds(5));
AddNewPlayback(base::Seconds(100));
AppendDuration(base::Seconds(10));
MediaRanges expected_ranges = {MediaSecondsRange(101, 110),
MediaSecondsRange(140, 145),
MediaSecondsRange(100, 106)};
VerifyRanges(PeekRange(SpeechSecondsRange(1, 21)), expected_ranges);
VerifyRanges(PeekRange(SpeechSecondsRange(1, 21)), expected_ranges);
VerifyRanges(TakeRange(SpeechSecondsRange(1, 21)), expected_ranges);
VerifyRanges(PeekRange(SpeechSecondsRange(0, 100)),
{MediaSecondsRange(106, 110)});
}
}
}