#include "pdf/pdfium/pdfium_on_demand_searchifier.h"
#include <array>
#include <memory>
#include <string>
#include <utility>
#include "base/functional/bind.h"
#include "base/functional/callback.h"
#include "base/memory/weak_ptr.h"
#include "base/strings/stringprintf.h"
#include "base/strings/utf_string_conversions.h"
#include "base/task/single_thread_task_runner.h"
#include "base/test/metrics/histogram_tester.h"
#include "base/test/test_future.h"
#include "base/time/time.h"
#include "pdf/accessibility_structs.h"
#include "pdf/pdfium/pdfium_page.h"
#include "pdf/pdfium/pdfium_print.h"
#include "pdf/pdfium/pdfium_range.h"
#include "pdf/pdfium/pdfium_test_base.h"
#include "pdf/test/test_client.h"
#include "pdf/test/test_helpers.h"
#include "services/screen_ai/public/mojom/screen_ai_service.mojom.h"
namespace chrome_pdf {
namespace {
constexpr uint32_t kMaxOcrImageDimension = 2048;
const char kPageHasTextHistogram[] = "PDF.PageHasText";
const char kSearchifyAddedTextHistogram[] = "PDF.SearchifyAddedText";
using VisualAnnotationPtr = screen_ai::mojom::VisualAnnotationPtr;
constexpr base::TimeDelta kOcrDelay = base::Milliseconds(100);
base::FilePath GetReferenceFilePathForPrint(std::string_view test_filename) {
return base::FilePath(FILE_PATH_LITERAL("pdfium_print"))
.AppendASCII(test_filename);
}
class SearchifierTestClient : public TestClient {
public:
SearchifierTestClient() = default;
SearchifierTestClient(const SearchifierTestClient&) = delete;
SearchifierTestClient& operator=(const SearchifierTestClient&) = delete;
~SearchifierTestClient() override = default;
bool IsPrintPreview() const override { return is_print_preview_; }
void OnSearchifyStateChange(bool busy) override {
if (busy) {
busy_state_changed_count_++;
} else {
idle_state_changed_count_++;
}
}
void set_for_print_preview() { is_print_preview_ = true; }
int busy_state_changed_count() const { return busy_state_changed_count_; }
int idle_state_changed_count() const { return idle_state_changed_count_; }
private:
bool is_print_preview_ = false;
int busy_state_changed_count_ = 0;
int idle_state_changed_count_ = 0;
};
void WaitForOneTimingCycle(base::OnceClosure callback) {
base::SingleThreadTaskRunner::GetCurrentDefault()->PostDelayedTask(
FROM_HERE, std::move(callback), kOcrDelay);
}
VisualAnnotationPtr CreateEmptyAnnotation() {
return screen_ai::mojom::VisualAnnotation::New();
}
VisualAnnotationPtr CreateSampleAnnotation(int call_number) {
auto annotation = CreateEmptyAnnotation();
auto line_box = screen_ai::mojom::LineBox::New();
line_box->bounding_box = gfx::Rect(0, 0, 100, 100);
line_box->bounding_box_angle = 0;
auto word_box = screen_ai::mojom::WordBox::New();
word_box->word = base::StringPrintf("OCR Text %i", call_number);
word_box->bounding_box = gfx::Rect(0, 0, 100, 100);
word_box->bounding_box_angle = 0;
line_box->words.push_back(std::move(word_box));
annotation->lines.push_back(std::move(line_box));
return annotation;
}
}
class PDFiumOnDemandSearchifierTest : public PDFiumTestBase {
public:
[[nodiscard]] PDFiumEngine* CreateEngine(
const base::FilePath::CharType* test_filename) {
engine_ = InitializeEngine(&client_, test_filename);
return engine_.get();
}
[[nodiscard]] PDFiumEngine* CreatePreviewEngine(
const base::FilePath::CharType* test_filename) {
client_.set_for_print_preview();
return CreateEngine(test_filename);
}
void TearDown() override {
engine_.reset();
PDFiumTestBase::TearDown();
}
void StartSearchify(bool empty_results) {
engine_->StartSearchify(
base::BindOnce(
&PDFiumOnDemandSearchifierTest::MockGetOcrMaxImageDimension,
weak_factory_.GetWeakPtr()),
base::BindRepeating(&PDFiumOnDemandSearchifierTest::MockPerformOcr,
weak_factory_.GetWeakPtr(), empty_results));
}
void MockGetOcrMaxImageDimension(
base::OnceCallback<void(uint32_t)> callback) {
base::SingleThreadTaskRunner::GetCurrentDefault()->PostDelayedTask(
FROM_HERE,
base::BindOnce(
&PDFiumOnDemandSearchifierTest::ReplyGetOcrMaxImageDimension,
weak_factory_.GetWeakPtr(), std::move(callback)),
base::Milliseconds(100));
}
void MockPerformOcr(bool empty_results,
const SkBitmap& ,
base::OnceCallback<void(VisualAnnotationPtr)> callback) {
VisualAnnotationPtr results = empty_results
? CreateEmptyAnnotation()
: CreateSampleAnnotation(performed_ocrs_);
base::SingleThreadTaskRunner::GetCurrentDefault()->PostDelayedTask(
FROM_HERE, base::BindOnce(std::move(callback), std::move(results)),
base::Milliseconds(100));
performed_ocrs_++;
}
void WaitUntilMaxImageDimensionReplied() {
EXPECT_TRUE(max_image_dimension_replied_.Wait());
}
void WaitUntilPerformedOcrCount(int expected_performed_ocrs) {
if (performed_ocrs() == expected_performed_ocrs) {
return;
}
base::SingleThreadTaskRunner::GetCurrentDefault()->PostDelayedTask(
FROM_HERE,
base::BindOnce(
&PDFiumOnDemandSearchifierTest::WaitUntilPerformedOcrCount,
base::Unretained(this), expected_performed_ocrs),
kOcrDelay);
}
static void WaitForState(PDFiumOnDemandSearchifier* searchifier,
base::OnceClosure callback,
PDFiumOnDemandSearchifier::State expected_state) {
if (searchifier->state_ == expected_state) {
std::move(callback).Run();
return;
}
WaitForOneTimingCycle(
base::BindOnce(&PDFiumOnDemandSearchifierTest::WaitForState,
searchifier, std::move(callback), expected_state));
}
[[nodiscard]] bool WaitForIdleState(PDFiumOnDemandSearchifier* searchifier) {
base::test::TestFuture<void> future;
WaitForState(searchifier, future.GetCallback(),
PDFiumOnDemandSearchifier::State::kIdle);
return future.Wait();
}
[[nodiscard]] bool WaitForFailedState(
PDFiumOnDemandSearchifier* searchifier) {
base::test::TestFuture<void> future;
WaitForState(searchifier, future.GetCallback(),
PDFiumOnDemandSearchifier::State::kFailed);
return future.Wait();
}
[[nodiscard]] bool WaitForWaitingForPageAvailabilityState(
PDFiumOnDemandSearchifier* searchifier) {
base::test::TestFuture<void> future;
WaitForState(searchifier, future.GetCallback(),
PDFiumOnDemandSearchifier::State::kWaitingForPageAvailability);
return future.Wait();
}
std::string GetPageText(PDFiumPage& page) {
return base::UTF16ToUTF8(PDFiumRange::AllTextOnPage(&page).GetText());
}
void ReplyGetOcrMaxImageDimension(
base::OnceCallback<void(uint32_t)> callback) {
std::move(callback).Run(kMaxOcrImageDimension);
std::move(max_image_dimension_replied_.GetCallback()).Run();
}
int performed_ocrs() const { return performed_ocrs_; }
int busy_state_changed_count() const {
return client_.busy_state_changed_count();
}
int idle_state_changed_count() const {
return client_.idle_state_changed_count();
}
private:
std::unique_ptr<PDFiumEngine> engine_;
SearchifierTestClient client_;
int performed_ocrs_ = 0;
base::test::TestFuture<void> max_image_dimension_replied_;
base::WeakPtrFactory<PDFiumOnDemandSearchifierTest> weak_factory_{this};
};
TEST_P(PDFiumOnDemandSearchifierTest, Blank) {
base::HistogramTester histogram_tester;
PDFiumEngine* engine = CreateEngine(FILE_PATH_LITERAL("blank.pdf"));
ASSERT_TRUE(engine);
PDFiumPage& page = GetPDFiumPage(*engine, 0);
page.GetPage();
ASSERT_FALSE(engine->IsPageScheduledForSearchify(0));
EXPECT_FALSE(page.IsPageSearchified());
ASSERT_FALSE(engine->GetSearchifierForTesting());
histogram_tester.ExpectUniqueSample(kPageHasTextHistogram, false, 1);
histogram_tester.ExpectTotalCount(kSearchifyAddedTextHistogram, 0);
}
TEST_P(PDFiumOnDemandSearchifierTest, NoImage) {
base::HistogramTester histogram_tester;
PDFiumEngine* engine = CreateEngine(FILE_PATH_LITERAL("hello_world2.pdf"));
ASSERT_TRUE(engine);
PDFiumPage& page = GetPDFiumPage(*engine, 0);
page.GetPage();
ASSERT_FALSE(engine->IsPageScheduledForSearchify(0));
EXPECT_FALSE(page.IsPageSearchified());
ASSERT_FALSE(engine->GetSearchifierForTesting());
histogram_tester.ExpectUniqueSample(kPageHasTextHistogram, true, 1);
histogram_tester.ExpectTotalCount(kSearchifyAddedTextHistogram, 0);
}
TEST_P(PDFiumOnDemandSearchifierTest, OnePageWithImages) {
PDFiumEngine* engine = CreateEngine(FILE_PATH_LITERAL("image_alt_text.pdf"));
ASSERT_TRUE(engine);
PDFiumPage& page = GetPDFiumPage(*engine, 0);
page.GetPage();
ASSERT_TRUE(engine->IsPageScheduledForSearchify(0));
PDFiumOnDemandSearchifier* searchifier = engine->GetSearchifierForTesting();
ASSERT_TRUE(searchifier);
ASSERT_TRUE(searchifier->IsPageScheduled(0));
StartSearchify(false);
ASSERT_TRUE(WaitForIdleState(searchifier));
ASSERT_EQ(performed_ocrs(), 2);
EXPECT_TRUE(page.IsPageSearchified());
ASSERT_EQ(GetPageText(page), "OCR Text 0\r\nOCR Text 1");
}
TEST_P(PDFiumOnDemandSearchifierTest, PageWithImagesNoRecognizableText) {
PDFiumEngine* engine = CreateEngine(FILE_PATH_LITERAL("image_alt_text.pdf"));
ASSERT_TRUE(engine);
PDFiumPage& page = GetPDFiumPage(*engine, 0);
page.GetPage();
ASSERT_TRUE(engine->IsPageScheduledForSearchify(0));
PDFiumOnDemandSearchifier* searchifier = engine->GetSearchifierForTesting();
ASSERT_TRUE(searchifier);
ASSERT_TRUE(searchifier->IsPageScheduled(0));
StartSearchify(true);
ASSERT_TRUE(WaitForIdleState(searchifier));
ASSERT_EQ(performed_ocrs(), 2);
EXPECT_TRUE(page.IsPageSearchified());
EXPECT_TRUE(GetPageText(page).empty());
page.Unload();
EXPECT_EQ(GetPageText(page), "");
ASSERT_TRUE(WaitForIdleState(searchifier));
ASSERT_EQ(performed_ocrs(), 2);
EXPECT_TRUE(page.IsPageSearchified());
}
TEST_P(PDFiumOnDemandSearchifierTest, MultiplePagesWithImages) {
constexpr int kPageCount = 4;
PDFiumEngine* engine =
CreateEngine(FILE_PATH_LITERAL("multi_page_no_text.pdf"));
ASSERT_TRUE(engine);
for (int page = 0; page < kPageCount; page++) {
GetPDFiumPage(*engine, page).GetPage();
ASSERT_TRUE(engine->IsPageScheduledForSearchify(page));
}
PDFiumOnDemandSearchifier* searchifier = engine->GetSearchifierForTesting();
ASSERT_TRUE(searchifier);
for (int page = 0; page < kPageCount; page++) {
ASSERT_TRUE(searchifier->IsPageScheduled(page)) << page;
}
StartSearchify(false);
ASSERT_TRUE(WaitForIdleState(searchifier));
ASSERT_EQ(performed_ocrs(), 4);
EXPECT_EQ(GetPageText(GetPDFiumPage(*engine, 0)), "OCR Text 0");
EXPECT_EQ(GetPageText(GetPDFiumPage(*engine, 1)), "OCR Text 1");
EXPECT_EQ(GetPageText(GetPDFiumPage(*engine, 2)), "OCR Text 2");
EXPECT_EQ(GetPageText(GetPDFiumPage(*engine, 3)), "OCR Text 3");
}
TEST_P(PDFiumOnDemandSearchifierTest, AddedTextPreservedAfterUnload) {
constexpr int kPageCount = 4;
PDFiumEngine* engine =
CreateEngine(FILE_PATH_LITERAL("multi_page_no_text.pdf"));
ASSERT_TRUE(engine);
for (int page = 0; page < kPageCount; page++) {
GetPDFiumPage(*engine, page).GetPage();
}
PDFiumOnDemandSearchifier* searchifier = engine->GetSearchifierForTesting();
ASSERT_TRUE(searchifier);
StartSearchify(false);
ASSERT_TRUE(WaitForIdleState(searchifier));
ASSERT_EQ(performed_ocrs(), 4);
EXPECT_EQ(GetPageText(GetPDFiumPage(*engine, 0)), "OCR Text 0");
EXPECT_EQ(GetPageText(GetPDFiumPage(*engine, 1)), "OCR Text 1");
EXPECT_EQ(GetPageText(GetPDFiumPage(*engine, 2)), "OCR Text 2");
EXPECT_EQ(GetPageText(GetPDFiumPage(*engine, 3)), "OCR Text 3");
for (int page = 0; page < kPageCount; page++) {
GetPDFiumPage(*engine, page).Unload();
}
EXPECT_EQ(GetPageText(GetPDFiumPage(*engine, 0)), "OCR Text 0");
EXPECT_EQ(GetPageText(GetPDFiumPage(*engine, 1)), "OCR Text 1");
EXPECT_EQ(GetPageText(GetPDFiumPage(*engine, 2)), "OCR Text 2");
EXPECT_EQ(GetPageText(GetPDFiumPage(*engine, 3)), "OCR Text 3");
}
TEST_P(PDFiumOnDemandSearchifierTest, MultipleImagesWithUnload) {
PDFiumEngine* engine = CreateEngine(FILE_PATH_LITERAL("image_alt_text.pdf"));
ASSERT_TRUE(engine);
PDFiumPage& page = GetPDFiumPage(*engine, 0);
page.GetPage();
ASSERT_TRUE(engine->IsPageScheduledForSearchify(0));
PDFiumOnDemandSearchifier* searchifier = engine->GetSearchifierForTesting();
ASSERT_TRUE(searchifier);
ASSERT_TRUE(searchifier->IsPageScheduled(0));
ASSERT_EQ(performed_ocrs(), 0);
StartSearchify(false);
WaitUntilPerformedOcrCount(1);
EXPECT_FALSE(page.IsPageSearchified());
ASSERT_EQ(GetPageText(page), "");
{
base::test::TestFuture<void> future;
WaitForOneTimingCycle(future.GetCallback());
ASSERT_TRUE(future.Wait());
}
EXPECT_FALSE(page.IsPageSearchified());
ASSERT_EQ(GetPageText(page), "");
page.Unload();
ASSERT_TRUE(searchifier->IsPageScheduled(0));
ASSERT_TRUE(WaitForIdleState(searchifier));
ASSERT_EQ(performed_ocrs(), 2);
EXPECT_TRUE(page.IsPageSearchified());
ASSERT_EQ(GetPageText(page), "OCR Text 0\r\nOCR Text 1");
}
TEST_P(PDFiumOnDemandSearchifierTest, MultiplePagesWithUnload) {
constexpr int kPageCount = 4;
PDFiumEngine* engine =
CreateEngine(FILE_PATH_LITERAL("multi_page_no_text.pdf"));
ASSERT_TRUE(engine);
for (int page = 0; page < kPageCount; page++) {
ASSERT_TRUE(GetPDFiumPage(*engine, page).GetPage());
}
PDFiumPage& page0 = GetPDFiumPage(*engine, 0);
page0.Unload();
PDFiumOnDemandSearchifier* searchifier = engine->GetSearchifierForTesting();
ASSERT_TRUE(searchifier);
for (int page = 0; page < kPageCount; page++) {
ASSERT_TRUE(searchifier->IsPageScheduled(page));
}
StartSearchify(false);
ASSERT_TRUE(WaitForIdleState(searchifier));
ASSERT_EQ(performed_ocrs(), kPageCount);
EXPECT_EQ(page0.page(), nullptr);
EXPECT_EQ(GetPageText(page0), "OCR Text 0");
EXPECT_TRUE(page0.IsPageSearchified());
std::optional<AccessibilityTextRunInfo> page0_info =
page0.GetTextRunInfoAt(0);
ASSERT_TRUE(page0_info.has_value());
EXPECT_TRUE(page0_info.value().is_searchified);
PDFiumPage& page1 = GetPDFiumPage(*engine, 1);
EXPECT_EQ(GetPageText(page1), "OCR Text 1");
EXPECT_TRUE(page1.IsPageSearchified());
std::optional<AccessibilityTextRunInfo> page1_info =
page1.GetTextRunInfoAt(0);
ASSERT_TRUE(page1_info.has_value());
EXPECT_TRUE(page1_info.value().is_searchified);
PDFiumPage& page2 = GetPDFiumPage(*engine, 2);
EXPECT_EQ(GetPageText(page2), "OCR Text 2");
EXPECT_TRUE(page2.IsPageSearchified());
std::optional<AccessibilityTextRunInfo> page2_info =
page2.GetTextRunInfoAt(0);
ASSERT_TRUE(page2_info.has_value());
EXPECT_TRUE(page2_info.value().is_searchified);
PDFiumPage& page3 = GetPDFiumPage(*engine, 3);
EXPECT_EQ(GetPageText(page3), "OCR Text 3");
EXPECT_TRUE(page3.IsPageSearchified());
std::optional<AccessibilityTextRunInfo> page3_info =
page3.GetTextRunInfoAt(0);
ASSERT_TRUE(page3_info.has_value());
EXPECT_TRUE(page3_info.value().is_searchified);
page3.Unload();
EXPECT_EQ(GetPageText(page3), "OCR Text 3");
EXPECT_TRUE(page3.IsPageSearchified());
page3_info = page3.GetTextRunInfoAt(0);
ASSERT_TRUE(page3_info.has_value());
EXPECT_TRUE(page3_info.value().is_searchified);
}
TEST_P(PDFiumOnDemandSearchifierTest, OnePageWithImagesInPrintPreview) {
PDFiumEngine* engine =
CreatePreviewEngine(FILE_PATH_LITERAL("image_alt_text.pdf"));
ASSERT_TRUE(engine);
PDFiumPage& page = GetPDFiumPage(*engine, 0);
page.GetPage();
ASSERT_FALSE(engine->IsPageScheduledForSearchify(0));
ASSERT_FALSE(engine->GetSearchifierForTesting());
}
TEST_P(PDFiumOnDemandSearchifierTest,
OcrDisconnectionBeforeGettingMaxImageDimension) {
constexpr int kPageCount = 4;
PDFiumEngine* engine =
CreateEngine(FILE_PATH_LITERAL("multi_page_no_text.pdf"));
ASSERT_TRUE(engine);
for (int page = 0; page < kPageCount; page++) {
ASSERT_TRUE(GetPDFiumPage(*engine, page).GetPage());
}
StartSearchify(false);
engine->GetOcrDisconnectHandler().Run();
ASSERT_TRUE(WaitForFailedState(engine->GetSearchifierForTesting()));
ASSERT_EQ(performed_ocrs(), 0);
}
TEST_P(PDFiumOnDemandSearchifierTest,
OcrDisconnectionAfterGettingMaxImageDimension) {
constexpr int kPageCount = 4;
PDFiumEngine* engine =
CreateEngine(FILE_PATH_LITERAL("multi_page_no_text.pdf"));
ASSERT_TRUE(engine);
for (int page = 0; page < kPageCount; page++) {
ASSERT_TRUE(GetPDFiumPage(*engine, page).GetPage());
}
StartSearchify(false);
WaitUntilMaxImageDimensionReplied();
engine->GetOcrDisconnectHandler().Run();
ASSERT_TRUE(WaitForFailedState(engine->GetSearchifierForTesting()));
ASSERT_LT(performed_ocrs(), kPageCount);
}
TEST_P(PDFiumOnDemandSearchifierTest, SearchifyStateChanges) {
PDFiumEngine* engine =
CreateEngine(FILE_PATH_LITERAL("multi_page_no_text.pdf"));
ASSERT_TRUE(engine);
GetPDFiumPage(*engine, 0).GetPage();
EXPECT_EQ(busy_state_changed_count(), 1);
EXPECT_EQ(idle_state_changed_count(), 0);
StartSearchify(false);
EXPECT_EQ(busy_state_changed_count(), 1);
EXPECT_EQ(idle_state_changed_count(), 0);
ASSERT_TRUE(WaitForIdleState(engine->GetSearchifierForTesting()));
EXPECT_EQ(busy_state_changed_count(), 1);
EXPECT_EQ(idle_state_changed_count(), 1);
GetPDFiumPage(*engine, 1).GetPage();
GetPDFiumPage(*engine, 2).GetPage();
EXPECT_EQ(busy_state_changed_count(), 2);
EXPECT_EQ(idle_state_changed_count(), 1);
ASSERT_TRUE(WaitForIdleState(engine->GetSearchifierForTesting()));
EXPECT_EQ(busy_state_changed_count(), 2);
EXPECT_EQ(idle_state_changed_count(), 2);
GetPDFiumPage(*engine, 3).GetPage();
EXPECT_EQ(busy_state_changed_count(), 3);
EXPECT_EQ(idle_state_changed_count(), 2);
engine->GetOcrDisconnectHandler().Run();
EXPECT_EQ(busy_state_changed_count(), 3);
EXPECT_EQ(idle_state_changed_count(), 3);
}
TEST_P(PDFiumOnDemandSearchifierTest, MetricsProcessedPageWithoutText) {
base::HistogramTester histogram_tester;
PDFiumEngine* engine =
CreateEngine(FILE_PATH_LITERAL("multi_page_no_text.pdf"));
ASSERT_TRUE(engine);
histogram_tester.ExpectTotalCount(kPageHasTextHistogram, 0);
histogram_tester.ExpectTotalCount(kSearchifyAddedTextHistogram, 0);
GetPDFiumPage(*engine, 0).GetPage();
histogram_tester.ExpectTotalCount(kPageHasTextHistogram, 1);
histogram_tester.ExpectBucketCount(kPageHasTextHistogram, false, 1);
histogram_tester.ExpectTotalCount(kSearchifyAddedTextHistogram, 0);
StartSearchify(false);
ASSERT_TRUE(WaitForIdleState(engine->GetSearchifierForTesting()));
histogram_tester.ExpectTotalCount(kPageHasTextHistogram, 1);
histogram_tester.ExpectTotalCount(kSearchifyAddedTextHistogram, 1);
histogram_tester.ExpectBucketCount(kSearchifyAddedTextHistogram, true, 1);
}
TEST_P(PDFiumOnDemandSearchifierTest, MetricsCanceledPageWithoutText) {
base::HistogramTester histogram_tester;
PDFiumEngine* engine =
CreateEngine(FILE_PATH_LITERAL("multi_page_no_text.pdf"));
ASSERT_TRUE(engine);
histogram_tester.ExpectTotalCount(kPageHasTextHistogram, 0);
histogram_tester.ExpectTotalCount(kSearchifyAddedTextHistogram, 0);
GetPDFiumPage(*engine, 0).GetPage();
histogram_tester.ExpectTotalCount(kPageHasTextHistogram, 1);
histogram_tester.ExpectBucketCount(kPageHasTextHistogram, false, 1);
histogram_tester.ExpectTotalCount(kSearchifyAddedTextHistogram, 0);
StartSearchify(false);
engine->GetOcrDisconnectHandler().Run();
ASSERT_TRUE(WaitForFailedState(engine->GetSearchifierForTesting()));
histogram_tester.ExpectTotalCount(kPageHasTextHistogram, 1);
histogram_tester.ExpectTotalCount(kSearchifyAddedTextHistogram, 0);
}
TEST_P(PDFiumOnDemandSearchifierTest, SelectPageBeforeSearchify) {
PDFiumEngine* engine = CreateEngine(FILE_PATH_LITERAL("image_alt_text.pdf"));
ASSERT_TRUE(engine);
PDFiumPage& page = GetPDFiumPage(*engine, 0);
page.GetPage();
ASSERT_TRUE(engine->IsPageScheduledForSearchify(0));
engine->SelectAll();
ASSERT_TRUE(engine->GetSelectedText().empty());
PDFiumOnDemandSearchifier* searchifier = engine->GetSearchifierForTesting();
ASSERT_TRUE(searchifier);
ASSERT_TRUE(searchifier->IsPageScheduled(0));
StartSearchify(false);
ASSERT_TRUE(WaitForIdleState(searchifier));
engine->SelectAll();
#if BUILDFLAG(IS_WIN)
const char kExpectedSelection[] = "OCR Text 0\r\nOCR Text 1";
#else
const char kExpectedSelection[] = "OCR Text 0\nOCR Text 1";
#endif
ASSERT_EQ(engine->GetSelectedText(), kExpectedSelection);
}
TEST_P(PDFiumOnDemandSearchifierTest, UpdateWithUnloadedPage) {
PDFiumEngine* engine =
CreateEngine(FILE_PATH_LITERAL("multi_page_no_text.pdf"));
ASSERT_TRUE(engine);
PDFiumPage& page0 = GetPDFiumPage(*engine, 0);
PDFiumPage& page1 = GetPDFiumPage(*engine, 1);
EXPECT_TRUE(page0.GetPage());
page1.Unload();
StartSearchify(false);
PDFiumOnDemandSearchifier* searchifier = engine->GetSearchifierForTesting();
ASSERT_TRUE(WaitForIdleState(searchifier));
EXPECT_EQ(performed_ocrs(), 1);
EXPECT_EQ(page1.GetImageObjectIndices().size(), 1u);
}
TEST_P(PDFiumOnDemandSearchifierTest, UpdateWithUnloadLockedPage) {
PDFiumEngine* engine =
CreateEngine(FILE_PATH_LITERAL("multi_page_no_text.pdf"));
PDFiumPage& page0 = GetPDFiumPage(*engine, 0);
EXPECT_TRUE(page0.GetPage());
StartSearchify(false);
PDFiumOnDemandSearchifier* searchifier = engine->GetSearchifierForTesting();
{
PDFiumPage::ScopedUnloadPreventer unload_preventer(&page0);
ASSERT_TRUE(WaitForWaitingForPageAvailabilityState(searchifier));
EXPECT_EQ(performed_ocrs(), 1);
EXPECT_FALSE(page0.IsPageSearchified());
engine->GetOcrDisconnectHandler().Run();
}
ASSERT_TRUE(WaitForIdleState(searchifier));
EXPECT_TRUE(page0.IsPageSearchified());
}
TEST_P(PDFiumOnDemandSearchifierTest, Bug405433817) {
PDFiumEngine* engine = CreateEngine(FILE_PATH_LITERAL("bug_405433817.pdf"));
ASSERT_TRUE(engine);
PDFiumPage& page = GetPDFiumPage(*engine, 0);
page.GetPage();
ASSERT_TRUE(engine->IsPageScheduledForSearchify(0));
PDFiumPrint print(engine);
static constexpr std::array<int, 1> kPageIndices = {0};
const blink::WebPrintParams print_params = GetDefaultPrintParams();
std::vector<uint8_t> pdf_data =
print.PrintPagesAsPdf(kPageIndices, print_params);
CheckFuzzyPdfRendering(pdf_data, 0, {200, 300},
GetReferenceFilePathForPrint("bug_405433817.png"));
PDFiumOnDemandSearchifier* searchifier = engine->GetSearchifierForTesting();
ASSERT_TRUE(searchifier);
ASSERT_TRUE(searchifier->IsPageScheduled(0));
StartSearchify(true);
ASSERT_TRUE(WaitForIdleState(searchifier));
ASSERT_EQ(performed_ocrs(), 1);
EXPECT_TRUE(page.IsPageSearchified());
pdf_data = print.PrintPagesAsPdf(kPageIndices, print_params);
CheckFuzzyPdfRendering(pdf_data, 0, {200, 300},
GetReferenceFilePathForPrint("bug_405433817.png"));
}
TEST_P(PDFiumOnDemandSearchifierTest, Bug406530484) {
PDFiumEngine* engine = CreateEngine(FILE_PATH_LITERAL("bug_406530484.pdf"));
ASSERT_TRUE(engine);
PDFiumPage& page = GetPDFiumPage(*engine, 0);
page.GetPage();
ASSERT_TRUE(engine->IsPageScheduledForSearchify(0));
PDFiumPrint print(engine);
static constexpr std::array<int, 1> kPageIndices = {0};
const blink::WebPrintParams print_params = GetDefaultPrintParams();
std::vector<uint8_t> pdf_data =
print.PrintPagesAsPdf(kPageIndices, print_params);
CheckPdfRendering(pdf_data, 0, {200, 300},
GetReferenceFilePathForPrint("bug_406530484.png"));
PDFiumOnDemandSearchifier* searchifier = engine->GetSearchifierForTesting();
ASSERT_TRUE(searchifier);
ASSERT_TRUE(searchifier->IsPageScheduled(0));
StartSearchify(true);
ASSERT_TRUE(WaitForIdleState(searchifier));
ASSERT_EQ(performed_ocrs(), 1);
EXPECT_TRUE(page.IsPageSearchified());
pdf_data = print.PrintPagesAsPdf(kPageIndices, print_params);
CheckPdfRendering(pdf_data, 0, {200, 300},
GetReferenceFilePathForPrint("bug_406530484.png"));
}
INSTANTIATE_TEST_SUITE_P(All, PDFiumOnDemandSearchifierTest, testing::Bool());
}