910e62b5创建于 1月15日历史提交
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "pdf/pdfium/pdfium_searchify.h"

#include <math.h>
#include <stdint.h>

#include <array>
#include <string>
#include <utility>
#include <vector>

#include "base/check.h"
#include "base/check_op.h"
#include "base/compiler_specific.h"
#include "base/containers/span.h"
#include "base/functional/callback.h"
#include "base/numerics/angle_conversions.h"
#include "base/strings/utf_string_conversions.h"
#include "pdf/pdfium/pdfium_api_wrappers.h"
#include "pdf/pdfium/pdfium_engine.h"
#include "pdf/pdfium/pdfium_mem_buffer_file_write.h"
#include "pdf/pdfium/pdfium_ocr.h"
#include "pdf/pdfium/pdfium_searchify_font.h"
#include "services/screen_ai/public/cpp/utilities.h"
#include "services/screen_ai/public/mojom/screen_ai_service.mojom.h"
#include "third_party/pdfium/public/cpp/fpdf_scopers.h"
#include "third_party/pdfium/public/fpdf_edit.h"
#include "third_party/pdfium/public/fpdf_save.h"
#include "third_party/pdfium/public/fpdfview.h"
#include "third_party/skia/include/core/SkBitmap.h"
#include "third_party/skia/include/core/SkImageInfo.h"
#include "third_party/skia/include/core/SkPixmap.h"
#include "ui/gfx/geometry/point_f.h"
#include "ui/gfx/geometry/rect.h"
#include "ui/gfx/geometry/size.h"
#include "ui/gfx/geometry/size_f.h"

namespace chrome_pdf {

namespace {

std::vector<uint32_t> Utf8ToCharcodes(const std::string& string) {
  std::u16string utf16_str = base::UTF8ToUTF16(string);
  std::vector<uint32_t> charcodes;
  charcodes.reserve(utf16_str.size());
  for (auto c : utf16_str) {
    charcodes.push_back(c);
  }
  return charcodes;
}

// The coordinate systems between OCR and PDF are different. OCR's origin is at
// top-left, so we need to convert them to PDF's bottom-left.
SearchifyBoundingBoxOrigin ConvertToPdfOrigin(const gfx::Rect& rect,
                                              float angle,
                                              float coordinate_system_height) {
  const float theta = base::DegToRad(angle);
  const float x = rect.x() - (sinf(theta) * rect.height());
  const float y =
      coordinate_system_height - (rect.y() + cosf(theta) * rect.height());
  return {.point = {x, y}, .theta = -theta};
}

// Project the text object's origin to the baseline's origin.
SearchifyBoundingBoxOrigin ProjectToBaseline(
    const gfx::PointF& origin_point,
    const SearchifyBoundingBoxOrigin& baseline_origin) {
  const float sin_theta = sinf(baseline_origin.theta);
  const float cos_theta = cosf(baseline_origin.theta);
  // The length between `origin` and `baseline_origin`.
  const float length =
      (origin_point.x() - baseline_origin.point.x()) * cos_theta +
      (origin_point.y() - baseline_origin.point.y()) * sin_theta;
  return {.point = {baseline_origin.point.x() + length * cos_theta,
                    baseline_origin.point.y() + length * sin_theta},
          .theta = baseline_origin.theta};
}

gfx::SizeF GetRenderedImageSize(FPDF_PAGEOBJECT image) {
  FS_QUADPOINTSF quadpoints;
  if (!FPDFPageObj_GetRotatedBounds(image, &quadpoints)) {
    return gfx::SizeF();
  }

  return gfx::SizeF(
      hypotf(quadpoints.x1 - quadpoints.x2, quadpoints.y1 - quadpoints.y2),
      hypotf(quadpoints.x2 - quadpoints.x3, quadpoints.y2 - quadpoints.y3));
}

bool CalculateImageWithoutScalingMatrix(FPDF_PAGEOBJECT image,
                                        const gfx::SizeF& rendered_size,
                                        FS_MATRIX& image_matrix) {
  if (!FPDFPageObj_GetMatrix(image, &image_matrix)) {
    return false;
  }
  image_matrix.a /= rendered_size.width();
  image_matrix.b /= rendered_size.width();
  image_matrix.c /= rendered_size.height();
  image_matrix.d /= rendered_size.height();
  return true;
}

// Returns the transformation matrix needed to move a word to where it is
// positioned on the image.
FS_MATRIX CalculateWordMoveMatrix(const SearchifyBoundingBoxOrigin& word_origin,
                                  int word_bounding_box_width,
                                  bool word_is_rtl) {
  const float sin_theta = sinf(word_origin.theta);
  const float cos_theta = cosf(word_origin.theta);
  FS_MATRIX move_matrix(cos_theta, sin_theta, -sin_theta, cos_theta,
                        word_origin.point.x(), word_origin.point.y());
  if (word_is_rtl) {
    move_matrix.a = -move_matrix.a;
    move_matrix.b = -move_matrix.b;
    move_matrix.e += cos_theta * word_bounding_box_width;
    move_matrix.f += sin_theta * word_bounding_box_width;
  }
  return move_matrix;
}

// Returns whether this function succeeded or not.
bool AddWordOnImage(FPDF_DOCUMENT document,
                    FPDF_PAGE page,
                    FPDF_FONT font,
                    const screen_ai::mojom::WordBox& word,
                    base::span<const FS_MATRIX> transform_matrices) {
  ScopedFPDFPageObject text(
      FPDFPageObj_CreateTextObj(document, font, word.bounding_box.height()));
  CHECK(text);

  std::vector<uint32_t> charcodes = Utf8ToCharcodes(word.word);
  if (charcodes.empty()) {
    DLOG(ERROR) << "Got empty word";
    return false;
  }
  bool result =
      FPDFText_SetCharcodes(text.get(), charcodes.data(), charcodes.size());
  CHECK(result);

  // Make text invisible
  result =
      FPDFTextObj_SetTextRenderMode(text.get(), FPDF_TEXTRENDERMODE_INVISIBLE);
  CHECK(result);

  const gfx::SizeF text_object_size = GetImageSize(text.get());
  CHECK_GT(text_object_size.width(), 0);
  CHECK_GT(text_object_size.height(), 0);
  const FS_MATRIX text_scale_matrix(
      word.bounding_box.width() / text_object_size.width(), 0, 0,
      word.bounding_box.height() / text_object_size.height(), 0, 0);
  CHECK(FPDFPageObj_TransformF(text.get(), &text_scale_matrix));

  for (const auto& matrix : transform_matrices) {
    FPDFPageObj_TransformF(text.get(), &matrix);
  }

  FPDFPage_InsertObject(page, text.release());
  return true;
}

// If OCR has recognized a space character between two consecutive words,
// inserts a new word between them to represent it, and returns the vector of
// words and spaces.
std::vector<screen_ai::mojom::WordBox> GetWordsAndSpaces(
    base::span<const screen_ai::mojom::WordBoxPtr> words) {
  std::vector<screen_ai::mojom::WordBox> words_and_spaces;

  size_t original_word_count = words.size();
  if (original_word_count) {
    words_and_spaces.reserve(original_word_count * 2 - 1);
  }

  gfx::Rect empty_rect;
  for (size_t i = 0; i < original_word_count; i++) {
    auto& current_word = words[i];
    words_and_spaces.push_back(*current_word);
    // Add whitespace if it's not empty.
    if (current_word->whitespace_bounding_box.width() &&
        current_word->whitespace_bounding_box.height()) {
      words_and_spaces.emplace_back(
          /*word=*/" ", current_word->language,
          current_word->whitespace_bounding_box,
          current_word->whitespace_bounding_box_angle, current_word->direction,
          /*whitespace_bounding_box=*/empty_rect,
          /*whitespace_bounding_box_angle=*/0,
          /*confidence=*/1);
    }
  }

  return words_and_spaces;
}

}  // namespace

std::vector<uint8_t> PDFiumSearchify(
    base::span<const uint8_t> pdf_buffer,
    base::RepeatingCallback<screen_ai::mojom::VisualAnnotationPtr(
        const SkBitmap& bitmap)> perform_ocr_callback) {
  ScopedFPDFDocument document = LoadPdfData(pdf_buffer);
  if (!document) {
    DLOG(ERROR) << "Failed to load document";
    return {};
  }
  int page_count = FPDF_GetPageCount(document.get());
  if (page_count == 0) {
    DLOG(ERROR) << "Got zero page count";
    return {};
  }
  ScopedFPDFFont font = CreateFont(document.get());
  CHECK(font);
  for (int page_index = 0; page_index < page_count; page_index++) {
    ScopedFPDFPage page(FPDF_LoadPage(document.get(), page_index));
    if (!page) {
      DLOG(ERROR) << "Failed to load page";
      continue;
    }
    int object_count = FPDFPage_CountObjects(page.get());
    for (int object_index = 0; object_index < object_count; object_index++) {
      // GetImageForOcr() checks for null `image`.
      FPDF_PAGEOBJECT image = FPDFPage_GetObject(page.get(), object_index);
      SkBitmap bitmap = GetImageForOcr(document.get(), page.get(), image,
                                       screen_ai::GetMaxDimensionForOCR());
      // The object is not an image or failed to get the bitmap from the image.
      if (bitmap.empty()) {
        continue;
      }
      auto annotation = perform_ocr_callback.Run(bitmap);
      if (!annotation) {
        DLOG(ERROR) << "Failed to get OCR annotation on the image";
        return {};
      }
      AddTextOnImage(document.get(), page.get(), font.get(), image,
                     std::move(annotation),
                     gfx::Size(bitmap.width(), bitmap.height()));
    }
    if (!FPDFPage_GenerateContent(page.get())) {
      DLOG(ERROR) << "Failed to generate content";
      return {};
    }
  }
  PDFiumMemBufferFileWrite output_file_write;
  if (!FPDF_SaveAsCopy(document.get(), &output_file_write, 0)) {
    DLOG(ERROR) << "Failed to save the document";
    return {};
  }
  return output_file_write.TakeBuffer();
}

bool AddTextOnImage(FPDF_DOCUMENT document,
                    FPDF_PAGE page,
                    FPDF_FONT font,
                    FPDF_PAGEOBJECT image,
                    screen_ai::mojom::VisualAnnotationPtr annotation,
                    const gfx::Size& image_pixel_size) {
  const gfx::SizeF image_rendered_size = GetRenderedImageSize(image);
  if (image_rendered_size.IsEmpty()) {
    DLOG(ERROR) << "Failed to get image rendered dimensions";
    return false;
  }

  // The transformation matrices is applied as follows:
  std::array<FS_MATRIX, 3> transform_matrices;
  // Move text object to the corresponding text position on the full image.
  FS_MATRIX& move_matrix = transform_matrices[0];
  // Scale from full image size to rendered image size on the PDF.
  FS_MATRIX& image_scale_matrix = transform_matrices[1];
  // Apply the image's transformation matrix on the PDF page without the
  // scaling matrix.
  FS_MATRIX& image_without_scaling_matrix = transform_matrices[2];

  image_scale_matrix = {
      image_rendered_size.width() / image_pixel_size.width(),   0, 0,
      image_rendered_size.height() / image_pixel_size.height(), 0, 0};
  if (!CalculateImageWithoutScalingMatrix(image, image_rendered_size,
                                          image_without_scaling_matrix)) {
    DLOG(ERROR) << "Failed to get image matrix";
    return false;
  }

  bool added_text = false;
  for (const auto& line : annotation->lines) {
    // TODO(crbug.com/398694513): Try to get baseline information from font
    // information.
    SearchifyBoundingBoxOrigin baseline_origin =
        ConvertToPdfOrigin(line->bounding_box, line->bounding_box_angle,
                           image_pixel_size.height());

    std::vector<screen_ai::mojom::WordBox> words_and_spaces =
        GetWordsAndSpaces(line->words);

    for (const auto& word : words_and_spaces) {
      if (word.bounding_box.IsEmpty()) {
        continue;
      }

      SearchifyBoundingBoxOrigin origin =
          ConvertToPdfOrigin(word.bounding_box, word.bounding_box_angle,
                             image_pixel_size.height());
      move_matrix = CalculateWordMoveMatrix(
          ProjectToBaseline(origin.point, baseline_origin),
          word.bounding_box.width(),
          word.direction ==
              screen_ai::mojom::Direction::DIRECTION_RIGHT_TO_LEFT);
      added_text |=
          AddWordOnImage(document, page, font, word, transform_matrices);
    }
  }
  return added_text;
}

SearchifyBoundingBoxOrigin ConvertToPdfOriginForTesting(
    const gfx::Rect& rect,
    float angle,
    float coordinate_system_height) {
  return ConvertToPdfOrigin(rect, angle, coordinate_system_height);
}

FS_MATRIX CalculateWordMoveMatrixForTesting(
    const SearchifyBoundingBoxOrigin& origin,
    int word_bounding_box_width,
    bool word_is_rtl) {
  return CalculateWordMoveMatrix(origin, word_bounding_box_width, word_is_rtl);
}

std::vector<screen_ai::mojom::WordBox> GetWordsAndSpacesForTesting(  // IN-TEST
    base::span<const screen_ai::mojom::WordBoxPtr> words) {
  return GetWordsAndSpaces(words);
}

ScopedFPDFFont CreateFont(FPDF_DOCUMENT document) {
  std::vector<uint8_t> cid_to_gid_map(CreateCidToGidMap());
  return ScopedFPDFFont(
      FPDFText_LoadCidType2Font(document, kPdfTtf, kPdfTtfSize, kToUnicodeCMap,
                                cid_to_gid_map.data(), cid_to_gid_map.size()));
}

}  // namespace chrome_pdf