910e62b5创建于 1月15日历史提交
// Copyright 2023 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_BIDI_PARAGRAPH_H_
#define THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_BIDI_PARAGRAPH_H_

#include <unicode/ubidi.h>

#include <optional>

#include "base/check_op.h"
#include "base/containers/span.h"
#include "third_party/blink/renderer/platform/text/text_direction.h"
#include "third_party/blink/renderer/platform/wtf/allocator/allocator.h"
#include "third_party/blink/renderer/platform/wtf/forward.h"
#include "third_party/blink/renderer/platform/wtf/text/string_view.h"
#include "third_party/blink/renderer/platform/wtf/vector.h"

namespace blink {

// BidiParagraph resolves bidirectional runs in a paragraph using ICU BiDi.
// http://userguide.icu-project.org/transforms/bidi
//
// Given a string of a paragraph, it runs Unicode Bidirectional Algorithm in
// UAX#9 and create logical runs.
// http://unicode.org/reports/tr9/
// It can also create visual runs once lines breaks are determined.
class PLATFORM_EXPORT BidiParagraph {
  STACK_ALLOCATED();

 public:
  BidiParagraph() = default;
  BidiParagraph(const String& text,
                std::optional<TextDirection> base_direction) {
    SetParagraph(text, base_direction);
  }

  // Splits the given paragraph to bidi runs and resolves the bidi embedding
  // level of each run.
  //
  // Returns false on failure. Nothing other than the destructor should be
  // called.
  bool SetParagraph(const String&, std::optional<TextDirection> base_direction);

  // @return the entire text is unidirectional.
  bool IsUnidirectional() const {
    return ubidi_getDirection(ubidi_.get()) != UBIDI_MIXED;
  }

  // The base direction (a.k.a. paragraph direction) of this block.
  // This is determined by the 'direction' property of the block, or by the
  // heuristic rules defined in UAX#9 if 'unicode-bidi: plaintext'.
  TextDirection BaseDirection() const { return base_direction_; }

  // Compute the base direction for a given string using the heuristic
  // rules defined in UAX#9. It determines the direction by the first strong
  // character, or returns `nullopt` if no strong characters are found before
  // the first segment break.
  // http://unicode.org/reports/tr9/#The_Paragraph_Level
  static std::optional<TextDirection> BaseDirectionForString(
      const StringView&,
      bool (*stop_at)(UChar) = nullptr);

  // Same as `BaseDirectionForString().value_or(kLtr)`, with an optimized code
  // path for when the default (no strong characters) is LTR.
  static TextDirection BaseDirectionForStringOrLtr(
      const StringView& text,
      bool (*stop_at)(UChar) = nullptr);

  // Create a string that enforces directional override by wrapping the given
  // string with a Unicode BiDi override character (LRO or ROL) and PDF.
  // https://unicode.org/reports/tr9/#Explicit_Directional_Overrides
  // https://unicode.org/reports/tr9/#Terminating_Explicit_Directional_Embeddings_and_Overrides
  static String StringWithDirectionalOverride(const StringView& text,
                                              TextDirection direction);

  struct Run {
    Run(unsigned start, unsigned end, UBiDiLevel level)
        : start(start), end(end), level(level) {
      DCHECK_GT(end, start);
    }

    unsigned Length() const { return end - start; }
    TextDirection Direction() const { return DirectionFromLevel(level); }

    bool operator==(const Run& other) const {
      return start == other.start && end == other.end && level == other.level;
    }

    unsigned start;
    unsigned end;
    UBiDiLevel level;
  };
  using Runs = Vector<Run, 32>;

  // Get a list of `Run` in the logical order (before bidi reorder.)
  // `text` must be the same one as `SetParagraph`.
  // This is higher-level API for `GetLogicalRun`.
  void GetLogicalRuns(const String& text, Runs* runs) const;

  // Returns the end offset of a logical run that starts from the |start|
  // offset.
  unsigned GetLogicalRun(unsigned start, UBiDiLevel*) const;

  // Get a list of `Run` in the visual order (after bidi reorder.)
  // `text` must be the same one as `SetParagraph`.
  // This is higher-level API for `GetLogicalRuns` and `IndicesInVisualOrder`.
  void GetVisualRuns(const String& text, Runs* runs) const;

  // Create a list of indices in the visual order.
  // A wrapper for ICU |ubidi_reorderVisual()|.
  static void IndicesInVisualOrder(
      const Vector<UBiDiLevel, 32>& levels,
      Vector<int32_t, 32>* indices_in_visual_order_out);

 private:
  template <typename TChar>
  static std::optional<TextDirection> BaseDirectionForString(
      base::span<const TChar>,
      bool (*stop_at)(UChar));

  struct UBiDiDeleter {
    void operator()(UBiDi* ubidi) const { ubidi_close(ubidi); }
  };
  using UBidiPtr = std::unique_ptr<UBiDi, UBiDiDeleter>;

  UBidiPtr ubidi_;
  TextDirection base_direction_ = TextDirection::kLtr;
};

// static
inline TextDirection BidiParagraph::BaseDirectionForStringOrLtr(
    const StringView& text,
    bool (*stop_at)(UChar)) {
  if (text.empty() || text.Is8Bit()) {
    // The result is LTR when 8 bits string and the distinction between LTR or
    // neutral is not needed, because U+0000-00FF are LTR or neutral.
    return TextDirection::kLtr;
  }
  return BaseDirectionForString(text, stop_at).value_or(TextDirection::kLtr);
}

}  // namespace blink

#endif  // THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_BIDI_PARAGRAPH_H_