#include "ui/accessibility/ax_language_detection.h"
#include <algorithm>
#include <functional>
#include <memory>
#include "base/command_line.h"
#include "base/i18n/unicodestring.h"
#include "base/metrics/histogram_functions.h"
#include "base/metrics/histogram_macros.h"
#include "base/strings/utf_string_conversions.h"
#include "base/trace_event/trace_event.h"
#include "ui/accessibility/accessibility_features.h"
#include "ui/accessibility/accessibility_switches.h"
#include "ui/accessibility/ax_enums.mojom.h"
#include "ui/accessibility/ax_node.h"
#include "ui/accessibility/ax_tree.h"
namespace ui {
namespace {
const int kMaxDetectedLanguagesPerPage = 3;
const int kMaxDetectedLanguagesPerSpan = 3;
const int kShortTextIdentifierMinByteLength = 1;
const int kShortTextIdentifierMaxByteLength = 1000;
}
using Result = chrome_lang_id::NNetLanguageIdentifier::Result;
using SpanInfo = chrome_lang_id::NNetLanguageIdentifier::SpanInfo;
AXLanguageInfo::AXLanguageInfo() = default;
AXLanguageInfo::~AXLanguageInfo() = default;
AXLanguageInfoStats::AXLanguageInfoStats()
: top_results_valid_(false),
disable_metric_clearing_(false),
count_detection_attempted_(0),
count_detection_results_(0),
count_labelled_(0),
count_labelled_with_top_result_(0),
count_overridden_(0) {}
AXLanguageInfoStats::~AXLanguageInfoStats() = default;
void AXLanguageInfoStats::Add(const std::vector<std::string>& languages) {
++count_detection_results_;
int score = kMaxDetectedLanguagesPerSpan;
for (const auto& lang : languages) {
lang_counts_[lang] += score;
if (score == kMaxDetectedLanguagesPerSpan)
unique_top_lang_detected_.insert(lang);
--score;
}
InvalidateTopResults();
}
int AXLanguageInfoStats::GetScore(const std::string& lang) const {
const auto& lang_count_it = lang_counts_.find(lang);
if (lang_count_it == lang_counts_.end()) {
return 0;
}
return lang_count_it->second;
}
void AXLanguageInfoStats::InvalidateTopResults() {
top_results_valid_ = false;
}
bool AXLanguageInfoStats::CheckLanguageWithinTop(const std::string& lang) {
if (!top_results_valid_) {
GenerateTopResults();
}
for (const auto& item : top_results_) {
if (lang == item.second) {
return true;
}
}
return false;
}
void AXLanguageInfoStats::GenerateTopResults() {
top_results_.clear();
for (const auto& item : lang_counts_) {
top_results_.emplace_back(item.second, item.first);
}
std::sort(top_results_.begin(), top_results_.end(), std::greater<>());
top_results_.resize(kMaxDetectedLanguagesPerPage);
top_results_valid_ = true;
}
void AXLanguageInfoStats::RecordLabelStatistics(
const std::string& labelled_lang,
const std::string& author_lang,
bool labelled_with_first_result) {
++count_labelled_;
if (labelled_with_first_result)
++count_labelled_with_top_result_;
if (author_lang != labelled_lang)
++count_overridden_;
}
void AXLanguageInfoStats::RecordDetectionAttempt() {
++count_detection_attempted_;
}
void AXLanguageInfoStats::ReportMetrics() {
if (!count_detection_attempted_)
return;
base::UmaHistogramCustomCounts(
"Accessibility.LanguageDetection.CountDetectionAttempted",
count_detection_attempted_, 1, 1000, 50);
int percentage_detected =
count_detection_results_ * 100 / count_detection_attempted_;
base::UmaHistogramPercentageObsoleteDoNotUse(
"Accessibility.LanguageDetection.PercentageLanguageDetected",
percentage_detected);
base::UmaHistogramCustomCounts(
"Accessibility.LanguageDetection.CountLabelled", count_labelled_, 1, 1000,
50);
if (count_labelled_) {
int percentage_top =
count_labelled_with_top_result_ * 100 / count_labelled_;
base::UmaHistogramPercentageObsoleteDoNotUse(
"Accessibility.LanguageDetection.PercentageLabelledWithTop",
percentage_top);
int percentage_overridden = count_overridden_ * 100 / count_labelled_;
base::UmaHistogramPercentageObsoleteDoNotUse(
"Accessibility.LanguageDetection.PercentageOverridden",
percentage_overridden);
}
base::UmaHistogramExactLinear("Accessibility.LanguageDetection.LangsPerPage",
unique_top_lang_detected_.size(), 15);
ClearMetrics();
}
void AXLanguageInfoStats::ClearMetrics() {
if (disable_metric_clearing_)
return;
unique_top_lang_detected_.clear();
count_detection_attempted_ = 0;
count_detection_results_ = 0;
count_labelled_ = 0;
count_labelled_with_top_result_ = 0;
count_overridden_ = 0;
}
AXLanguageDetectionManager::AXLanguageDetectionManager(AXTree* tree)
: short_text_language_identifier_(kShortTextIdentifierMinByteLength,
kShortTextIdentifierMaxByteLength),
tree_(tree) {}
AXLanguageDetectionManager::~AXLanguageDetectionManager() = default;
bool AXLanguageDetectionManager::IsStaticLanguageDetectionEnabled() {
return features::IsAccessibilityLanguageDetectionEnabled() ||
::switches::IsExperimentalAccessibilityLanguageDetectionEnabled();
}
bool AXLanguageDetectionManager::IsDynamicLanguageDetectionEnabled() {
return features::IsAccessibilityLanguageDetectionEnabled() ||
::switches::
IsExperimentalAccessibilityLanguageDetectionDynamicEnabled();
}
void AXLanguageDetectionManager::RegisterLanguageDetectionObserver() {
if (!IsDynamicLanguageDetectionEnabled()) {
return;
}
language_detection_observer_ =
std::make_unique<AXLanguageDetectionObserver>(tree_);
}
void AXLanguageDetectionManager::DetectLanguages() {
TRACE_EVENT0("accessibility", "AXLanguageInfo::DetectLanguages");
if (!IsStaticLanguageDetectionEnabled()) {
return;
}
DetectLanguagesForSubtree(tree_->root());
}
void AXLanguageDetectionManager::DetectLanguagesForSubtree(
AXNode* subtree_root) {
if (subtree_root->GetRole() == ax::mojom::Role::kStaticText) {
DetectLanguagesForNode(subtree_root);
} else {
for (AXNode* child : subtree_root->children()) {
DetectLanguagesForSubtree(child);
}
}
}
void AXLanguageDetectionManager::DetectLanguagesForNode(AXNode* node) {
lang_info_stats_.RecordDetectionAttempt();
const auto& text =
node->GetStringAttribute(ax::mojom::StringAttribute::kName);
const std::vector<Result> results =
language_identifier_.FindTopNMostFreqLangs(text,
kMaxDetectedLanguagesPerSpan);
std::vector<std::string> reliable_results;
for (const auto& res : results) {
if (res.is_reliable) {
reliable_results.push_back(res.language);
}
}
if (reliable_results.size()) {
AXLanguageInfo* lang_info = node->GetLanguageInfo();
if (lang_info) {
lang_info->detected_languages.clear();
lang_info->language.clear();
} else {
node->SetLanguageInfo(std::make_unique<AXLanguageInfo>());
lang_info = node->GetLanguageInfo();
}
lang_info->detected_languages = std::move(reliable_results);
lang_info_stats_.Add(lang_info->detected_languages);
}
}
void AXLanguageDetectionManager::LabelLanguages() {
TRACE_EVENT0("accessibility", "AXLanguageInfo::LabelLanguages");
if (!IsStaticLanguageDetectionEnabled()) {
return;
}
LabelLanguagesForSubtree(tree_->root());
lang_info_stats_.ReportMetrics();
}
void AXLanguageDetectionManager::LabelLanguagesForSubtree(
AXNode* subtree_root) {
LabelLanguagesForNode(subtree_root);
for (AXNode* child : subtree_root->children()) {
LabelLanguagesForSubtree(child);
}
}
void AXLanguageDetectionManager::LabelLanguagesForNode(AXNode* node) {
AXLanguageInfo* lang_info = node->GetLanguageInfo();
if (!lang_info)
return;
if (lang_info->language.size())
return;
bool labelled_with_first_result = true;
for (const auto& lang : lang_info->detected_languages) {
if (lang_info_stats_.CheckLanguageWithinTop(lang)) {
lang_info->language = lang;
const std::string& author_lang = node->GetInheritedStringAttribute(
ax::mojom::StringAttribute::kLanguage);
lang_info_stats_.RecordLabelStatistics(lang, author_lang,
labelled_with_first_result);
lang_info->detected_languages.clear();
return;
}
labelled_with_first_result = false;
}
node->ClearLanguageInfo();
}
std::vector<AXLanguageSpan>
AXLanguageDetectionManager::GetLanguageAnnotationForStringAttribute(
const AXNode& node,
ax::mojom::StringAttribute attr) {
std::vector<AXLanguageSpan> language_annotation;
if (!node.HasStringAttribute(attr))
return language_annotation;
const std::string& attr_value = node.GetStringAttribute(attr);
if (node.HasStringAttribute(ax::mojom::StringAttribute::kLanguage)) {
language_annotation.push_back(AXLanguageSpan{
0 ,
static_cast<int>(attr_value.length()) ,
node.GetStringAttribute(
ax::mojom::StringAttribute::kLanguage) ,
1 });
return language_annotation;
}
std::vector<Result> top_languages =
short_text_language_identifier_.FindTopNMostFreqLangs(
attr_value, kMaxDetectedLanguagesPerPage);
for (const auto& result : top_languages) {
const std::vector<SpanInfo>& ranges = result.byte_ranges;
for (const auto& span_info : ranges) {
language_annotation.push_back(
AXLanguageSpan{span_info.start_index, span_info.end_index,
result.language, span_info.probability});
}
}
std::sort(
language_annotation.begin(), language_annotation.end(),
[](const AXLanguageSpan& left, const AXLanguageSpan& right) -> bool {
return left.start_index < right.start_index;
});
for (size_t i = 0; i < language_annotation.size(); ++i) {
if (i > 0) {
DCHECK(language_annotation[i].start_index <=
language_annotation[i - 1].end_index);
}
}
return language_annotation;
}
AXLanguageDetectionObserver::AXLanguageDetectionObserver(AXTree* tree) {
DCHECK(AXLanguageDetectionManager::IsDynamicLanguageDetectionEnabled());
observation_.Observe(tree);
}
AXLanguageDetectionObserver::~AXLanguageDetectionObserver() = default;
void AXLanguageDetectionObserver::OnAtomicUpdateFinished(
AXTree* tree,
bool root_changed,
const std::vector<Change>& changes) {
DCHECK(tree->language_detection_manager);
for (auto& change : changes) {
if (change.node->GetRole() == ax::mojom::Role::kStaticText) {
tree->language_detection_manager->DetectLanguagesForNode(change.node);
}
}
for (auto& change : changes) {
if (change.node->GetRole() == ax::mojom::Role::kStaticText) {
tree->language_detection_manager->LabelLanguagesForNode(change.node);
}
}
tree->language_detection_manager->lang_info_stats_.ReportMetrics();
}
}