#include "chrome/browser/ui/lens/lens_url_matcher.h"
#include "base/json/json_reader.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_split.h"
#include "third_party/farmhash/src/src/farmhash.h"
namespace lens {
namespace {
std::vector<std::string> JSONArrayToVector(const std::string& json_array) {
std::optional<base::Value> json_value =
base::JSONReader::Read(json_array, base::JSON_PARSE_CHROMIUM_EXTENSIONS);
if (!json_value) {
return {};
}
base::Value::List* entries = json_value->GetIfList();
if (!entries) {
return {};
}
std::vector<std::string> result;
result.reserve(entries->size());
for (const base::Value& entry : *entries) {
const std::string* filter = entry.GetIfString();
if (filter) {
result.emplace_back(*filter);
}
}
return result;
}
}
LensUrlMatcher::LensUrlMatcher(std::string url_allow_filters,
std::string url_block_filters,
std::string path_match_allow_filters,
std::string path_match_block_filters,
std::string url_forced_allowed_match_patterns,
std::string hashed_domain_block_filters_list) {
base::MatcherStringPattern::ID id(0);
InitializeUrlMatcher(url_allow_filters, url_block_filters, &id);
InitializeForceAllowUrlPatterns(url_forced_allowed_match_patterns, &id);
InitializePathAllowMatcher(path_match_allow_filters, &id);
InitializePathBlockMatcher(path_match_block_filters, &id);
InitializeHashedDomainBlockFilters(hashed_domain_block_filters_list);
}
LensUrlMatcher::~LensUrlMatcher() = default;
void LensUrlMatcher::InitializeUrlMatcher(std::string url_allow_filters,
std::string url_block_filters,
base::MatcherStringPattern::ID* id) {
url_matcher_ = std::make_unique<url_matcher::URLMatcher>();
url_matcher::util::AddFiltersWithLimit(url_matcher_.get(), true, id,
JSONArrayToVector(url_allow_filters),
&url_filters_);
url_matcher::util::AddFiltersWithLimit(url_matcher_.get(), false, id,
JSONArrayToVector(url_block_filters),
&url_filters_);
}
void LensUrlMatcher::InitializeForceAllowUrlPatterns(
std::string url_path_forced_allowed_match_patterns,
base::MatcherStringPattern::ID* id) {
auto force_allow_url_strings =
JSONArrayToVector(url_path_forced_allowed_match_patterns);
std::vector<base::MatcherStringPattern> force_allow_url_patterns;
std::vector<const base::MatcherStringPattern*> force_allow_url_pointers;
force_allow_url_patterns.reserve(force_allow_url_strings.size());
force_allow_url_pointers.reserve(force_allow_url_strings.size());
for (const std::string& entry : force_allow_url_strings) {
(*id)++;
force_allow_url_patterns.emplace_back(entry, *id);
force_allow_url_pointers.push_back(&force_allow_url_patterns.back());
}
url_forced_allow_matcher = std::make_unique<url_matcher::RegexSetMatcher>();
url_forced_allow_matcher->AddPatterns(force_allow_url_pointers);
}
void LensUrlMatcher::InitializePathAllowMatcher(
std::string path_match_allow_filters,
base::MatcherStringPattern::ID* id) {
const auto allow_strings = JSONArrayToVector(path_match_allow_filters);
std::vector<base::MatcherStringPattern> allow_patterns;
std::vector<const base::MatcherStringPattern*> allow_pointers;
allow_patterns.reserve(allow_strings.size());
allow_pointers.reserve(allow_strings.size());
for (const std::string& entry : allow_strings) {
(*id)++;
allow_patterns.emplace_back(entry, *id);
allow_pointers.push_back(&allow_patterns.back());
}
path_allow_matcher_ = std::make_unique<url_matcher::RegexSetMatcher>();
path_allow_matcher_->AddPatterns(allow_pointers);
}
void LensUrlMatcher::InitializePathBlockMatcher(
std::string path_match_block_filters,
base::MatcherStringPattern::ID* id) {
const auto block_strings = JSONArrayToVector(path_match_block_filters);
std::vector<base::MatcherStringPattern> block_patterns;
std::vector<const base::MatcherStringPattern*> block_pointers;
block_patterns.reserve(block_strings.size());
block_pointers.reserve(block_strings.size());
for (const std::string& entry : block_strings) {
(*id)++;
block_patterns.emplace_back(entry, *id);
block_pointers.push_back(&block_patterns.back());
}
path_block_matcher_ = std::make_unique<url_matcher::RegexSetMatcher>();
path_block_matcher_->AddPatterns(block_pointers);
}
void LensUrlMatcher::InitializeHashedDomainBlockFilters(
std::string hashed_domain_block_filters_list) {
for (std::string_view hash_string :
base::SplitStringPiece(hashed_domain_block_filters_list, ",",
base::WhitespaceHandling::TRIM_WHITESPACE,
base::SplitResult::SPLIT_WANT_NONEMPTY)) {
uint32_t hash;
if (base::StringToUint(hash_string, &hash)) {
hashed_domain_block_filters_.insert(hash);
}
}
}
bool LensUrlMatcher::IsMatch(const GURL& url) {
auto matches = url_matcher_.get()->MatchURL(url);
if (!matches.size()) {
return false;
}
for (auto match : matches) {
if (!url_filters_[match].allow) {
return false;
}
}
if (SubdomainsMatchHash(url.GetHost())) {
return false;
}
if (path_block_matcher_ && !path_block_matcher_->IsEmpty() &&
path_block_matcher_->Match(url.GetPath(), &matches)) {
return false;
}
if (url_forced_allow_matcher && !url_forced_allow_matcher->IsEmpty() &&
url_forced_allow_matcher->Match(url.spec(), &matches)) {
return true;
}
if (path_allow_matcher_ && !path_allow_matcher_->IsEmpty() &&
!path_allow_matcher_->Match(url.GetPath(), &matches)) {
return false;
}
return true;
}
bool LensUrlMatcher::SubdomainsMatchHash(std::string_view str) {
size_t start = str.find_first_not_of('.');
if (start == std::string::npos) {
return false;
}
size_t end = str.find_last_not_of('.');
std::string_view domain =
std::string_view(str).substr(start, 1 + end - start);
while (true) {
if (MatchesHash(domain)) {
return true;
}
size_t found = domain.find('.');
if (found == std::string::npos) {
return false;
}
domain = domain.substr(found + 1);
}
}
bool LensUrlMatcher::MatchesHash(std::string_view str) {
uint32_t hash = util::Fingerprint32(str);
return hashed_domain_block_filters_.contains(hash);
}
}