910e62b5创建于 1月15日历史提交
// Copyright 2019 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "extensions/browser/api/declarative_net_request/ruleset_matcher_base.h"

#include <algorithm>
#include <string_view>
#include <tuple>

#include "base/strings/strcat.h"
#include "base/strings/string_util.h"
#include "components/url_pattern_index/flat/url_pattern_index_generated.h"
#include "content/public/browser/navigation_handle.h"
#include "content/public/browser/render_frame_host.h"
#include "content/public/browser/render_process_host.h"
#include "extensions/browser/api/declarative_net_request/constants.h"
#include "extensions/browser/api/declarative_net_request/request_action.h"
#include "extensions/browser/api/declarative_net_request/request_params.h"
#include "extensions/browser/api/declarative_net_request/utils.h"
#include "extensions/common/api/declarative_net_request.h"
#include "net/base/url_util.h"
#include "url/gurl.h"

namespace extensions::declarative_net_request {
namespace flat_rule = url_pattern_index::flat;
namespace dnr_api = api::declarative_net_request;

namespace {

bool ShouldCollapseResourceType(flat_rule::ElementType type) {
  // TODO(crbug.com/40578984): Add support for other element types like
  // OBJECT.
  return type == flat_rule::ElementType_IMAGE ||
         type == flat_rule::ElementType_SUBDOCUMENT;
}

bool IsUpgradeableUrl(const GURL& url) {
  return url.SchemeIs(url::kHttpScheme) || url.SchemeIs(url::kFtpScheme);
}

// Upgrades the url's scheme to HTTPS.
GURL GetUpgradedUrl(const GURL& url) {
  DCHECK(IsUpgradeableUrl(url));
  GURL::Replacements replacements;
  replacements.SetSchemeStr(url::kHttpsScheme);
  return url.ReplaceComponents(replacements);
}

// Returns true if the given |vec| is nullptr or empty.
template <typename T>
bool IsEmpty(const flatbuffers::Vector<T>* vec) {
  return !vec || vec->size() == 0;
}

// Performs any required query transformations on the |url|. Returns true if the
// query should be modified and populates |modified_query|.
bool GetModifiedQuery(const GURL& url,
                      const flat::UrlTransform& transform,
                      std::string* modified_query) {
  DCHECK(modified_query);

  // |remove_query_params| should always be sorted.
  DCHECK(
      IsEmpty(transform.remove_query_params()) ||
      std::is_sorted(transform.remove_query_params()->begin(),
                     transform.remove_query_params()->end(),
                     [](const flatbuffers::String* x1,
                        const flatbuffers::String* x2) { return *x1 < *x2; }));

  // Return early if there's nothing to modify.
  if (IsEmpty(transform.remove_query_params()) &&
      IsEmpty(transform.add_or_replace_query_params())) {
    return false;
  }

  std::vector<std::string_view> remove_query_params;
  if (!IsEmpty(transform.remove_query_params())) {
    remove_query_params.reserve(transform.remove_query_params()->size());
    for (const ::flatbuffers::String* str : *transform.remove_query_params()) {
      remove_query_params.push_back(str->string_view());
    }
  }

  // We don't use a map from keys to vector of values to ensure the relative
  // order of different params specified by the extension is respected. We use a
  // std::list to support fast removal from middle of the list. Note that the
  // key value pairs should already be escaped.
  struct QueryReplace {
    std::string_view key;
    std::string_view value;
    bool replace_only = false;
  };
  std::list<QueryReplace> add_or_replace_query_params;

  if (!IsEmpty(transform.add_or_replace_query_params())) {
    for (const flat::QueryKeyValue* query_pair :
         *transform.add_or_replace_query_params()) {
      DCHECK(query_pair->key());
      DCHECK(query_pair->value());
      add_or_replace_query_params.emplace_back(QueryReplace{
          query_pair->key()->string_view(), query_pair->value()->string_view(),
          query_pair->replace_only()});
    }
  }

  std::vector<std::string> query_parts;

  auto create_query_part = [](std::string_view key,
                              std::string_view value) -> std::string {
    return base::StrCat({key, "=", value});
  };

  bool query_changed = false;
  for (net::QueryIterator it(url); !it.IsAtEnd(); it.Advance()) {
    const std::string_view key = it.GetKey();
    // Remove query param.
    if (std::binary_search(remove_query_params.begin(),
                           remove_query_params.end(), key)) {
      query_changed = true;
      continue;
    }

    auto replace_iterator =
        std::ranges::find(add_or_replace_query_params, key, &QueryReplace::key);

    // Nothing to do.
    if (replace_iterator == add_or_replace_query_params.end()) {
      query_parts.push_back(create_query_part(key, it.GetValue()));
      continue;
    }

    // Replace query param.
    query_changed = true;
    query_parts.push_back(create_query_part(key, replace_iterator->value));
    add_or_replace_query_params.erase(replace_iterator);
  }

  // Append any remaining query params.
  for (const auto& params : add_or_replace_query_params) {
    if (!params.replace_only) {
      query_parts.push_back(create_query_part(params.key, params.value));
      query_changed = true;
    }
  }

  if (!query_changed) {
    return false;
  }

  *modified_query = base::JoinString(query_parts, "&");
  return true;
}

GURL GetTransformedURL(const RequestParams& params,
                       const flat::UrlTransform& transform) {
  GURL::Replacements replacements;

  if (transform.scheme()) {
    replacements.SetSchemeStr(transform.scheme()->string_view());
  }

  if (transform.host()) {
    replacements.SetHostStr(transform.host()->string_view());
  }

  DCHECK(!(transform.clear_port() && transform.port()));
  if (transform.clear_port()) {
    replacements.ClearPort();
  } else if (transform.port()) {
    replacements.SetPortStr(transform.port()->string_view());
  }

  DCHECK(!(transform.clear_path() && transform.path()));
  if (transform.clear_path()) {
    replacements.ClearPath();
  } else if (transform.path()) {
    replacements.SetPathStr(transform.path()->string_view());
  }

  // |query| is defined outside the if conditions since url::Replacements does
  // not own the strings it uses.
  std::string query;
  if (transform.clear_query()) {
    replacements.ClearQuery();
  } else if (transform.query()) {
    replacements.SetQueryStr(transform.query()->string_view());
  } else if (GetModifiedQuery(*params.url, transform, &query)) {
    if (query.empty()) {
      replacements.ClearQuery();
    } else {
      replacements.SetQueryStr(query);
    }
  }

  DCHECK(!(transform.clear_fragment() && transform.fragment()));
  if (transform.clear_fragment()) {
    replacements.ClearRef();
  } else if (transform.fragment()) {
    replacements.SetRefStr(transform.fragment()->string_view());
  }

  if (transform.password()) {
    replacements.SetPasswordStr(transform.password()->string_view());
  }

  if (transform.username()) {
    replacements.SetUsernameStr(transform.username()->string_view());
  }

  return params.url->ReplaceComponents(replacements);
}

}  // namespace

RulesetMatcherBase::RulesetMatcherBase(const ExtensionId& extension_id,
                                       RulesetID ruleset_id)
    : extension_id_(extension_id), ruleset_id_(ruleset_id) {}
RulesetMatcherBase::~RulesetMatcherBase() = default;

std::optional<RequestAction> RulesetMatcherBase::GetAction(
    const RequestParams& params,
    RulesetMatchingStage stage) const {
  std::optional<RequestAction> action =
      GetActionIgnoringAncestors(params, stage);
  std::optional<RequestAction> parent_action =
      GetAllowlistedFrameAction(params.parent_routing_id);

  return GetMaxPriorityAction(std::move(action), std::move(parent_action));
}

void RulesetMatcherBase::OnRenderFrameCreated(content::RenderFrameHost* host) {
  DCHECK(host);
  content::RenderFrameHost* parent = host->GetParentOrOuterDocument();
  if (!parent) {
    return;
  }

  // Some frames like srcdoc frames inherit URLLoaderFactories from their
  // parents and can make network requests before a corresponding navigation
  // commit for the frame is received in the browser (via DidFinishNavigation).
  // Hence if the parent frame is allowlisted, we allow list the current frame
  // as well in OnRenderFrameCreated.
  std::optional<RequestAction> parent_action =
      GetAllowlistedFrameAction(parent->GetGlobalId());
  if (!parent_action) {
    return;
  }

  bool inserted = false;
  std::tie(std::ignore, inserted) = allowlisted_frames_.insert(
      std::make_pair(host->GetGlobalId(), std::move(*parent_action)));
  DCHECK(inserted);
}

void RulesetMatcherBase::OnRenderFrameDeleted(content::RenderFrameHost* host) {
  DCHECK(host);
  allowlisted_frames_.erase(host->GetGlobalId());
}

void RulesetMatcherBase::OnDidFinishNavigation(
    content::NavigationHandle* navigation_handle) {
  content::RenderFrameHost* host = navigation_handle->GetRenderFrameHost();

  // Note: we only start tracking frames on navigation, since a document only
  // issues network requests after the corresponding navigation is committed.
  // Hence we need not listen to OnRenderFrameCreated.
  DCHECK(host);

  RequestParams params(host, navigation_handle->IsPost(),
                       /*response_headers=*/nullptr);

  // Find the highest priority allowAllRequests action corresponding to this
  // frame for rules that match in the onBeforeRequest request stage.
  std::optional<RequestAction> frame_action =
      GetAllowAllRequestsAction(params, RulesetMatchingStage::kOnBeforeRequest);

  // The only navigation requests that match DNR rules in the OnHeadersReceived
  // request phase are HTTP/HTTPS and will have response headers. So in this
  // method, if a navigation request:
  //  - has response headers, then match it against rules for both the
  //    `kOnBeforeRequest` and `kOnHeadersReceived` stages.
  //  - has no response headers, then only match against rule for the
  //    `kOnBeforeRequest` stage.
  // TODO(crbug.com/331846139): Add filtering logic to limit which requests can
  // be matched here, similar to what's done in the webrequest event router for
  // OnBeforeRequest and OnHeadersReceived.
  if (navigation_handle->GetResponseHeaders()) {
    // The allow rule cache from `params` does not need to be copied into
    // `params_with_headers` since it won't have an effect on the final value of
    // `frame_action`.
    RequestParams params_with_headers(host, navigation_handle->IsPost(),
                                      navigation_handle->GetResponseHeaders());
    // Take the matching allowAllRequests action with the highest priority
    // between all ruleset matching stages that this navigation request can be
    // matched against.
    frame_action = GetMaxPriorityAction(
        std::move(frame_action),
        GetAllowAllRequestsAction(params_with_headers,
                                  RulesetMatchingStage::kOnHeadersReceived));
  }

  std::optional<RequestAction> action =
      GetMaxPriorityAction(GetAllowlistedFrameAction(params.parent_routing_id),
                           std::move(frame_action));

  content::GlobalRenderFrameHostId frame_id = host->GetGlobalId();
  allowlisted_frames_.erase(frame_id);

  if (action) {
    allowlisted_frames_.insert(std::make_pair(frame_id, std::move(*action)));
  }
}

std::optional<RequestAction>
RulesetMatcherBase::GetAllowlistedFrameActionForTesting(
    content::RenderFrameHost* host) const {
  DCHECK(host);
  return GetAllowlistedFrameAction(host->GetGlobalId());
}

RequestAction RulesetMatcherBase::CreateBlockOrCollapseRequestAction(
    const RequestParams& params,
    const flat_rule::UrlRule& rule) const {
  return CreateRequestAction(ShouldCollapseResourceType(params.element_type)
                                 ? RequestAction::Type::COLLAPSE
                                 : RequestAction::Type::BLOCK,
                             rule);
}

RequestAction RulesetMatcherBase::CreateAllowAction(
    const RequestParams& params,
    const flat_rule::UrlRule& rule) const {
  return CreateRequestAction(RequestAction::Type::ALLOW, rule);
}

RequestAction RulesetMatcherBase::CreateAllowAllRequestsAction(
    const RequestParams& params,
    const url_pattern_index::flat::UrlRule& rule) const {
  return CreateRequestAction(RequestAction::Type::ALLOW_ALL_REQUESTS, rule);
}

std::optional<RequestAction> RulesetMatcherBase::CreateUpgradeAction(
    const RequestParams& params,
    const url_pattern_index::flat::UrlRule& rule) const {
  if (!IsUpgradeableUrl(*params.url)) {
    // TODO(crbug.com/40111509): this results in counterintuitive behavior.
    return std::nullopt;
  }
  RequestAction upgrade_action =
      CreateRequestAction(RequestAction::Type::UPGRADE, rule);
  upgrade_action.redirect_url = GetUpgradedUrl(*params.url);
  return upgrade_action;
}

std::optional<RequestAction>
RulesetMatcherBase::CreateRedirectActionFromMetadata(
    const RequestParams& params,
    const url_pattern_index::flat::UrlRule& rule,
    const ExtensionMetadataList& metadata_list) const {
  // Find the UrlRuleMetadata corresponding to |rule|. Since |metadata_list| is
  // sorted by rule id, use LookupByKey which binary searches for fast lookup.
  const flat::UrlRuleMetadata* metadata = metadata_list.LookupByKey(rule.id());

  // There must be a UrlRuleMetadata object corresponding to the |rule|.
  DCHECK(metadata);
  DCHECK_EQ(metadata->id(), rule.id());
  DCHECK(metadata->redirect_url() || metadata->transform());

  GURL redirect_url;
  if (metadata->redirect_url()) {
    redirect_url = GURL(metadata->redirect_url()->string_view());
  } else {
    redirect_url = GetTransformedURL(params, *metadata->transform());
  }

  // Sanity check that we don't redirect to a javascript url. Specifying
  // redirect to a javascript url and specifying javascript as a transform
  // scheme is prohibited. In addition extensions can't intercept requests to
  // javascript urls. Hence we should never end up with a javascript url here.
  DCHECK(!redirect_url.SchemeIs(url::kJavaScriptScheme));

  return CreateRedirectAction(params, rule, std::move(redirect_url));
}

std::optional<RequestAction> RulesetMatcherBase::CreateRedirectAction(
    const RequestParams& params,
    const url_pattern_index::flat::UrlRule& rule,
    GURL redirect_url) const {
  // Redirecting WebSocket handshake request is prohibited.
  // TODO(crbug.com/40111509): this results in counterintuitive behavior.
  if (params.element_type == flat_rule::ElementType_WEBSOCKET) {
    return std::nullopt;
  }

  // Prevent a redirect loop where a URL continuously redirects to itself.
  if (!redirect_url.is_valid() || *params.url == redirect_url) {
    return std::nullopt;
  }

  RequestAction redirect_action =
      CreateRequestAction(RequestAction::Type::REDIRECT, rule);
  redirect_action.redirect_url = std::move(redirect_url);
  return redirect_action;
}

std::vector<RequestAction>
RulesetMatcherBase::GetModifyHeadersActionsFromMetadata(
    const RequestParams& params,
    const std::vector<const url_pattern_index::flat::UrlRule*>& rules,
    const ExtensionMetadataList& metadata_list) const {
  using FlatHeaderList = flatbuffers::Vector<flatbuffers::Offset<
      extensions::declarative_net_request::flat::ModifyHeaderInfo>>;

  // Helper method to convert a list of headers from a rule's metadata to a list
  // of RequestAction::HeaderInfo.
  auto get_headers_for_action = [](const FlatHeaderList& headers_for_rule) {
    std::vector<RequestAction::HeaderInfo> headers_for_action;
    for (const auto* flat_header_info : headers_for_rule) {
      headers_for_action.emplace_back(*flat_header_info);
    }

    return headers_for_action;
  };

  std::vector<RequestAction> actions;
  for (const auto* rule : rules) {
    const flat::UrlRuleMetadata* metadata =
        metadata_list.LookupByKey(rule->id());

    DCHECK(metadata);
    DCHECK_EQ(metadata->id(), rule->id());

    RequestAction action =
        CreateRequestAction(RequestAction::Type::MODIFY_HEADERS, *rule);
    action.request_headers_to_modify =
        get_headers_for_action(*metadata->request_headers());
    action.response_headers_to_modify =
        get_headers_for_action(*metadata->response_headers());

    actions.push_back(std::move(action));
  }

  return actions;
}

RequestAction RulesetMatcherBase::CreateRequestAction(
    RequestAction::Type type,
    const flat_rule::UrlRule& rule) const {
  return RequestAction(type, rule.id(), rule.priority(), ruleset_id(),
                       extension_id());
}

std::optional<RequestAction> RulesetMatcherBase::GetAllowlistedFrameAction(
    content::GlobalRenderFrameHostId frame_id) const {
  auto it = allowlisted_frames_.find(frame_id);
  if (it == allowlisted_frames_.end()) {
    return std::nullopt;
  }

  return it->second.Clone();
}

}  // namespace extensions::declarative_net_request