910e62b5创建于 1月15日历史提交
// Copyright 2019 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "extensions/browser/api/declarative_net_request/request_params.h"

#include <algorithm>
#include <optional>
#include <string_view>

#include "base/check.h"
#include "base/containers/flat_map.h"
#include "base/dcheck_is_on.h"
#include "base/functional/bind.h"
#include "base/no_destructor.h"
#include "base/strings/pattern.h"
#include "base/strings/string_util.h"
#include "content/public/browser/render_frame_host.h"
#include "content/public/browser/render_process_host.h"
#include "content/public/browser/web_contents.h"
#include "extensions/browser/api/declarative_net_request/constants.h"
#include "extensions/browser/api/declarative_net_request/flat/extension_ruleset_generated.h"
#include "extensions/browser/api/declarative_net_request/utils.h"
#include "extensions/browser/api/web_request/web_request_info.h"
#include "extensions/browser/api/web_request/web_request_resource_type.h"
#include "extensions/browser/extensions_browser_client.h"
#include "extensions/common/constants.h"
#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
#include "net/http/http_response_headers.h"
#include "third_party/blink/public/mojom/loader/resource_load_info.mojom-shared.h"
#include "url/gurl.h"

namespace extensions::declarative_net_request {

namespace {
namespace flat_rule = url_pattern_index::flat;

// Returns whether the request to `url` is third party to its `document_origin`.
// TODO(crbug.com/40508457): Look into caching this.
bool IsThirdPartyRequest(const GURL& url, const url::Origin& document_origin) {
  if (document_origin.opaque()) {
    return true;
  }

  return !net::registry_controlled_domains::SameDomainOrHost(
      url, document_origin,
      net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
}

bool IsThirdPartyRequest(const url::Origin& origin,
                         const url::Origin& document_origin) {
  if (document_origin.opaque()) {
    return true;
  }

  return !net::registry_controlled_domains::SameDomainOrHost(
      origin, document_origin,
      net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
}

content::GlobalRenderFrameHostId GetFrameRoutingId(
    content::RenderFrameHost* host) {
  if (!host) {
    return content::GlobalRenderFrameHostId();
  }

  return host->GetGlobalId();
}

// Returns if any value for `header` in `response_headers` matches the value
// pattern from `flat_pattern`.
// Note: Matches are case-insensitive, and supports * (0 or more characters) and
// ? (0 or 1 characters) matching.
bool HasHeaderValue(const net::HttpResponseHeaders& response_headers,
                    std::string_view header,
                    const flatbuffers::String* flat_pattern) {
  auto pattern = CreateString<std::string_view>(*flat_pattern);

  size_t iter = 0;
  std::optional<std::string_view> temp;
  while ((temp = response_headers.EnumerateHeader(&iter, header))) {
    if (base::MatchPattern(base::ToLowerASCII(*temp), pattern)) {
      return true;
    }
  }
  return false;
}

// Returns true if the request's response headers matches at least one condition
// in `header_conditions`. A header matches a condition if:
// - the header exists AND
// - contains a value in condition->values() if specified AND
// - does not contain any values in condition->excluded_values() if specified.
bool MatchesHeaderConditions(
    const net::HttpResponseHeaders& response_headers,
    const flatbuffers::Vector<flatbuffers::Offset<flat::HeaderCondition>>&
        header_conditions) {
  for (const flat::HeaderCondition* header_condition : header_conditions) {
    std::string_view header =
        CreateString<std::string_view>(*header_condition->header());
    if (!response_headers.HasHeader(header)) {
      continue;
    }

    // Match on the existence of the header if no values or excluded values are
    // specified.
    if (!header_condition->values() && !header_condition->excluded_values()) {
      return true;
    }

    auto has_header_value = [&response_headers,
                             header](const flatbuffers::String* value) {
      return HasHeaderValue(response_headers, header, value);
    };

    // The condition for `header` does not match if there's an excluded value,
    // continue to the next header.
    if (header_condition->excluded_values() &&
        std::ranges::any_of(*header_condition->excluded_values(),
                            has_header_value)) {
      continue;
    }

    // Match if the response contains at least one header value in
    // `header_condition->values()`.
    if (!header_condition->values() ||
        std::ranges::any_of(*header_condition->values(), has_header_value)) {
      return true;
    }
  }

  return false;
}

bool DoEmbedderConditionsMatch(
    int tab_id,
    const std::string& top_level_frame_or_initiator_host,
    scoped_refptr<const net::HttpResponseHeaders> response_headers,
    const flatbuffers::Vector<uint8_t>& conditions_buffer) {
#if DCHECK_IS_ON()
  // Verify that `conditions_buffer` corresponds to a valid Flatbuffer with
  // `flat::EmbedderConditions` as the root. Note: this is a sanity check and
  // not a security check. Consider the two cases:
  //  - For a file backed ruleset, we already verify the file checksum on
  //    ruleset load. So the nested flatbuffer shouldn't be corrupted. On-disk
  //    modification of stored artifacts is outside Chrome's security model
  //    anyway.
  //  - For a non-file backed (session-scoped) ruleset, the ruleset is only
  //    maintained in memory. Hence there shouldn't be corruption risk.
  flatbuffers::Verifier verifier(conditions_buffer.Data(),
                                 conditions_buffer.size());
  CHECK(verifier.VerifyBuffer<flat::EmbedderConditions>(
      kEmbedderConditionsBufferIdentifier));
#endif  // DCHECK_IS_ON()

  auto* embedder_conditions =
      flatbuffers::GetRoot<flat::EmbedderConditions>(conditions_buffer.Data());
  DCHECK(embedder_conditions);

  auto matches_tab_ids =
      [tab_id](const flatbuffers::Vector<int32_t>& sorted_tab_ids) {
        DCHECK(std::is_sorted(sorted_tab_ids.begin(), sorted_tab_ids.end()));
        return std::binary_search(sorted_tab_ids.begin(), sorted_tab_ids.end(),
                                  tab_id);
      };

  if (embedder_conditions->tab_ids_included() &&
      !matches_tab_ids(*embedder_conditions->tab_ids_included())) {
    return false;
  }

  if (embedder_conditions->tab_ids_excluded() &&
      matches_tab_ids(*embedder_conditions->tab_ids_excluded())) {
    return false;
  }

  // Top-level frame domain matching.

#if DCHECK_IS_ON()
  auto domain_precedes = [](const flatbuffers::String* lhs,
                            const flatbuffers::String* rhs) {
    return url_pattern_index::CompareDomains(
               std::string_view(lhs->c_str(), lhs->size()),
               std::string_view(rhs->c_str(), rhs->size())) < 0;
  };
  if (embedder_conditions->top_domains_included()) {
    CHECK(std::is_sorted(embedder_conditions->top_domains_included()->begin(),
                         embedder_conditions->top_domains_included()->end(),
                         domain_precedes));
  }
  if (embedder_conditions->top_domains_excluded()) {
    CHECK(std::is_sorted(embedder_conditions->top_domains_excluded()->begin(),
                         embedder_conditions->top_domains_excluded()->end(),
                         domain_precedes));
  }
#endif  // DCHECK_IS_ON()

  if (!url_pattern_index::DoesHostMatchDomainLists(
          top_level_frame_or_initiator_host,
          embedder_conditions->top_domains_included(),
          embedder_conditions->top_domains_excluded())) {
    return false;
  }

  if (response_headers) {
    // Do not match the rule if any conditions in `excluded_response_headers()`
    // match.
    if (embedder_conditions->excluded_response_headers() &&
        MatchesHeaderConditions(
            *response_headers,
            *embedder_conditions->excluded_response_headers())) {
      return false;
    }

    // Do not match the rule if no conditions in `response_headers()` match.
    if (embedder_conditions->response_headers() &&
        embedder_conditions->response_headers()->size() &&
        !MatchesHeaderConditions(*response_headers,
                                 *embedder_conditions->response_headers())) {
      return false;
    }
  }

  return true;
}

}  // namespace

RequestParams::RequestParams(
    const WebRequestInfo& info,
    scoped_refptr<const net::HttpResponseHeaders> response_headers)
    : url(&info.url),
      first_party_origin(info.initiator.value_or(url::Origin())),
      element_type(GetElementType(info.web_request_type)),
      method(GetRequestMethod(info.url.SchemeIsHTTPOrHTTPS(), info.method)),
      parent_routing_id(info.parent_routing_id) {
  // Allow/allowAllRequest rules matched in earlier rule matching stages can
  // influence rule matches for later matching stages. Hence this information
  // is needed from `info`.
  for (auto& it : info.max_priority_allow_action) {
    max_priority_allow_action.emplace(
        it.first, it.second.has_value() ? std::make_optional(it.second->Clone())
                                        : std::nullopt);
  }

  is_third_party = IsThirdPartyRequest(*url, first_party_origin);

  // Determine the top-level frame or initiator host. This is the request host
  // for main-frame requests, the host of the outer-most frame of the request
  // initiator if available, otherwise the host of the request initiator. When
  // none of those are available, fall back to an opaque origin.
  std::string top_level_frame_or_initiator_host;
  if (info.web_request_type == WebRequestResourceType::MAIN_FRAME) {
    top_level_frame_or_initiator_host = info.url.GetHost();
  } else {
    url::Origin top_level_frame_or_initiator_origin = first_party_origin;

    content::RenderFrameHost* initiator_host =
        content::RenderFrameHost::FromID(info.parent_routing_id);
    if (initiator_host) {
      url::Origin top_origin =
          initiator_host->GetOutermostMainFrame()->GetLastCommittedOrigin();

      if (!top_origin.opaque()) {
        top_level_frame_or_initiator_origin = top_origin;
      }
    }

    top_level_frame_or_initiator_host =
        top_level_frame_or_initiator_origin.host();
  }

  embedder_conditions_matcher = base::BindRepeating(
      DoEmbedderConditionsMatch, info.frame_data.tab_id,
      std::move(top_level_frame_or_initiator_host), response_headers);
}

RequestParams::RequestParams(
    content::RenderFrameHost* host,
    bool is_post_navigation,
    scoped_refptr<const net::HttpResponseHeaders> response_headers)
    : url(&host->GetLastCommittedURL()),
      method(is_post_navigation ? flat_rule::RequestMethod_POST
                                : flat_rule::RequestMethod_GET),
      parent_routing_id(GetFrameRoutingId(host->GetParentOrOuterDocument())) {
  if (host->GetParentOrOuterDocument()) {
    // Note the discrepancy with the WebRequestInfo constructor. For a
    // navigation request, we'd use the request initiator as the
    // `first_party_origin`. But here we use the origin of the parent frame.
    // This is the same as crbug.com/996998.
    first_party_origin =
        host->GetParentOrOuterDocument()->GetLastCommittedOrigin();
    element_type = url_pattern_index::flat::ElementType_SUBDOCUMENT;
  } else {
    first_party_origin = url::Origin();
    element_type = url_pattern_index::flat::ElementType_MAIN_FRAME;
  }
  is_third_party =
      IsThirdPartyRequest(host->GetLastCommittedOrigin(), first_party_origin);

  url::Origin top_origin =
      host->GetOutermostMainFrame()
          ? host->GetOutermostMainFrame()->GetLastCommittedOrigin()
          : url::Origin();

  std::string top_level_frame_or_initiator_host =
      top_origin.opaque() ? first_party_origin.host() : top_origin.host();

  int window_id_unused = extension_misc::kUnknownWindowId;
  int tab_id = extension_misc::kUnknownTabId;
  ExtensionsBrowserClient::Get()->GetTabAndWindowIdForWebContents(
      content::WebContents::FromRenderFrameHost(host), &tab_id,
      &window_id_unused);
  embedder_conditions_matcher = base::BindRepeating(
      DoEmbedderConditionsMatch, tab_id,
      std::move(top_level_frame_or_initiator_host), response_headers);
}

RequestParams::RequestParams(
    const GURL& url,
    const url::Origin& initiator,
    const url::Origin& top_origin,
    const api::declarative_net_request::ResourceType request_type,
    const api::declarative_net_request::RequestMethod request_method,
    int tab_id,
    scoped_refptr<const net::HttpResponseHeaders> response_headers)
    : url(&url),
      first_party_origin(initiator),
      element_type(GetElementType(request_type)),
      is_third_party(IsThirdPartyRequest(url, first_party_origin)),
      method(GetRequestMethod(url.SchemeIsHTTPOrHTTPS(), request_method)),
      embedder_conditions_matcher(base::BindRepeating(
          DoEmbedderConditionsMatch,
          tab_id,
          top_origin.opaque() ? initiator.host() : top_origin.host(),
          response_headers)) {}

RequestParams::RequestParams() = default;
RequestParams::~RequestParams() = default;

}  // namespace extensions::declarative_net_request