910e62b5创建于 1月15日历史提交
// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "extensions/common/content_script_injection_url_getter.h"

#include "base/containers/contains.h"
#include "base/containers/flat_set.h"
#include "base/notreached.h"
#include "base/trace_event/typed_macros.h"
#include "extensions/common/mojom/match_origin_as_fallback.mojom-shared.h"
#include "url/scheme_host_port.h"

namespace extensions {

// static
GURL ContentScriptInjectionUrlGetter::Get(
    const FrameContextData& context_data,
    const GURL& document_url,
    mojom::MatchOriginAsFallbackBehavior match_origin_as_fallback,
    bool allow_inaccessible_parents) {
  // The following schemes are considered for opaque origins if the
  // `match_origin_as_fallback` behavior is to always match.
  // NOTE(devlin): This isn't an exhaustive list of schemes: some schemes may
  // be missing, or more schemes may be added in the future. Would it make
  // sense to turn this into a blocklist? Just doing this for all opaque
  // schemes *should* be safe, since We still have a permission check against
  // the precursor origin. This would only be a problem if an
  // extension-accessible precursor origin can create an opaque-origin frame
  // that *shouldn't* be accessible.
  static const char* const kAllowedSchemesToMatchOriginAsFallback[] = {
      url::kAboutScheme,
      url::kBlobScheme,
      url::kDataScheme,
      url::kFileSystemScheme,
  };

  // TODO(crbug.com/40055997): Consider reducing tracing instrumentation
  // in the main function bodu and in the lambda below (once the bug is
  // understood and fixed).
  auto should_consider_origin = [&document_url, match_origin_as_fallback]() {
    bool result = false;
    switch (match_origin_as_fallback) {
      case mojom::MatchOriginAsFallbackBehavior::kNever: {
        TRACE_EVENT_INSTANT("extensions",
                            "ContentScriptInjectionUrlGetter::Get/"
                            "should_consider_origin: origin-never");
        result = false;
        break;
      }
      case mojom::MatchOriginAsFallbackBehavior::
          kMatchForAboutSchemeAndClimbTree: {
        TRACE_EVENT_INSTANT("extensions",
                            "ContentScriptInjectionUrlGetter::Get/"
                            "should_consider_origin: origin-climb");
        result = document_url.SchemeIs(url::kAboutScheme);
        break;
      }
      case mojom::MatchOriginAsFallbackBehavior::kAlways: {
        TRACE_EVENT_INSTANT("extensions",
                            "ContentScriptInjectionUrlGetter::Get/"
                            "should_consider_origin: origin-always");
        result = base::Contains(kAllowedSchemesToMatchOriginAsFallback,
                                document_url.GetScheme());
        break;
      }
    }
    if (result) {
      TRACE_EVENT_INSTANT("extensions",
                          "ContentScriptInjectionUrlGetter::Get/"
                          "should_consider_origin=true");
    } else {
      TRACE_EVENT_INSTANT("extensions",
                          "ContentScriptInjectionUrlGetter::Get/"
                          "should_consider_origin=false");
    }
    return result;
  };

  // If we don't need to consider the origin, we're done.
  if (!should_consider_origin()) {
    TRACE_EVENT_INSTANT(
        "extensions", "ContentScriptInjectionUrlGetter::Get/!consider-origin");
    return document_url;
  }

  // Get the security origin for the `frame`. For about: frames, this is the
  // origin of that of the controlling frame - e.g., an about:blank frame on
  // https://example.com will have the security origin of https://example.com.
  // Other frames, like data: frames, will have an opaque origin. For these,
  // we can get the precursor origin.
  const url::Origin frame_origin = context_data.GetOrigin();
  const url::SchemeHostPort& tuple_or_precursor_tuple =
      frame_origin.GetTupleOrPrecursorTupleIfOpaque();

  // When there's no valid tuple (which can happen in the case of e.g. a
  // browser-initiated navigation to an opaque URL), there's no origin to
  // fallback to. Bail.
  if (!tuple_or_precursor_tuple.IsValid()) {
    TRACE_EVENT_INSTANT("extensions",
                        "ContentScriptInjectionUrlGetter::Get/invalid-tuple");
    return document_url;
  }

  const url::Origin origin_or_precursor_origin =
      url::Origin::Create(tuple_or_precursor_tuple.GetURL());

  if (!allow_inaccessible_parents &&
      !context_data.CanAccess(origin_or_precursor_origin)) {
    // The `context_data` can't access its precursor. Bail.
    TRACE_EVENT_INSTANT(
        "extensions",
        "ContentScriptInjectionUrlGetter::Get/no-precursor-access");
    return document_url;
  }

  // Note: Just because the frame origin can theoretically access its
  // precursor origin, there may be more restrictions in practice - such as
  // if the frame has the disallowdocumentaccess attribute. It's okay to
  // ignore this case for context classification because it's not meant as an
  // origin boundary (unlike e.g. a sandboxed frame).

  // Looks like the initiator origin is an appropriate fallback!

  if (match_origin_as_fallback ==
      mojom::MatchOriginAsFallbackBehavior::kAlways) {
    // The easy case! We use the origin directly. We're done.
    TRACE_EVENT_INSTANT(
        "extensions",
        "ContentScriptInjectionUrlGetter::Get/origin-or-precursor");
    return origin_or_precursor_origin.GetURL();
  }

  DCHECK_EQ(
      mojom::MatchOriginAsFallbackBehavior::kMatchForAboutSchemeAndClimbTree,
      match_origin_as_fallback);

  // Unfortunately, in this case, we have to climb the frame tree. This is for
  // match patterns that are associated with paths as well, not just origins.
  // For instance, if an extension wants to run on google.com/maps/* with
  // match_about_blank true, then it should run on about:-scheme frames created
  // by google.com/maps, but not about:-scheme frames created by google.com
  // (which is what the precursor tuple origin would be).

  // Traverse the frame/window hierarchy to find the closest non-about:-page
  // with the same origin as the precursor and return its URL.
  // TODO(crbug.com/40753677): This can return the incorrect result, e.g.
  // if a parent frame navigates a grandchild frame to about:blank.
  std::unique_ptr<FrameContextData> parent_context_data =
      context_data.CloneFrameContextData();
  GURL parent_url;
  base::flat_set<uintptr_t> already_visited_frame_ids;
  do {
    already_visited_frame_ids.insert(parent_context_data->GetId());
    parent_context_data = parent_context_data->GetLocalParentOrOpener();

    // We reached the end of the ancestral chain without finding a valid parent,
    // or found a remote web frame (in which case, it's a different origin).
    // Bail and use the original URL.
    if (!parent_context_data) {
      TRACE_EVENT_INSTANT(
          "extensions", "ContentScriptInjectionUrlGetter::Get/no-more-parents");
      return document_url;
    }

    // Avoid an infinite loop - see https://crbug.com/568432 and
    // https://crbug.com/883526.
    if (base::Contains(already_visited_frame_ids,
                       parent_context_data->GetId())) {
      TRACE_EVENT_INSTANT("extensions",
                          "ContentScriptInjectionUrlGetter::Get/infinite-loop");
      return document_url;
    }

    url::SchemeHostPort parent_tuple_or_precursor_tuple =
        url::Origin(parent_context_data->GetOrigin())
            .GetTupleOrPrecursorTupleIfOpaque();
    if (!parent_tuple_or_precursor_tuple.IsValid() ||
        parent_tuple_or_precursor_tuple != tuple_or_precursor_tuple) {
      // The parent has a different tuple origin than frame; this could happen
      // in edge cases where a parent navigates an iframe or popup of a child
      // frame at a different origin. [1] In this case, bail, since we can't
      // find a full URL (i.e., one including the path) with the same security
      // origin to use for the frame in question.
      // [1] Consider a frame tree like:
      // <html> <!--example.com-->
      //   <iframe id="a" src="a.com">
      //     <iframe id="b" src="b.com"></iframe>
      //   </iframe>
      // </html>
      // Frame "a" is cross-origin from the top-level frame, and so the
      // example.com top-level frame can't directly access frame "b". However,
      // it can navigate it through
      // window.frames[0].frames[0].location.href = 'about:blank';
      // In that case, the precursor origin tuple origin of frame "b" would be
      // example.com, but the parent tuple origin is a.com.
      // Note that usually, this would have bailed earlier with a remote frame,
      // but it may not if we're at the process limit.
      TRACE_EVENT_INSTANT("extensions",
                          "ContentScriptInjectionUrlGetter::Get/tuple-diff");
      return document_url;
    }

    // If we don't allow inaccessible parents, the security origin may still
    // be restricted if the author has prevented same-origin access via the
    // disallowdocumentaccess attribute on iframe.
    if (!allow_inaccessible_parents &&
        !context_data.CanAccess(*parent_context_data)) {
      // The frame can't access its precursor. Bail.
      TRACE_EVENT_INSTANT(
          "extensions",
          "ContentScriptInjectionUrlGetter::Get/no-parent-access");
      return document_url;
    }

    parent_url = parent_context_data->GetUrl();
  } while (parent_url.SchemeIs(url::kAboutScheme));

  DCHECK(!parent_url.is_empty());

  // We should know that the frame can access the parent document (unless we
  // explicitly allow it not to), since it has the same tuple origin as the
  // frame, and we checked the frame access above.
  TRACE_EVENT_INSTANT("extensions",
                      "ContentScriptInjectionUrlGetter::Get/parent-url");
  DCHECK(allow_inaccessible_parents ||
         context_data.CanAccess(parent_context_data->GetOrigin()));
  return parent_url;
}

}  // namespace extensions