910e62b5创建于 1月15日历史提交
// Copyright 2025 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "content/browser/btm/btm_page_visit_observer.h"

#include <vector>

#include "base/check.h"
#include "base/debug/alias.h"
#include "base/debug/dump_without_crashing.h"
#include "base/metrics/histogram_functions.h"
#include "content/browser/btm/btm_bounce_detector.h"
#include "content/browser/btm/btm_utils.h"
#include "content/browser/btm/cookie_access_filter.h"
#include "content/public/browser/btm_redirect_info.h"
#include "content/public/browser/cookie_access_details.h"
#include "content/public/browser/navigation_handle.h"
#include "content/public/browser/navigation_handle_user_data.h"
#include "content/public/browser/render_frame_host.h"
#include "url/gurl_debug.h"

namespace content {

BtmNavigationInfo::BtmNavigationInfo(NavigationHandle& navigation_handle)
    : was_user_initiated(!navigation_handle.IsRendererInitiated() ||
                         navigation_handle.HasUserGesture()),
      was_renderer_initiated(navigation_handle.IsRendererInitiated()),
      page_transition(navigation_handle.GetPageTransition()),
      destination_url(navigation_handle.GetURL()),
      destination_source_id(navigation_handle.GetNextPageUkmSourceId()) {
  CHECK(navigation_handle.HasCommitted());
}
BtmNavigationInfo::BtmNavigationInfo(BtmNavigationInfo&&) = default;
BtmNavigationInfo& BtmNavigationInfo::operator=(BtmNavigationInfo&&) = default;
BtmNavigationInfo::~BtmNavigationInfo() = default;

BtmPageVisitObserver::BtmPageVisitObserver(WebContents* web_contents,
                                           VisitCallback callback,
                                           base::Clock* clock)
    : WebContentsObserver(web_contents),
      callback_(callback),
      current_page_{
          .url = web_contents->GetPrimaryMainFrame()->GetLastCommittedURL(),
          .source_id =
              web_contents->GetPrimaryMainFrame()->GetPageUkmSourceId()},
      clock_(CHECK_DEREF(clock)),
      last_page_change_time_(clock_->Now()) {}

BtmPageVisitObserver::~BtmPageVisitObserver() {
  // Flush any visits still pending. We won't be alive any longer to receive
  // late cookie access notifications so this is the best we can do.
  while (!pending_visits_.empty()) {
    ReportVisit();
  }
}

namespace {

inline bool IsWrite(BtmDataAccessType t) {
  return t == BtmDataAccessType::kWrite || t == BtmDataAccessType::kReadWrite;
}

// State associated with a navigation such as cookie accesses reported on its
// NavigationHandle. This state is used to generate the BtmNavigationInfo passed
// to BtmPageVisitObserver's callback.
class NavigationState
    : public content::NavigationHandleUserData<NavigationState> {
 public:
  explicit NavigationState(NavigationHandle&) {}

  void RecordCookieAccess(const GURL& url, CookieOperation op) {
    filter_.AddAccess(url, op);
  }

  // Idempotent for multiple calls with the same value of
  // `redirect_chain_index`.
  void RecordServerRedirectAtChainIndex(size_t redirect_chain_index) {
    server_redirect_chain_indices_.insert(redirect_chain_index);
  }

  // Returns the navigation info paired with the cookie access of the final
  // (i.e. committed) URL of the navigation.
  // Precondition: `navigation_handle.HasCommitted()` must be `true`.
  std::pair<BtmNavigationInfo, BtmDataAccessType> CreateNavigationInfo(
      NavigationHandle& navigation_handle) {
    BtmNavigationInfo navigation(navigation_handle);

    // Populate navigation.server_redirects.
    std::vector<BtmDataAccessType> accesses;
    std::vector<GURL> urls;
    const std::vector<GURL>& redirect_chain =
        navigation_handle.GetRedirectChain();
    for (const size_t index : server_redirect_chain_indices_) {
      urls.push_back(redirect_chain[index]);
    }
    // We need to add the final committed URL to `urls` because
    // `filter_.Filter()` requires that `urls` contain all URLs that `filter_`
    // recorded an access type for.
    urls.push_back(navigation_handle.GetURL());

    // Cookie accesses can race each other causing order of navigations to not
    // match the order of cookie accesses. When this happens Filter() will
    // return false and assume all kUnknown accesses.
    //
    // TODO: crbug.com/407710083 - `CHECK` the result of `filter_.Filter` once
    // the race is fixed.
    const bool were_all_accesses_matched = filter_.Filter(urls, accesses);
    base::UmaHistogramBoolean(
        "Privacy.DIPS.PageVisitObserver.AllAccessesMatched",
        were_all_accesses_matched);

    int i = 0;
    for (const size_t redirect_chain_index : server_redirect_chain_indices_) {
      navigation.server_redirects.emplace_back(
          urls[i],
          btm::GetRedirectSourceId(&navigation_handle, redirect_chain_index),
          IsWrite(accesses[i]));
      i += 1;
    }

    BtmDataAccessType committed_url_access_type = accesses.back();

    return {std::move(navigation), committed_url_access_type};
  }

  NAVIGATION_HANDLE_USER_DATA_KEY_DECL();

 private:
  CookieAccessFilter filter_;
  // This is a set instead of a vector because there can be multiple callers
  // recording server redirects per instance of `NavigationState`, and we
  // therefore need repeated recordings of the same server redirect to be
  // idempotent.
  std::set<size_t> server_redirect_chain_indices_;
};

NAVIGATION_HANDLE_USER_DATA_KEY_IMPL(NavigationState);

}  // namespace

void BtmPageVisitObserver::DidStartNavigation(
    NavigationHandle* navigation_handle) {
  // Ignore irrelevant navigations.
  if (!IsInPrimaryPage(*navigation_handle) ||
      navigation_handle->IsSameDocument()) {
    return;
  }

  NavigationState::CreateForNavigationHandle(*navigation_handle);
}

void BtmPageVisitObserver::DidRedirectNavigation(
    NavigationHandle* navigation_handle) {
  // Ignore irrelevant navigations.
  if (navigation_handle->IsSameDocument() ||
      !navigation_handle->IsInPrimaryMainFrame()) {
    return;
  }

  NavigationState* navigation_state =
      NavigationState::GetForNavigationHandle(*navigation_handle);
  if (!navigation_state) {
    // We've started observing this navigation after it started. We have no idea
    // if we've missed redirects already or not, so we skip recording anything
    // so as not to give bad info.
    return;
  }

  // The last item in the redirect chain is the current navigation target (the
  // destination of the redirect). The most recent redirector is the one before
  // that.
  size_t redirector_index = navigation_handle->GetRedirectChain().size() - 2;
  navigation_state->RecordServerRedirectAtChainIndex(redirector_index);
}

void BtmPageVisitObserver::DidFinishNavigation(
    NavigationHandle* navigation_handle) {
  // Ignore irrelevant navigations.
  if (!navigation_handle->IsInPrimaryMainFrame() ||
      !navigation_handle->HasCommitted() ||
      navigation_handle->IsSameDocument()) {
    return;
  }

  auto* state = NavigationState::GetForNavigationHandle(*navigation_handle);
  if (!state) {
    // We must have started observing this WebContents after the navigation
    // started, so we're only seeing its end. Ignore it because we don't have
    // enough info to report.
    return;
  }

  base::Time now = clock_->Now();
  current_page_.visit_duration = now - last_page_change_time_;
  auto [navigation, final_url_cookie_access] =
      state->CreateNavigationInfo(*navigation_handle);
  // Don't report the visit right away; put it in the pending queue and wait a
  // bit to see if we receive any late cookie notifications.
  pending_visits_.emplace_back(std::move(current_page_), std::move(navigation));
  base::SequencedTaskRunner::GetCurrentDefault()->PostDelayedTask(
      FROM_HERE,
      base::BindOnce(&BtmPageVisitObserver::ReportVisit,
                     weak_factory_.GetWeakPtr()),
      base::Seconds(1));

  current_page_ = BtmPageVisitInfo{
      .url = navigation_handle->GetURL(),
      .source_id = navigation_handle->GetNextPageUkmSourceId(),
      .had_active_storage_access = IsWrite(final_url_cookie_access)};
  last_page_change_time_ = now;
}

void BtmPageVisitObserver::ReportVisit() {
  CHECK(!pending_visits_.empty());
  VisitTuple visit = std::move(pending_visits_.front());
  pending_visits_.pop_front();
  callback_.Run(std::move(visit.prev_page), std::move(visit.navigation));
}

void BtmPageVisitObserver::NotifyStorageAccessed(
    RenderFrameHost* render_frame_host,
    blink::mojom::StorageTypeAccessed storage_type,
    bool blocked) {
  if (!render_frame_host->GetPage().IsPrimary() || blocked) {
    return;
  }
  current_page_.had_active_storage_access = true;
}

void BtmPageVisitObserver::OnCookiesAccessed(
    RenderFrameHost* render_frame_host,
    const CookieAccessDetails& details) {
  // Ignore irrelevant cookie accesses.
  bool is_passive_access =
      details.type == CookieAccessDetails::Type::kRead &&
      details.source == CookieAccessDetails::Source::kNavigation;
  if (details.blocked_by_policy || is_passive_access ||
      !btm::IsOrWasInPrimaryPage(*render_frame_host)) {
    return;
  }

  // Attribute accesses by iframes and other subresources to the first-party
  // page they're embedded in.
  const GURL& first_party_url = GetFirstPartyURL(*render_frame_host);

  // BTM is only turned on when non-CHIPS 3PCs are blocked, so mirror that
  // behavior by ignoring non-CHIPS 3PC accesses.
  if (!HasCHIPS(details.cookie_access_result_list) &&
      !IsSameSiteForBtm(first_party_url, details.url)) {
    return;
  }

  // Check to see if this is a late report for a redirect. Only Navigation
  // cookie accesses should be attributed to redirects.
  if (details.source == CookieAccessDetails::Source::kNavigation) {
    for (VisitTuple& visit : pending_visits_) {
      for (BtmServerRedirectInfo& redirect :
           visit.navigation.server_redirects) {
        if (details.url == redirect.url) {
          redirect.did_write_cookies = true;
          return;
        }
      }
    }
  }

  if (render_frame_host->GetMainFrame()->IsInPrimaryMainFrame()) {
    // Cookie access within the current page.
    current_page_.had_active_storage_access = true;
    return;
  }

  // If the cookie was accessed by a subresource request in a now-bfcached
  // page, try to find that page's visit.
  for (VisitTuple& visit : pending_visits_) {
    if (first_party_url == visit.prev_page.url) {
      visit.prev_page.had_active_storage_access = true;
      return;
    }
  }
}

void BtmPageVisitObserver::OnCookiesAccessed(
    NavigationHandle* navigation_handle,
    const CookieAccessDetails& details) {
  // Ignore irrelevant cookie accesses. Included in this group are navigational
  // cookie reads, as they're passive storage accesses.
  if (details.blocked_by_policy ||
      details.type != CookieAccessDetails::Type::kChange ||
      !IsInPrimaryPage(*navigation_handle)) {
    return;
  }

  bool is_subframe_navigation = !navigation_handle->IsInMainFrame();
  if (is_subframe_navigation) {
    const GURL& first_party_url = GetFirstPartyURL(*navigation_handle);
    // BTM is only turned on when non-CHIPS 3PCs are blocked, so mirror that
    // behavior by ignoring non-CHIPS 3PC accesses.
    if (!HasCHIPS(details.cookie_access_result_list) &&
        !IsSameSiteForBtm(first_party_url, details.url)) {
      return;
    }

    // Attribute subframe storage accesses to the top-level page.
    current_page_.had_active_storage_access = true;
    return;
  }

  // Ignore non-navigational cookie accesses reported through this event because
  // we can't reliably attribute subresources accesses to the URL that is
  // loading the subresource.
  // TODO - https://crbug.com/408168195: Attribute non-navigation accesses e.g.
  // from Early Hints, to the correct URL.
  if (details.source == CookieAccessDetails::Source::kNonNavigation) {
    return;
  }

  auto* state = NavigationState::GetForNavigationHandle(*navigation_handle);
  if (!state) {
    // We must have started observing this WebContents after the navigation
    // started. Just ignore it; we'll handle the next navigation.
    return;
  }

  state->RecordCookieAccess(details.url, details.type);
}

void BtmPageVisitObserver::FrameReceivedUserActivation(
    RenderFrameHost* render_frame_host) {
  if (!render_frame_host->IsInPrimaryMainFrame()) {
    CHECK(render_frame_host->GetOutermostMainFrameOrEmbedder()
              ->IsInPrimaryMainFrame());
    return;
  }
  current_page_.received_user_activation = true;
}

void BtmPageVisitObserver::WebAuthnAssertionRequestSucceeded(
    RenderFrameHost* render_frame_host) {
  if (!render_frame_host->IsInPrimaryMainFrame()) {
    // TODO: crbug.com/448047352 - Investigate (and handle, if applicable) late
    //   WAA notifications.
    return;
  }
  current_page_.had_successful_web_authn_assertion = true;
}

}  // namespace content