910e62b5创建于 1月15日历史提交
// Copyright 2022 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/browsing_topics/browsing_topics_state.h"

#include "base/base64.h"
#include "base/files/file_path.h"
#include "base/files/file_util.h"
#include "base/json/json_file_value_serializer.h"
#include "base/json/json_writer.h"
#include "base/json/values_util.h"
#include "base/metrics/histogram_functions.h"
#include "base/strings/string_number_conversions.h"
#include "base/task/task_traits.h"
#include "base/task/thread_pool.h"
#include "components/browsing_topics/common/common_types.h"
#include "components/browsing_topics/util.h"
#include "third_party/blink/public/common/features.h"

namespace browsing_topics {

namespace {

// How often the file is saved at most.
const base::TimeDelta kSaveDelay = base::Milliseconds(2500);

const char kEpochsNameKey[] = "epochs";
const char kNextScheduledCalculationTimeNameKey[] =
    "next_scheduled_calculation_time";
const char kHexEncodedHmacKeyNameKey[] = "hex_encoded_hmac_key";

// `config_version` is a deprecated key. Do not reuse.

std::unique_ptr<BrowsingTopicsState::LoadResult> LoadFileOnBackendTaskRunner(
    const base::FilePath& file_path) {
  bool file_exists = base::PathExists(file_path);

  if (!file_exists) {
    return std::make_unique<BrowsingTopicsState::LoadResult>(
        /*file_exists=*/false, nullptr);
  }

  JSONFileValueDeserializer deserializer(file_path);
  std::unique_ptr<base::Value> value = deserializer.Deserialize(
      /*error_code=*/nullptr,
      /*error_message=*/nullptr);

  return std::make_unique<BrowsingTopicsState::LoadResult>(/*file_exists=*/true,
                                                           std::move(value));
}

bool AreConfigVersionsCompatible(int preexisting, int current) {
  // The config version can be 0 for a failed topics calculation.
  CHECK_GE(preexisting, 0);
  CHECK_GE(current, 1);
  CHECK_LE(current, ConfigVersion::kMaxValue);

  // This could happen in rare case when Chrome rolls back to an earlier
  // version.
  if (preexisting > ConfigVersion::kMaxValue) {
    return false;
  }

  // Epoch from a failed calculation is compatible with any version.
  if (preexisting == 0) {
    return true;
  }

  if (preexisting == current) {
    return true;
  }

  if ((preexisting == ConfigVersion::kInitial &&
       current == ConfigVersion::kUsePrioritizedTopicsList) ||
      (preexisting == ConfigVersion::kUsePrioritizedTopicsList &&
       current == ConfigVersion::kInitial)) {
    // Versions 1 and 2 are forward and backward compatible.
    return true;
  }
  return false;
}

}  // namespace

BrowsingTopicsState::LoadResult::LoadResult(bool file_exists,
                                            std::unique_ptr<base::Value> value)
    : file_exists(file_exists), value(std::move(value)) {}

BrowsingTopicsState::LoadResult::~LoadResult() = default;

BrowsingTopicsState::BrowsingTopicsState(const base::FilePath& profile_path,
                                         base::OnceClosure loaded_callback)
    : backend_task_runner_(base::ThreadPool::CreateSequencedTaskRunner(
          {base::MayBlock(), base::TaskPriority::BEST_EFFORT,
           base::TaskShutdownBehavior::BLOCK_SHUTDOWN})),
      writer_(profile_path.Append(FILE_PATH_LITERAL("BrowsingTopicsState")),
              backend_task_runner_,
              kSaveDelay,
              /*histogram_suffix=*/"BrowsingTopicsState") {
  backend_task_runner_->PostTaskAndReplyWithResult(
      FROM_HERE, base::BindOnce(&LoadFileOnBackendTaskRunner, writer_.path()),
      base::BindOnce(&BrowsingTopicsState::DidLoadFile,
                     weak_ptr_factory_.GetWeakPtr(),
                     std::move(loaded_callback)));
}

BrowsingTopicsState::~BrowsingTopicsState() {
  if (writer_.HasPendingWrite()) {
    writer_.DoScheduledWrite();
  }
}

void BrowsingTopicsState::ClearAllTopics() {
  DCHECK(loaded_);

  if (!epochs_.empty()) {
    epochs_.clear();
    ScheduleSave();
  }
}

void BrowsingTopicsState::ClearOneEpoch(size_t epoch_index) {
  DCHECK(loaded_);

  epochs_[epoch_index].ClearTopics();
  ScheduleSave();
}

void BrowsingTopicsState::ClearTopic(Topic topic) {
  for (EpochTopics& epoch : epochs_) {
    epoch.ClearTopic(topic);
  }

  ScheduleSave();
}

void BrowsingTopicsState::ClearContextDomain(
    const HashedDomain& hashed_context_domain) {
  for (EpochTopics& epoch : epochs_) {
    epoch.ClearContextDomain(hashed_context_domain);
  }

  ScheduleSave();
}

std::optional<EpochTopics> BrowsingTopicsState::AddEpoch(
    EpochTopics epoch_topics) {
  DCHECK(loaded_);

  epochs_.push_back(std::move(epoch_topics));
  epochs_.back().ScheduleExpiration(base::BindOnce(
      &BrowsingTopicsState::OnEpochExpired, weak_ptr_factory_.GetWeakPtr(),
      epochs_.back().calculation_time()));

  // Remove the epoch data that is no longer useful.
  std::optional<EpochTopics> removed_epoch_topics;
  if (epochs_.size() >
      static_cast<size_t>(
          blink::features::kBrowsingTopicsNumberOfEpochsToExpose.Get()) +
          1) {
    removed_epoch_topics = std::move(epochs_[0]);
    epochs_.pop_front();
  }

  ScheduleSave();
  return removed_epoch_topics;
}

void BrowsingTopicsState::ScheduleEpochsExpiration() {
  base::Time expired_calculation_time =
      base::Time::Now() -
      blink::features::kBrowsingTopicsEpochRetentionDuration.Get();

  // Remove expired epochs synchronously.
  base::EraseIf(epochs_, [&expired_calculation_time](const EpochTopics& epoch) {
    return epoch.calculation_time() <= expired_calculation_time;
  });

  for (EpochTopics& epoch : epochs_) {
    epoch.ScheduleExpiration(base::BindOnce(
        &BrowsingTopicsState::OnEpochExpired, weak_ptr_factory_.GetWeakPtr(),
        epoch.calculation_time()));
  }

  ScheduleSave();
}

void BrowsingTopicsState::OnEpochExpired(base::Time calculation_time) {
  // Remove all epochs associated with the given calculation_time.
  // Though calculation times are typically unique, this handles potential
  // duplicates.
  base::EraseIf(epochs_, [&calculation_time](const EpochTopics& epoch) {
    return epoch.calculation_time() == calculation_time;
  });

  ScheduleSave();
}

void BrowsingTopicsState::UpdateNextScheduledCalculationTime(
    base::TimeDelta delay) {
  DCHECK(loaded_);
  DCHECK(!delay.is_negative());

  next_scheduled_calculation_time_ = base::Time::Now() + delay;

  ScheduleSave();
}

std::vector<const EpochTopics*> BrowsingTopicsState::EpochsForSite(
    const std::string& top_domain) const {
  DCHECK(loaded_);

  if (epochs_.empty()) {
    return {};
  }

  const size_t kNumberOfEpochsToExpose = static_cast<size_t>(
      blink::features::kBrowsingTopicsNumberOfEpochsToExpose.Get());

  DCHECK_GT(kNumberOfEpochsToExpose, 0u);

  base::Time now = base::Time::Now();

  // Derive a per-user per-site per-epoch time delta in the range of
  // [0, `kBrowsingTopicsMaxEpochIntroductionDelay`). The latest epoch will only
  // be used after `site_epoch_sticky_introduction_delay` has elapsed since the
  // last calculation finish time (i.e. `next_scheduled_calculation_time_` -
  // `kBrowsingTopicsTimePeriodPerEpoch`). This way, each site will see a
  // different epoch switch time.
  base::TimeDelta site_epoch_sticky_introduction_delay =
      CalculateSiteStickyIntroductionDelay(top_domain);

  size_t end_epoch_index = 0;
  if (now <= next_scheduled_calculation_time_ -
                 blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get() +
                 site_epoch_sticky_introduction_delay) {
    if (epochs_.size() < 2) {
      return {};
    }

    end_epoch_index = epochs_.size() - 2;
  } else {
    end_epoch_index = epochs_.size() - 1;
  }

  size_t start_epoch_index = (end_epoch_index + 1 >= kNumberOfEpochsToExpose)
                                 ? end_epoch_index + 1 - kNumberOfEpochsToExpose
                                 : 0;

  std::vector<const EpochTopics*> result;

  for (size_t i = start_epoch_index; i <= end_epoch_index; ++i) {
    const EpochTopics& epoch = epochs_[i];

    base::Time earliest_valid_epoch_time =
        now + CalculateSiteStickyPhaseOutTimeOffset(top_domain, epoch) -
        blink::features::kBrowsingTopicsEpochRetentionDuration.Get();

    if (epoch.calculation_time() > earliest_valid_epoch_time) {
      result.emplace_back(&epoch);
    }
  }

  return result;
}

bool BrowsingTopicsState::HasScheduledSaveForTesting() const {
  return writer_.HasPendingWrite();
}

base::TimeDelta BrowsingTopicsState::CalculateSiteStickyIntroductionDelay(
    const std::string& top_domain) const {
  CHECK(!epochs_.empty());

  uint64_t epoch_introduction_time_decision_hash =
      HashTopDomainForEpochIntroductionTimeDecision(
          hmac_key_, epochs_.back().calculation_time(), top_domain);

  // The random-over period cannot exceed an epoch. This limitation is due to:
  // 1. We only keep data for the last `kNumberOfEpochsToExpose` + 1 epochs. A
  //    longer random-over period would require us to store more historical
  //    epochs to meet the `kNumberOfEpochsToExpose` configuration.
  // 2. For past, non-latest epochs, we don't store the exact delimitation times
  //    (i.e. calculation finish times). Using the calculation start time as an
  //    approximation is not 100% accurate.
  DCHECK_LE(blink::features::kBrowsingTopicsMaxEpochIntroductionDelay.Get(),
            blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get());

  DCHECK_GT(blink::features::kBrowsingTopicsMaxEpochIntroductionDelay.Get()
                .InSeconds(),
            0);

  // If the latest epoch was manually triggered, make the latest epoch
  // immediately available for testing purposes.
  if (epochs_.back().from_manually_triggered_calculation()) {
    return base::Seconds(0);
  }

  return base::Seconds(
      epoch_introduction_time_decision_hash %
      blink::features::kBrowsingTopicsMaxEpochIntroductionDelay.Get()
          .InSeconds());
}

base::TimeDelta BrowsingTopicsState::CalculateSiteStickyPhaseOutTimeOffset(
    const std::string& top_domain,
    const EpochTopics& epoch) const {
  uint64_t epoch_phase_out_time_decision_hash =
      HashTopDomainForEpochPhaseOutTimeDecision(
          hmac_key_, epoch.calculation_time(), top_domain);

  return base::Seconds(
      epoch_phase_out_time_decision_hash %
      blink::features::kBrowsingTopicsMaxEpochPhaseOutTimeOffset.Get()
          .InSeconds());
}

base::ImportantFileWriter::BackgroundDataProducerCallback
BrowsingTopicsState::GetSerializedDataProducerForBackgroundSequence() {
  DCHECK(loaded_);

  return base::BindOnce(
      [](base::Value value) -> std::optional<std::string> {
        // This runs on the background sequence.
        std::string output;
        if (!base::JSONWriter::WriteWithOptions(
                value, base::JSONWriter::OPTIONS_PRETTY_PRINT, &output)) {
          return std::nullopt;
        }
        return output;
      },
      base::Value(ToDictValue()));
}

base::Value::Dict BrowsingTopicsState::ToDictValue() const {
  DCHECK(loaded_);

  base::Value::List epochs_list;
  for (const EpochTopics& epoch : epochs_) {
    epochs_list.Append(epoch.ToDictValue());
  }

  base::Value::Dict result_dict;
  result_dict.Set(kEpochsNameKey, std::move(epochs_list));

  result_dict.Set(kNextScheduledCalculationTimeNameKey,
                  base::TimeToValue(next_scheduled_calculation_time_));

  std::string hex_encoded_hmac_key = base::HexEncode(hmac_key_);
  result_dict.Set(kHexEncodedHmacKeyNameKey, base::HexEncode(hmac_key_));

  return result_dict;
}

void BrowsingTopicsState::ScheduleSave() {
  DCHECK(loaded_);
  writer_.ScheduleWriteWithBackgroundDataSerializer(this);
}

void BrowsingTopicsState::DidLoadFile(base::OnceClosure loaded_callback,
                                      std::unique_ptr<LoadResult> load_result) {
  DCHECK(load_result);
  DCHECK(!loaded_);

  bool success = false;
  bool should_save_state_to_file = false;

  if (!load_result->file_exists) {
    // If this is the first time loading, generate a `hmac_key_`, and save it.
    // This ensures we only generate the key once per profile, as data derived
    // from the key may be subsequently stored elsewhere.
    hmac_key_ = GenerateRandomHmacKey();
    success = true;
    should_save_state_to_file = true;
  } else if (!load_result->value) {
    // If a file read error was encountered, or if the JSON deserialization
    // failed in general, empty the file.
    should_save_state_to_file = true;
  } else {
    // JSON deserialization succeeded in general. Parse the value to individual
    // fields.
    ParseResult parse_result = ParseValue(*(load_result->value));

    success = parse_result.success;
    should_save_state_to_file = parse_result.should_save_state_to_file;
  }

  base::UmaHistogramBoolean(
      "BrowsingTopics.BrowsingTopicsState.LoadFinishStatus", success);

  loaded_ = true;

  if (should_save_state_to_file) {
    ScheduleSave();
  }

  std::move(loaded_callback).Run();
}

BrowsingTopicsState::ParseResult BrowsingTopicsState::ParseValue(
    const base::Value& value) {
  DCHECK(!loaded_);

  const base::Value::Dict* dict_value = value.GetIfDict();
  if (!dict_value) {
    return ParseResult{.success = false, .should_save_state_to_file = true};
  }

  const std::string* hex_encoded_hmac_key =
      dict_value->FindString(kHexEncodedHmacKeyNameKey);
  if (!hex_encoded_hmac_key) {
    return ParseResult{.success = false, .should_save_state_to_file = true};
  }

  if (!base::HexStringToSpan(*hex_encoded_hmac_key, hmac_key_)) {
    // `HexStringToSpan` may partially fill the `hmac_key_` up until the
    // failure. Reset it to empty.
    hmac_key_.fill(0);
    return ParseResult{.success = false, .should_save_state_to_file = true};
  }

  const base::Value::List* epochs_value = dict_value->FindList(kEpochsNameKey);
  if (!epochs_value) {
    return ParseResult{.success = false, .should_save_state_to_file = true};
  }

  for (const base::Value& epoch_value : *epochs_value) {
    const base::Value::Dict* epoch_dict_value = epoch_value.GetIfDict();
    if (!epoch_dict_value) {
      return ParseResult{.success = false, .should_save_state_to_file = true};
    }

    epochs_.push_back(EpochTopics::FromDictValue(*epoch_dict_value));
  }

  for (const EpochTopics& epoch : epochs_) {
    // If any preexisting epoch's version is incompatible with the current
    // version, start with a fresh `epoch_`.
    if (!AreConfigVersionsCompatible(epoch.config_version(),
                                     CurrentConfigVersion())) {
      epochs_.clear();
      return ParseResult{.success = true, .should_save_state_to_file = true};
    }
  }

  const base::Value* next_scheduled_calculation_time_value =
      dict_value->Find(kNextScheduledCalculationTimeNameKey);
  if (!next_scheduled_calculation_time_value) {
    return ParseResult{.success = false, .should_save_state_to_file = true};
  }

  next_scheduled_calculation_time_ =
      base::ValueToTime(next_scheduled_calculation_time_value).value();

  return ParseResult{.success = true, .should_save_state_to_file = false};
}

}  // namespace browsing_topics