910e62b5创建于 1月15日历史提交
// Copyright 2025 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

module actor.mojom;

import "mojo/public/mojom/base/time.mojom";
import "third_party/blink/public/mojom/content_extraction/ai_page_content.mojom";
import "ui/gfx/geometry/mojom/geometry.mojom";

// This interface is meant to largely mirror the
// BrowserAction::ActionInformation proto.

// Tool-specific target.
union ToolTarget {
  // DOMNodeId for the node this invocation should be applied to.
  int32 dom_node_id;

  // An x,y pair representing a location relative to the origin at the top-left
  // corner of the local root frame (i.e. the blink::WebWidget/RenderWidget). In
  // DIP.
  gfx.mojom.Point coordinate_dip;
};

// Attributes of the Node target returned from hit test on last observed APC.
struct ObservedToolTarget {
  blink.mojom.AIPageContentAttributes node_attribute;
};

// Information specific to a click action.
struct ClickAction {
  // Corresponds to ClickAction.ClickType
  enum Type {
    kLeft = 1,
    kRight = 2,
  };

  // Corresponds to ClickAction.ClickCount
  enum Count {
    kSingle = 1,
    kDouble = 2,
  };
  Type type;
  Count count;
};

// Information specific to a mouse move action.
// Has no members because the target, included in ToolInvocation, is the only
// needed parameter.
struct MouseMoveAction {
};

// Information specific to a scroll-to action.
// Has no members because the target, included in ToolInvocation, is the only
// needed parameter.
struct ScrollToAction {
};

// Information specific to the type (keyboard input) action.  Note: TypeAction
// may be implemented in multiple steps (e.g.  individual key events). In some
// situations, part of the action may complete while another part fails.
// TypeAction currently returns failure if any part of the action fails.
// TODO(crbug.com/409333494): Consider how to handle partial success cases like
// this.
struct TypeAction {
  // How the text should be inserted into the target.
  enum Mode {
    kDeleteExisting = 0,
    kPrepend = 1,
    kAppend = 2,
  };

  Mode mode;
  // text should be ASCII only for now.
  string text;
  bool follow_by_enter;
};

// Scroll action performs a scroll on the page's viewport or on a specified
// target element.
// It currently returns true even for a partial scroll to the given distance
// and direction.
// It will provide more precise information about how much it scrolls.
struct ScrollAction {
  // Corresponds to ScrollAction.ScrollDirection.
  // This must be kept in sync with ScrollAction.ScrollDirection enum in
  // components/optimization_guide/proto/features/actions_data.proto.
  enum ScrollDirection {
    // Scroll left.
    kLeft = 1,
    // Scroll right.
    kRight = 2,
    // Scroll up.
    kUp = 3,
    // Scroll down.
    kDown = 4,
  };

  ScrollDirection direction;
  // Scroll distance in DIPs, and it should always be positive.
  float distance;
};

// Action for selecting an option from a <select> element.
struct SelectAction {
  // The value of the option to set as the currently selected option. The action
  // fails if the given value doesn't match the value of one of the element's
  // <option> children. Case-sensitive.
  string value;
};

// Performs a left mouse button down on the from_target, then moves to and
// releases on the to_target.
struct DragAndReleaseAction {
  ToolTarget to_target;
};

// Executes a script tool associated with the Document.
struct ScriptToolAction {
  string name;
  string input_arguments;
};

// Union of tool-specific actions.
union ToolAction {
  ClickAction click;
  DragAndReleaseAction drag_and_release;
  MouseMoveAction mouse_move;
  ScrollAction scroll;
  SelectAction select;
  TypeAction type;
  ScriptToolAction script_tool;
  ScrollToAction scroll_to;
};



// LINT.IfChange(ActionResultCode)

// Result codes for outcomes of an action.
// kOk     - Action was successful.
// 10-99   - An error occurred that is not specific to the tool.
// X00-X99 - A tool-specific error; each tool gets a reserved range of values.
//
// These codes will be used in UMA histograms and in
// optimization_guide.proto.BrowserActionResult.  New Codes may be added to
// unused values, but existing codes should not be removed, reordered, or
// renumbered.
// Googlers: When making additions, also add the new value here:
// http://shortn/_gLyPxrRm6p
enum ActionResultCode {
  kOk = 0,

  // DEPRECATED. Use explicit error codes instead.
  // A generic error that does not have a more specific code. Avoid using this.
  // kError = 10,

  // Actions are not allowed on the current URL.
  kUrlBlocked = 11,

  // The task requires opening a new tab, and we were unable to do so.
  kNewTabCreationFailed = 12,

  // The tab we were trying to act on no longer exists.
  kTabWentAway = 13,

  // The task for the action no longer exists.
  kTaskWentAway = 14,

  // The tab we were trying to act on performed a cross-origin navigation and is
  // no longer able to be acted upon.
  kCrossOriginNavigation = 15,

  // An unknown tool was requested.
  kToolUnknown = 16,

  // The target frame in the renderer no longer exists or is shutting down.
  kFrameWentAway = 17,

  // The target DOMNodeId does not exist in the document.
  kInvalidDomNodeId = 18,

  // The targeted element cannot be modified because it is disabled.
  kElementDisabled = 19,

  // The target element is off screen or otherwise clipped by ancestor elements.
  kElementOffscreen = 20,

  // When using coordinate target, a supplied coordinate is outside the
  // viewport.
  kCoordinatesOutOfBounds = 21,

  // Arguments supplied to the tool are invalid.
  kArgumentsInvalid = 22,

  // The task for the action was paused.
  kTaskPaused = 23,

  // The tool executor in the renderer was destroyed before the tool finished
  // executing.
  kExecutorDestroyed = 24,

  // The target window no longer exists.
  kWindowWentAway = 25,

  // The current frame target under supplied coordinate does not match the
  // frame under that coordinate during time of observation.
  kFrameLocationChangedSinceObservation = 26,

  // A tool caused a navigation, but the navigation was blocked.
  kTriggeredNavigationBlocked = 27,

  // The requested tool actions array was empty.
  kEmptyActionSequence = 28,

  // The tool executor was busy with another tool, so could not invoke another.
  kExecutorBusy = 29,

  // The target element from hit test observed page content cannot be found in
  // the live DOM.
  kObservedTargetElementDestroyed  = 30,

  // The target element from hit test observed page content is different from
  // hit test on the live DOM.
  kObservedTargetElementChanged = 31,

  // The target node's interaction point is obscured by other elements in hit
  // testing.
  kTargetNodeInteractionPointObscured = 32,

  // The tool took too long to execute.
  kToolTimeout = 33,

  // The task triggered a file picker.
  kFilePickerTriggered = 34,

  // File picker was confirmed.
  kFilePickerConfirmed = 35,

  // File picker was cancelled.
  kFilePickerCancelled = 36,

  // The execution engine already had an existing action sequence when a new
  // sequence was provided.
  kExecutionEngineExistingAction = 37,

  // Navigation to external protocol was blocked
  kExternalProtocolNavigationBlocked = 38,

  // The renderer process crashed.
  kRendererCrashed = 39,

  // Only used as place holder for planned but unimplemented features. Do not
  // use as a generic error code.
  kNotImplemented = 40,

  // The PageTool invocation was canceled.
  kInvokeCanceled = 41,

  // Please see the comment above about adding new values.

  ///////////////////////////////////////////////////////////////////////
  // Codes 100-199: Errors for navigation. (Not part of the ToolAction union
  // as it's a browser-side tool.)

  // The requested URL was not valid.
  kNavigateInvalidUrl = 100,

  // The navigation failed before it started.
  kNavigateFailedToStart = 101,

  // The navigation committed to an error page.
  kNavigateCommittedErrorPage = 102,

  // Please see the comment above about adding new values.

  ///////////////////////////////////////////////////////////////////////
  // Codes 200-299: Errors for ClickAction.

  // The click event was suppressed.
  kClickSuppressed = 200,

  // Please see the comment above about adding new values.

  ///////////////////////////////////////////////////////////////////////
  // Codes 300-399: Errors for DragAndRelease.

  // The `from` coordinate is outside of the viewport bounds.
  kDragAndReleaseFromOffscreen = 300,

  // The `to` coordinate is outside of the viewport bounds.
  kDragAndReleaseToOffscreen = 301,

  // The initial mouse move event was suppressed.
  kDragAndReleaseFromMoveSuppressed = 302,

  // The mouse down event was suppressed.
  kDragAndReleaseDownSuppressed = 303,

  // The mouse move event to the target was suppressed.
  kDragAndReleaseToMoveSuppressed = 304,

  // The mouse up event was suppressed.
  kDragAndReleaseUpSuppressed = 305,

  // Please see the comment above about adding new values.

  ///////////////////////////////////////////////////////////////////////
  // Codes 400-499: Errors for MouseMoveAction.

  // The mouse move event was suppressed.
  kMouseMoveEventSuppressed = 400,

  // Please see the comment above about adding new values.

  ///////////////////////////////////////////////////////////////////////
  // Codes 500-599: Errors for ScrollAction.

  // The tool is targeting the viewport but the document doesn't have a
  // scrolling element for the viewport. (Note: this is a rare edge case in
  // quirks mode. This does NOT mean that the viewport isn't scrollable.)
  kScrollNoScrollingElement = 500,

  // The targeted element either has no overflow or the overflow is not user
  // scrollable.
  kScrollTargetNotUserScrollable = 501,

  // The offset of the target element didn't change after scrolling.
  kScrollOffsetDidNotChange = 502,

  // Please see the comment above about adding new values.

  ///////////////////////////////////////////////////////////////////////
  // Codes 600-699: Errors for SelectAction.

  // Target element was not a <select>.
  kSelectInvalidElement = 600,

  // Specified value to select does not exist as an <option> in the <select>.
  kSelectNoSuchOption = 601,

  // Specified value to select does exist but is disabled.
  kSelectOptionDisabled = 602,

  // The invoking the tool resulted in an unexpected value.
  // DEPRECATED: Unused
  // kSelectUnexpectedValue = 603,

  // Please see the comment above about adding new values.

  ///////////////////////////////////////////////////////////////////////
  // Codes 700-799: Errors for TypeAction.

  // The target provided exists but is not an Element.
  kTypeTargetNotElement = 700,

  // The target element is not focusable.
  kTypeTargetNotFocusable = 701,

  // The type tool does not support the requested characters.
  kTypeUnsupportedCharacters = 702,

  // Failed to map a character in the input string to a key.
  kTypeFailedMappingCharToKey = 703,

  // The key down event for one of the chars was suppressed by Blink.
  kTypeKeyDownSuppressed = 704,

  // The text to type is not valid UTF-8 string.
  kTypeInvalidTextEncoding = 705,

  // Please see the comment above about adding new values.

  ///////////////////////////////////////////////////////////////////////
  // Codes 800-899: Errors for history tool. (Not part of the ToolAction union
  // as it's a browser-side tool.)

  // Calling GoBack/GoForward resulted in no navigations being created.
  kHistoryNoNavigationsCreated = 800,

  // Navigations were created but asynchronously cancelled before being started.
  kHistoryCancelledBeforeStart = 801,

  // Back traversal was requested when at beginning of session history.
  kHistoryNoBackEntries = 802,

  // Forward traversal was requested when at end of session history.
  kHistoryNoForwardEntries = 803,

  // History navigation was aborted before commit.
  kHistoryFailedBeforeCommit = 804,

  // History navigation committed to an error page.
  kHistoryErrorPage = 805,

  // The navigation entry at the current offset changed between validation and
  // time of use.
  kHistoryNavigationEntryChanged = 806,

  // Please see the comment above about adding new values.

  ///////////////////////////////////////////////////////////////////////
  // Codes 900-999: Errors for attempt login.

  // The login attempt failed because no credentials were available.
  kLoginNoCredentialsAvailable = 900,

  // The login attempt failed because the current page is not a login page.
  // The caller needs to first navigate to a login page.
  kLoginNotLoginPage = 901,

  // The login attempt failed because the credentials were entered, but another
  // action is needed to complete the login.
  kLoginCredentialsEnteredButAnotherActionNeeded = 902,

  // Neither the username, nor the password field could be filled.
  kLoginNoFillableFields = 903,

  // The use of the password manager is not allowed on the page (e.g. blocked
  // by enterprise policy).
  kLoginFillingNotAllowed = 904,

  // The page changed before the credential selected by the user was filled.
  kLoginPageChangedDuringSelection = 905,

  // The task is in a background tab and filling requires device reauth. The
  // user needs to focus that tab first.
  kLoginDeviceReauthRequired = 906,

  // The screen-lock-based authentication attempt failed (likely the user
  // cancelled it).
  kLoginDeviceReauthFailed = 907,

  // The login attempt failed because the feature is disabled.
  kLoginFeatureDisabled = 908,

  // Multiple login requests were made at the same time.
  kLoginTooManyRequests = 909,

  // Please see the comment above about adding new values.

  ///////////////////////////////////////////////////////////////////////
  // Codes 1000-1099: Errors for media control.

  // The media control action failed because there is no media on the page.
  kMediaControlNoMedia = 1000,

  // Please see the comment above about adding new values.

  ///////////////////////////////////////////////////////////////////////
  // Codes 1100-1199: Errors for form filling.

  // Autofill is not available.
  kFormFillingAutofillUnavailable = 1100,

  // No autofill suggestions are available for the fields.
  kFormFillingNoSuggestionsAvailable = 1101,

  // The user could not choose a suggestion because of an error showing the
  // suggestion selection dialog.
  kFormFillingDialogError = 1102,

  // The trigger field for form filling was not found. It could be that the page
  // has changed since the action was requested.
  kFormFillingFieldNotFound = 1103,

  // An error occurred during autofill.
  kFormFillingUnknownAutofillError = 1104,

  // No tab observation was found when the TOCTOU check was performed.
  kFormFillingNoLastTabObservation = 1105,

  // The suggestion ID selected by the user was invalid.
  kFormFillingInvalidSuggestionId = 1106,

  ///////////////////////////////////////////////////////////////////////
  // Codes 1200-1299: Errors for script tool.

  // The document did not return a response for the script tool.
  kScriptToolNoResponse = 1200,

  // Please see the comment above about adding new values.

  ///////////////////////////////////////////////////////////////////////
  // Codes 1300-1399: Errors for Chrome UI.

  // The Chrome Actor UI could not be shown.
  kActorUiError = 1300,

  // Please see the comment above about adding new values.
};

// LINT.ThenChange(//tools/metrics/histograms/metadata/actor/enums.xml:ActionResultCode)

// The task ID associated with the tool invocation.
struct TaskId {
  int32 id;
};

// All information required to invoke a tool in the renderer.
struct ToolInvocation {
  // The task ID owning this invocation.
  TaskId task_id;

  // The action to carry out in this invocation.
  ToolAction action;

  // Target of this tool invocation.
  ToolTarget target;

  // Target from hit test on the last observed page content.
  // This is optional and is used for checking if the action target has changed
  // since last observation.
  ObservedToolTarget? observed_target;
};

struct ActionResult {
  // A code with the outcome of the tool invocation.
  ActionResultCode code;

  // Whether any action was taken on the page. This is used to know if we need
  // to wait for the page to settle.
  bool requires_page_stabilization;

  // If the code is not kOk, an English language message describing the error.
  string message;

  // If the action is a script tool, provides the result of that execution. This
  // is only populated if the execution was successful.
  string? script_tool_response;

  // Time at which execution of the action was complete and page
  // stabilization was started. This field may not be available if the action's
  // execution did not fully complete.
  mojo_base.mojom.TimeTicks? execution_end_time;
};

// The type of the journal entry.
enum JournalEntryType {
  kBegin,
  kEnd,
  kInstant,
};

// Details of a journal entry.
struct JournalDetails {
  string key;
  string value;
};

// Represents a journal entry. This should only be used for logging and
// debugging. It should not be used to make logic decisions since a
// compromised renderer could lie about events (such as mismatched
// or missing begin and end events).
struct JournalEntry {
  // What type of entry this is.
  JournalEntryType type;

  // The task ID associated with this entry.
  TaskId task_id;

  // The time the event occurred at.
  mojo_base.mojom.Time timestamp;

  // The event name.
  string event;

  // Track UUID.
  uint64 track_uuid;

  // Specific details of the event logged.
  array<JournalDetails> details;
};

// Use to listen for new journal entries. Lives in the browser process and
// is used to receive entries from renderer processes.
interface JournalClient {
  // New log messages are sent in batches to limit the frequency of calls.
  AddEntriesToJournal(array<JournalEntry> entries);
};

// Used to monitor page stability in the renderer. Implemented by
// `actor::PageStabilityMonitor` and lives in the renderer process. The browser
// queries this interface to learn when the current page associated with
// `actor::PageStabilityMonitor` is stable.
//
// We CHECK that only one `NotifyWhenStable()` call is in flight at any given
// time.
//
// `observation_delay` is the amount of time to wait when observing tool
// execution before starting to wait for page stability.
interface PageStabilityMonitor {
  NotifyWhenStable(mojo_base.mojom.TimeDelta observation_delay) => ();
};