#ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_
#define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_
#include <memory>
#include <optional>
#include "base/containers/flat_map.h"
#include "base/memory/raw_ptr.h"
#include "base/memory/weak_ptr.h"
#include "content/common/content_export.h"
#include "content/public/browser/browser_thread.h"
#include "content/public/browser/speech_recognition_event_listener.h"
#include "content/public/browser/speech_recognition_manager.h"
#include "content/public/browser/speech_recognition_session_config.h"
#include "content/public/browser/speech_recognition_session_context.h"
#include "media/mojo/mojom/speech_recognition.mojom.h"
#include "media/mojo/mojom/speech_recognition_error.mojom.h"
#include "mojo/public/cpp/bindings/pending_receiver.h"
#include "mojo/public/cpp/bindings/remote.h"
#include "third_party/blink/public/mojom/mediastream/media_stream.mojom-forward.h"
namespace media {
class AudioSystem;
}
namespace content {
class BrowserMainLoop;
class MediaStreamManager;
class MediaStreamUIProxy;
class SpeechRecognitionManagerDelegate;
class SpeechRecognizer;
class CONTENT_EXPORT SpeechRecognitionManagerImpl
: public SpeechRecognitionManager,
public SpeechRecognitionEventListener {
public:
static SpeechRecognitionManagerImpl* GetInstance();
int CreateSession(const SpeechRecognitionSessionConfig& config) override;
int CreateSession(
const SpeechRecognitionSessionConfig& config,
mojo::PendingReceiver<media::mojom::SpeechRecognitionSession>
session_receiver,
mojo::PendingRemote<media::mojom::SpeechRecognitionSessionClient>
client_remote,
std::optional<SpeechRecognitionAudioForwarderConfig>
audio_forwarder_config) override;
void StartSession(int session_id) override;
void AbortSession(int session_id) override;
void AbortAllSessionsForRenderFrame(int render_process_id,
int render_frame_id) override;
void StopAudioCaptureForSession(int session_id) override;
void UpdateRecognitionContextForSession(
int session_id,
const media::SpeechRecognitionRecognitionContext& recognition_context)
override;
const SpeechRecognitionSessionConfig& GetSessionConfig(
int session_id) override;
SpeechRecognitionSessionContext GetSessionContext(int session_id) override;
bool UseOnDeviceSpeechRecognition(
const SpeechRecognitionSessionConfig& config) override;
void OnRecognitionStart(int session_id) override;
void OnAudioStart(int session_id) override;
void OnSoundStart(int session_id) override;
void OnSoundEnd(int session_id) override;
void OnAudioEnd(int session_id) override;
void OnRecognitionEnd(int session_id) override;
void OnRecognitionResults(
int session_id,
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& result)
override;
void OnRecognitionError(
int session_id,
const media::mojom::SpeechRecognitionError& error) override;
void OnAudioLevelsChange(int session_id,
float volume,
float noise_volume) override;
int CreateSession(
const SpeechRecognitionSessionConfig& config,
mojo::PendingReceiver<media::mojom::SpeechRecognitionSession>
session_receiver,
mojo::PendingRemote<media::mojom::SpeechRecognitionSessionClient>
client_remote,
std::optional<SpeechRecognitionAudioForwarderConfig>
audio_forwarder_config,
bool can_render_frame_use_on_device);
SpeechRecognitionManagerDelegate* delegate() const { return delegate_.get(); }
protected:
friend class BrowserMainLoop;
friend class SpeechRecognitionManagerImplTest;
friend std::default_delete<SpeechRecognitionManagerImpl>;
friend class base::DeleteHelper<content::SpeechRecognitionManagerImpl>;
SpeechRecognitionManagerImpl(media::AudioSystem* audio_system,
MediaStreamManager* media_stream_manager);
~SpeechRecognitionManagerImpl() override;
private:
enum FSMState {
SESSION_STATE_IDLE = 0,
SESSION_STATE_CAPTURING_AUDIO,
SESSION_STATE_WAITING_FOR_RESULT,
SESSION_STATE_MAX_VALUE = SESSION_STATE_WAITING_FOR_RESULT
};
enum FSMEvent {
EVENT_ABORT = 0,
EVENT_START,
EVENT_UPDATE_RECOGNITION_CONTEXT,
EVENT_STOP_CAPTURE,
EVENT_AUDIO_ENDED,
EVENT_RECOGNITION_ENDED,
EVENT_MAX_VALUE = EVENT_RECOGNITION_ENDED
};
struct Session {
Session();
~Session();
int id;
bool abort_requested;
SpeechRecognitionSessionConfig config;
SpeechRecognitionSessionContext context;
scoped_refptr<SpeechRecognizer> recognizer;
std::unique_ptr<MediaStreamUIProxy> ui;
bool use_microphone;
media::SpeechRecognitionRecognitionContext recognition_context;
};
void AbortSessionImpl(int session_id);
void RecognitionAllowedCallback(int session_id,
bool ask_user,
bool is_allowed);
void MediaRequestPermissionCallback(
int session_id,
const blink::mojom::StreamDevicesSet& stream_devices_set,
std::unique_ptr<MediaStreamUIProxy> stream_ui);
void DispatchEvent(int session_id, FSMEvent event);
void ExecuteTransitionAndGetNextState(Session* session,
FSMState session_state,
FSMEvent event);
FSMState GetSessionState(int session_id) const;
void SessionStart(const Session& session);
void SessionUpdateRecognitionContext(const Session& session);
void SessionAbort(const Session& session);
void SessionStopAudioCapture(const Session& session);
void ResetCapturingSessionId(const Session& session);
void SessionDelete(Session* session);
void NotFeasible(const Session& session, FSMEvent event);
bool SessionExists(int session_id) const;
Session* GetSession(int session_id) const;
SpeechRecognitionEventListener* GetListener(int session_id) const;
SpeechRecognitionEventListener* GetDelegateListener() const;
int GetNextSessionID();
static int next_requester_id_;
raw_ptr<media::AudioSystem> audio_system_;
raw_ptr<MediaStreamManager> media_stream_manager_;
base::flat_map<int, std::unique_ptr<Session>> sessions_;
int microphone_session_id_ = kSessionIDInvalid;
int last_session_id_ = kSessionIDInvalid;
bool is_dispatching_event_ = false;
std::unique_ptr<SpeechRecognitionManagerDelegate> delegate_;
const int requester_id_;
mojo::Remote<media::mojom::SpeechRecognitionContext>
speech_recognition_context_;
base::WeakPtrFactory<SpeechRecognitionManagerImpl> weak_factory_{this};
};
}
#endif