root/chrome/browser/speech/tts_linux.cc

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. Initialize
  2. Reset
  3. PlatformImplAvailable
  4. Speak
  5. StopSpeaking
  6. Pause
  7. Resume
  8. IsSpeaking
  9. GetVoices
  10. OnSpeechEvent
  11. NotificationCallback
  12. IndexMarkCallback
  13. GetInstance
  14. GetInstance

// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include <math.h>

#include <map>

#include "base/command_line.h"
#include "base/debug/leak_annotations.h"
#include "base/memory/scoped_ptr.h"
#include "base/memory/singleton.h"
#include "base/synchronization/lock.h"
#include "chrome/browser/speech/tts_platform.h"
#include "chrome/common/chrome_switches.h"
#include "content/public/browser/browser_thread.h"

#include "library_loaders/libspeechd.h"

using content::BrowserThread;

namespace {

const char kNotSupportedError[] =
    "Native speech synthesis not supported on this platform.";

struct SPDChromeVoice {
  std::string name;
  std::string module;
};

}  // namespace

class TtsPlatformImplLinux : public TtsPlatformImpl {
 public:
  virtual bool PlatformImplAvailable() OVERRIDE;
  virtual bool Speak(
      int utterance_id,
      const std::string& utterance,
      const std::string& lang,
      const VoiceData& voice,
      const UtteranceContinuousParameters& params) OVERRIDE;
  virtual bool StopSpeaking() OVERRIDE;
  virtual void Pause() OVERRIDE;
  virtual void Resume() OVERRIDE;
  virtual bool IsSpeaking() OVERRIDE;
  virtual void GetVoices(std::vector<VoiceData>* out_voices) OVERRIDE;

  void OnSpeechEvent(SPDNotificationType type);

  // Get the single instance of this class.
  static TtsPlatformImplLinux* GetInstance();

 private:
  TtsPlatformImplLinux();
  virtual ~TtsPlatformImplLinux();

  // Initiate the connection with the speech dispatcher.
  void Initialize();

  // Resets the connection with speech dispatcher.
  void Reset();

  static void NotificationCallback(size_t msg_id,
                                   size_t client_id,
                                   SPDNotificationType type);

  static void IndexMarkCallback(size_t msg_id,
                                size_t client_id,
                                SPDNotificationType state,
                                char* index_mark);

  static SPDNotificationType current_notification_;

  base::Lock initialization_lock_;
  LibSpeechdLoader libspeechd_loader_;
  SPDConnection* conn_;

  // These apply to the current utterance only.
  std::string utterance_;
  int utterance_id_;

  // Map a string composed of a voicename and module to the voicename. Used to
  // uniquely identify a voice across all available modules.
  scoped_ptr<std::map<std::string, SPDChromeVoice> > all_native_voices_;

  friend struct DefaultSingletonTraits<TtsPlatformImplLinux>;

  DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplLinux);
};

// static
SPDNotificationType TtsPlatformImplLinux::current_notification_ =
    SPD_EVENT_END;

TtsPlatformImplLinux::TtsPlatformImplLinux()
    : utterance_id_(0) {
  const CommandLine& command_line = *CommandLine::ForCurrentProcess();
  if (!command_line.HasSwitch(switches::kEnableSpeechDispatcher))
    return;

  BrowserThread::PostTask(BrowserThread::FILE,
                          FROM_HERE,
                          base::Bind(&TtsPlatformImplLinux::Initialize,
                                     base::Unretained(this)));
}

void TtsPlatformImplLinux::Initialize() {
  base::AutoLock lock(initialization_lock_);

  if (!libspeechd_loader_.Load("libspeechd.so.2"))
    return;

  {
    // spd_open has memory leaks which are hard to suppress.
    // http://crbug.com/317360
    ANNOTATE_SCOPED_MEMORY_LEAK;
    conn_ = libspeechd_loader_.spd_open(
        "chrome", "extension_api", NULL, SPD_MODE_SINGLE);
  }
  if (!conn_)
    return;

  // Register callbacks for all events.
  conn_->callback_begin =
    conn_->callback_end =
    conn_->callback_cancel =
    conn_->callback_pause =
    conn_->callback_resume =
    &NotificationCallback;

  conn_->callback_im = &IndexMarkCallback;

  libspeechd_loader_.spd_set_notification_on(conn_, SPD_BEGIN);
  libspeechd_loader_.spd_set_notification_on(conn_, SPD_END);
  libspeechd_loader_.spd_set_notification_on(conn_, SPD_CANCEL);
  libspeechd_loader_.spd_set_notification_on(conn_, SPD_PAUSE);
  libspeechd_loader_.spd_set_notification_on(conn_, SPD_RESUME);
}

TtsPlatformImplLinux::~TtsPlatformImplLinux() {
  base::AutoLock lock(initialization_lock_);
  if (conn_) {
    libspeechd_loader_.spd_close(conn_);
    conn_ = NULL;
  }
}

void TtsPlatformImplLinux::Reset() {
  base::AutoLock lock(initialization_lock_);
  if (conn_)
    libspeechd_loader_.spd_close(conn_);
  conn_ = libspeechd_loader_.spd_open(
      "chrome", "extension_api", NULL, SPD_MODE_SINGLE);
}

bool TtsPlatformImplLinux::PlatformImplAvailable() {
  if (!initialization_lock_.Try())
    return false;
  bool result = libspeechd_loader_.loaded() && (conn_ != NULL);
  initialization_lock_.Release();
  return result;
}

bool TtsPlatformImplLinux::Speak(
    int utterance_id,
    const std::string& utterance,
    const std::string& lang,
    const VoiceData& voice,
    const UtteranceContinuousParameters& params) {
  if (!PlatformImplAvailable()) {
    error_ = kNotSupportedError;
    return false;
  }

  // Speech dispatcher's speech params are around 3x at either limit.
  float rate = params.rate > 3 ? 3 : params.rate;
  rate = params.rate < 0.334 ? 0.334 : rate;
  float pitch = params.pitch > 3 ? 3 : params.pitch;
  pitch = params.pitch < 0.334 ? 0.334 : pitch;

  std::map<std::string, SPDChromeVoice>::iterator it =
      all_native_voices_->find(voice.name);
  if (it != all_native_voices_->end()) {
    libspeechd_loader_.spd_set_output_module(conn_, it->second.module.c_str());
    libspeechd_loader_.spd_set_synthesis_voice(conn_, it->second.name.c_str());
  }

  // Map our multiplicative range to Speech Dispatcher's linear range.
  // .334 = -100.
  // 3 = 100.
  libspeechd_loader_.spd_set_voice_rate(conn_, 100 * log10(rate) / log10(3));
  libspeechd_loader_.spd_set_voice_pitch(conn_, 100 * log10(pitch) / log10(3));

  utterance_ = utterance;
  utterance_id_ = utterance_id;

  if (libspeechd_loader_.spd_say(conn_, SPD_TEXT, utterance.c_str()) == -1) {
    Reset();
    return false;
  }
  return true;
}

bool TtsPlatformImplLinux::StopSpeaking() {
  if (!PlatformImplAvailable())
    return false;
  if (libspeechd_loader_.spd_stop(conn_) == -1) {
    Reset();
    return false;
  }
  return true;
}

void TtsPlatformImplLinux::Pause() {
  if (!PlatformImplAvailable())
    return;
  libspeechd_loader_.spd_pause(conn_);
}

void TtsPlatformImplLinux::Resume() {
  if (!PlatformImplAvailable())
    return;
  libspeechd_loader_.spd_resume(conn_);
}

bool TtsPlatformImplLinux::IsSpeaking() {
  return current_notification_ == SPD_EVENT_BEGIN;
}

void TtsPlatformImplLinux::GetVoices(
    std::vector<VoiceData>* out_voices) {
  if (!all_native_voices_.get()) {
    all_native_voices_.reset(new std::map<std::string, SPDChromeVoice>());
    char** modules = libspeechd_loader_.spd_list_modules(conn_);
    if (!modules)
      return;
    for (int i = 0; modules[i]; i++) {
      char* module = modules[i];
      libspeechd_loader_.spd_set_output_module(conn_, module);
      SPDVoice** native_voices =
          libspeechd_loader_.spd_list_synthesis_voices(conn_);
      if (!native_voices) {
        free(module);
        continue;
      }
      for (int j = 0; native_voices[j]; j++) {
        SPDVoice* native_voice = native_voices[j];
        SPDChromeVoice native_data;
        native_data.name = native_voice->name;
        native_data.module = module;
        std::string key;
        key.append(native_data.name);
        key.append(" ");
        key.append(native_data.module);
        all_native_voices_->insert(
            std::pair<std::string, SPDChromeVoice>(key, native_data));
        free(native_voices[j]);
      }
      free(modules[i]);
    }
  }

  for (std::map<std::string, SPDChromeVoice>::iterator it =
           all_native_voices_->begin();
       it != all_native_voices_->end();
       it++) {
    out_voices->push_back(VoiceData());
    VoiceData& voice = out_voices->back();
    voice.native = true;
    voice.name = it->first;
    voice.events.insert(TTS_EVENT_START);
    voice.events.insert(TTS_EVENT_END);
    voice.events.insert(TTS_EVENT_CANCELLED);
    voice.events.insert(TTS_EVENT_MARKER);
    voice.events.insert(TTS_EVENT_PAUSE);
    voice.events.insert(TTS_EVENT_RESUME);
  }
}

void TtsPlatformImplLinux::OnSpeechEvent(SPDNotificationType type) {
  TtsController* controller = TtsController::GetInstance();
  switch (type) {
  case SPD_EVENT_BEGIN:
    controller->OnTtsEvent(utterance_id_, TTS_EVENT_START, 0, std::string());
    break;
  case SPD_EVENT_RESUME:
    controller->OnTtsEvent(utterance_id_, TTS_EVENT_RESUME, 0, std::string());
    break;
  case SPD_EVENT_END:
    controller->OnTtsEvent(
        utterance_id_, TTS_EVENT_END, utterance_.size(), std::string());
    break;
  case SPD_EVENT_PAUSE:
    controller->OnTtsEvent(
        utterance_id_, TTS_EVENT_PAUSE, utterance_.size(), std::string());
    break;
  case SPD_EVENT_CANCEL:
    controller->OnTtsEvent(
        utterance_id_, TTS_EVENT_CANCELLED, 0, std::string());
    break;
  case SPD_EVENT_INDEX_MARK:
    controller->OnTtsEvent(utterance_id_, TTS_EVENT_MARKER, 0, std::string());
    break;
  }
}

// static
void TtsPlatformImplLinux::NotificationCallback(
    size_t msg_id, size_t client_id, SPDNotificationType type) {
  // We run Speech Dispatcher in threaded mode, so these callbacks should always
  // be in a separate thread.
  if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) {
    current_notification_ = type;
    BrowserThread::PostTask(
        BrowserThread::UI,
        FROM_HERE,
        base::Bind(&TtsPlatformImplLinux::OnSpeechEvent,
                   base::Unretained(TtsPlatformImplLinux::GetInstance()),
                   type));
  }
}

// static
void TtsPlatformImplLinux::IndexMarkCallback(size_t msg_id,
                                                      size_t client_id,
                                                      SPDNotificationType state,
                                                      char* index_mark) {
  // TODO(dtseng): index_mark appears to specify an index type supplied by a
  // client. Need to explore how this is used before hooking it up with existing
  // word, sentence events.
  // We run Speech Dispatcher in threaded mode, so these callbacks should always
  // be in a separate thread.
  if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) {
    current_notification_ = state;
    BrowserThread::PostTask(BrowserThread::UI, FROM_HERE,
        base::Bind(&TtsPlatformImplLinux::OnSpeechEvent,
        base::Unretained(TtsPlatformImplLinux::GetInstance()),
        state));
  }
}

// static
TtsPlatformImplLinux* TtsPlatformImplLinux::GetInstance() {
  return Singleton<TtsPlatformImplLinux,
                   LeakySingletonTraits<TtsPlatformImplLinux> >::get();
}

// static
TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
  return TtsPlatformImplLinux::GetInstance();
}

/* [<][>][^][v][top][bottom][index][help] */