[Sugar-devel] [Sugar] Implement text to speech in Sugar feature - v2
Simon Schampijer
simon at schampijer.de
Wed Jan 18 03:05:52 EST 2012
On 17/01/12 22:49, godiard at sugarlabs.org wrote:
> From: Gonzalo Odiard<godiard at gmail.com>
>
> Added controls to pause/stop and addressed suggestions
> in the review process.
Thanks for the update!
- one thing that is not solved yet, is the item that gets created in the
clipboard tray
- when I do not have text selected I can still click on the 'play' icon
- sometimes the button inactive part is out of sync (the stop button is
not inactive even though I am not playing back). Maybe if it is too
complicated to get it right we can make them always available and find
other means of indicating when something is played back, not sure yet
how/if, just a random thought.
- nitpick: for the copyright, I think it should be either your or OLPC,
I think OLPC is the one to pick in this case, and welcome to 2012 :)
Regards,
Simon
> ---
> data/sugar.schemas.in | 28 ++++
> extensions/deviceicon/Makefile.am | 1 +
> extensions/deviceicon/speech.py | 204 ++++++++++++++++++++++++++++
> extensions/globalkey/Makefile.am | 1 +
> extensions/globalkey/speech.py | 24 ++++
> src/jarabe/model/Makefile.am | 1 +
> src/jarabe/model/speech.py | 266 +++++++++++++++++++++++++++++++++++++
> src/jarabe/view/keyhandler.py | 29 +----
> 8 files changed, 526 insertions(+), 28 deletions(-)
> create mode 100644 extensions/deviceicon/speech.py
> create mode 100644 extensions/globalkey/speech.py
> create mode 100644 src/jarabe/model/speech.py
>
> diff --git a/data/sugar.schemas.in b/data/sugar.schemas.in
> index 8b3e1ad..66d3391 100644
> --- a/data/sugar.schemas.in
> +++ b/data/sugar.schemas.in
> @@ -368,5 +368,33 @@
> </locale>
> </schema>
>
> +<schema>
> +<key>/schemas/desktop/sugar/speech/pitch</key>
> +<applyto>/desktop/sugar/speech/pitch</applyto>
> +<owner>sugar</owner>
> +<type>int</type>
> +<default>50</default>
> +<locale name="C">
> +<short>Default pitch to the speech sugar service</short>
> +<long>Pitch value used by the speech service in Sugar,
> + can be changed by the user, with controls in a icon
> + in the frame</long>
> +</locale>
> +</schema>
> +
> +<schema>
> +<key>/schemas/desktop/sugar/speech/rate</key>
> +<applyto>/desktop/sugar/speech/rate</applyto>
> +<owner>sugar</owner>
> +<type>int</type>
> +<default>170</default>
> +<locale name="C">
> +<short>Default rate to the speech sugar service</short>
> +<long>Rate value used by the speech service in Sugar,
> + can be changed by the user, with controls in a icon
> + in the frame</long>
> +</locale>
> +</schema>
> +
> </schemalist>
> </gconfschemafile>
> diff --git a/extensions/deviceicon/Makefile.am b/extensions/deviceicon/Makefile.am
> index 118d866..7ed1f77 100644
> --- a/extensions/deviceicon/Makefile.am
> +++ b/extensions/deviceicon/Makefile.am
> @@ -5,5 +5,6 @@ sugar_PYTHON = \
> battery.py \
> network.py \
> speaker.py \
> + speech.py \
> touchpad.py \
> volume.py
> diff --git a/extensions/deviceicon/speech.py b/extensions/deviceicon/speech.py
> new file mode 100644
> index 0000000..4528e98
> --- /dev/null
> +++ b/extensions/deviceicon/speech.py
> @@ -0,0 +1,204 @@
> +# Copyright (C) 2011 One Laptop Per Child
> +# Copyright (C) 2011 Gonzalo Odiard
> +#
> +# This program is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 2 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program; if not, write to the Free Software
> +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
> +
> +from gettext import gettext as _
> +import gconf
> +
> +import glib
> +import gtk
> +
> +from sugar.graphics.icon import Icon
> +from sugar.graphics.menuitem import MenuItem
> +from sugar.graphics.tray import TrayIcon
> +from sugar.graphics.palette import Palette
> +from sugar.graphics.xocolor import XoColor
> +from sugar.graphics.toolbutton import ToolButton
> +
> +from jarabe.frame.frameinvoker import FrameWidgetInvoker
> +from jarabe.model import speech
> +
> +
> +_ICON_NAME = 'microphone'
> +
> +
> +class SpeechDeviceView(TrayIcon):
> +
> + FRAME_POSITION_RELATIVE = 105
> +
> + def __init__(self):
> + client = gconf.client_get_default()
> + self._color = XoColor(client.get_string('/desktop/sugar/user/color'))
> +
> + TrayIcon.__init__(self, icon_name=_ICON_NAME, xo_color=self._color)
> +
> + self.set_palette_invoker(FrameWidgetInvoker(self))
> +
> + self._manager = speech.get_speech_manager()
> +
> + self.connect('expose-event', self.__expose_event_cb)
> +
> + self._icon_widget.connect('button-release-event',
> + self.__button_release_event_cb)
> +
> + def create_palette(self):
> + label = glib.markup_escape_text(_('Speech'))
> + palette = SpeechPalette(label, manager=self._manager)
> + palette.set_group_id('frame')
> + return palette
> +
> + def __button_release_event_cb(self, widget, event):
> + if event.button != 1:
> + return False
> +
> + self.palette_invoker.notify_right_click()
> + return True
> +
> + def __expose_event_cb(self, *args):
> + self._update_info()
> +
> +
> +class SpeechPalette(Palette):
> +
> + def __init__(self, primary_text, manager):
> + Palette.__init__(self, label=primary_text)
> +
> +
> +
> + self._manager = manager
> + self._manager.connect('play', self._set_buttons_state, 'play')
> + self._manager.connect('stop', self._set_buttons_state, 'stop')
> + self._manager.connect('pause', self._set_buttons_state, 'pause')
> +
> + vbox = gtk.VBox()
> + self.set_content(vbox)
> +
> + """
> + self._play_item = MenuItem('Say selected text')
> + self._play_icon = Icon(icon_name='player_play',
> + icon_size=gtk.ICON_SIZE_MENU)
> + self._pause_icon = Icon(icon_name='player_pause',
> + icon_size=gtk.ICON_SIZE_MENU)
> + self._play_item.set_image(self._play_icon)
> + self.menu.append(self._play_item)
> + self._play_item.show()
> + self._play_item.connect('activate', self.__play_clicked_cb)
> +
> + self._stop_item = MenuItem('Stop talking')
> + self._stop_icon = Icon(icon_name='player_stop',
> + icon_size=gtk.ICON_SIZE_MENU)
> + self._stop_item.set_image(self._stop_icon)
> + self.menu.append(self._stop_item)
> + self._stop_item.show()
> + self._stop_item.connect('activate', self.__stop_clicked_cb)
> + self._stop_item.set_sensitive(False)
> + """
> + hbox_play_pause = gtk.HBox()
> + self._play_pause_button = ToolButton(icon_name='player_play')
> + self._play_pause_button.connect('clicked', self.__play_clicked_cb)
> + hbox_play_pause.pack_start(self._play_pause_button)
> + self._stop_button = ToolButton(icon_name='player_stop')
> + self._stop_button.connect('clicked', self.__stop_clicked_cb)
> + self._stop_button.set_sensitive(False)
> + hbox_play_pause.pack_start(self._stop_button)
> + hbox_play_pause.pack_start(gtk.Label(_('Say selected text')))
> + vbox.add(hbox_play_pause)
> +
> + vbox.add(gtk.HSeparator())
> +
> + pitch_step = 10
> + self._adj_pitch = gtk.Adjustment(value=self._manager.get_pitch(),
> + lower=self._manager.MIN_PITCH,
> + upper=self._manager.MAX_PITCH,
> + step_incr=pitch_step,
> + page_incr=pitch_step,
> + page_size=pitch_step)
> + self._hscale_pitch = gtk.HScale(self._adj_pitch)
> + self._hscale_pitch.set_digits(0)
> + self._hscale_pitch.set_draw_value(False)
> +
> + hbox_pitch = gtk.HBox()
> + hbox_pitch.pack_start(gtk.Label(_('Pitch')))
> + hbox_pitch.pack_start(self._hscale_pitch)
> + vbox.add(hbox_pitch)
> +
> + rate_step = 10
> + self._adj_rate = gtk.Adjustment(value=self._manager.get_rate(),
> + lower=self._manager.MIN_RATE,
> + upper=self._manager.MAX_RATE,
> + step_incr=rate_step,
> + page_incr=rate_step,
> + page_size=rate_step)
> + self._hscale_rate = gtk.HScale(self._adj_rate)
> + self._hscale_rate.set_digits(0)
> + self._hscale_rate.set_draw_value(False)
> +
> + hbox_rate = gtk.HBox()
> + hbox_rate.pack_start(gtk.Label(_('Rate')))
> + hbox_rate.pack_start(self._hscale_rate)
> + vbox.add(hbox_rate)
> + vbox.show_all()
> +
> + self._adj_pitch.connect('value_changed', self.__adj_pitch_changed_cb)
> + self._adj_rate.connect('value_changed', self.__adj_rate_changed_cb)
> +
> + def __adj_pitch_changed_cb(self, adjustement):
> + self._manager.set_pitch(int(adjustement.value))
> +
> + def __adj_rate_changed_cb(self, adjustement):
> + self._manager.set_rate(int(adjustement.value))
> +
> + def __play_clicked_cb(self, widget):
> + if self._manager.is_paused:
> + self._manager.restart()
> + else:
> + if not self._manager.is_playing:
> + self._manager.say_selected_text()
> + else:
> + self._manager.pause()
> +
> + def __stop_clicked_cb(self, widget):
> + self._manager.stop()
> +
> + def _set_buttons_state(self, manager, signal):
> + if signal == 'play':
> + """
> + self._play_item.set_image(self._pause_icon)
> + self._play_item.set_label('Pause talking selected text')
> + self._stop_item.set_sensitive(True)
> + """
> + self._play_pause_button.set_icon('player_pause')
> + self._stop_button.set_sensitive(True)
> +
> + elif signal == 'pause':
> + """
> + self._play_item.set_image(self._play_icon)
> + self._play_item.set_label('Say selected text')
> + self._stop_item.set_sensitive(True)
> + """
> + self._play_pause_button.set_icon('player_play')
> + self._stop_button.set_sensitive(True)
> +
> + elif signal == 'stop':
> + """
> + self._play_item.set_image(self._play_icon)
> + self._stop_item.set_sensitive(False)
> + """
> + self._play_pause_button.set_icon('player_play')
> + self._stop_button.set_sensitive(False)
> +
> +def setup(tray):
> + tray.add_device(SpeechDeviceView())
> diff --git a/extensions/globalkey/Makefile.am b/extensions/globalkey/Makefile.am
> index 69afac2..b6cbbd6 100644
> --- a/extensions/globalkey/Makefile.am
> +++ b/extensions/globalkey/Makefile.am
> @@ -3,4 +3,5 @@ sugardir = $(pkgdatadir)/extensions/globalkey
> sugar_PYTHON = \
> __init__.py \
> screenshot.py \
> + speech.py \
> viewsource.py
> diff --git a/extensions/globalkey/speech.py b/extensions/globalkey/speech.py
> new file mode 100644
> index 0000000..1e7ea66
> --- /dev/null
> +++ b/extensions/globalkey/speech.py
> @@ -0,0 +1,24 @@
> +# Copyright (C) 2011 One Laptop Per Child
> +# Copyright (C) 2011 Gonzalo Odiard
> +#
> +# This program is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 2 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program; if not, write to the Free Software
> +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
> +
> +from jarabe.model import speech
> +
> +BOUND_KEYS = ['<alt>s']
> +
> +
> +def handle_key_press(key):
> + speech.get_speech_manager().say_selected_text()
> diff --git a/src/jarabe/model/Makefile.am b/src/jarabe/model/Makefile.am
> index 92e8712..2fc6b1c 100644
> --- a/src/jarabe/model/Makefile.am
> +++ b/src/jarabe/model/Makefile.am
> @@ -16,4 +16,5 @@ sugar_PYTHON = \
> screen.py \
> session.py \
> sound.py \
> + speech.py \
> telepathyclient.py
> diff --git a/src/jarabe/model/speech.py b/src/jarabe/model/speech.py
> new file mode 100644
> index 0000000..6330b0f
> --- /dev/null
> +++ b/src/jarabe/model/speech.py
> @@ -0,0 +1,266 @@
> +# Copyright (C) 2011 One Laptop Per Child
> +# Copyright (C) 2011 Gonzalo Odiard
> +#
> +# This program is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 2 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program; if not, write to the Free Software
> +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
> +
> +import gconf
> +
> +import gst
> +import gtk
> +import gobject
> +
> +import os
> +from gettext import gettext as _
> +import logging
> +
> +# TRANS: The language pitch (range [0 - 99], default 50 for English)
> +# Look at http://espeak.sourceforge.net/commands.html for details
> +DEFAULT_PITCH = int(_('50'))
> +
> +
> +# TRANS: The diction speed, in average words per minute (range [80 - 390],
> +# default 170 for English).
> +# Look at http://espeak.sourceforge.net/commands.html for details
> +DEFAULT_RATE = int(_('170'))
> +
> +_speech_manager = None
> +
> +
> +class SpeechManager(gobject.GObject):
> +
> + __gtype_name__ = 'SpeechManager'
> +
> + __gsignals__ = {
> + 'play': (gobject.SIGNAL_RUN_FIRST, None, []),
> + 'pause': (gobject.SIGNAL_RUN_FIRST, None, []),
> + 'stop': (gobject.SIGNAL_RUN_FIRST, None, [])
> + }
> +
> + MIN_PITCH = 0
> + MAX_PITCH = 99
> +
> + MIN_RATE = 80
> + MAX_RATE = 390
> +
> + def __init__(self, **kwargs):
> + gobject.GObject.__init__(self, **kwargs)
> + self._player = AudioGrabGst()
> + self._player.connect('play', self._emit_signal, 'play')
> + self._player.connect('stop', self._emit_signal, 'stop')
> + self._player.connect('pause', self._emit_signal, 'pause')
> + logging.debug('SpeechManager setting default parameters')
> + self._voice_name = self._player.get_default_voice()
> + self._pitch = DEFAULT_PITCH
> + self._rate = DEFAULT_RATE
> + self._is_playing = False
> + self._is_paused = False
> +
> + try:
> + self._loading = True
> + self.restore()
> + self._loading = False
> + except:
> + pass
> +
> + def _emit_signal(self, player, signal):
> + self._is_playing = (signal == 'play')
> + self._is_paused = (signal == 'pause')
> + self.emit(signal)
> +
> + def get_is_playing(self):
> + return self._is_playing
> +
> + is_playing = gobject.property(type=bool, getter=get_is_playing,
> + setter=None, default=False)
> +
> + def get_is_paused(self):
> + return self._is_paused
> +
> + is_paused = gobject.property(type=bool, getter=get_is_paused,
> + setter=None, default=False)
> +
> + def get_pitch(self):
> + return self._pitch
> +
> + def get_rate(self):
> + return self._rate
> +
> + def set_pitch(self, pitch):
> + self._pitch = pitch
> + if not self._loading:
> + self.save()
> +
> + def set_rate(self, rate):
> + self._rate = rate
> + if not self._loading:
> + self.save()
> +
> + def say_text(self, text):
> + if text:
> + self._player.speak(self._pitch, self._rate, self._voice_name, text)
> +
> + def say_selected_text(self):
> + clipboard = gtk.clipboard_get(selection='PRIMARY')
> + clipboard.request_text(self._primary_selection_cb)
> +
> + def pause(self):
> + self._player.pause_sound_device()
> +
> + def restart(self):
> + self._player.restart_sound_device()
> +
> + def stop(self):
> + self._player.stop_sound_device()
> +
> + def _primary_selection_cb(self, clipboard, text, user_data):
> + logging.debug('SpeechManager._primary_selection_cb: %r', text)
> + self.say_text(text)
> +
> + def save(self):
> + client = gconf.client_get_default()
> + client.set_int('/desktop/sugar/speech/pitch', self.get_pitch())
> + client.set_int('/desktop/sugar/speech/rate',
> + self.get_rate())
> + logging.debug('saving speech configuration pitch %s rate %s' %
> + (self._pitch, self._rate))
> +
> + def restore(self):
> + client = gconf.client_get_default()
> + self.set_pitch(client.get_int('/desktop/sugar/speech/pitch'))
> + self.set_rate(client.get_int('/desktop/sugar/speech/rate'))
> + logging.debug('loading speech configuration pitch %s rate %s' %
> + (self._pitch, self._rate))
> +
> +
> +class AudioGrabGst(gobject.GObject):
> +
> + __gsignals__ = {
> + 'play': (gobject.SIGNAL_RUN_FIRST, None, []),
> + 'pause': (gobject.SIGNAL_RUN_FIRST, None, []),
> + 'stop': (gobject.SIGNAL_RUN_FIRST, None, [])
> + }
> +
> + def __init__(self):
> + gobject.GObject.__init__(self)
> + self._pipeline = None
> + self._quiet = True
> +
> + def restart_sound_device(self):
> + if self._pipeline is None:
> + return
> +
> + self._quiet = False
> + self._pipeline.set_state(gst.STATE_PLAYING)
> + self.emit('play')
> +
> + def pause_sound_device(self):
> + if self._pipeline is None:
> + return
> +
> + self._pipeline.set_state(gst.STATE_PAUSED)
> + self.emit('pause')
> + self._quiet = True
> +
> + def stop_sound_device(self):
> + if self._pipeline is None:
> + return
> +
> + self._pipeline.set_state(gst.STATE_NULL)
> + self.emit('stop')
> +
> + self._quiet = True
> +
> + def make_pipeline(self, cmd):
> + if self._pipeline is not None:
> + self.stop_sound_device()
> + del self._pipeline
> +
> + self._pipeline = gst.parse_launch(cmd)
> +
> + bus = self._pipeline.get_bus()
> + bus.add_signal_watch()
> + bus.connect('message::element', self.__pipe_message_cb)
> +
> + def __pipe_message_cb(self, bus, message):
> + logging.error('Gst message %s' % message)
> + if message.structure.get_name() == 'espeak-mark' and \
> + message.structure['mark'] == 'end':
> + self.emit('stop')
> +
> + def speak(self, pitch, rate, voice_name, text):
> + # XXX workaround for http://bugs.sugarlabs.org/ticket/1801
> + if not [i for i in text if i.isalnum()]:
> + return
> + text = text + '<mark name="end>"></mark>'
> +
> + self.make_pipeline('espeak name=espeak ! autoaudiosink')
> + src = self._pipeline.get_by_name('espeak')
> +
> + logging.debug('pitch=%d rate=%d voice=%s text=%s' % (pitch, rate,
> + voice_name, text))
> +
> + src.props.text = text
> + src.props.pitch = pitch
> + src.props.rate = rate
> + src.props.voice = voice_name
> + src.props.track = 2 # track for marks
> +
> + self.restart_sound_device()
> +
> + def get_all_voices(self):
> + all_voices = {}
> + for i in gst.element_factory_make('espeak').props.voices:
> + name, language, dialect = i
> + #if name in ('en-rhotic','english_rp','english_wmids'):
> + # these voices don't produce sound
> + # continue
> + all_voices[language] = name
> + return all_voices
> +
> + def get_default_voice(self):
> + """Try to figure out the default voice, from the current locale ($LANG)
> + Fall back to espeak's voice called Default."""
> + voices = self.get_all_voices()
> +
> + try:
> + lang = os.environ['LANG']
> + if lang.find('.')> --1:
> + lang = lang[0:lang.find('.')]
> + lang = lang.replace('_', '-').lower()
> + except:
> + lang = ""
> +
> + best = "default"
> +
> + try:
> + best = voices[lang]
> + except:
> + try:
> + lang = lang[0:lang.find('-')]
> + best = voices[lang]
> + except:
> + pass
> +
> + logging.debug('Best voice for LANG %s seems to be %s' %
> + (lang, best))
> + return best
> +
> +
> +def get_speech_manager():
> + global _speech_manager
> +
> + if _speech_manager == None:
> + _speech_manager = SpeechManager()
> + return _speech_manager
> diff --git a/src/jarabe/view/keyhandler.py b/src/jarabe/view/keyhandler.py
> index d79bfe6..a71f260 100644
> --- a/src/jarabe/view/keyhandler.py
> +++ b/src/jarabe/view/keyhandler.py
> @@ -60,13 +60,9 @@ _actions_table = {
> '<alt><shift>f': 'frame',
> '<alt><shift>q': 'quit_emulator',
> 'XF86Search': 'open_search',
> - '<alt><shift>o': 'open_search',
> - '<alt><shift>s': 'say_text',
> + '<alt><shift>o': 'open_search'
> }
>
> -SPEECH_DBUS_SERVICE = 'org.laptop.Speech'
> -SPEECH_DBUS_PATH = '/org/laptop/Speech'
> -SPEECH_DBUS_INTERFACE = 'org.laptop.Speech'
>
> _instance = None
>
> @@ -77,7 +73,6 @@ class KeyHandler(object):
> self._key_pressed = None
> self._keycode_pressed = 0
> self._keystate_pressed = 0
> - self._speech_proxy = None
>
> self._key_grabber = KeyGrabber()
> self._key_grabber.connect('key-pressed',
> @@ -114,28 +109,6 @@ class KeyHandler(object):
> sound.set_volume(volume)
> sound.set_muted(volume == 0)
>
> - def _get_speech_proxy(self):
> - if self._speech_proxy is None:
> - bus = dbus.SessionBus()
> - speech_obj = bus.get_object(SPEECH_DBUS_SERVICE, SPEECH_DBUS_PATH,
> - follow_name_owner_changes=True)
> - self._speech_proxy = dbus.Interface(speech_obj,
> - SPEECH_DBUS_INTERFACE)
> - return self._speech_proxy
> -
> - def _on_speech_err(self, ex):
> - logging.error('An error occurred with the ESpeak service: %r', ex)
> -
> - def _primary_selection_cb(self, clipboard, text, user_data):
> - logging.debug('KeyHandler._primary_selection_cb: %r', text)
> - if text:
> - self._get_speech_proxy().SayText(text, reply_handler=lambda: None,
> - error_handler=self._on_speech_err)
> -
> - def handle_say_text(self, event_time):
> - clipboard = gtk.clipboard_get(selection='PRIMARY')
> - clipboard.request_text(self._primary_selection_cb)
> -
> def handle_previous_window(self, event_time):
> self._tabbing_handler.previous_activity(event_time)
>
More information about the Sugar-devel
mailing list