Commit a7eb0f0a authored by Jean-Baptiste Kempf's avatar Jean-Baptiste Kempf

Add a SAPI synthetizer for Windows

This is the work from Moti Zilberman, modified by me to build and
integrate in-tree

It's heavily inspired on the OS X one.

Be careful: you need a very recent Mingw-W64 to hope to compile it.

Ref #11893
parent e4ea3a00
......@@ -163,6 +163,7 @@ Misc
* remove ZPL playlist format
* Update libVLC doxygen modules
* Add a text-to-speech renderer for subtitles on OS X/iOS
* Add a text-to-speech renderer for subtitles on Windows
Removed modules
* Atmo video filter
......
......@@ -319,6 +319,7 @@ $Id$
* rv32: RV32 image format conversion module
* samplerate: Secret Rabbit Code (libsamplerate) audio resampler
* sap: Interface module to read SAP/SDP announcements
* sapi: Windows Text to Speech Synthetizer using the SAPI 5.1 API
* scale: Images rescaler
* scaletempo: Scale audio tempo in sync with playback rate
* scene: scene video filter
......
......@@ -42,6 +42,12 @@ libnsspeechsynthesizer_plugin_la_LDFLAGS = -Wl,-framework,Cocoa
text_LTLIBRARIES += libnsspeechsynthesizer_plugin.la
endif
libsapi_plugin_la_SOURCES = text_renderer/sapi.cpp
libsapi_plugin_la_LIBADD = -lole32
if HAVE_WIN32
text_LTLIBRARIES += libsapi_plugin.la
endif
libsvg_plugin_la_SOURCES = text_renderer/svg.c
libsvg_plugin_la_CPPFLAGS = $(AM_CPPFLAGS) $(SVG_CFLAGS)
libsvg_plugin_la_LDFLAGS = $(AM_LDFLAGS) -rpath '$(textdir)'
......
/*****************************************************************************
* sapi.cpp: Simple text to Speech renderer for Windows, based on SAPI
*****************************************************************************
* Copyright (c) 2015 Moti Zilberman
*
* Authors: Moti Zilberman
* Jean-Baptiste Kempf
*
* The MIT License (MIT)
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*****************************************************************************/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
/* VLC core API headers */
#include <vlc_common.h>
#include <vlc_plugin.h>
#include <vlc_filter.h>
#include <vlc_charset.h>
#define INITGUID
#include <windows.h>
#include <sapi.h>
#include <sphelper.h>
static int Create (vlc_object_t *);
static void Destroy(vlc_object_t *);
static int RenderText(filter_t *,
subpicture_region_t *,
subpicture_region_t *,
const vlc_fourcc_t *);
vlc_module_begin ()
set_description(N_("Speech synthesis for Windows"))
set_category(CAT_VIDEO)
set_subcategory(SUBCAT_VIDEO_SUBPIC)
set_capability("text renderer", 0)
set_callbacks(Create, Destroy)
add_integer("sapi-voice", -1, "Voice Index", "Voice index", false)
vlc_module_end ()
struct filter_sys_t
{
ISpVoice* cpVoice;
char* lastString;
};
/* MTA functions */
static int TryEnterMTA(vlc_object_t *obj)
{
HRESULT hr = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
if (unlikely(FAILED(hr)))
{
msg_Err (obj, "cannot initialize COM (error 0x%lx)", hr);
return -1;
}
return 0;
}
#define TryEnterMTA(o) TryEnterMTA(VLC_OBJECT(o))
static void EnterMTA(void)
{
HRESULT hr = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
if (unlikely(FAILED(hr)))
abort();
}
static void LeaveMTA(void)
{
CoUninitialize();
}
static int Create (vlc_object_t *p_this)
{
filter_t *p_filter = (filter_t *)p_this;
filter_sys_t *p_sys;
HRESULT hr;
if (TryEnterMTA(p_this))
return VLC_EGENERIC;
p_filter->p_sys = p_sys = (filter_sys_t*) malloc(sizeof(filter_sys_t));
if (!p_sys)
goto error;
p_sys->cpVoice = nullptr;
p_sys->lastString = nullptr;
hr = CoCreateInstance(CLSID_SpVoice, nullptr, CLSCTX_INPROC_SERVER, IID_ISpVoice, (void**) &p_sys->cpVoice);
if (SUCCEEDED(hr)) {
ISpObjectToken* cpVoiceToken = nullptr;
IEnumSpObjectTokens* cpEnum = nullptr;
ULONG ulCount = 0;
hr = SpEnumTokens(SPCAT_VOICES, nullptr, nullptr, &cpEnum);
if (SUCCEEDED(hr))
{
// Get the number of voices.
hr = cpEnum->GetCount(&ulCount);
if (SUCCEEDED (hr))
{
int voiceIndex = var_InheritInteger(p_this, "sapi-voice");
if (voiceIndex > -1)
{
if ((unsigned)voiceIndex <= ulCount) {
hr = cpEnum->Item(voiceIndex, &cpVoiceToken);
if (SUCCEEDED(hr)) {
hr = p_sys->cpVoice->SetVoice(cpVoiceToken);
if (SUCCEEDED(hr)) {
msg_Dbg(p_this, "Selected voice %d", voiceIndex);
}
else {
msg_Err(p_this, "Failed to set voice %d", voiceIndex);
}
cpVoiceToken->Release();
cpVoiceToken = nullptr;
}
}
else
msg_Err(p_this, "Voice index exceeds available count");
}
}
cpEnum->Release();
cpEnum = nullptr;
/* Set Output */
hr = p_sys->cpVoice->SetOutput(nullptr, TRUE);
}
}
else
{
msg_Err(p_filter, "Could not create SpVoice");
goto error;
}
LeaveMTA();
p_filter->pf_render = RenderText;
return VLC_SUCCESS;
error:
LeaveMTA();
free(p_sys);
return VLC_EGENERIC;
}
static void Destroy(vlc_object_t *p_this)
{
filter_t *p_filter = (filter_t *)p_this;
filter_sys_t *p_sys = p_filter->p_sys;
if (p_sys->cpVoice) {
p_sys->cpVoice->Release();
p_sys->cpVoice = nullptr;
}
if (p_sys->lastString) {
free(p_sys->lastString);
p_sys->lastString = nullptr;
}
free(p_sys);
}
static int RenderText(filter_t *p_filter,
subpicture_region_t *p_region_out,
subpicture_region_t *p_region_in,
const vlc_fourcc_t *p_chroma_list)
{
VLC_UNUSED(p_region_out);
VLC_UNUSED(p_chroma_list);
filter_sys_t *p_sys = p_filter->p_sys;
text_segment_t *p_segment = p_region_in->p_text;
if (!p_segment)
return VLC_EGENERIC;
for (const text_segment_t *s = p_segment; s != nullptr; s = s->p_next ) {
if (!s->psz_text )
continue;
if (strlen(s->psz_text) == 0)
continue;
try {
if (p_sys->lastString && !strcmp(p_sys->lastString, s->psz_text))
continue;
if (!strcmp(s->psz_text, "\n"))
continue;
p_sys->lastString = strdup(s->psz_text);
if (p_sys->lastString) {
msg_Dbg(p_filter, "Speaking '%s'", s->psz_text);
EnterMTA();
wchar_t* wideText = ToWide(s->psz_text);
HRESULT hr = p_sys->cpVoice->Speak(wideText, SPF_ASYNC, nullptr);
free(wideText);
if (!SUCCEEDED(hr)) {
msg_Err(p_filter, "Speak() error");
}
LeaveMTA();
}
}
catch (...) {
msg_Err(p_filter, "Caught an exception!");
}
}
return VLC_SUCCESS;
}
......@@ -1063,6 +1063,7 @@ modules/stream_out/transcode/transcode.c
modules/text_renderer/freetype.c
modules/text_renderer/nsspeechsynthesizer.m
modules/text_renderer/quartztext.c
modules/text_renderer/sapi.cpp
modules/text_renderer/svg.c
modules/text_renderer/tdummy.c
modules/text_renderer/win32text.c
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment