diff --git a/modules/MODULES_LIST b/modules/MODULES_LIST index 2743013b104f9ae8483515cdc9405f19c147fe9d..127d2495909b9baefae4abb9a0e9c58c978c0c31 100644 --- a/modules/MODULES_LIST +++ b/modules/MODULES_LIST @@ -385,7 +385,6 @@ $Id$ * stream_out_transcode: audio & video transcoder * subsdec: a codec to output textual subtitles * subsdelay: subtitles delay filter - * substtml: TTML subtitles decoder * substx3g: tx3g styled subtitles decoder * subsusf: a demuxer for USF subtitles * subtitle: a demuxer for subtitle files @@ -408,7 +407,7 @@ $Id$ * trivial_channel_mixer: Simple channel mixer plugin * ts: MPEG-TS demuxer * tta: Lossless True Audio parser - * ttml: a TTML subtitles demuxer + * ttml: a TTML subtitles demuxer and decoder * twolame: a mp1 mp2 audio encoder based on twolame * ty: TY demuxer * udev: udev probing module diff --git a/modules/codec/Makefile.am b/modules/codec/Makefile.am index 985e00bfdf27bf17a4528b147173de8df1c3271b..21549065e611669a4e85d6939e60b5f552136ac1 100644 --- a/modules/codec/Makefile.am +++ b/modules/codec/Makefile.am @@ -215,8 +215,10 @@ codec_LTLIBRARIES += libsubsdec_plugin.la libsubsusf_plugin_la_SOURCES = codec/subsusf.c codec_LTLIBRARIES += libsubsusf_plugin.la -libsubsttml_plugin_la_SOURCES = codec/substtml.c -codec_LTLIBRARIES += libsubsttml_plugin.la +libttml_plugin_la_SOURCES = codec/ttml/substtml.c \ + demux/ttml.c \ + codec/ttml/ttml.h codec/ttml/ttml.c +codec_LTLIBRARIES += libttml_plugin.la libsvcdsub_plugin_la_SOURCES = codec/svcdsub.c codec_LTLIBRARIES += libsvcdsub_plugin.la diff --git a/modules/codec/substtml.c b/modules/codec/substtml.c deleted file mode 100644 index 0e293534343bc13f0d9cd3958a09c5b48fc904ef..0000000000000000000000000000000000000000 --- a/modules/codec/substtml.c +++ /dev/null @@ -1,859 +0,0 @@ -/***************************************************************************** - * substtml.c : TTML subtitles decoder - ***************************************************************************** - * Copyright (C) 2015 VLC authors and VideoLAN - * - * Authors: Hugo Beauzée-Luyssen <hugo@beauzee.fr> - * Sushma Reddy <sushma.reddy@research.iiit.ac.in> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. - *****************************************************************************/ - -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif - -#include <vlc_common.h> -#include <vlc_plugin.h> -#include <vlc_modules.h> -#include <vlc_codec.h> -#include <vlc_xml.h> -#include <vlc_stream.h> -#include <vlc_text_style.h> -#include <vlc_charset.h> - -#include "substext.h" - -#include <ctype.h> - -#define ALIGN_TEXT N_("Subtitle justification") -#define ALIGN_LONGTEXT N_("Set the justification of subtitles") - -/***************************************************************************** - * Module descriptor. - *****************************************************************************/ -static int OpenDecoder ( vlc_object_t * ); -static void CloseDecoder ( vlc_object_t * ); - -static text_segment_t *ParseTTMLSubtitles( decoder_t *, subpicture_updater_sys_t *, - const uint8_t *, size_t ); - -vlc_module_begin () - set_capability( "decoder", 10 ) - set_shortname( N_("TTML decoder")) - set_description( N_("TTML subtitles decoder") ) - set_callbacks( OpenDecoder, CloseDecoder ) - set_category( CAT_INPUT ) - set_subcategory( SUBCAT_INPUT_SCODEC ) - add_integer( "ttml-align", 0, ALIGN_TEXT, ALIGN_LONGTEXT, false ) -vlc_module_end (); - -/***************************************************************************** - * Local prototypes - *****************************************************************************/ - -typedef struct -{ - char* psz_styleid; - text_style_t* font_style; - int i_align; - int i_margin_h; - int i_margin_v; - int i_margin_percent_h; - int i_margin_percent_v; - int i_direction; - bool b_direction_set; -} ttml_style_t; - -struct decoder_sys_t -{ - int i_align; - ttml_style_t** pp_styles; - size_t i_styles; -}; - -enum -{ - UNICODE_BIDI_LTR = 0, - UNICODE_BIDI_RTL = 1, - UNICODE_BIDI_EMBEDDED = 2, - UNICODE_BIDI_OVERRIDE = 4, -}; - -static int tagnamecmp( char const* tagname, char const* needle ) -{ - if( !strncasecmp( "tt:", tagname, 3 ) ) - tagname += 3; - - return strcasecmp( tagname, needle ); -} - -static void MergeTTMLStyle( ttml_style_t *p_dst, const ttml_style_t *p_src) -{ - text_style_Merge( p_dst->font_style, p_src->font_style, false ); - if( !( p_dst->i_align & SUBPICTURE_ALIGN_MASK ) ) - p_dst->i_align |= p_src->i_align; - - if( !p_dst->i_margin_h ) - p_dst->i_margin_h = p_src->i_margin_h; - - if( !p_dst->i_margin_v ) - p_dst->i_margin_v = p_src->i_margin_v; - - if( !p_dst->i_margin_percent_h ) - p_dst->i_margin_percent_h = p_src->i_margin_percent_h; - - if( !p_dst->i_margin_percent_v ) - p_dst->i_margin_percent_v = p_src->i_margin_percent_v; - - if( !p_dst->b_direction_set ) - { - p_dst->i_direction = p_src->i_direction; - p_dst->b_direction_set = p_src->b_direction_set; - } -} - -static ttml_style_t* DuplicateStyle( ttml_style_t* p_style_src ) -{ - ttml_style_t* p_style = calloc( 1, sizeof( *p_style ) ); - if( unlikely( p_style == NULL ) ) - return NULL; - - *p_style = *p_style_src; - p_style->psz_styleid = strdup( p_style_src->psz_styleid ); - if( unlikely( p_style->psz_styleid == NULL ) ) - { - free( p_style ); - return NULL; - } - - p_style->font_style = text_style_Duplicate( p_style_src->font_style ); - if( unlikely( p_style->font_style == NULL ) ) - { - free( p_style->psz_styleid ); - free( p_style ); - return NULL; - } - return p_style; -} - -static void CleanupStyle( ttml_style_t* p_ttml_style ) -{ - text_style_Delete( p_ttml_style->font_style ); - free( p_ttml_style->psz_styleid ); - free( p_ttml_style ); -} - -static ttml_style_t *FindTextStyle( decoder_t *p_dec, const char *psz_style ) -{ - decoder_sys_t *p_sys = p_dec->p_sys; - - for( size_t i = 0; i < p_sys->i_styles; i++ ) - { - if( !strcmp( p_sys->pp_styles[i]->psz_styleid, psz_style ) ) - return DuplicateStyle( p_sys->pp_styles[i] ); - - } - return NULL; -} - -typedef struct style_stack_t -{ - ttml_style_t* p_style; - struct style_stack_t* p_next; -} style_stack_t ; - -static bool PushStyle( style_stack_t **pp_stack, ttml_style_t* p_style ) -{ - style_stack_t* p_entry = malloc( sizeof( *p_entry ) ); - if( unlikely( p_entry == NULL ) ) - return false; - p_entry->p_style = p_style; - p_entry->p_next = *pp_stack; - *pp_stack = p_entry; - return true; -} - -static void PopStyle( style_stack_t** pp_stack ) -{ - if( *pp_stack == NULL ) - return; - style_stack_t* p_next = (*pp_stack)->p_next; - CleanupStyle( (*pp_stack)->p_style ); - free( *pp_stack ); - *pp_stack = p_next; -} - -static void ClearStack( style_stack_t* p_stack ) -{ - while( p_stack != NULL ) - { - style_stack_t* p_next = p_stack->p_next; - CleanupStyle( p_stack->p_style ); - free( p_stack ); - p_stack = p_next; - } -} - -static text_style_t* CurrentStyle( style_stack_t* p_stack ) -{ - if( p_stack == NULL ) - return text_style_Create( STYLE_NO_DEFAULTS ); - - return text_style_Duplicate( p_stack->p_style->font_style ); -} - -static ttml_style_t* ParseTTMLStyle( decoder_t *p_dec, xml_reader_t* p_reader, const char* psz_node_name ) -{ - decoder_sys_t* p_sys = p_dec->p_sys; - ttml_style_t *p_ttml_style = NULL; - ttml_style_t *p_base_style = NULL; - - p_ttml_style = calloc( 1, sizeof( ttml_style_t ) ); - if( unlikely( !p_ttml_style ) ) - return NULL; - - p_ttml_style->font_style = text_style_Create( STYLE_NO_DEFAULTS ); - if( unlikely( !p_ttml_style->font_style ) ) - { - free( p_ttml_style ); - return NULL; - } - - const char *attr, *val; - - while( (attr = xml_ReaderNextAttr( p_reader, &val ) ) ) - { - /* searching previous styles for inheritence */ - if( !strcasecmp( attr, "style" ) || !strcasecmp( attr, "region" ) ) - { - if( !tagnamecmp( psz_node_name, "style" ) || !tagnamecmp( psz_node_name, "region" ) ) - { - for( size_t i = 0; i < p_sys->i_styles; i++ ) - { - if( !strcasecmp( p_sys->pp_styles[i]->psz_styleid, val ) ) - { - p_base_style = p_sys->pp_styles[i]; - break; - } - } - } - /* - * In p nodes, style attribute has this format : - * style="style1 style2 style3" where style1 and style2 are - * style applied on the parents of p in that order. - * - * In span node, we can apply several styles in the same order than - * in p nodes with the same inheritance order. - * - * In order to preserve this style predominance, we merge the styles - * in the from right to left ( the right one being predominant ) . - */ - else if( !tagnamecmp( psz_node_name, "p" ) || !tagnamecmp( psz_node_name, "span" ) ) - { - char *tmp; - char *value = strdup( val ); - if( unlikely( value == NULL ) ) - { - CleanupStyle( p_ttml_style ); - return NULL; - } - - char *token = strtok_r( value , " ", &tmp ); - - if( token == NULL ) - { - msg_Warn( p_dec, "No IDREF specified in attribute " - "'%s' on tag '%s', ignoring.", attr, - psz_node_name ); - free( value ); - continue; - } - - ttml_style_t* p_style = FindTextStyle( p_dec, token ); - if( p_style == NULL ) - { - msg_Warn( p_dec, "IDREF '%s' in '%s' not found", token, attr ); - free( value ); - break; - } - - while( ( token = strtok_r( NULL, " ", &tmp) ) != NULL ) - { - ttml_style_t* p_next_style = FindTextStyle( p_dec, token ); - if( p_next_style == NULL ) - { - msg_Warn( p_dec, "IDREF '%s' in '%s' not found", token, attr ); - break; - } - MergeTTMLStyle( p_next_style, p_style ); - CleanupStyle( p_style ); - p_style = p_next_style; - } - MergeTTMLStyle( p_style, p_ttml_style ); - free( value ); - CleanupStyle( p_ttml_style ); - p_ttml_style = p_style; - } - else - { - ttml_style_t* p_style = FindTextStyle( p_dec, val ); - if( p_style == NULL ) - { - msg_Warn( p_dec, "IDREF '%s' in '%s' not found", val, attr ); - break; - } - MergeTTMLStyle( p_style , p_ttml_style ); - CleanupStyle( p_ttml_style ); - p_ttml_style = p_style; - } - } - else if( !strcasecmp( "xml:id", attr ) ) - { - free( p_ttml_style->psz_styleid ); - p_ttml_style->psz_styleid = strdup( val ); - } - else if( !strcasecmp ( "tts:fontFamily", attr ) ) - { - free( p_ttml_style->font_style->psz_fontname ); - p_ttml_style->font_style->psz_fontname = strdup( val ); - if( unlikely( p_ttml_style->font_style->psz_fontname == NULL ) ) - { - CleanupStyle( p_ttml_style ); - return NULL; - } - } - else if( !strcasecmp( "tts:opacity", attr ) ) - { - p_ttml_style->font_style->i_background_alpha = atoi( val ); - p_ttml_style->font_style->i_font_alpha = atoi( val ); - p_ttml_style->font_style->i_features |= STYLE_HAS_BACKGROUND_ALPHA | STYLE_HAS_FONT_ALPHA; - } - else if( !strcasecmp( "tts:fontSize", attr ) ) - { - char* psz_end = NULL; - float size = us_strtof( val, &psz_end ); - if( *psz_end == '%' ) - p_ttml_style->font_style->f_font_relsize = size; - else - p_ttml_style->font_style->i_font_size = (int)( size + 0.5 ); - } - else if( !strcasecmp( "tts:color", attr ) ) - { - unsigned int i_color = vlc_html_color( val, NULL ); - p_ttml_style->font_style->i_font_color = (i_color & 0xffffff); - p_ttml_style->font_style->i_font_alpha = (i_color & 0xFF000000) >> 24; - p_ttml_style->font_style->i_features |= STYLE_HAS_FONT_COLOR | STYLE_HAS_FONT_ALPHA; - } - else if( !strcasecmp( "tts:backgroundColor", attr ) ) - { - unsigned int i_color = vlc_html_color( val, NULL ); - p_ttml_style->font_style->i_background_color = i_color & 0xFFFFFF; - p_ttml_style->font_style->i_background_alpha = (i_color & 0xFF000000) >> 24; - p_ttml_style->font_style->i_features |= STYLE_HAS_BACKGROUND_COLOR - | STYLE_HAS_BACKGROUND_ALPHA; - p_ttml_style->font_style->i_style_flags |= STYLE_BACKGROUND; - } - else if( !strcasecmp( "tts:textAlign", attr ) ) - { - if( !strcasecmp ( "left", val ) ) - p_ttml_style->i_align = SUBPICTURE_ALIGN_TOP | SUBPICTURE_ALIGN_LEFT; - else if( !strcasecmp ( "right", val ) ) - p_ttml_style->i_align = SUBPICTURE_ALIGN_TOP | SUBPICTURE_ALIGN_RIGHT; - else if( !strcasecmp ( "center", val ) ) - p_ttml_style->i_align = SUBPICTURE_ALIGN_BOTTOM; - else if( !strcasecmp ( "start", val ) ) - p_ttml_style->i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_LEFT; - else if( !strcasecmp ( "end", val ) ) - p_ttml_style->i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_RIGHT; - } - else if( !strcasecmp( "tts:fontStyle", attr ) ) - { - if( !strcasecmp ( "italic", val ) || !strcasecmp ( "oblique", val ) ) - p_ttml_style->font_style->i_style_flags |= STYLE_ITALIC; - else - p_ttml_style->font_style->i_style_flags &= ~STYLE_ITALIC; - p_ttml_style->font_style->i_features |= STYLE_HAS_FLAGS; - } - else if( !strcasecmp ( "tts:fontWeight", attr ) ) - { - if( !strcasecmp ( "bold", val ) ) - p_ttml_style->font_style->i_style_flags |= STYLE_BOLD; - else - p_ttml_style->font_style->i_style_flags &= ~STYLE_BOLD; - p_ttml_style->font_style->i_features |= STYLE_HAS_FLAGS; - } - else if( !strcasecmp ( "tts:textDecoration", attr ) ) - { - if( !strcasecmp ( "underline", val ) ) - p_ttml_style->font_style->i_style_flags |= STYLE_UNDERLINE; - else if( !strcasecmp ( "noUnderline", val ) ) - p_ttml_style->font_style->i_style_flags &= ~STYLE_UNDERLINE; - if( !strcasecmp ( "lineThrough", val ) ) - p_ttml_style->font_style->i_style_flags |= STYLE_STRIKEOUT; - else if( !strcasecmp ( "noLineThrough", val ) ) - p_ttml_style->font_style->i_style_flags &= ~STYLE_STRIKEOUT; - p_ttml_style->font_style->i_features |= STYLE_HAS_FLAGS; - } - else if( !strcasecmp ( "tts:origin", attr ) ) - { - const char *psz_token = val; - while( isspace( *psz_token ) ) - psz_token++; - - const char *psz_separator = strchr( psz_token, ' ' ); - if( psz_separator == NULL ) - { - msg_Warn( p_dec, "Invalid origin attribute: \"%s\"", val ); - continue; - } - const char *psz_percent_sign = strchr( psz_token, '%' ); - - if( psz_percent_sign != NULL && psz_percent_sign < psz_separator ) - { - p_ttml_style->i_margin_h = 0; - p_ttml_style->i_margin_percent_h = atoi( psz_token ); - } - else - { - p_ttml_style->i_margin_h = atoi( psz_token ); - p_ttml_style->i_margin_percent_h = 0; - } - while( isspace( *psz_separator ) ) - psz_separator++; - psz_token = psz_separator; - psz_percent_sign = strchr( psz_token, '%' ); - if( psz_percent_sign != NULL ) - { - p_ttml_style->i_margin_v = 0; - p_ttml_style->i_margin_percent_v = atoi( val ); - } - else - { - p_ttml_style->i_margin_v = atoi( val ); - p_ttml_style->i_margin_percent_v = 0; - } - } - else if( !strcasecmp( "tts:textOutline", attr ) ) - { - char *value = strdup( val ); - char* psz_saveptr = NULL; - char* token = strtok_r( value, " ", &psz_saveptr ); - // <color>? <length> <length>? - bool b_ok = false; - unsigned int color = vlc_html_color( token, &b_ok ); - if( b_ok ) - { - p_ttml_style->font_style->i_outline_color = color & 0xFFFFFF; - p_ttml_style->font_style->i_outline_alpha = (color & 0xFF000000) >> 24; - token = strtok_r( NULL, " ", &psz_saveptr ); - } - char* psz_end = NULL; - int i_outline_width = strtol( token, &psz_end, 10 ); - if( psz_end != token ) - { - // Assume unit is pixel, and ignore border radius - p_ttml_style->font_style->i_outline_width = i_outline_width; - } - free( value ); - } - else if( !strcasecmp( "tts:direction", attr ) ) - { - if( !strcasecmp( "rtl", val ) ) - { - p_ttml_style->i_direction |= UNICODE_BIDI_RTL; - p_ttml_style->b_direction_set = true; - } - else if( !strcasecmp( "ltr", val ) ) - { - p_ttml_style->i_direction |= UNICODE_BIDI_LTR; - p_ttml_style->b_direction_set = true; - } - } - else if( !strcasecmp( "tts:unicodeBidi", attr ) ) - { - if( !strcasecmp( "bidiOverride", val ) ) - p_ttml_style->i_direction |= UNICODE_BIDI_OVERRIDE & ~UNICODE_BIDI_EMBEDDED; - else if( !strcasecmp( "embed", val ) ) - p_ttml_style->i_direction |= UNICODE_BIDI_EMBEDDED & ~UNICODE_BIDI_OVERRIDE; - } - else if( !strcasecmp( "tts:writingMode", attr ) ) - { - if( !strcasecmp( "rl", val ) || !strcasecmp( "rltb", val ) ) - { - p_ttml_style->i_direction = UNICODE_BIDI_RTL | UNICODE_BIDI_OVERRIDE; - p_ttml_style->i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_RIGHT; - p_ttml_style->b_direction_set = true; - } - else if( !strcasecmp( "lr", val ) || !strcasecmp( "lrtb", val ) ) - { - p_ttml_style->i_direction = UNICODE_BIDI_LTR | UNICODE_BIDI_OVERRIDE; - p_ttml_style->i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_LEFT; - p_ttml_style->b_direction_set = true; - } - } - } - if( p_base_style != NULL ) - { - MergeTTMLStyle( p_ttml_style, p_base_style ); - } - if( p_ttml_style->psz_styleid == NULL ) - { - CleanupStyle( p_ttml_style ); - return NULL; - } - return p_ttml_style; -} - -static void ParseTTMLStyles( decoder_t* p_dec ) -{ - stream_t* p_stream = vlc_stream_MemoryNew( p_dec, (uint8_t*)p_dec->fmt_in.p_extra, p_dec->fmt_in.i_extra, true ); - if( unlikely( p_stream == NULL ) ) - return ; - - xml_reader_t* p_reader = xml_ReaderCreate( p_dec, p_stream ); - if( unlikely( p_reader == NULL ) ) - { - vlc_stream_Delete( p_stream ); - return ; - } - const char* psz_node_name; - int i_type = xml_ReaderNextNode( p_reader, &psz_node_name ); - - if( i_type == XML_READER_STARTELEM && !tagnamecmp( psz_node_name, "tt" ) ) - { - int i_type = xml_ReaderNextNode( p_reader, &psz_node_name ); - - while( i_type != XML_READER_STARTELEM || tagnamecmp( psz_node_name, "head" ) ) - i_type = xml_ReaderNextNode( p_reader, &psz_node_name ); - - do - { - /* region and style tag are respectively inside layout and styling tags */ - if( !tagnamecmp( psz_node_name, "styling" ) || !tagnamecmp( psz_node_name, "layout" ) ) - { - i_type = xml_ReaderNextNode( p_reader, &psz_node_name ); - while( i_type != XML_READER_ENDELEM ) - { - ttml_style_t* p_ttml_style = ParseTTMLStyle( p_dec, p_reader, psz_node_name ); - if ( p_ttml_style == NULL ) - { - xml_ReaderDelete( p_reader ); - vlc_stream_Delete( p_stream ); - return; - } - decoder_sys_t* p_sys = p_dec->p_sys; - TAB_APPEND( p_sys->i_styles, p_sys->pp_styles, p_ttml_style ); - i_type = xml_ReaderNextNode( p_reader, &psz_node_name ); - } - } - i_type = xml_ReaderNextNode( p_reader, &psz_node_name ); - }while( i_type != XML_READER_ENDELEM || tagnamecmp( psz_node_name, "head" ) ); - } - xml_ReaderDelete( p_reader ); - vlc_stream_Delete( p_stream ); -} - -static text_segment_t *ParseTTMLSubtitles( decoder_t *p_dec, subpicture_updater_sys_t *p_update_sys, - const uint8_t *p_buffer, size_t i_buffer ) -{ - stream_t* p_sub = NULL; - xml_reader_t* p_xml_reader = NULL; - text_segment_t* p_first_segment = NULL; - text_segment_t* p_current_segment = NULL; - style_stack_t* p_style_stack = NULL; - ttml_style_t* p_style = NULL; - - p_sub = vlc_stream_MemoryNew( p_dec, (uint8_t*) p_buffer, i_buffer, true ); - if( unlikely( p_sub == NULL ) ) - return NULL; - - p_xml_reader = xml_ReaderCreate( p_dec, p_sub ); - if( unlikely( p_xml_reader == NULL ) ) - { - vlc_stream_Delete( p_sub ); - return NULL; - } - - const char *node; - int i_type; - - i_type = xml_ReaderNextNode( p_xml_reader, &node ); - while( i_type != XML_READER_NONE && i_type > 0 ) - { - /* - * We parse the styles and put them on the style stack - * until we reach a text node. - */ - if( i_type == XML_READER_STARTELEM && ( !tagnamecmp( node, "p") || !tagnamecmp( node, "span" ) ) ) - { - p_style = ParseTTMLStyle( p_dec, p_xml_reader, node ); - if( unlikely( p_style == NULL ) ) - goto fail; - - if( p_style_stack != NULL && p_style_stack->p_style != NULL ) - MergeTTMLStyle( p_style, p_style_stack->p_style ); - - if( PushStyle( &p_style_stack, p_style ) == false ) - { - CleanupStyle( p_style ); - goto fail; - } - - } - else if( i_type == XML_READER_TEXT ) - { - /* - * Once we have a text node, we create a segment, apply the - * latest style put on the style stack and fill it with the - * content of the node. - */ - text_segment_t* p_segment = text_segment_New( NULL ); - if( unlikely( p_segment == NULL ) ) - goto fail; - - p_segment->psz_text = strdup( node ); - if( unlikely( p_segment->psz_text == NULL ) ) - { - text_segment_Delete( p_segment ); - goto fail; - } - - vlc_xml_decode( p_segment->psz_text ); - if( p_segment->style == NULL && p_style_stack == NULL ) - { - p_segment->style = text_style_Create( STYLE_NO_DEFAULTS ); - } - else if( p_segment->style == NULL ) - { - p_segment->style = CurrentStyle( p_style_stack ); - if( p_segment->style->f_font_relsize && !p_segment->style->i_font_size ) - p_segment->style->i_font_size = (int)( ( p_segment->style->f_font_relsize * STYLE_DEFAULT_FONT_SIZE / 100 ) + 0.5 ); - - if( p_style_stack->p_style->i_margin_h ) - p_update_sys->x = p_style_stack->p_style->i_margin_h; - else - p_update_sys->x = p_style_stack->p_style->i_margin_percent_h; - - if( p_style_stack->p_style->i_margin_v ) - p_update_sys->y = p_style_stack->p_style->i_margin_v; - else - p_update_sys->y = p_style_stack->p_style->i_margin_percent_v; - - p_update_sys->align |= p_style_stack->p_style->i_align; - /* - * For bidirectionnal support, we use different enum - * to recognize different cases, en then we add the - * corresponding unicode character to the text of - * the text_segment. - */ - int i_direction = p_style_stack->p_style->i_direction; - static const struct - { - const char* psz_uni_start; - const char* psz_uni_end; - }p_bidi[] = { - { "\u2066", "\u2069" }, - { "\u2067", "\u2069" }, - { "\u202A", "\u202C" }, - { "\u202B", "\u202C" }, - { "\u202D", "\u202C" }, - { "\u202E", "\u202C" }, - }; - if( p_style_stack->p_style->b_direction_set ) - { - char* psz_text = NULL; - if( asprintf( &psz_text, "%s%s%s", p_bidi[i_direction].psz_uni_start, p_segment->psz_text, p_bidi[i_direction].psz_uni_end ) < 0 ) - { - text_segment_Delete( p_segment ); - goto fail; - } - - free( p_segment->psz_text ); - p_segment->psz_text = psz_text; - } - } - if( p_first_segment == NULL ) - { - p_first_segment = p_segment; - p_current_segment = p_segment; - } - else if( p_current_segment->psz_text != NULL ) - { - p_current_segment->p_next = p_segment; - p_current_segment = p_segment; - } - else - { - /* - * If p_first_segment isn't NULL but p_current_segment->psz_text is NULL - * this means that something went wrong in the decoding of the - * first segment text: - * - * Indeed, to allocate p_first_segment ( aka non NULL ), we must have - * - i_type == XML_READER_TEXT - * - passed the allocation of p_segment->psz_text without any error - * - * This would mean that vlc_xml_decode failed and p_first_segment->psz_text - * is NULL. - */ - text_segment_Delete( p_segment ); - goto fail; - } - } - else if( i_type == XML_READER_ENDELEM && !tagnamecmp( node, "span" ) ) - { - if( p_style_stack->p_next ) - PopStyle( &p_style_stack); - } - else if( i_type == XML_READER_ENDELEM && !tagnamecmp( node, "p" ) ) - { - PopStyle( &p_style_stack ); - p_current_segment->p_next = NULL; - } - else if( i_type == XML_READER_STARTELEM && !strcasecmp( node, "br" ) ) - { - if( p_current_segment != NULL && p_current_segment->psz_text != NULL ) - { - char* psz_text = NULL; - if( asprintf( &psz_text, "%s\n", p_current_segment->psz_text ) != -1 ) - { - free( p_current_segment->psz_text ); - p_current_segment->psz_text = psz_text; - } - } - } - i_type = xml_ReaderNextNode( p_xml_reader, &node ); - } - ClearStack( p_style_stack ); - xml_ReaderDelete( p_xml_reader ); - vlc_stream_Delete( p_sub ); - - return p_first_segment; - -fail: - text_segment_ChainDelete( p_first_segment ); - ClearStack( p_style_stack ); - xml_ReaderDelete( p_xml_reader ); - vlc_stream_Delete( p_sub ); - return NULL; -} - -static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block ) -{ - decoder_sys_t *p_sys = p_dec->p_sys; - subpicture_t *p_spu = NULL; - - if( p_block->i_flags & BLOCK_FLAG_CORRUPTED ) - return NULL; - - /* We cannot display a subpicture with no date */ - if( p_block->i_pts <= VLC_TS_INVALID ) - { - msg_Warn( p_dec, "subtitle without a date" ); - return NULL; - } - - /* Check validity of packet data */ - /* An "empty" line containing only \0 can be used to force - and ephemer picture from the screen */ - - if( p_block->i_buffer < 1 ) - { - msg_Warn( p_dec, "no subtitle data" ); - return NULL; - } - - /* Create the subpicture unit */ - p_spu = decoder_NewSubpictureText( p_dec ); - if( !p_spu ) - return NULL; - - p_spu->i_start = p_block->i_pts; - p_spu->i_stop = p_block->i_pts + p_block->i_length; - p_spu->b_ephemer = (p_block->i_length == 0); - p_spu->b_absolute = false; - - subpicture_updater_sys_t *p_spu_sys = p_spu->updater.p_sys; - - p_spu_sys->align = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align; - p_spu_sys->p_segments = ParseTTMLSubtitles( p_dec, p_spu_sys, p_block->p_buffer, p_block->i_buffer ); - - return p_spu; -} - - - -/**************************************************************************** - * DecodeBlock: the whole thing - ****************************************************************************/ -static subpicture_t *DecodeBlock( decoder_t *p_dec, block_t **pp_block ) -{ - if( !pp_block || *pp_block == NULL ) - return NULL; - - block_t* p_block = *pp_block; - subpicture_t *p_spu = ParseText( p_dec, p_block ); - - block_Release( p_block ); - *pp_block = NULL; - - return p_spu; -} - -/***************************************************************************** - * OpenDecoder: probe the decoder and return score - *****************************************************************************/ -static int OpenDecoder( vlc_object_t *p_this ) -{ - decoder_t *p_dec = (decoder_t*)p_this; - decoder_sys_t *p_sys; - - if( p_dec->fmt_in.i_codec != VLC_CODEC_TTML ) - return VLC_EGENERIC; - - /* Allocate the memory needed to store the decoder's structure */ - p_dec->p_sys = p_sys = calloc( 1, sizeof( *p_sys ) ); - if( unlikely( p_sys == NULL ) ) - return VLC_ENOMEM; - - if( p_dec->fmt_in.p_extra != NULL && p_dec->fmt_in.i_extra > 0 ) - ParseTTMLStyles( p_dec ); - - p_dec->pf_decode_sub = DecodeBlock; - p_dec->fmt_out.i_cat = SPU_ES; - p_sys->i_align = var_InheritInteger( p_dec, "ttml-align" ); - - return VLC_SUCCESS; -} - -/***************************************************************************** - * CloseDecoder: clean up the decoder - *****************************************************************************/ -static void CloseDecoder( vlc_object_t *p_this ) -{ - decoder_t *p_dec = (decoder_t *)p_this; - decoder_sys_t *p_sys = p_dec->p_sys; - - for( size_t i = 0; i < p_sys->i_styles; ++i ) - { - free( p_sys->pp_styles[i]->psz_styleid ); - text_style_Delete( p_sys->pp_styles[i]->font_style ); - free( p_sys->pp_styles[i] ); - } - TAB_CLEAN( p_sys->i_styles, p_sys->pp_styles ); - - free( p_sys ); -} diff --git a/modules/codec/ttml/substtml.c b/modules/codec/ttml/substtml.c new file mode 100644 index 0000000000000000000000000000000000000000..316cd1db7bc9f3bc6b83babb46f878a7102e23eb --- /dev/null +++ b/modules/codec/ttml/substtml.c @@ -0,0 +1,699 @@ +/***************************************************************************** + * substtml.c : TTML subtitles decoder + ***************************************************************************** + * Copyright (C) 2015-2017 VLC authors and VideoLAN + * + * Authors: Hugo Beauzée-Luyssen <hugo@beauzee.fr> + * Sushma Reddy <sushma.reddy@research.iiit.ac.in> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. + *****************************************************************************/ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include <vlc_common.h> +#include <vlc_codec.h> +#include <vlc_xml.h> +#include <vlc_stream.h> +#include <vlc_text_style.h> +#include <vlc_charset.h> + +#include <ctype.h> +#include <assert.h> + +#include "substext.h" +#include "ttml.h" + +/***************************************************************************** + * Local prototypes + *****************************************************************************/ + +typedef struct +{ + char* psz_styleid; + text_style_t* font_style; + int i_align; + int i_margin_h; + int i_margin_v; + int i_margin_percent_h; + int i_margin_percent_v; + int i_direction; + bool b_direction_set; +} ttml_style_t; + +typedef struct +{ + tt_node_t * p_rootnode; /* for now. FIXME: split header */ +} ttml_context_t; + +struct decoder_sys_t +{ + int i_align; +}; + +enum +{ + UNICODE_BIDI_LTR = 0, + UNICODE_BIDI_RTL = 1, + UNICODE_BIDI_EMBEDDED = 2, + UNICODE_BIDI_OVERRIDE = 4, +}; + +static text_segment_t *ParseTTML( decoder_t *, const uint8_t *, size_t, + ttml_style_t ** ); + +static void ttml_style_Delete( ttml_style_t* p_ttml_style ) +{ + text_style_Delete( p_ttml_style->font_style ); + free( p_ttml_style->psz_styleid ); + free( p_ttml_style ); +} + +static ttml_style_t * ttml_style_New( ) +{ + ttml_style_t *p_ttml_style = calloc( 1, sizeof( ttml_style_t ) ); + if( unlikely( !p_ttml_style ) ) + return NULL; + + p_ttml_style->font_style = text_style_Create( STYLE_NO_DEFAULTS ); + if( unlikely( !p_ttml_style->font_style ) ) + { + free( p_ttml_style ); + return NULL; + } + return p_ttml_style; +} + +static tt_node_t * FindNode( tt_node_t *p_node, const char *psz_nodename, + size_t i_maxdepth, const char *psz_id ) +{ + if( !tt_node_NameCompare( p_node->psz_node_name, psz_nodename ) ) + { + if( psz_id != NULL ) + { + char *psz = vlc_dictionary_value_for_key( &p_node->attr_dict, "xml:id" ); + if( psz && !strcmp( psz, psz_id ) ) + return p_node; + } + else return p_node; + } + + if( i_maxdepth == 0 ) + return NULL; + + for( tt_basenode_t *p_child = p_node->p_child; + p_child; p_child = p_child->p_next ) + { + if( p_child->i_type == TT_NODE_TYPE_TEXT ) + continue; + + p_node = FindNode( (tt_node_t *) p_child, psz_nodename, i_maxdepth - 1, psz_id ); + if( p_node ) + return p_node; + } + + return NULL; +} + +static void FillTextStyle( const char *psz_attr, const char *psz_val, + text_style_t *p_text_style ) +{ + if( !strcasecmp ( "tts:fontFamily", psz_attr ) ) + { + free( p_text_style->psz_fontname ); + p_text_style->psz_fontname = strdup( psz_val ); + } + else if( !strcasecmp( "tts:opacity", psz_attr ) ) + { + p_text_style->i_background_alpha = atoi( psz_val ); + p_text_style->i_font_alpha = atoi( psz_val ); + p_text_style->i_features |= STYLE_HAS_BACKGROUND_ALPHA | STYLE_HAS_FONT_ALPHA; + } + else if( !strcasecmp( "tts:fontSize", psz_attr ) ) + { + char* psz_end = NULL; + float size = us_strtof( psz_val, &psz_end ); + if( *psz_end == '%' ) + p_text_style->f_font_relsize = size; + else + p_text_style->i_font_size = (int)( size + 0.5 ); + } + else if( !strcasecmp( "tts:color", psz_attr ) ) + { + unsigned int i_color = vlc_html_color( psz_val, NULL ); + p_text_style->i_font_color = (i_color & 0xffffff); + p_text_style->i_font_alpha = (i_color & 0xFF000000) >> 24; + p_text_style->i_features |= STYLE_HAS_FONT_COLOR | STYLE_HAS_FONT_ALPHA; + } + else if( !strcasecmp( "tts:backgroundColor", psz_attr ) ) + { + unsigned int i_color = vlc_html_color( psz_val, NULL ); + p_text_style->i_background_color = i_color & 0xFFFFFF; + p_text_style->i_background_alpha = (i_color & 0xFF000000) >> 24; + p_text_style->i_features |= STYLE_HAS_BACKGROUND_COLOR + | STYLE_HAS_BACKGROUND_ALPHA; + p_text_style->i_style_flags |= STYLE_BACKGROUND; + } + else if( !strcasecmp( "tts:fontStyle", psz_attr ) ) + { + if( !strcasecmp ( "italic", psz_val ) || !strcasecmp ( "oblique", psz_val ) ) + p_text_style->i_style_flags |= STYLE_ITALIC; + else + p_text_style->i_style_flags &= ~STYLE_ITALIC; + p_text_style->i_features |= STYLE_HAS_FLAGS; + } + else if( !strcasecmp ( "tts:fontWeight", psz_attr ) ) + { + if( !strcasecmp ( "bold", psz_val ) ) + p_text_style->i_style_flags |= STYLE_BOLD; + else + p_text_style->i_style_flags &= ~STYLE_BOLD; + p_text_style->i_features |= STYLE_HAS_FLAGS; + } + else if( !strcasecmp ( "tts:textDecoration", psz_attr ) ) + { + if( !strcasecmp ( "underline", psz_val ) ) + p_text_style->i_style_flags |= STYLE_UNDERLINE; + else if( !strcasecmp ( "noUnderline", psz_val ) ) + p_text_style->i_style_flags &= ~STYLE_UNDERLINE; + if( !strcasecmp ( "lineThrough", psz_val ) ) + p_text_style->i_style_flags |= STYLE_STRIKEOUT; + else if( !strcasecmp ( "noLineThrough", psz_val ) ) + p_text_style->i_style_flags &= ~STYLE_STRIKEOUT; + p_text_style->i_features |= STYLE_HAS_FLAGS; + } + else if( !strcasecmp( "tts:textOutline", psz_attr ) ) + { + char *value = strdup( psz_val ); + char* psz_saveptr = NULL; + char* token = strtok_r( value, " ", &psz_saveptr ); + // <color>? <length> <length>? + bool b_ok = false; + unsigned int color = vlc_html_color( token, &b_ok ); + if( b_ok ) + { + p_text_style->i_outline_color = color & 0xFFFFFF; + p_text_style->i_outline_alpha = (color & 0xFF000000) >> 24; + token = strtok_r( NULL, " ", &psz_saveptr ); + } + char* psz_end = NULL; + int i_outline_width = strtol( token, &psz_end, 10 ); + if( psz_end != token ) + { + // Assume unit is pixel, and ignore border radius + p_text_style->i_outline_width = i_outline_width; + } + free( value ); + } +} + +static void FillTTMLStyle( const char *psz_attr, const char *psz_val, + ttml_style_t *p_ttml_style ) +{ + if( !strcasecmp( "tts:textAlign", psz_attr ) ) + { + if( !strcasecmp ( "left", psz_val ) ) + p_ttml_style->i_align = SUBPICTURE_ALIGN_TOP | SUBPICTURE_ALIGN_LEFT; + else if( !strcasecmp ( "right", psz_val ) ) + p_ttml_style->i_align = SUBPICTURE_ALIGN_TOP | SUBPICTURE_ALIGN_RIGHT; + else if( !strcasecmp ( "center", psz_val ) ) + p_ttml_style->i_align = SUBPICTURE_ALIGN_BOTTOM; + else if( !strcasecmp ( "start", psz_val ) ) + p_ttml_style->i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_LEFT; + else if( !strcasecmp ( "end", psz_val ) ) + p_ttml_style->i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_RIGHT; + } + else if( !strcasecmp ( "tts:origin", psz_attr ) ) + { + const char *psz_token = psz_val; + while( isspace( *psz_token ) ) + psz_token++; + + const char *psz_separator = strchr( psz_token, ' ' ); + if( psz_separator == NULL ) + return; + const char *psz_percent_sign = strchr( psz_token, '%' ); + + if( psz_percent_sign != NULL && psz_percent_sign < psz_separator ) + { + p_ttml_style->i_margin_h = 0; + p_ttml_style->i_margin_percent_h = atoi( psz_token ); + } + else + { + p_ttml_style->i_margin_h = atoi( psz_token ); + p_ttml_style->i_margin_percent_h = 0; + } + while( isspace( *psz_separator ) ) + psz_separator++; + psz_token = psz_separator; + psz_percent_sign = strchr( psz_token, '%' ); + if( psz_percent_sign != NULL ) + { + p_ttml_style->i_margin_v = 0; + p_ttml_style->i_margin_percent_v = atoi( psz_val ); + } + else + { + p_ttml_style->i_margin_v = atoi( psz_val ); + p_ttml_style->i_margin_percent_v = 0; + } + } + else if( !strcasecmp( "tts:direction", psz_attr ) ) + { + if( !strcasecmp( "rtl", psz_val ) ) + { + p_ttml_style->i_direction |= UNICODE_BIDI_RTL; + p_ttml_style->b_direction_set = true; + } + else if( !strcasecmp( "ltr", psz_val ) ) + { + p_ttml_style->i_direction |= UNICODE_BIDI_LTR; + p_ttml_style->b_direction_set = true; + } + } + else if( !strcasecmp( "tts:unicodeBidi", psz_attr ) ) + { + if( !strcasecmp( "bidiOverride", psz_val ) ) + p_ttml_style->i_direction |= UNICODE_BIDI_OVERRIDE & ~UNICODE_BIDI_EMBEDDED; + else if( !strcasecmp( "embed", psz_val ) ) + p_ttml_style->i_direction |= UNICODE_BIDI_EMBEDDED & ~UNICODE_BIDI_OVERRIDE; + } + else if( !strcasecmp( "tts:writingMode", psz_attr ) ) + { + if( !strcasecmp( "rl", psz_val ) || !strcasecmp( "rltb", psz_val ) ) + { + p_ttml_style->i_direction = UNICODE_BIDI_RTL | UNICODE_BIDI_OVERRIDE; + p_ttml_style->i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_RIGHT; + p_ttml_style->b_direction_set = true; + } + else if( !strcasecmp( "lr", psz_val ) || !strcasecmp( "lrtb", psz_val ) ) + { + p_ttml_style->i_direction = UNICODE_BIDI_LTR | UNICODE_BIDI_OVERRIDE; + p_ttml_style->i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_LEFT; + p_ttml_style->b_direction_set = true; + } + } + else FillTextStyle( psz_attr, psz_val, p_ttml_style->font_style ); +} + +static void DictionnaryMerge( const vlc_dictionary_t *p_src, vlc_dictionary_t *p_dst ) +{ + for( int i = 0; i < p_src->i_size; ++i ) + { + for ( const vlc_dictionary_entry_t* p_entry = p_src->p_entries[i]; + p_entry != NULL; p_entry = p_entry->p_next ) + { + if( !strncmp( "tts:", p_entry->psz_key, 4 ) && + !vlc_dictionary_has_key( p_dst, p_entry->psz_key ) ) + vlc_dictionary_insert( p_dst, p_entry->psz_key, p_entry->p_value ); + } + } +} + +static void DictMergeWithStyleID( ttml_context_t *p_ctx, const char *psz_id, + vlc_dictionary_t *p_dst ) +{ + assert(p_ctx->p_rootnode); + if( psz_id && p_ctx->p_rootnode ) + { + /* Lookup referenced style ID */ + const tt_node_t *p_node = FindNode( p_ctx->p_rootnode, + "style", -1, psz_id ); + if( p_node ) + DictionnaryMerge( &p_node->attr_dict, p_dst ); + } +} + +static void DictMergeWithRegionID( ttml_context_t *p_ctx, const char *psz_id, + vlc_dictionary_t *p_dst ) +{ + assert(p_ctx->p_rootnode); + if( psz_id && p_ctx->p_rootnode ) + { + const tt_node_t *p_regionnode = FindNode( p_ctx->p_rootnode, + "region", -1, psz_id ); + if( !p_regionnode ) + return; + + /* First fill with style elements */ + for( const tt_basenode_t *p_child = p_regionnode->p_child; + p_child; p_child = p_child->p_next ) + { + if( unlikely( p_child->i_type == TT_NODE_TYPE_TEXT ) ) + continue; + + const tt_node_t *p_node = (const tt_node_t *) p_child; + if( !tt_node_NameCompare( p_node->psz_node_name, "style" ) ) + { + DictionnaryMerge( &p_node->attr_dict, p_dst ); + } + } + + /* Merge region attributes */ + DictionnaryMerge( &p_regionnode->attr_dict, p_dst ); + } +} + +static ttml_style_t * InheritTTMLStyles( ttml_context_t *p_ctx, tt_node_t *p_node ) +{ + ttml_style_t *p_ttml_style = NULL; + vlc_dictionary_t merged; + vlc_dictionary_init( &merged, 0 ); + + /* Merge dics backwards without overwriting */ + for( ; p_node; p_node = p_node->p_parent ) + { + const char *psz_regionid = (const char *) + vlc_dictionary_value_for_key( &p_node->attr_dict, "region" ); + if( psz_regionid ) + DictMergeWithRegionID( p_ctx, psz_regionid, &merged ); + + const char *psz_styleid = (const char *) + vlc_dictionary_value_for_key( &p_node->attr_dict, "style" ); + if( psz_styleid ) + DictMergeWithStyleID( p_ctx, psz_styleid, &merged ); + + DictionnaryMerge( &p_node->attr_dict, &merged ); + } + + if( merged.i_size && merged.p_entries[0] && (p_ttml_style = ttml_style_New()) ) + { + for( int i = 0; i < merged.i_size; ++i ) + { + for ( vlc_dictionary_entry_t* p_entry = merged.p_entries[i]; + p_entry != NULL; p_entry = p_entry->p_next ) + { + FillTTMLStyle( p_entry->psz_key, p_entry->p_value, p_ttml_style ); + } + } + } + + vlc_dictionary_clear( &merged, NULL, NULL ); + + return p_ttml_style; +} + +static int ParseTTMLChunk( xml_reader_t *p_reader, tt_node_t **pp_rootnode ) +{ + const char* psz_node_name; + + do + { + int i_type = xml_ReaderNextNode( p_reader, &psz_node_name ); + + if( i_type <= XML_READER_NONE ) + break; + + switch(i_type) + { + default: + break; + + case XML_READER_STARTELEM: + if( tt_node_NameCompare( psz_node_name, "tt" ) || + *pp_rootnode != NULL ) + return VLC_EGENERIC; + + *pp_rootnode = tt_node_New( p_reader, NULL, psz_node_name ); + if( !*pp_rootnode || + tt_nodes_Read( p_reader, *pp_rootnode ) != VLC_SUCCESS ) + return VLC_EGENERIC; + break; + + case XML_READER_ENDELEM: + if( !*pp_rootnode || + tt_node_NameCompare( psz_node_name, (*pp_rootnode)->psz_node_name ) ) + return VLC_EGENERIC; + break; + } + + } while( 1 ); + + if( *pp_rootnode == NULL ) + return VLC_EGENERIC; + + return VLC_SUCCESS; +} + +static void BIDIConvert( text_segment_t *p_segment, int i_direction ) +{ + /* + * For bidirectionnal support, we use different enum + * to recognize different cases, en then we add the + * corresponding unicode character to the text of + * the text_segment. + */ + static const struct + { + const char* psz_uni_start; + const char* psz_uni_end; + } p_bidi[] = { + { "\u2066", "\u2069" }, + { "\u2067", "\u2069" }, + { "\u202A", "\u202C" }, + { "\u202B", "\u202C" }, + { "\u202D", "\u202C" }, + { "\u202E", "\u202C" }, + }; + + if( unlikely((size_t)i_direction >= ARRAY_SIZE(p_bidi)) ) + return; + + char *psz_text = NULL; + if( asprintf( &psz_text, "%s%s%s", p_bidi[i_direction].psz_uni_start, + p_segment->psz_text, p_bidi[i_direction].psz_uni_end ) < 0 ) + { + free( p_segment->psz_text ); + p_segment->psz_text = psz_text; + } +} + +static text_segment_t * ConvertNodesToSegments( ttml_context_t *p_ctx, const tt_node_t *p_node, + bool b_has_prev_text, ttml_style_t **pp_ret_ttml_style ) +{ + text_segment_t *p_head = NULL; + text_segment_t **pp_last = &p_head; + + /* awkward paragraph handling */ + if( !tt_node_NameCompare( p_node->psz_node_name, "p" ) && b_has_prev_text ) + { + *pp_last = text_segment_New( "\n" ); + if( (*pp_last) ) + pp_last = &(*pp_last)->p_next; + } + + for( const tt_basenode_t *p_child = p_node->p_child; + p_child; p_child = p_child->p_next ) + { + if( p_child->i_type == TT_NODE_TYPE_TEXT ) + { + const tt_textnode_t *p_ttnode = (const tt_textnode_t *) p_child; + *pp_last = text_segment_New( p_ttnode->psz_text ); + ttml_style_t *s = InheritTTMLStyles( p_ctx, p_child->p_parent ); + (*pp_last)->style = s->font_style; + s->font_style = NULL; + if( s->b_direction_set ) + BIDIConvert( *pp_last, s->i_direction ); + /* FIXME: This is carried from broken prev code feat + * as there can be multiple regions. Return first ttml style + * to apply to default SPU region for now */ + if( *pp_ret_ttml_style == NULL ) + *pp_ret_ttml_style = s; + else + ttml_style_Delete( s ); + } + else + { + const tt_node_t *p_childnode = (const tt_node_t *) p_child; + if( !tt_node_NameCompare( p_childnode->psz_node_name, "br" ) ) + { + *pp_last = text_segment_New( "\n" ); + } + else + { + *pp_last = ConvertNodesToSegments( p_ctx, p_childnode, + p_head != NULL, pp_ret_ttml_style ); + } + } + + while( (*pp_last) ) + pp_last = &(*pp_last)->p_next; + } + + return p_head; +} + +static text_segment_t *ParseTTML( decoder_t *p_dec, const uint8_t *p_buffer, size_t i_buffer, + ttml_style_t **p_ret_ttml_style ) +{ + stream_t* p_sub = NULL; + xml_reader_t* p_xml_reader = NULL; + text_segment_t* p_segments = NULL; + + p_sub = vlc_stream_MemoryNew( p_dec, (uint8_t*) p_buffer, i_buffer, true ); + if( unlikely( p_sub == NULL ) ) + return NULL; + + p_xml_reader = xml_ReaderCreate( p_dec, p_sub ); + if( unlikely( p_xml_reader == NULL ) ) + { + vlc_stream_Delete( p_sub ); + return NULL; + } + + tt_node_t *p_rootnode = NULL; + if( ParseTTMLChunk( p_xml_reader, &p_rootnode ) != VLC_SUCCESS ) + { + if( p_rootnode ) + tt_node_RecursiveDelete( p_rootnode ); + goto end; + } + + if( !tt_node_NameCompare( p_rootnode->psz_node_name, "tt" ) ) + { + const tt_node_t *p_bodynode = FindNode( p_rootnode, "body", 1, NULL ); + if( p_bodynode ) + { + ttml_context_t context; + context.p_rootnode = p_rootnode; + p_segments = ConvertNodesToSegments( &context, p_bodynode, + false, p_ret_ttml_style ); + } + } + else if ( !tt_node_NameCompare( p_rootnode->psz_node_name, "div" ) || + !tt_node_NameCompare( p_rootnode->psz_node_name, "p" ) ) + { + /* TODO */ + } + + tt_node_RecursiveDelete( p_rootnode ); + +end: + xml_ReaderDelete( p_xml_reader ); + vlc_stream_Delete( p_sub ); + + return p_segments; +} + +static subpicture_t *ParseBlock( decoder_t *p_dec, const block_t *p_block ) +{ + decoder_sys_t *p_sys = p_dec->p_sys; + subpicture_t *p_spu = NULL; + + if( p_block->i_flags & BLOCK_FLAG_CORRUPTED ) + return NULL; + + /* We cannot display a subpicture with no date */ + if( p_block->i_pts <= VLC_TS_INVALID ) + { + msg_Warn( p_dec, "subtitle without a date" ); + return NULL; + } + + ttml_style_t *p_ttml_style = NULL; + text_segment_t *p_segments = ParseTTML( p_dec, p_block->p_buffer, p_block->i_buffer, + &p_ttml_style ); + if( p_segments ) + { + /* Create the subpicture unit */ + p_spu = decoder_NewSubpictureText( p_dec ); + if( !p_spu ) + return NULL; + + p_spu->i_start = p_block->i_pts; + p_spu->i_stop = p_block->i_pts + p_block->i_length; + p_spu->b_ephemer = (p_block->i_length == 0); + p_spu->b_absolute = false; + + subpicture_updater_sys_t *p_spu_sys = p_spu->updater.p_sys; + + /* Broken stuff. See comments */ + if( p_ttml_style ) + { + p_spu_sys->x = ( p_ttml_style->i_margin_h ) ? p_ttml_style->i_margin_h + : p_ttml_style->i_margin_percent_h; + + p_spu_sys->y = ( p_ttml_style->i_margin_v ) ? p_ttml_style->i_margin_v + : p_ttml_style->i_margin_percent_v; + + p_spu_sys->align |= p_ttml_style->i_align; + } + + if( (p_spu_sys->align & SUBPICTURE_ALIGN_MASK) == 0 ) + p_spu_sys->align = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align; + + p_spu_sys->p_segments = p_segments; + } + + if( p_ttml_style ) + ttml_style_Delete( p_ttml_style ); + + return p_spu; +} + + + +/**************************************************************************** + * DecodeBlock: the whole thing + ****************************************************************************/ +static subpicture_t *DecodeBlock( decoder_t *p_dec, block_t **pp_block ) +{ + if( !pp_block || *pp_block == NULL ) + return NULL; + + block_t* p_block = *pp_block; + subpicture_t *p_spu = ParseBlock( p_dec, p_block ); + + block_Release( p_block ); + *pp_block = NULL; + + return p_spu; +} + +/***************************************************************************** + * OpenDecoder: probe the decoder and return score + *****************************************************************************/ +int OpenDecoder( vlc_object_t *p_this ) +{ + decoder_t *p_dec = (decoder_t*)p_this; + decoder_sys_t *p_sys; + + if( p_dec->fmt_in.i_codec != VLC_CODEC_TTML ) + return VLC_EGENERIC; + + /* Allocate the memory needed to store the decoder's structure */ + p_dec->p_sys = p_sys = calloc( 1, sizeof( *p_sys ) ); + if( unlikely( p_sys == NULL ) ) + return VLC_ENOMEM; + + p_dec->pf_decode_sub = DecodeBlock; + p_dec->fmt_out.i_cat = SPU_ES; + p_sys->i_align = var_InheritInteger( p_dec, "ttml-align" ); + + return VLC_SUCCESS; +} + +/***************************************************************************** + * CloseDecoder: clean up the decoder + *****************************************************************************/ +void CloseDecoder( vlc_object_t *p_this ) +{ + decoder_t *p_dec = (decoder_t *)p_this; + decoder_sys_t *p_sys = p_dec->p_sys; + + free( p_sys ); +} diff --git a/modules/codec/ttml/ttml.c b/modules/codec/ttml/ttml.c index 3d40445bdac2fd4af5f096be03321423ed9692bb..3958c01e2523bb79230da14e3812f2bb2f313530 100644 --- a/modules/codec/ttml/ttml.c +++ b/modules/codec/ttml/ttml.c @@ -22,6 +22,7 @@ #endif #include <vlc_common.h> +#include <vlc_plugin.h> #include <vlc_xml.h> #include <vlc_strings.h> @@ -30,6 +31,34 @@ #include "ttml.h" +#define ALIGN_TEXT N_("Subtitle justification") +#define ALIGN_LONGTEXT N_("Set the justification of subtitles") + +/***************************************************************************** + * Modules descriptor. + *****************************************************************************/ + +vlc_module_begin () + set_capability( "decoder", 10 ) + set_shortname( N_("TTML decoder")) + set_description( N_("TTML subtitles decoder") ) + set_callbacks( OpenDecoder, CloseDecoder ) + set_category( CAT_INPUT ) + set_subcategory( SUBCAT_INPUT_SCODEC ) + add_integer( "ttml-align", 0, ALIGN_TEXT, ALIGN_LONGTEXT, false ) + + add_submodule() + set_shortname( N_("TTML") ) + set_description( N_("TTML demuxer") ) + set_capability( "demux", 2 ) + set_category( CAT_INPUT ) + set_subcategory( SUBCAT_INPUT_DEMUX ) + set_callbacks( OpenDemux, CloseDemux ) + add_shortcut( "ttml", "subtitle" ) + +vlc_module_end () + + int tt_node_NameCompare( const char* psz_tagname, const char* psz_pattern ) { if( !strncasecmp( "tt:", psz_tagname, 3 ) ) diff --git a/modules/codec/ttml/ttml.h b/modules/codec/ttml/ttml.h index 794d2e7b318c85c3292369e6224d46bc4ffc0ad6..478787828698d78110f99569d3d02c171431fe77 100644 --- a/modules/codec/ttml/ttml.h +++ b/modules/codec/ttml/ttml.h @@ -18,6 +18,12 @@ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. *****************************************************************************/ +int OpenDemux( vlc_object_t* p_this ); +void CloseDemux( demux_t* p_demux ); + +int OpenDecoder ( vlc_object_t * ); +void CloseDecoder ( vlc_object_t * ); + enum { TT_TIMINGS_UNSPEC = 0, diff --git a/modules/demux/Makefile.am b/modules/demux/Makefile.am index c06f5d9847174c9856a5674ee1760da08904409c..271af342eac6235c5c387cf0c40b7fd2df680a8e 100644 --- a/modules/demux/Makefile.am +++ b/modules/demux/Makefile.am @@ -458,7 +458,3 @@ libadaptive_plugin_la_LIBADD += $(GCRYPT_LIBS) endif demux_LTLIBRARIES += libadaptive_plugin.la -libttml_plugin_la_SOURCES = demux/ttml.c \ - codec/ttml/ttml.h codec/ttml/ttml.c -demux_LTLIBRARIES += libttml_plugin.la - diff --git a/modules/demux/ttml.c b/modules/demux/ttml.c index 52524fd14cbee4048a25f6cb84beb4e2f6c5ed59..1274142d42dc0962ff30a42098d347f8f56ce60e 100644 --- a/modules/demux/ttml.c +++ b/modules/demux/ttml.c @@ -26,7 +26,6 @@ #endif #include <vlc_common.h> -#include <vlc_plugin.h> #include <vlc_demux.h> #include <vlc_xml.h> #include <vlc_strings.h> @@ -40,19 +39,6 @@ //#define TTML_DEMUX_DEBUG -static int Open( vlc_object_t* p_this ); -static void Close( demux_t* p_demux ); - -vlc_module_begin () - set_shortname( N_("TTML") ) - set_description( N_("TTML demuxer") ) - set_capability( "demux", 2 ) - set_category( CAT_INPUT ) - set_subcategory( SUBCAT_INPUT_DEMUX ) - set_callbacks( Open, Close ) - add_shortcut( "ttml", "subtitle" ) -vlc_module_end () - struct demux_sys_t { xml_t* p_xml; @@ -408,7 +394,7 @@ static int Demux( demux_t* p_demux ) return VLC_DEMUXER_SUCCESS; } -static int Open( vlc_object_t* p_this ) +int OpenDemux( vlc_object_t* p_this ) { demux_t *p_demux = (demux_t*)p_this; demux_sys_t *p_sys; @@ -490,12 +476,12 @@ static int Open( vlc_object_t* p_this ) return VLC_SUCCESS; error: - Close( p_demux ); + CloseDemux( p_demux ); return VLC_EGENERIC; } -static void Close( demux_t* p_demux ) +void CloseDemux( demux_t* p_demux ) { demux_sys_t* p_sys = p_demux->p_sys; diff --git a/po/POTFILES.in b/po/POTFILES.in index 291063b73fc4a8fd644ef6e6aeefabbcb9fe7e5a..68010c715e2b57a74fc19827564af2e6894dec52 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -404,7 +404,7 @@ modules/codec/svcdsub.c modules/codec/t140.c modules/codec/telx.c modules/codec/theora.c -modules/codec/substtml.c +modules/codec/ttml/ttml.c modules/codec/twolame.c modules/codec/uleaddvaudio.c modules/codec/videotoolbox.m