ttml.c 17 KB
Newer Older
1 2 3
/*****************************************************************************
 * ttml.c : TTML subtitles demux
 *****************************************************************************
4
 * Copyright (C) 2015-2017 VLC authors and VideoLAN
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
 *
 * Authors: Hugo Beauzée-Luyssen <hugo@beauzee.fr>
 *          Sushma Reddy <sushma.reddy@research.iiit.ac.in>
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation; either version 2.1 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
 *****************************************************************************/

#ifdef HAVE_CONFIG_H
# include "config.h"
#endif

#include <vlc_common.h>
#include <vlc_demux.h>
#include <vlc_xml.h>
#include <vlc_strings.h>
#include <vlc_memory.h>
33
#include <vlc_memstream.h>
34
#include <vlc_es_out.h>
35
#include <vlc_charset.h>          /* FromCharset */
36

37 38
#include <assert.h>
#include <stdlib.h>
39
#include <ctype.h>
40 41 42 43 44

#include "../codec/ttml/ttml.h"

//#define TTML_DEMUX_DEBUG

45 46
struct demux_sys_t
{
47 48 49 50
    xml_t*          p_xml;
    xml_reader_t*   p_reader;
    es_out_id_t*    p_es;
    int64_t         i_next_demux_time;
51 52
    bool            b_slave;
    bool            b_first_time;
53

54
    tt_node_t         *p_rootnode;
55

56
    tt_timings_t    temporal_extent;
57

58 59 60 61 62 63
    /*
     * All timings are stored unique and ordered.
     * Being begin or end times of sub sequence,
     * we use them as 'point of change' for output filtering.
    */
    struct
64
    {
65
        tt_time_t *p_array;
66 67 68 69
        size_t   i_count;
        size_t   i_current;
    } times;
};
70

71
static char *tt_genTiming( tt_time_t t )
72
{
73 74
    if( !tt_time_Valid( &t ) )
        t.base = 0;
75 76
    unsigned f = t.base % CLOCK_FREQ;
    t.base /= CLOCK_FREQ;
77 78 79
    unsigned h = t.base / 3600;
    unsigned m = t.base % 3600 / 60;
    unsigned s = t.base % 60;
80

81
    int i_ret;
82
    char *psz;
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
    if( f )
    {
        const char *lz = "000000";
        const char *psz_lz = &lz[6];
        /* add leading zeroes */
        for( unsigned i=10*f; i<CLOCK_FREQ; i *= 10 )
            psz_lz--;
        /* strip trailing zeroes */
        for( ; f > 0 && (f % 10) == 0; f /= 10 );
        i_ret = asprintf( &psz, "%02u:%02u:%02u.%s%u",
                                 h, m, s, psz_lz, f );
    }
    else if( t.frames )
    {
        i_ret = asprintf( &psz, "%02u:%02u:%02u:%s%u",
                                 h, m, s, t.frames < 10 ? "0" : "", t.frames );
    }
    else
    {
        i_ret = asprintf( &psz, "%02u:%02u:%02u",
                                 h, m, s );
    }
105

106
    return i_ret < 0 ? NULL : psz;
107 108
}

109
static void tt_node_AttributesToText( struct vlc_memstream *p_stream, const tt_node_t* p_node )
110
{
111
    bool b_timed_node = false;
112
    const vlc_dictionary_t* p_attr_dict = &p_node->attr_dict;
113
    for( int i = 0; i < p_attr_dict->i_size; ++i )
114
    {
115 116 117
        for ( vlc_dictionary_entry_t* p_entry = p_attr_dict->p_entries[i];
                                      p_entry != NULL; p_entry = p_entry->p_next )
        {
118 119
            const char *psz_value = NULL;

120 121 122
            if( !strcmp(p_entry->psz_key, "begin") ||
                !strcmp(p_entry->psz_key, "end") ||
                !strcmp(p_entry->psz_key, "dur") )
123
            {
124 125 126
                b_timed_node = true;
                /* will remove duration */
                continue;
127
            }
128
            else if( !strcmp(p_entry->psz_key, "timeContainer") )
129
            {
130
                /* also remove sequential timings info (all abs now) */
131 132 133 134 135 136 137 138 139 140
                continue;
            }
            else
            {
                psz_value = (char const*)p_entry->p_value;
            }

            if( psz_value == NULL )
                continue;

141
            vlc_memstream_printf( p_stream, " %s=\"%s\"",
142
                                  p_entry->psz_key, psz_value );
143 144
        }
    }
145

146 147
    if( b_timed_node )
    {
148
        if( tt_time_Valid( &p_node->timings.begin ) )
149
        {
150
            char *psz = tt_genTiming( p_node->timings.begin );
151 152 153 154
            vlc_memstream_printf( p_stream, " begin=\"%s\"", psz );
            free( psz );
        }

155
        if( tt_time_Valid( &p_node->timings.end ) )
156
        {
157
            char *psz = tt_genTiming( p_node->timings.end );
158 159
            vlc_memstream_printf( p_stream, " end=\"%s\"", psz );
            free( psz );
160
        }
161 162 163
    }
}

164
static void tt_node_ToText( struct vlc_memstream *p_stream, const tt_basenode_t *p_basenode,
165
                            const tt_time_t *playbacktime )
166
{
167
    if( p_basenode->i_type == TT_NODE_TYPE_ELEMENT )
168
    {
169
        const tt_node_t *p_node = (const tt_node_t *) p_basenode;
170

171 172
        if( tt_time_Valid( playbacktime ) &&
           !tt_timings_Contains( &p_node->timings, playbacktime ) )
173
            return;
174

175 176
        vlc_memstream_putc( p_stream, '<' );
        vlc_memstream_puts( p_stream, p_node->psz_node_name );
177

178
        tt_node_AttributesToText( p_stream, p_node );
179

180 181
        if( tt_node_HasChild( p_node ) )
        {
182
            vlc_memstream_putc( p_stream, '>' );
183 184

#ifdef TTML_DEMUX_DEBUG
185
            vlc_memstream_printf( p_stream, "<!-- starts %ld ends %ld -->",
186 187
                                  tt_time_Convert( &p_node->timings.begin ),
                                  tt_time_Convert( &p_node->timings.end ) );
188
#endif
189

190 191 192
            for( const tt_basenode_t *p_child = p_node->p_child;
                                   p_child; p_child = p_child->p_next )
            {
193
                tt_node_ToText( p_stream, p_child, playbacktime );
194
            }
195

196
            vlc_memstream_printf( p_stream, "</%s>", p_node->psz_node_name );
197 198
        }
        else
199
            vlc_memstream_puts( p_stream, "/>" );
200 201
    }
    else
202
    {
203
        const tt_textnode_t *p_textnode = (const tt_textnode_t *) p_basenode;
204
        vlc_memstream_puts( p_stream, p_textnode->psz_text );
205 206 207
    }
}

208
static int Control( demux_t* p_demux, int i_query, va_list args )
209
{
210 211 212
    demux_sys_t *p_sys = p_demux->p_sys;
    int64_t *pi64, i64;
    double *pf, f;
213
    bool b;
214

215
    switch( i_query )
216
    {
217 218 219 220
        case DEMUX_CAN_SEEK:
            *va_arg( args, bool * ) = true;
            return VLC_SUCCESS;
        case DEMUX_GET_TIME:
221
            pi64 = va_arg( args, int64_t * );
222
            *pi64 = p_sys->i_next_demux_time;
223 224
            return VLC_SUCCESS;
        case DEMUX_SET_TIME:
225
            i64 = va_arg( args, int64_t );
226
            if( p_sys->times.i_count )
227
            {
228
                tt_time_t t = tt_time_Create( i64 - VLC_TS_0 );
229
                size_t i_index = tt_timings_FindLowerIndex( p_sys->times.p_array,
230
                                                            p_sys->times.i_count, t, &b );
231 232 233
                p_sys->times.i_current = i_index;
                p_sys->b_first_time = true;
                return VLC_SUCCESS;
234
            }
235 236
            break;
        case DEMUX_SET_NEXT_DEMUX_TIME:
237
            i64 = va_arg( args, int64_t );
238 239 240 241
            p_sys->i_next_demux_time = i64;
            p_sys->b_slave = true;
            return VLC_SUCCESS;
        case DEMUX_GET_LENGTH:
242
            pi64 = va_arg( args, int64_t * );
243
            if( p_sys->times.i_count )
244
            {
245 246 247
                tt_time_t t = tt_time_Sub( p_sys->times.p_array[p_sys->times.i_count - 1],
                                           p_sys->temporal_extent.begin );
                *pi64 = tt_time_Convert( &t );
248
                return VLC_SUCCESS;
249
            }
250 251
            break;
        case DEMUX_GET_POSITION:
252
            pf = va_arg( args, double * );
253
            if( p_sys->times.i_current >= p_sys->times.i_count )
254
            {
255
                *pf = 1.0;
256
            }
257
            else if( p_sys->times.i_count > 0 )
258
            {
259 260
                i64 = tt_time_Convert( &p_sys->times.p_array[p_sys->times.i_count - 1] );
                *pf = (double) p_sys->i_next_demux_time / (i64 + 0.5);
261
            }
262
            else
263
            {
264
                *pf = 0.0;
265
            }
266 267
            return VLC_SUCCESS;
        case DEMUX_SET_POSITION:
268
            f = va_arg( args, double );
269
            if( p_sys->times.i_count )
270
            {
271 272
                i64 = f * tt_time_Convert( &p_sys->times.p_array[p_sys->times.i_count - 1] );
                tt_time_t t = tt_time_Create( i64 );
273
                size_t i_index = tt_timings_FindLowerIndex( p_sys->times.p_array,
274
                                                            p_sys->times.i_count, t, &b );
275 276 277
                p_sys->times.i_current = i_index;
                p_sys->b_first_time = true;
                return VLC_SUCCESS;
278
            }
279 280 281 282 283 284 285 286 287 288
            break;
        case DEMUX_GET_PTS_DELAY:
        case DEMUX_GET_FPS:
        case DEMUX_GET_META:
        case DEMUX_GET_ATTACHMENTS:
        case DEMUX_GET_TITLE_INFO:
        case DEMUX_HAS_UNSUPPORTED_META:
        case DEMUX_CAN_RECORD:
        default:
            break;
289 290
    }

291
    return VLC_EGENERIC;
292 293
}

294
static int ReadTTML( demux_t* p_demux )
295
{
296
    demux_sys_t* p_sys = p_demux->p_sys;
297
    const char* psz_node_name;
298

299 300
    do
    {
301 302 303
        int i_type = xml_ReaderNextNode( p_sys->p_reader, &psz_node_name );
        bool b_empty = xml_ReaderIsEmptyElement( p_sys->p_reader );

304 305
        if( i_type <= XML_READER_NONE )
            break;
306

307
        switch(i_type)
308
        {
309 310
            default:
                break;
311

312 313 314 315
            case XML_READER_STARTELEM:
                if( tt_node_NameCompare( psz_node_name, "tt" ) ||
                    p_sys->p_rootnode != NULL )
                    return VLC_EGENERIC;
316

317 318 319 320 321 322 323
                p_sys->p_rootnode = tt_node_New( p_sys->p_reader, NULL, psz_node_name );
                if( b_empty )
                    break;
                if( !p_sys->p_rootnode ||
                    tt_nodes_Read( p_sys->p_reader, p_sys->p_rootnode ) != VLC_SUCCESS )
                    return VLC_EGENERIC;
                break;
324

325 326 327 328 329
            case XML_READER_ENDELEM:
                if( !p_sys->p_rootnode ||
                    tt_node_NameCompare( psz_node_name, p_sys->p_rootnode->psz_node_name ) )
                    return VLC_EGENERIC;
                break;
330
        }
331

332
    } while( 1 );
333

334 335
    if( p_sys->p_rootnode == NULL )
        return VLC_EGENERIC;
336

337
    return VLC_SUCCESS;
338 339
}

340
static int Demux( demux_t* p_demux )
341 342 343
{
    demux_sys_t* p_sys = p_demux->p_sys;

344 345
    /* Last one must be an end time */
    while( p_sys->times.i_current + 1 < p_sys->times.i_count &&
346
           tt_time_Convert( &p_sys->times.p_array[p_sys->times.i_current] ) <= p_sys->i_next_demux_time )
347
    {
348 349 350 351
        const int64_t i_playbacktime =
                tt_time_Convert( &p_sys->times.p_array[p_sys->times.i_current] );
        const int64_t i_playbackendtime =
                tt_time_Convert( &p_sys->times.p_array[p_sys->times.i_current + 1] ) - 1;
352

353
        if ( !p_sys->b_slave && p_sys->b_first_time )
354
        {
355
            es_out_SetPCR( p_demux->out, VLC_TS_0 + i_playbacktime );
356
            p_sys->b_first_time = false;
357
        }
358

359 360 361 362 363
        struct vlc_memstream stream;

        if( vlc_memstream_open( &stream ) )
            return VLC_DEMUXER_EGENERIC;

364 365
        tt_node_ToText( &stream, (tt_basenode_t *) p_sys->p_rootnode,
                        &p_sys->times.p_array[p_sys->times.i_current] );
366 367

        if( vlc_memstream_close( &stream ) == VLC_SUCCESS )
368
        {
369
            block_t* p_block = block_heap_Alloc( stream.ptr, stream.length );
370 371 372 373 374
            if( p_block )
            {
                p_block->i_dts =
                    p_block->i_pts = VLC_TS_0 + i_playbacktime;
                p_block->i_length = i_playbackendtime - i_playbacktime;
375

376 377
                es_out_Send( p_demux->out, p_sys->p_es, p_block );
            }
378
        }
379 380 381 382 383 384

        p_sys->times.i_current++;
    }

    if ( !p_sys->b_slave )
    {
385
        es_out_SetPCR( p_demux->out, VLC_TS_0 + p_sys->i_next_demux_time );
386
        p_sys->i_next_demux_time += CLOCK_FREQ / 8;
387
    }
388 389 390 391 392

    if( p_sys->times.i_current + 1 >= p_sys->times.i_count )
        return VLC_DEMUXER_EOF;

    return VLC_DEMUXER_SUCCESS;
393 394
}

395
int tt_OpenDemux( vlc_object_t* p_this )
396 397 398
{
    demux_t     *p_demux = (demux_t*)p_this;
    demux_sys_t *p_sys;
399

400 401 402
    const uint8_t *p_peek;
    ssize_t i_peek = vlc_stream_Peek( p_demux->s, &p_peek, 2048 );
    if( unlikely( i_peek <= 32 ) )
403 404
        return VLC_EGENERIC;

405 406 407
    const char *psz_xml = (const char *) p_peek;
    size_t i_xml  = i_peek;

408
    /* Try to probe without xml module/loading the full document */
409 410 411 412
    char *psz_alloc = NULL;
    switch( GetQWBE(p_peek) )
    {
        /* See RFC 3023 Part 4 */
413 414 415 416
        case UINT64_C(0xFFFE3C003F007800): /* UTF16 BOM<? */
        case UINT64_C(0xFFFE3C003F007400): /* UTF16 BOM<t */
        case UINT64_C(0xFEFF003C003F0078): /* UTF16 BOM<? */
        case UINT64_C(0xFEFF003C003F0074): /* UTF16 BOM<t */
417 418
            psz_alloc = FromCharset( "UTF-16", p_peek, i_peek );
            break;
419 420
        case UINT64_C(0x3C003F0078006D00): /* UTF16-LE <?xm */
        case UINT64_C(0x3C003F0074007400): /* UTF16-LE <tt */
421 422
            psz_alloc = FromCharset( "UTF-16LE", p_peek, i_peek );
            break;
423 424
        case UINT64_C(0x003C003F0078006D): /* UTF16-BE <?xm */
        case UINT64_C(0x003C003F00740074): /* UTF16-BE <tt */
425 426
            psz_alloc = FromCharset( "UTF-16BE", p_peek, i_peek );
            break;
427 428
        case UINT64_C(0xEFBBBF3C3F786D6C): /* UTF8 BOM<?xml */
        case UINT64_C(0x3C3F786D6C207665): /* UTF8 <?xml ve */
429
        case UINT64_C(0xEFBBBF3C74742078): /* UTF8 BOM<tt x*/
430 431
            break;
        default:
432
            if(GetDWBE(p_peek) != UINT32_C(0x3C747420)) /* tt node without xml document marker */
433
                return VLC_EGENERIC;
434 435 436 437 438 439 440 441
    }

    if( psz_alloc )
    {
        psz_xml = psz_alloc;
        i_xml = strlen( psz_alloc );
    }

442
    /* Simplified probing. Valid TTML must have a namespace declaration */
443
    const char *psz_tt = strnstr( psz_xml, "tt", i_xml );
444
    if( !psz_tt || psz_tt == psz_xml ||
445
        ((size_t)(&psz_tt[2] - (const char*)p_peek)) == i_xml || isalpha(psz_tt[2]) ||
446
        (psz_tt[-1] != ':' && psz_tt[-1] != '<') )
447
    {
448
        free( psz_alloc );
449 450
        return VLC_EGENERIC;
    }
451
    else
452
    {
453 454 455 456 457 458 459
        const char * const rgsz[] =
        {
            "=\"http://www.w3.org/ns/ttml\"",
            "=\"http://www.w3.org/2004/11/ttaf1\"",
            "=\"http://www.w3.org/2006/04/ttaf1\"",
            "=\"http://www.w3.org/2006/10/ttaf1\"",
        };
460
        const char *psz_ns = NULL;
461 462 463
        for( size_t i=0; i<ARRAY_SIZE(rgsz) && !psz_ns; i++ )
        {
            psz_ns = strnstr( psz_xml, rgsz[i],
464
                              i_xml - (psz_tt - psz_xml) );
465
        }
466
        free( psz_alloc );
467 468
        if( !psz_ns )
            return VLC_EGENERIC;
469
    }
470

471 472 473
    p_demux->p_sys = p_sys = calloc( 1, sizeof( *p_sys ) );
    if( unlikely( p_sys == NULL ) )
        return VLC_ENOMEM;
474

475 476
    p_sys->b_first_time = true;
    p_sys->temporal_extent.i_type = TT_TIMINGS_PARALLEL;
477 478 479 480
    tt_time_Init( &p_sys->temporal_extent.begin );
    tt_time_Init( &p_sys->temporal_extent.end );
    tt_time_Init( &p_sys->temporal_extent.dur );
    p_sys->temporal_extent.begin.base = 0;
481

482 483
    p_sys->p_xml = xml_Create( p_demux );
    if( !p_sys->p_xml )
484 485
        goto error;

486
    p_sys->p_reader = xml_ReaderCreate( p_sys->p_xml, p_demux->s );
487 488 489
    if( !p_sys->p_reader )
        goto error;

490 491 492 493
#ifndef TTML_DEMUX_DEBUG
    p_sys->p_reader->obj.flags |= OBJECT_FLAGS_QUIET;
#endif

494 495 496
    if( ReadTTML( p_demux ) != VLC_SUCCESS )
        goto error;

497 498 499 500 501
    tt_timings_Resolve( (tt_basenode_t *) p_sys->p_rootnode, &p_sys->temporal_extent,
                        &p_sys->times.p_array, &p_sys->times.i_count );

#ifdef TTML_DEMUX_DEBUG
    {
502 503 504 505 506
        struct vlc_memstream stream;

        if( vlc_memstream_open( &stream ) )
            goto error;

507 508 509
        tt_time_t t;
        tt_time_Init( &t );
        tt_node_ToText( &stream, (tt_basenode_t*)p_sys->p_rootnode, &t /* invalid */ );
510 511 512 513

        vlc_memstream_putc( &stream, '\0' );

        if( vlc_memstream_close( &stream ) == VLC_SUCCESS )
514
        {
515 516
            msg_Dbg( p_demux, "%s", stream.ptr );
            free( stream.ptr );
517 518 519
        }
    }
#endif
520

521 522 523 524 525 526
    p_demux->pf_demux = Demux;
    p_demux->pf_control = Control;

    es_format_t fmt;
    es_format_Init( &fmt, SPU_ES, VLC_CODEC_TTML );
    p_sys->p_es = es_out_Add( p_demux->out, &fmt );
527 528
    if( !p_sys->p_es )
        goto error;
529

530
    es_format_Clean( &fmt );
531 532

    return VLC_SUCCESS;
533 534

error:
535
    tt_CloseDemux( p_demux );
536

537
    return VLC_EGENERIC;
538 539
}

540
void tt_CloseDemux( demux_t* p_demux )
541 542
{
    demux_sys_t* p_sys = p_demux->p_sys;
543 544 545 546

    if( p_sys->p_rootnode )
        tt_node_RecursiveDelete( p_sys->p_rootnode );

547
    if( p_sys->p_es )
548
        es_out_Del( p_demux->out, p_sys->p_es );
549

550
    if( p_sys->p_reader )
551
        xml_ReaderDelete( p_sys->p_reader );
552

553
    if( p_sys->p_xml )
554
        xml_Delete( p_sys->p_xml );
555 556 557

    free( p_sys->times.p_array );

558 559
    free( p_sys );
}