subtitle.c 71.9 KB
Newer Older
1
/*****************************************************************************
2
 * subtitle.c: Demux for subtitle text files.
3
 *****************************************************************************
Jean-Baptiste Kempf's avatar
Jean-Baptiste Kempf committed
4
 * Copyright (C) 1999-2007 VLC authors and VideoLAN
5
 * $Id$
6 7
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
Derk-Jan Hartman's avatar
Derk-Jan Hartman committed
8
 *          Derk-Jan Hartman <hartman at videolan dot org>
9
 *          Jean-Baptiste Kempf <jb@videolan.org>
10
 *
Jean-Baptiste Kempf's avatar
Jean-Baptiste Kempf committed
11 12 13
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation; either version 2.1 of the License, or
14
 * (at your option) any later version.
15
 *
16 17
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Jean-Baptiste Kempf's avatar
Jean-Baptiste Kempf committed
18 19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Lesser General Public License for more details.
20
 *
Jean-Baptiste Kempf's avatar
Jean-Baptiste Kempf committed
21 22 23
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
24 25 26 27 28
 *****************************************************************************/

/*****************************************************************************
 * Preamble
 *****************************************************************************/
29

30 31 32 33
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif

34
#include <vlc_common.h>
35
#include <vlc_plugin.h>
36
#include <vlc_input.h>
37
#include <vlc_memory.h>
Rémi Denis-Courmont's avatar
Rémi Denis-Courmont committed
38

39
#include <ctype.h>
40
#include <math.h>
41
#include <assert.h>
42

43 44
#include <vlc_demux.h>
#include <vlc_charset.h>
45 46 47 48

/*****************************************************************************
 * Module descriptor
 *****************************************************************************/
49 50 51
static int  Open ( vlc_object_t *p_this );
static void Close( vlc_object_t *p_this );

52
#define SUB_DELAY_LONGTEXT \
53
    N_("Apply a delay to all subtitles (in 1/10s, eg 100 means 10s).")
54
#define SUB_FPS_LONGTEXT \
55 56
    N_("Override the normal frames per second settings. " \
    "This will only work with MicroDVD and SubRIP (SRT) subtitles.")
57
#define SUB_TYPE_LONGTEXT \
58
    N_("Force the subtiles format. Selecting \"auto\" means autodetection and should always work.")
59 60
#define SUB_DESCRIPTION_LONGTEXT \
    N_("Override the default track description.")
61

62
static const char *const ppsz_sub_type[] =
63 64
{
    "auto", "microdvd", "subrip", "subviewer", "ssa1",
65
    "ssa2-4", "ass", "vplayer", "sami", "dvdsubtitle", "mpl2",
66
    "aqt", "pjs", "mpsub", "jacosub", "psb", "realtext", "dks",
67
    "subviewer1", "sbv"
68
};
69

70 71
vlc_module_begin ()
    set_shortname( N_("Subtitles"))
72
    set_description( N_("Text subtitle parser") )
73 74 75
    set_capability( "demux", 0 )
    set_category( CAT_INPUT )
    set_subcategory( SUBCAT_INPUT_DEMUX )
76
    add_float( "sub-fps", 0.0,
77
               N_("Frames per Second"),
Rémi Denis-Courmont's avatar
Rémi Denis-Courmont committed
78
               SUB_FPS_LONGTEXT, true )
79
    add_integer( "sub-delay", 0,
80
               N_("Subtitle delay"),
Rémi Denis-Courmont's avatar
Rémi Denis-Courmont committed
81
               SUB_DELAY_LONGTEXT, true )
82
    add_string( "sub-type", "auto", N_("Subtitle format"),
Rémi Denis-Courmont's avatar
Rémi Denis-Courmont committed
83
                SUB_TYPE_LONGTEXT, true )
84
        change_string_list( ppsz_sub_type, ppsz_sub_type )
85
    add_string( "sub-description", NULL, N_("Subtitle description"),
86
                SUB_DESCRIPTION_LONGTEXT, true )
87
    set_callbacks( Open, Close )
88

89 90
    add_shortcut( "subtitle" )
vlc_module_end ()
91 92

/*****************************************************************************
93
 * Prototypes:
94
 *****************************************************************************/
95
enum subtitle_type_e
96
{
97 98 99 100 101
    SUB_TYPE_UNKNOWN = -1,
    SUB_TYPE_MICRODVD,
    SUB_TYPE_SUBRIP,
    SUB_TYPE_SSA1,
    SUB_TYPE_SSA2_4,
102
    SUB_TYPE_ASS,
103 104
    SUB_TYPE_VPLAYER,
    SUB_TYPE_SAMI,
105 106
    SUB_TYPE_SUBVIEWER, /* SUBVIEWER 2 */
    SUB_TYPE_DVDSUBTITLE, /* Mplayer calls it subviewer2 */
107
    SUB_TYPE_MPL2,
108
    SUB_TYPE_AQT,
Jean-Baptiste Kempf's avatar
Jean-Baptiste Kempf committed
109
    SUB_TYPE_PJS,
110
    SUB_TYPE_MPSUB,
111
    SUB_TYPE_JACOSUB,
112
    SUB_TYPE_PSB,
113
    SUB_TYPE_RT,
114
    SUB_TYPE_DKS,
Arun Pandian G's avatar
Arun Pandian G committed
115
    SUB_TYPE_SUBVIEW1, /* SUBVIEWER 1 - mplayer calls it subrip09,
116
                         and Gnome subtitles SubViewer 1.0 */
117 118
    SUB_TYPE_SBV,
    SUB_TYPE_SCC,      /* Scenarist Closed Caption */
119
};
120 121 122

typedef struct
{
123 124
    size_t  i_line_count;
    size_t  i_line;
125 126
    char    **line;
} text_t;
127

128 129
static int  TextLoad( text_t *, stream_t *s );
static void TextUnload( text_t * );
130

131
typedef struct
132
{
133 134
    int64_t i_start;
    int64_t i_stop;
135

136 137
    char    *psz_text;
} subtitle_t;
138

139
typedef struct
140
{
141
    enum subtitle_type_e i_type;
142
    int64_t     i_microsecperframe;
143

144
    char        *psz_header; /* SSA */
145 146 147 148 149 150 151 152 153

    struct
    {
        bool b_inited;

        int i_comment;
        int i_time_resolution;
        int i_time_shift;
    } jss;
154

155 156 157 158 159 160 161
    struct
    {
        bool  b_inited;

        float f_total;
        float f_factor;
    } mpsub;
162

163 164 165 166 167
    struct
    {
        const char *psz_start;
    } sami;

168 169 170 171 172
} subs_properties_t;

struct demux_sys_t
{
    es_out_id_t *es;
173 174
    bool        b_slave;
    bool        b_first_time;
175 176 177 178 179 180 181 182 183 184 185 186 187 188

    int64_t     i_next_demux_date;

    struct
    {
        subtitle_t *p_array;
        size_t      i_count;
        size_t      i_current;
    } subtitles;

    int64_t     i_length;

    /* */
    subs_properties_t props;
189 190

    block_t * (*pf_convert)( const subtitle_t * );
191 192
};

193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
static int  ParseMicroDvd   ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
static int  ParseSubRip     ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
static int  ParseSubViewer  ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
static int  ParseSSA        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
static int  ParseVplayer    ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
static int  ParseSami       ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
static int  ParseDVDSubtitle( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
static int  ParseMPL2       ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
static int  ParseAQT        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
static int  ParsePJS        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
static int  ParseMPSub      ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
static int  ParseJSS        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
static int  ParsePSB        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
static int  ParseRealText   ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
static int  ParseDKS        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
static int  ParseSubViewer1 ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
209
static int  ParseCommonSBV  ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
210
static int  ParseSCC        ( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t *, size_t );
211

212
static const struct
213
{
214
    const char *psz_type_name;
215
    int  i_type;
216
    const char *psz_name;
217
    int  (*pf_read)( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t*, size_t );
218 219
} sub_read_subtitle_function [] =
{
220 221 222 223 224 225 226 227 228
    { "microdvd",   SUB_TYPE_MICRODVD,    "MicroDVD",    ParseMicroDvd },
    { "subrip",     SUB_TYPE_SUBRIP,      "SubRIP",      ParseSubRip },
    { "subviewer",  SUB_TYPE_SUBVIEWER,   "SubViewer",   ParseSubViewer },
    { "ssa1",       SUB_TYPE_SSA1,        "SSA-1",       ParseSSA },
    { "ssa2-4",     SUB_TYPE_SSA2_4,      "SSA-2/3/4",   ParseSSA },
    { "ass",        SUB_TYPE_ASS,         "SSA/ASS",     ParseSSA },
    { "vplayer",    SUB_TYPE_VPLAYER,     "VPlayer",     ParseVplayer },
    { "sami",       SUB_TYPE_SAMI,        "SAMI",        ParseSami },
    { "dvdsubtitle",SUB_TYPE_DVDSUBTITLE, "DVDSubtitle", ParseDVDSubtitle },
229
    { "mpl2",       SUB_TYPE_MPL2,        "MPL2",        ParseMPL2 },
230
    { "aqt",        SUB_TYPE_AQT,         "AQTitle",     ParseAQT },
231
    { "pjs",        SUB_TYPE_PJS,         "PhoenixSub",  ParsePJS },
Jean-Baptiste Kempf's avatar
Jean-Baptiste Kempf committed
232
    { "mpsub",      SUB_TYPE_MPSUB,       "MPSub",       ParseMPSub },
233
    { "jacosub",    SUB_TYPE_JACOSUB,     "JacoSub",     ParseJSS },
234
    { "psb",        SUB_TYPE_PSB,         "PowerDivx",   ParsePSB },
235
    { "realtext",   SUB_TYPE_RT,          "RealText",    ParseRealText },
236
    { "dks",        SUB_TYPE_DKS,         "DKS",         ParseDKS },
237
    { "subviewer1", SUB_TYPE_SUBVIEW1,    "Subviewer 1", ParseSubViewer1 },
238
    { "sbv",        SUB_TYPE_SBV,         "SBV",         ParseCommonSBV },
239
    { "scc",        SUB_TYPE_SCC,         "SCC",         ParseSCC },
240
    { NULL,         SUB_TYPE_UNKNOWN,     "Unknown",     NULL }
241
};
242 243 244
/* When adding support for more formats, be sure to add their file extension
 * to src/input/subtitles.c to enable auto-detection.
 */
245

246 247 248
static int Demux( demux_t * );
static int Control( demux_t *, int, va_list );

249
static void Fix( demux_t * );
250
static char * get_language_from_filename( const char * );
251

252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297
/*****************************************************************************
 * Decoder format output function
 *****************************************************************************/

static block_t *ToTextBlock( const subtitle_t *p_subtitle )
{
    block_t *p_block;
    size_t i_len = strlen( p_subtitle->psz_text ) + 1;

    if( i_len <= 1 || !(p_block = block_Alloc( i_len )) )
        return NULL;

    memcpy( p_block->p_buffer, p_subtitle->psz_text, i_len );

    return p_block;
}

static block_t *ToEIA608Block( const subtitle_t *p_subtitle )
{
    block_t *p_block;
    const size_t i_len = strlen( p_subtitle->psz_text );
    const size_t i_block = (1 + i_len / 5) * 3;

    if( i_len < 4 || !(p_block = block_Alloc( i_block )) )
        return NULL;

    p_block->i_buffer = 0;

    char *saveptr = NULL;
    char *psz_tok = strtok_r( p_subtitle->psz_text, " ", &saveptr );
    unsigned a, b;
    while( psz_tok &&
           sscanf( psz_tok, "%2x%2x", &a, &b ) == 2 &&
           i_block - p_block->i_buffer >= 3 )
    {
        uint8_t *p_data = &p_block->p_buffer[p_block->i_buffer];
        p_data[0] = 0xFC;
        p_data[1] = a;
        p_data[2] = b;
        p_block->i_buffer += 3;
        psz_tok = strtok_r( NULL, " ", &saveptr );
    }

    return p_block;
}

298
/*****************************************************************************
299
 * Module initializer
300
 *****************************************************************************/
301
static int Open ( vlc_object_t *p_this )
302
{
303 304 305 306 307
    demux_t        *p_demux = (demux_t*)p_this;
    demux_sys_t    *p_sys;
    es_format_t    fmt;
    float          f_fps;
    char           *psz_type;
308
    int  (*pf_read)( vlc_object_t *, subs_properties_t *, text_t *, subtitle_t*, size_t );
Eric Petit's avatar
Eric Petit committed
309

310
    if( !p_demux->obj.force )
311
    {
312
        msg_Dbg( p_demux, "subtitle demux discarded" );
313 314
        return VLC_EGENERIC;
    }
315

316 317 318
    p_demux->pf_demux = Demux;
    p_demux->pf_control = Control;
    p_demux->p_sys = p_sys = malloc( sizeof( demux_sys_t ) );
319 320 321
    if( p_sys == NULL )
        return VLC_ENOMEM;

322 323 324 325
    p_sys->b_slave = false;
    p_sys->b_first_time = true;
    p_sys->i_next_demux_date = 0;

326 327
    p_sys->pf_convert = ToTextBlock;

328 329 330
    p_sys->subtitles.i_current= 0;
    p_sys->subtitles.i_count  = 0;
    p_sys->subtitles.p_array  = NULL;
331

332 333 334 335
    p_sys->props.psz_header         = NULL;
    p_sys->props.i_microsecperframe = 40000;
    p_sys->props.jss.b_inited       = false;
    p_sys->props.mpsub.b_inited     = false;
336
    p_sys->props.sami.psz_start     = NULL;
337

338
    /* Get the FPS */
339
    f_fps = var_CreateGetFloat( p_demux, "sub-original-fps" ); /* FIXME */
340
    if( f_fps >= 1.f )
341
        p_sys->props.i_microsecperframe = llroundf( 1000000.f / f_fps );
342

343
    msg_Dbg( p_demux, "Movie fps: %f", (double) f_fps );
344

345 346
    /* Check for override of the fps */
    f_fps = var_CreateGetFloat( p_demux, "sub-fps" );
347
    if( f_fps >= 1.f )
348
    {
349
        p_sys->props.i_microsecperframe = llroundf( 1000000.f / f_fps );
350
        msg_Dbg( p_demux, "Override subtitle fps %f", (double) f_fps );
351 352
    }

353
    /* Get or probe the type */
354
    p_sys->props.i_type = SUB_TYPE_UNKNOWN;
355
    psz_type = var_CreateGetString( p_demux, "sub-type" );
356
    if( psz_type && *psz_type )
357
    {
358
        for( int i = 0; ; i++ )
359
        {
360 361
            if( sub_read_subtitle_function[i].psz_type_name == NULL )
                break;
362

363
            if( !strcmp( sub_read_subtitle_function[i].psz_type_name,
364
                         psz_type ) )
365
            {
366
                p_sys->props.i_type = sub_read_subtitle_function[i].i_type;
367 368
                break;
            }
369 370
        }
    }
371
    free( psz_type );
372

373
#ifndef NDEBUG
374
    const uint64_t i_start_pos = vlc_stream_Tell( p_demux->s );
375 376
#endif

377 378 379
    size_t i_peek;
    const uint8_t *p_peek;
    if( vlc_stream_Peek( p_demux->s, &p_peek, 16 ) < 16 )
380 381
    {
        free( p_sys );
382
        return VLC_EGENERIC;
383
    }
384 385 386 387 388 389 390 391 392 393 394

    enum
    {
        UTF8BOM,
        UTF16LE,
        UTF16BE,
        NOBOM,
    } e_bom = NOBOM;
    const char *psz_bom = NULL;

    i_peek = 4096;
395
    /* Detect Unicode while skipping the UTF-8 Byte Order Mark */
396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418
    if( !memcmp( p_peek, "\xEF\xBB\xBF", 3 ) )
    {
        e_bom = UTF8BOM;
        psz_bom = "UTF-8";
    }
    else if( !memcmp( p_peek, "\xFF\xFE", 2 ) )
    {
        e_bom = UTF16LE;
        psz_bom = "UTF-16LE";
        i_peek *= 2;
    }
    else if( !memcmp( p_peek, "\xFE\xFF", 2 ) )
    {
        e_bom = UTF16BE;
        psz_bom = "UTF-16BE";
        i_peek *= 2;
    }

    if( e_bom != NOBOM )
        msg_Dbg( p_demux, "detected %s Byte Order Mark", psz_bom );

    i_peek = vlc_stream_Peek( p_demux->s, &p_peek, i_peek );
    if( unlikely(i_peek < 16) )
419 420
    {
        free( p_sys );
421
        return VLC_EGENERIC;
422
    }
423 424 425 426 427 428

    stream_t *p_probestream = NULL;
    if( e_bom != UTF8BOM && e_bom != NOBOM )
    {
        if( i_peek > 16 )
        {
429 430 431 432 433
            char *p_outbuf = FromCharset( psz_bom, p_peek, i_peek );
            if( p_outbuf != NULL )
                p_probestream = vlc_stream_MemoryNew( p_demux, (uint8_t *)p_outbuf,
                                                      strlen( p_outbuf ),
                                                      false ); /* free p_outbuf on release */
434 435 436
        }
    }
    else
437
    {
438 439 440
        const size_t i_skip = (e_bom == UTF8BOM) ? 3 : 0;
        p_probestream = vlc_stream_MemoryNew( p_demux, (uint8_t *) &p_peek[i_skip],
                                              i_peek - i_skip, true );
441 442
    }

443
    if( p_probestream == NULL )
444 445
    {
        free( p_sys );
446
        return VLC_EGENERIC;
447
    }
448

449
    /* Probe if unknown type */
450
    if( p_sys->props.i_type == SUB_TYPE_UNKNOWN )
451
    {
452
        int     i_try;
453
        char    *s = NULL;
454

455 456
        msg_Dbg( p_demux, "autodetecting subtitle format" );
        for( i_try = 0; i_try < 256; i_try++ )
457 458
        {
            int i_dummy;
459
            char p_dummy;
460

461
            if( (s = vlc_stream_ReadLine( p_probestream ) ) == NULL )
462 463
                break;

464
            if( strcasestr( s, "<SAMI>" ) )
465
            {
466
                p_sys->props.i_type = SUB_TYPE_SAMI;
467 468 469 470
                break;
            }
            else if( sscanf( s, "{%d}{%d}", &i_dummy, &i_dummy ) == 2 ||
                     sscanf( s, "{%d}{}", &i_dummy ) == 1)
471
            {
472
                p_sys->props.i_type = SUB_TYPE_MICRODVD;
473 474
                break;
            }
475
            else if( sscanf( s, "%d:%d:%d,%d --> %d:%d:%d,%d",
476
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
477 478 479 480 481
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy ) == 8 ||
                     sscanf( s, "%d:%d:%d --> %d:%d:%d,%d",
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
                             &i_dummy,&i_dummy,&i_dummy ) == 7 ||
                     sscanf( s, "%d:%d:%d,%d --> %d:%d:%d",
482
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
483 484 485 486 487 488 489 490 491 492 493 494 495
                             &i_dummy,&i_dummy,&i_dummy ) == 7 ||
                     sscanf( s, "%d:%d:%d.%d --> %d:%d:%d.%d",
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy ) == 8 ||
                     sscanf( s, "%d:%d:%d --> %d:%d:%d.%d",
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
                             &i_dummy,&i_dummy,&i_dummy ) == 7 ||
                     sscanf( s, "%d:%d:%d.%d --> %d:%d:%d",
                             &i_dummy,&i_dummy,&i_dummy,&i_dummy,
                             &i_dummy,&i_dummy,&i_dummy ) == 7 ||
                     sscanf( s, "%d:%d:%d --> %d:%d:%d",
                             &i_dummy,&i_dummy,&i_dummy,
                             &i_dummy,&i_dummy,&i_dummy ) == 6 )
496
            {
497
                p_sys->props.i_type = SUB_TYPE_SUBRIP;
498 499
                break;
            }
500
            else if( !strncasecmp( s, "!: This is a Sub Station Alpha v1", 33 ) )
501
            {
502
                p_sys->props.i_type = SUB_TYPE_SSA1;
503
                break;
504
            }
505
            else if( !strncasecmp( s, "ScriptType: v4.00+", 18 ) )
506
            {
507
                p_sys->props.i_type = SUB_TYPE_ASS;
508 509 510 511
                break;
            }
            else if( !strncasecmp( s, "ScriptType: v4.00", 17 ) )
            {
512
                p_sys->props.i_type = SUB_TYPE_SSA2_4;
513
                break;
514
            }
515
            else if( !strncasecmp( s, "Dialogue: Marked", 16  ) )
516
            {
517
                p_sys->props.i_type = SUB_TYPE_SSA2_4;
518 519 520 521
                break;
            }
            else if( !strncasecmp( s, "Dialogue:", 9  ) )
            {
522
                p_sys->props.i_type = SUB_TYPE_ASS;
523 524
                break;
            }
525
            else if( strcasestr( s, "[INFORMATION]" ) )
526
            {
527
                p_sys->props.i_type = SUB_TYPE_SUBVIEWER; /* I hope this will work */
528 529
                break;
            }
530 531 532 533
            else if( sscanf( s, "%d:%d:%d.%d %d:%d:%d",
                                 &i_dummy, &i_dummy, &i_dummy, &i_dummy,
                                 &i_dummy, &i_dummy, &i_dummy ) == 7 ||
                     sscanf( s, "@%d @%d", &i_dummy, &i_dummy) == 2)
534
            {
535
                p_sys->props.i_type = SUB_TYPE_JACOSUB;
536
                break;
537
            }
538 539 540 541
            else if( sscanf( s, "%d:%d:%d.%d,%d:%d:%d.%d",
                                 &i_dummy, &i_dummy, &i_dummy, &i_dummy,
                                 &i_dummy, &i_dummy, &i_dummy, &i_dummy ) == 8 )
            {
542
                p_sys->props.i_type = SUB_TYPE_SBV;
543 544
                break;
            }
545 546
            else if( sscanf( s, "%d:%d:%d:", &i_dummy, &i_dummy, &i_dummy ) == 3 ||
                     sscanf( s, "%d:%d:%d ", &i_dummy, &i_dummy, &i_dummy ) == 3 )
547
            {
548
                p_sys->props.i_type = SUB_TYPE_VPLAYER;
549
                break;
550
            }
551 552 553
            else if( sscanf( s, "{T %d:%d:%d:%d", &i_dummy, &i_dummy,
                             &i_dummy, &i_dummy ) == 4 )
            {
554
                p_sys->props.i_type = SUB_TYPE_DVDSUBTITLE;
555 556
                break;
            }
557 558 559
            else if( sscanf( s, "[%d:%d:%d]%c",
                     &i_dummy, &i_dummy, &i_dummy, &p_dummy ) == 4 )
            {
560
                p_sys->props.i_type = SUB_TYPE_DKS;
561 562 563 564
                break;
            }
            else if( strstr( s, "*** START SCRIPT" ) )
            {
565
                p_sys->props.i_type = SUB_TYPE_SUBVIEW1;
566 567
                break;
            }
568 569 570
            else if( sscanf( s, "[%d][%d]", &i_dummy, &i_dummy ) == 2 ||
                     sscanf( s, "[%d][]", &i_dummy ) == 1)
            {
571
                p_sys->props.i_type = SUB_TYPE_MPL2;
572
                break;
573
            }
574 575 576
            else if( sscanf (s, "FORMAT=%d", &i_dummy) == 1 ||
                     ( sscanf (s, "FORMAT=TIM%c", &p_dummy) == 1
                       && p_dummy =='E' ) )
Jean-Baptiste Kempf's avatar
Jean-Baptiste Kempf committed
577
            {
578
                p_sys->props.i_type = SUB_TYPE_MPSUB;
579
                break;
Jean-Baptiste Kempf's avatar
Jean-Baptiste Kempf committed
580
            }
581
            else if( sscanf( s, "-->> %d", &i_dummy) == 1 )
582
            {
583
                p_sys->props.i_type = SUB_TYPE_AQT;
584
                break;
585
            }
586 587
            else if( sscanf( s, "%d,%d,", &i_dummy, &i_dummy ) == 2 )
            {
588
                p_sys->props.i_type = SUB_TYPE_PJS;
589
                break;
590
            }
591 592
            else if( sscanf( s, "{%d:%d:%d}",
                                &i_dummy, &i_dummy, &i_dummy ) == 3 )
593
            {
594
                p_sys->props.i_type = SUB_TYPE_PSB;
595
                break;
596
            }
597 598
            else if( strcasestr( s, "<time" ) )
            {
599
                p_sys->props.i_type = SUB_TYPE_RT;
600
                break;
601
            }
Arun Pandian G's avatar
Arun Pandian G committed
602 603
            else if( !strncasecmp( s, "WEBVTT",6 ) )
            {
604
                /* FAIL */
Arun Pandian G's avatar
Arun Pandian G committed
605 606
                break;
            }
607 608 609 610 611 612
            else if( !strncasecmp( s, "Scenarist_SCC V1.0", 18 ) )
            {
                p_sys->props.i_type = SUB_TYPE_SCC;
                p_sys->pf_convert = ToEIA608Block;
                break;
            }
613 614 615

            free( s );
            s = NULL;
616
        }
617

618
        free( s );
619
    }
620

621 622
    vlc_stream_Delete( p_probestream );

623
    /* Quit on unknown subtitles */
624
    if( p_sys->props.i_type == SUB_TYPE_UNKNOWN )
625
    {
626 627
#ifndef NDEBUG
        /* Ensure it will work with non seekable streams */
628
        assert( i_start_pos == vlc_stream_Tell( p_demux->s ) );
629
#endif
630
        msg_Warn( p_demux, "failed to recognize subtitle type" );
631
        free( p_sys );
632
        return VLC_EGENERIC;
633 634
    }

635
    for( int i = 0; ; i++ )
636
    {
637
        if( sub_read_subtitle_function[i].i_type == p_sys->props.i_type )
638
        {
639 640 641
            msg_Dbg( p_demux, "detected %s format",
                     sub_read_subtitle_function[i].psz_name );
            pf_read = sub_read_subtitle_function[i].pf_read;
642 643
            break;
        }
644
    }
645

646 647
    msg_Dbg( p_demux, "loading all subtitles..." );

648 649
    if( e_bom == UTF8BOM && /* skip BOM */
        vlc_stream_Read( p_demux->s, NULL, 3 ) != 3 )
650 651 652 653
    {
        Close( p_this );
        return VLC_EGENERIC;
    }
654

655
    /* Load the whole file */
656 657
    text_t txtlines;
    TextLoad( &txtlines, p_demux->s );
658 659

    /* Parse it */
660
    for( size_t i_max = 0; i_max < SIZE_MAX - 500 * sizeof(subtitle_t); )
661
    {
662
        if( p_sys->subtitles.i_count >= i_max )
663
        {
664
            i_max += 500;
665 666
            subtitle_t *p_realloc = realloc( p_sys->subtitles.p_array, sizeof(subtitle_t) * i_max );
            if( p_realloc == NULL )
667
            {
668
                TextUnload( &txtlines );
669
                Close( p_this );
670
                return VLC_ENOMEM;
671
            }
672
            p_sys->subtitles.p_array = p_realloc;
673
        }
674

675
        if( pf_read( VLC_OBJECT(p_demux), &p_sys->props, &txtlines,
676
                     &p_sys->subtitles.p_array[p_sys->subtitles.i_count],
677
                     p_sys->subtitles.i_count ) )
678
            break;
679

680
        p_sys->subtitles.i_count++;
681
    }
682
    /* Unload */
683
    TextUnload( &txtlines );
684

685
    msg_Dbg(p_demux, "loaded %zu subtitles", p_sys->subtitles.i_count );
686

687
    /* Fix subtitle (order and time) *** */
688
    p_sys->subtitles.i_current = 0;
689
    p_sys->i_length = 0;
690 691
    if( p_sys->subtitles.i_count > 0 )
        p_sys->i_length = p_sys->subtitles.p_array[p_sys->subtitles.i_count-1].i_stop;
692

693
    /* *** add subtitle ES *** */
694 695 696
    if( p_sys->props.i_type == SUB_TYPE_SSA1 ||
             p_sys->props.i_type == SUB_TYPE_SSA2_4 ||
             p_sys->props.i_type == SUB_TYPE_ASS )
697
    {
698
        Fix( p_demux );
699
        es_format_Init( &fmt, SPU_ES, VLC_CODEC_SSA );
700
    }
701 702
    else if( p_sys->props.i_type == SUB_TYPE_SCC )
    {
703
        es_format_Init( &fmt, SPU_ES, VLC_CODEC_CEA608 );
704
        fmt.subs.cc.i_reorder_depth = -1;
705
    }
706
    else
707
        es_format_Init( &fmt, SPU_ES, VLC_CODEC_SUBT );
708 709 710 711 712 713 714 715 716 717 718

    /* Stupid language detection in the filename */
    char * psz_language = get_language_from_filename( p_demux->psz_file );

    if( psz_language )
    {
        fmt.psz_language = psz_language;
        msg_Dbg( p_demux, "detected language %s of subtitle: %s", psz_language,
                 p_demux->psz_location );
    }

719 720
    if( psz_bom )
        fmt.subs.psz_encoding = strdup( psz_bom );
721 722 723 724 725
    char *psz_description = var_InheritString( p_demux, "sub-description" );
    if( psz_description && *psz_description )
        fmt.psz_description = psz_description;
    else
        free( psz_description );
726 727
    if( p_sys->props.psz_header != NULL &&
       (fmt.p_extra = strdup( p_sys->props.psz_header )) )
728
    {
729
        fmt.i_extra = strlen( p_sys->props.psz_header ) + 1;
730
    }
731

732
    p_sys->es = es_out_Add( p_demux->out, &fmt );
733
    es_format_Clean( &fmt );
734 735 736 737 738
    if( p_sys->es == NULL )
    {
        Close( p_this );
        return VLC_EGENERIC;
    }
Derk-Jan Hartman's avatar
Derk-Jan Hartman committed
739

740
    return VLC_SUCCESS;
741 742 743
}

/*****************************************************************************
744
 * Close: Close subtitle demux
745
 *****************************************************************************/
746
static void Close( vlc_object_t *p_this )
747
{
748 749
    demux_t *p_demux = (demux_t*)p_this;
    demux_sys_t *p_sys = p_demux->p_sys;
750

751 752 753
    for( size_t i = 0; i < p_sys->subtitles.i_count; i++ )
        free( p_sys->subtitles.p_array[i].psz_text );
    free( p_sys->subtitles.p_array );
754
    free( p_sys->props.psz_header );
755 756 757 758 759 760 761 762 763 764 765 766 767 768 769

    free( p_sys );
}

/*****************************************************************************
 * Control:
 *****************************************************************************/
static int Control( demux_t *p_demux, int i_query, va_list args )
{
    demux_sys_t *p_sys = p_demux->p_sys;
    int64_t *pi64, i64;
    double *pf, f;

    switch( i_query )
    {
770 771 772 773
        case DEMUX_CAN_SEEK:
            *va_arg( args, bool * ) = true;
            return VLC_SUCCESS;

774
        case DEMUX_GET_LENGTH:
775
            pi64 = va_arg( args, int64_t * );
776 777 778 779
            *pi64 = p_sys->i_length;
            return VLC_SUCCESS;

        case DEMUX_GET_TIME:
780
            pi64 = va_arg( args, int64_t * );
781 782 783 784
            *pi64 = p_sys->i_next_demux_date - var_GetInteger( p_demux->obj.parent, "spu-delay" );
            if( *pi64 < 0 )
               *pi64 = p_sys->i_next_demux_date;
            return VLC_SUCCESS;
785 786

        case DEMUX_SET_TIME:
787
            i64 = va_arg( args, int64_t );
788
            for( size_t i = 0; i + 1< p_sys->subtitles.i_count; i++ )
789
            {
790 791 792 793 794 795 796
                if( p_sys->subtitles.p_array[i + 1].i_start >= i64 )
                {
                    p_sys->subtitles.i_current = i;
                    p_sys->i_next_demux_date = i64;
                    p_sys->b_first_time = true;
                    return VLC_SUCCESS;
                }
797
            }
798
            break;
799

800
        case DEMUX_GET_POSITION:
801
            pf = va_arg( args, double * );
802
            if( p_sys->subtitles.i_current >= p_sys->subtitles.i_count )
803 804 805
            {
                *pf = 1.0;
            }
806
            else if( p_sys->subtitles.i_count > 0 && p_sys->i_length )
807
            {
808 809 810 811
                *pf = p_sys->i_next_demux_date - var_GetInteger( p_demux->obj.parent, "spu-delay" );
                if( *pf < 0 )
                    *pf = p_sys->i_next_demux_date;
                *pf /= p_sys->i_length;
812 813 814 815 816 817 818 819
            }
            else
            {
                *pf = 0.0;
            }
            return VLC_SUCCESS;

        case DEMUX_SET_POSITION:
820
            f = va_arg( args, double );
821
            if( p_sys->subtitles.i_count && p_sys->i_length )
822
            {
823
                i64 = VLC_TS_0 + f * p_sys->i_length;
824
                return demux_Control( p_demux, DEMUX_SET_TIME, i64 );
825
            }
826
            break;
827 828

        case DEMUX_SET_NEXT_DEMUX_TIME:
829
            p_sys->b_slave = true;
830
            p_sys->i_next_demux_date = va_arg( args, int64_t ) - VLC_TS_0;
831 832
            return VLC_SUCCESS;

833
        case DEMUX_GET_PTS_DELAY:
834 835
        case DEMUX_GET_FPS:
        case DEMUX_GET_META:
836
        case DEMUX_GET_ATTACHMENTS:
837
        case DEMUX_GET_TITLE_INFO:
838
        case DEMUX_HAS_UNSUPPORTED_META:
Laurent Aimar's avatar
Laurent Aimar committed
839
        case DEMUX_CAN_RECORD:
840
        default:
841 842
            break;

843
    }
844
    return VLC_EGENERIC;
845 846 847 848 849 850 851 852 853
}

/*****************************************************************************
 * Demux: Send subtitle to decoder
 *****************************************************************************/
static int Demux( demux_t *p_demux )
{
    demux_sys_t *p_sys = p_demux->p_sys;

854 855 856
    int64_t i_barrier = p_sys->i_next_demux_date - var_GetInteger( p_demux->obj.parent, "spu-delay" );
    if( i_barrier < 0 )
        i_barrier = p_sys->i_next_demux_date;
857

858
    while( p_sys->subtitles.i_current < p_sys->subtitles.i_count &&
859
           p_sys->subtitles.p_array[p_sys->subtitles.i_current].i_start <= i_barrier )
860
    {
861
        const subtitle_t *p_subtitle = &p_sys->subtitles.p_array[p_sys->subtitles.i_current];
862

863 864
        if ( !p_sys->b_slave && p_sys->b_first_time )
        {
865
            es_out_SetPCR( p_demux->out, VLC_TS_0 + i_barrier );
866 867 868
            p_sys->b_first_time = false;
        }

869
        if( p_subtitle->i_start >= 0 )
870
        {
871 872 873 874 875 876 877
            block_t *p_block = p_sys->pf_convert( p_subtitle );
            if( p_block )
            {
                p_block->i_dts =
                p_block->i_pts = VLC_TS_0 + p_subtitle->i_start;
                if( p_subtitle->i_stop >= 0 && p_subtitle->i_stop >= p_subtitle->i_start )
                    p_block->i_length = p_subtitle->i_stop - p_subtitle->i_start;
878

879 880
                es_out_Send( p_demux->out, p_sys->es, p_block );
            }
881
        }
882

883
        p_sys->subtitles.i_current++;
884 885
    }

886 887
    if ( !p_sys->b_slave )
    {
888
        es_out_SetPCR( p_demux->out, VLC_TS_0 + i_barrier );
889 890 891 892 893
        p_sys->i_next_demux_date += CLOCK_FREQ / 8;
    }

    if( p_sys->subtitles.i_current >= p_sys->subtitles.i_count )
        return VLC_DEMUXER_EOF;
894

895
    return VLC_DEMUXER_SUCCESS;
896
}
897

898 899 900

static int subtitle_cmp( const void *first, const void *second )
{
901
    int64_t result = ((subtitle_t *)(first))->i_start - ((subtitle_t *)(second))->i_start;
902
    /* Return -1, 0 ,1, and not directly subtraction
903 904
     * as result can be > INT_MAX */
    return result == 0 ? 0 : result > 0 ? 1 : -1;
905
}
906
/*****************************************************************************
907
 * Fix: fix time stamp and order of subtitle
908
 *****************************************************************************/
909
static void Fix( demux_t *p_demux )
910
{
911
    demux_sys_t *p_sys = p_demux->p_sys;
912 913

    /* *** fix order (to be sure...) *** */
914
    qsort( p_sys->subtitles.p_array, p_sys->subtitles.i_count, sizeof( p_sys->subtitles.p_array[0] ), subtitle_cmp);
915 916 917 918
}

static int TextLoad( text_t *txt, stream_t *s )
{
919
    size_t i_line_max;
920 921 922 923 924 925

    /* init txt */
    i_line_max          = 500;
    txt->i_line_count   = 0;
    txt->i_line         = 0;
    txt->line           = calloc( i_line_max, sizeof( char * ) );
926 927
    if( !txt->line )
        return VLC_ENOMEM;
928 929 930 931

    /* load the complete file */
    for( ;; )
    {
932
        char *psz = vlc_stream_ReadLine( s );
933 934 935 936

        if( psz == NULL )
            break;

937 938
        txt->line[txt->i_line_count] = psz;
        if( txt->i_line_count + 1 >= i_line_max )
939 940
        {
            i_line_max += 100;
941 942
            char **p_realloc = realloc( txt->line, i_line_max * sizeof( char * ) );
            if( p_realloc == NULL )
943
                return VLC_ENOMEM;
944
            txt->line = p_realloc;
945
        }
946
        txt->i_line_count++;
947 948
    }

949
    if( txt->i_line_count == 0 )
950 951 952 953 954 955
    {
        free( txt->line );
        return VLC_EGENERIC;
    }

    return VLC_SUCCESS;
956
}
957 958
static void TextUnload( text_t *txt )
{
959
    if( txt->i_line_count )
960
    {
961 962 963
        for( size_t i = 0; i < txt->i_line_count; i++ )
            free( txt->line[i] );
        free( txt->line );
964 965 966 967
    }
    txt->i_line       = 0;
    txt->i_line_count = 0;
}
968

969 970 971 972 973 974 975 976 977 978 979 980
static char *TextGetLine( text_t *txt )
{
    if( txt->i_line >= txt->i_line_count )
        return( NULL );

    return txt->line[txt->i_line++];
}
static void TextPreviousLine( text_t *txt )
{
    if( txt->i_line > 0 )
        txt->i_line--;
}
981 982

/*****************************************************************************
983
 * Specific Subtitle function
984
 *****************************************************************************/
985 986 987 988 989
/* ParseMicroDvd:
 *  Format:
 *      {n1}{n2}Line1|Line2|Line3....
 *  where n1 and n2 are the video frame number (n2 can be empty)
 */
990 991 992
static int ParseMicroDvd( vlc_object_t *p_obj, subs_properties_t *p_props,
                          text_t *txt, subtitle_t *p_subtitle,
                          size_t i_idx )
993
{
994
    VLC_UNUSED( i_idx );
995 996 997 998
    char *psz_text;
    int  i_start;
    int  i_stop;
    int  i;
999

1000 1001
    for( ;; )
    {
1002 1003 1004 1005 1006 1007 1008 1009
        const char *s = TextGetLine( txt );
        if( !s )
            return VLC_EGENERIC;

        psz_text = malloc( strlen(s) + 1 );
        if( !psz_text )
            return VLC_ENOMEM;

1010
        i_start = 0;
1011
        i_stop  = -1;
1012 1013
        if( sscanf( s, "{%d}{}%[^\r\n]", &i_start, psz_text ) == 2 ||
            sscanf( s, "{%d}{%d}%[^\r\n]", &i_start, &i_stop, psz_text ) == 3)
1014
        {
1015 1016 1017 1018 1019
            if( i_start != 1 || i_stop != 1 )
                break;

            /* We found a possible setting of the framerate "{1}{1}23.976" */
            /* Check if it's usable, and if the sub-fps is not set */
1020
            float f_fps = us_strtof( psz_text, NULL );
1021 1022
            if( f_fps > 0.f && var_GetFloat( p_obj, "sub-fps" ) <= 0.f )
                p_props->i_microsecperframe = llroundf(1000000.f / f_fps);
1023
        }
1024
        free( psz_text );
1025 1026
    }

1027
    /* replace | by \n */
1028
    for( i = 0; psz_text[i] != '\0'; i++ )
1029
    {
1030 1031
        if( psz_text[i] == '|' )
            psz_text[i] = '\n';
1032
    }
1033

1034
    /* */
1035 1036
    p_subtitle->i_start  = i_start * p_props->i_microsecperframe;
    p_subtitle->i_stop   = i_stop >= 0 ? (i_stop  * p_props->i_microsecperframe) : -1;
1037 1038
    p_subtitle->psz_text = psz_text;
    return VLC_SUCCESS;
1039 1040
}

1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056
/* ParseSubRipSubViewer
 *  Format SubRip
 *      n
 *      h1:m1:s1,d1 --> h2:m2:s2,d2
 *      Line1
 *      Line2
 *      ....
 *      [Empty line]
 *  Format SubViewer v1/v2
 *      h1:m1:s1.d1,h2:m2:s2.d2
 *      Line1[br]Line2
 *      Line3
 *      ...
 *      [empty line]
 *  We ignore line number for SubRip
 */
1057 1058
static int ParseSubRipSubViewer( vlc_object_t *p_obj, subs_properties_t *p_props,
                                 text_t *txt, subtitle_t *p_subtitle,
1059
                                 int (* pf_parse_timing)(subtitle_t *, const char *),
1060
                                 bool b_replace_br )
1061
{
1062 1063
    VLC_UNUSED(p_obj);
    VLC_UNUSED(p_props);
1064
    char    *psz_text;
1065

1066 1067
    for( ;; )
    {