substx3g.c 14.5 KB
Newer Older
François Cartegnie's avatar
François Cartegnie committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
/*****************************************************************************
 * substx3gsub.c : MP4 tx3g subtitles decoder
 *****************************************************************************
 * Copyright (C) 2014 VLC authors and VideoLAN
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation; either version 2.1 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software Foundation, Inc.,
 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
 *****************************************************************************/

#ifdef HAVE_CONFIG_H
# include "config.h"
#endif

#include <vlc_common.h>
#include <vlc_plugin.h>
#include <vlc_codec.h>
#include <vlc_sout.h>
29
#include <vlc_charset.h>
François Cartegnie's avatar
François Cartegnie committed
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82

#include "substext.h"

/*****************************************************************************
 * Module descriptor.
 *****************************************************************************/
static int  Open ( vlc_object_t * );
static subpicture_t *Decode( decoder_t *, block_t ** );

vlc_module_begin ()
    set_description( N_("tx3g subtitles decoder") )
    set_shortname( N_("tx3g subtitles") )
    set_capability( "decoder", 100 )
    set_category( CAT_INPUT )
    set_subcategory( SUBCAT_INPUT_SCODEC )
    set_callbacks( Open, NULL )
vlc_module_end ()

/****************************************************************************
 * Local structs
 ****************************************************************************/

/*****************************************************************************
 * Open: probe the decoder and return score
 *****************************************************************************/
static int Open( vlc_object_t *p_this )
{
    decoder_t     *p_dec = (decoder_t *) p_this;

    if( p_dec->fmt_in.i_codec != VLC_CODEC_TX3G )
        return VLC_EGENERIC;

    p_dec->pf_decode_sub = Decode;

    p_dec->fmt_out.i_cat = SPU_ES;
    p_dec->fmt_out.i_codec = 0;

    return VLC_SUCCESS;
}

/*****************************************************************************
 * Local:
 *****************************************************************************/

#define FONT_FACE_BOLD      0x1
#define FONT_FACE_ITALIC    0x2
#define FONT_FACE_UNDERLINE 0x4

static int ConvertFlags( int i_atomflags )
{
    int i_vlcstyles_flags = 0;
    if ( i_atomflags & FONT_FACE_BOLD )
        i_vlcstyles_flags |= STYLE_BOLD;
83
    if ( i_atomflags & FONT_FACE_ITALIC )
François Cartegnie's avatar
François Cartegnie committed
84
        i_vlcstyles_flags |= STYLE_ITALIC;
85
    if ( i_atomflags & FONT_FACE_UNDERLINE )
François Cartegnie's avatar
François Cartegnie committed
86 87 88 89
        i_vlcstyles_flags |= STYLE_UNDERLINE;
    return i_vlcstyles_flags;
}

90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
static size_t str8len( const char *psz_string )
{
    const char *psz_tmp = psz_string;
    size_t i=0;
    while ( *psz_tmp )
    {
        if ( (*psz_tmp & 0xC0) != 0x80 ) i++;
        psz_tmp++;
    }
    return i;
}

static char * str8indup( const char *psz_string, size_t i_skip, size_t n )
{
    while( i_skip && *psz_string )
    {
        if ( (*psz_string & 0xC0) != 0x80 ) i_skip--;
        psz_string++;
    }
    if ( ! *psz_string || i_skip ) return NULL;

    const char *psz_tmp = psz_string;
    while( n && *psz_tmp )
    {
        if ( (*psz_tmp & 0xC0) != 0x80 ) n--;
        psz_tmp++;
    }
    return strndup( psz_string, psz_tmp - psz_string );
}

120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
typedef struct tx3g_segment_t tx3g_segment_t;

struct tx3g_segment_t
{
    text_segment_t *s;
    size_t i_size;
    tx3g_segment_t *p_next3g;
};

static tx3g_segment_t * tx3g_segment_New( const char *psz_string )
{
    tx3g_segment_t *p_seg = malloc( sizeof(tx3g_segment_t) );
    if( p_seg )
    {
        p_seg->i_size = 0;
        p_seg->p_next3g = NULL;
        p_seg->s = text_segment_New( psz_string );
        if( !p_seg->s )
        {
            free( p_seg );
            p_seg = NULL;
        }
    }
    return p_seg;
}

static void SegmentDoSplit( tx3g_segment_t *p_segment, uint16_t i_start, uint16_t i_end,
                            tx3g_segment_t **pp_segment_left,
                            tx3g_segment_t **pp_segment_middle,
                            tx3g_segment_t **pp_segment_right )
François Cartegnie's avatar
François Cartegnie committed
150
{
151 152 153
    tx3g_segment_t *p_segment_left = *pp_segment_left;
    tx3g_segment_t *p_segment_right = *pp_segment_right;
    tx3g_segment_t *p_segment_middle = *pp_segment_middle;
François Cartegnie's avatar
François Cartegnie committed
154 155 156 157 158 159 160
    p_segment_left = p_segment_middle = p_segment_right = NULL;

    if ( (p_segment->i_size - i_start < 1) || (p_segment->i_size - i_end < 1) )
        return;

    if ( i_start > 0 )
    {
161 162
        char* psz_text = str8indup( p_segment->s->psz_text, 0, i_start );
        p_segment_left = tx3g_segment_New( psz_text );
163
        free( psz_text );
François Cartegnie's avatar
François Cartegnie committed
164
        if ( !p_segment_left ) goto error;
165 166
        p_segment_left->s->style = text_style_Duplicate( p_segment->s->style );
        p_segment_left->i_size = str8len( p_segment_left->s->psz_text );
François Cartegnie's avatar
François Cartegnie committed
167 168
    }

169 170
    char* psz_text = str8indup( p_segment->s->psz_text, i_start, i_end - i_start + 1 );
    p_segment_middle = tx3g_segment_New( psz_text );
171
    free( psz_text );
François Cartegnie's avatar
François Cartegnie committed
172
    if ( !p_segment_middle ) goto error;
173 174
    p_segment_middle->s->style = text_style_Duplicate( p_segment->s->style );
    p_segment_middle->i_size = str8len( p_segment_middle->s->psz_text );
François Cartegnie's avatar
François Cartegnie committed
175 176 177

    if ( i_end < (p_segment->i_size - 1) )
    {
178 179
        char* psz_text = str8indup( p_segment->s->psz_text, i_end + 1, p_segment->i_size - i_end - 1 );
        p_segment_right = tx3g_segment_New( psz_text );
180
        free( psz_text );
François Cartegnie's avatar
François Cartegnie committed
181
        if ( !p_segment_right ) goto error;
182 183
        p_segment_right->s->style = text_style_Duplicate( p_segment->s->style );
        p_segment_right->i_size = str8len( p_segment_right->s->psz_text );
François Cartegnie's avatar
François Cartegnie committed
184 185
    }

186 187
    if ( p_segment_left ) p_segment_left->p_next3g = p_segment_middle;
    if ( p_segment_right ) p_segment_middle->p_next3g = p_segment_right;
François Cartegnie's avatar
François Cartegnie committed
188 189 190 191 192 193 194 195

    *pp_segment_left = p_segment_left;
    *pp_segment_middle = p_segment_middle;
    *pp_segment_right = p_segment_right;

    return;

error:
196 197 198 199 200 201
    text_segment_Delete( p_segment_left->s );
    free( p_segment_left );
    text_segment_Delete( p_segment_middle->s );
    free( p_segment_middle );
    text_segment_Delete( p_segment_right->s );
    free( p_segment_right );
François Cartegnie's avatar
François Cartegnie committed
202 203
}

204
static bool SegmentSplit( tx3g_segment_t *p_prev, tx3g_segment_t **pp_segment,
François Cartegnie's avatar
François Cartegnie committed
205
                          const uint16_t i_start, const uint16_t i_end,
206
                          const text_style_t *p_styles )
François Cartegnie's avatar
François Cartegnie committed
207
{
208
    tx3g_segment_t *p_segment_left = NULL, *p_segment_middle = NULL, *p_segment_right = NULL;
François Cartegnie's avatar
François Cartegnie committed
209 210 211 212 213 214 215 216 217 218

    if ( (*pp_segment)->i_size == 0 ) return false;
    if ( i_start > i_end ) return false;
    if ( (size_t)(i_end - i_start) > (*pp_segment)->i_size - 1 ) return false;
    if ( i_end > (*pp_segment)->i_size - 1 ) return false;

    SegmentDoSplit( *pp_segment, i_start, i_end, &p_segment_left, &p_segment_middle, &p_segment_right );
    if ( !p_segment_middle )
    {
        /* Failed */
219 220 221 222
        text_segment_Delete( p_segment_left->s );
        free( p_segment_left );
        text_segment_Delete( p_segment_right->s );
        free( p_segment_right );
François Cartegnie's avatar
François Cartegnie committed
223 224 225
        return false;
    }

226 227 228
    tx3g_segment_t *p_next3g = (*pp_segment)->p_next3g;
    text_segment_Delete( (*pp_segment)->s );
    free( *pp_segment );
François Cartegnie's avatar
François Cartegnie committed
229
    *pp_segment = ( p_segment_left ) ? p_segment_left : p_segment_middle ;
230
    if ( p_prev ) p_prev->p_next3g = *pp_segment;
François Cartegnie's avatar
François Cartegnie committed
231 232

    if ( p_segment_right )
233
        p_segment_right->p_next3g = p_next3g;
François Cartegnie's avatar
François Cartegnie committed
234
    else
235
        p_segment_middle->p_next3g = p_next3g;
François Cartegnie's avatar
François Cartegnie committed
236

237 238
    text_style_Delete( p_segment_middle->s->style );
    p_segment_middle->s->style = text_style_Duplicate( p_styles );
François Cartegnie's avatar
François Cartegnie committed
239 240 241 242 243 244

    return true;
}

/* Creates a new segment using the given style and split existing ones according
   to the start & end offsets */
245
static void ApplySegmentStyle( tx3g_segment_t **pp_segment, const uint16_t i_absstart,
246
                               const uint16_t i_absend, const text_style_t *p_styles )
François Cartegnie's avatar
François Cartegnie committed
247 248 249
{
    /* find the matching segment */
    uint16_t i_curstart = 0;
250 251
    tx3g_segment_t *p_prev = NULL;
    tx3g_segment_t *p_cur = *pp_segment;
François Cartegnie's avatar
François Cartegnie committed
252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267
    while ( p_cur )
    {
        uint16_t i_curend = i_curstart + p_cur->i_size - 1;
        if ( (i_absstart >= i_curstart) && (i_absend <= i_curend) )
        {
            /* segment found */
            if ( !SegmentSplit( p_prev, &p_cur, i_absstart - i_curstart,
                                i_absend - i_curstart, p_styles ) )
                return;
            if ( !p_prev ) *pp_segment = p_cur;
            break;
        }
        else
        {
            i_curstart += p_cur->i_size;
            p_prev = p_cur;
268
            p_cur = p_cur->p_next3g;
François Cartegnie's avatar
François Cartegnie committed
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
        }
    }
}

/*****************************************************************************
 * Decode:
 *****************************************************************************/
static subpicture_t *Decode( decoder_t *p_dec, block_t **pp_block )
{
    block_t       *p_block;
    subpicture_t  *p_spu = NULL;

    if( ( pp_block == NULL ) || ( *pp_block == NULL ) ) return NULL;
    p_block = *pp_block;
    *pp_block = NULL;

285
    if( ( p_block->i_flags & (BLOCK_FLAG_DISCONTINUITY|BLOCK_FLAG_CORRUPTED) ) ||
François Cartegnie's avatar
François Cartegnie committed
286 287 288 289 290 291 292 293 294
          p_block->i_buffer < sizeof(uint16_t) )
    {
        block_Release( p_block );
        return NULL;
    }

    uint8_t *p_buf = p_block->p_buffer;

    /* Read our raw string and create the styled segment for HTML */
295 296 297 298 299 300
    uint16_t i_psz_bytelength = GetWBE( p_buf );
    const uint8_t *p_pszstart = p_block->p_buffer + sizeof(uint16_t);
    char *psz_subtitle;
    if ( i_psz_bytelength > 2 &&
         ( !memcmp( p_pszstart, "\xFE\xFF", 2 ) || !memcmp( p_pszstart, "\xFF\xFE", 2 ) )
       )
301
    {
302
        psz_subtitle = FromCharset( "UTF-16", p_pszstart, i_psz_bytelength );
303 304
        if ( !psz_subtitle ) return NULL;
    }
305
    else
306
    {
307
        psz_subtitle = malloc( i_psz_bytelength + 1 );
308 309 310 311
        if ( !psz_subtitle ) return NULL;
        memcpy( psz_subtitle, p_pszstart, i_psz_bytelength );
        psz_subtitle[ i_psz_bytelength ] = '\0';
    }
312
    p_buf += i_psz_bytelength + sizeof(uint16_t);
François Cartegnie's avatar
François Cartegnie committed
313

314
    for( uint16_t i=0; i < i_psz_bytelength; i++ )
François Cartegnie's avatar
François Cartegnie committed
315 316
     if ( psz_subtitle[i] == '\r' ) psz_subtitle[i] = '\n';

317 318
    tx3g_segment_t *p_segment3g = tx3g_segment_New( psz_subtitle );
    p_segment3g->i_size = str8len( psz_subtitle );
François Cartegnie's avatar
François Cartegnie committed
319
    if ( p_dec->fmt_in.subs.p_style )
320
        p_segment3g->s->style = text_style_Duplicate( p_dec->fmt_in.subs.p_style );
François Cartegnie's avatar
François Cartegnie committed
321

322
    if ( !p_segment3g->s->psz_text )
François Cartegnie's avatar
François Cartegnie committed
323
    {
324 325
        text_segment_Delete( p_segment3g->s );
        free( p_segment3g );
François Cartegnie's avatar
François Cartegnie committed
326 327 328 329 330 331 332 333 334
        free( psz_subtitle );
        return NULL;
    }

    /* Create the subpicture unit */
    p_spu = decoder_NewSubpictureText( p_dec );
    if( !p_spu )
    {
        free( psz_subtitle );
335 336
        text_segment_Delete( p_segment3g->s );
        free( p_segment3g );
François Cartegnie's avatar
François Cartegnie committed
337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
        return NULL;
    }
    subpicture_updater_sys_t *p_spu_sys = p_spu->updater.p_sys;

    /* Parse our styles */
    while( (size_t)(p_buf - p_block->p_buffer) + 8 < p_block->i_buffer )
    {
        uint32_t i_atomsize = GetDWBE( p_buf );
        vlc_fourcc_t i_atomtype = VLC_FOURCC(p_buf[4],p_buf[5],p_buf[6],p_buf[7]);
        p_buf += 8;
        switch( i_atomtype )
        {

        case VLC_FOURCC('s','t','y','l'):
        {
            if ( (size_t)(p_buf - p_block->p_buffer) < 14 ) break;
            uint16_t i_nbrecords = GetWBE(p_buf);
            uint16_t i_cur_record = 0;
            p_buf += 2;
            while( i_cur_record++ < i_nbrecords )
            {
                if ( (size_t)(p_buf - p_block->p_buffer) < 12 ) break;
359 360
                uint16_t i_start = __MIN( GetWBE(p_buf), i_psz_bytelength - 1 );
                uint16_t i_end =  __MIN( GetWBE(p_buf + 2), i_psz_bytelength - 1 );
François Cartegnie's avatar
François Cartegnie committed
361

362
                text_style_t style;
363
                memset( &style, 0, sizeof(text_style_t) );
364
                style.i_style_flags = ConvertFlags( p_buf[6] );
365
                style.f_font_relsize = p_buf[7] * 5 / 100; /* in % units of 0.05 height */
366 367
                style.i_font_color = GetDWBE(p_buf+8) >> 8;// RGBA -> RGB
                style.i_font_alpha = GetDWBE(p_buf+8) & 0xFF;
368
                style.i_features = STYLE_HAS_FONT_COLOR | STYLE_HAS_FONT_ALPHA;
369
                ApplySegmentStyle( &p_segment3g, i_start, i_end, &style );
François Cartegnie's avatar
François Cartegnie committed
370 371 372 373 374

                if ( i_nbrecords == 1 )
                {
                    if ( p_buf[6] )
                    {
375 376
                        if( (p_spu_sys->p_default_style->i_style_flags = ConvertFlags( p_buf[6] )) )
                            p_spu_sys->p_default_style->i_features |= STYLE_HAS_FLAGS;
François Cartegnie's avatar
François Cartegnie committed
377
                    }
378 379 380 381
                    p_spu_sys->p_default_style->f_font_relsize = p_buf[7] * 5 / 100;
                    p_spu_sys->p_default_style->i_font_color = GetDWBE(p_buf+8) >> 8;// RGBA -> ARGB
                    p_spu_sys->p_default_style->i_font_alpha = (GetDWBE(p_buf+8) & 0xFF) << 24;
                    p_spu_sys->p_default_style->i_features |= (STYLE_HAS_FONT_COLOR | STYLE_HAS_FONT_ALPHA);
François Cartegnie's avatar
François Cartegnie committed
382 383 384 385 386 387 388 389
                }

                p_buf += 12;
            }
        }   break;

        case VLC_FOURCC('d','r','p','o'):
            if ( (size_t)(p_buf - p_block->p_buffer) < 4 ) break;
390
            p_spu_sys->p_default_style->i_shadow_width = __MAX( GetWBE(p_buf), GetWBE(p_buf+2) );
François Cartegnie's avatar
François Cartegnie committed
391 392 393 394
            break;

        case VLC_FOURCC('d','r','p','t'):
            if ( (size_t)(p_buf - p_block->p_buffer) < 2 ) break;
395 396
            p_spu_sys->p_default_style->i_shadow_alpha = GetWBE(p_buf);
            p_spu_sys->p_default_style->i_features |= STYLE_HAS_SHADOW_ALPHA;
François Cartegnie's avatar
François Cartegnie committed
397 398 399 400 401 402 403 404 405 406 407 408 409 410 411
            break;

        default:
            break;

        }
        p_buf += i_atomsize;
    }

    p_spu->i_start    = p_block->i_pts;
    p_spu->i_stop     = p_block->i_pts + p_block->i_length;
    p_spu->b_ephemer  = (p_block->i_length == 0);
    p_spu->b_absolute = false;

    p_spu_sys->align = SUBPICTURE_ALIGN_BOTTOM;
412 413 414 415 416 417 418 419 420 421 422 423 424 425 426

    /* Unwrap */
    text_segment_t *p_text_segments = p_segment3g->s;
    text_segment_t *p_cur = p_text_segments;
    while( p_segment3g )
    {
        tx3g_segment_t * p_old = p_segment3g;
        p_segment3g = p_segment3g->p_next3g;
        free( p_old );
        if( p_segment3g )
            p_cur->p_next = p_segment3g->s;
        p_cur = p_cur->p_next;
    }

    p_spu_sys->p_segments = p_text_segments;
François Cartegnie's avatar
François Cartegnie committed
427 428 429 430 431

    block_Release( p_block );

    return p_spu;
}