subtitles.c 11 KB
Newer Older
1 2 3
/*****************************************************************************
 * subtitles.c
 *****************************************************************************
4
 * Copyright (C) 2003-2004 VideoLAN
5
 * $Id$
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 *
 * Authors: Derk-Jan Hartman <hartman at videolan.org>
 * This is adapted code from the GPL'ed MPlayer (http://mplayerhq.hu)
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
 *****************************************************************************/

25 26 27 28 29
/**
 *  \file
 *  This file contains functions to dectect subtitle files.
 */

30 31 32 33 34
#include <stdlib.h>
#include <vlc/vlc.h>
#include <vlc/input.h>

#include "ninput.h"
35 36 37 38 39 40 41

#ifdef HAVE_DIRENT_H
#   include <dirent.h>
#else
#   include "../extras/dirent.h"
#endif

42 43
#include <ctype.h>

44 45 46
/**
 * What's between a directory and a filename?
 */
47 48 49 50 51 52
#if defined( WIN32 )
    #define DIRECTORY_SEPARATOR '\\'
#else
    #define DIRECTORY_SEPARATOR '/'
#endif

53 54 55
/**
 * We are not going to autodetect more subtitle files than this.
 */
56 57
#define MAX_SUBTITLE_FILES 128

58 59 60 61 62 63 64

/**
 * The possible extentions for subtitle files we support
 */
static const char * sub_exts[] = {  "utf", "utf8", "utf-8", "sub", "srt", "smi", "txt", "ssa", NULL};
/* extensions from unsupported types */
/* rt, aqt, jss, js, ass */
65 66 67 68 69 70

static void strcpy_trim( char *d, char *s )
{
    /* skip leading whitespace */
    while( *s && !isalnum(*s) )
    {
71
        s++;
72 73 74
    }
    for(;;)
    {
75 76
        /* copy word */
        while( *s && isalnum(*s) )
77
        {
78 79 80 81 82 83
            *d = tolower(*s);
            s++; d++;
        }
        if (*s == 0) break;
        /* trim excess whitespace */
        while( *s && !isalnum(*s) )
84
        {
85 86 87 88
            s++;
        }
        if( *s == 0 ) break;
        *d++ = ' ';
89 90 91
    }
    *d = 0;
}
92

93 94 95 96
static void strcpy_strip_ext( char *d, char *s )
{
    char *tmp = strrchr(s, '.');
    if( !tmp ) {
97 98
        strcpy(d, s);
        return;
99 100 101
    }
    else
    {
102 103
        strncpy(d, s, tmp - s);
        d[tmp - s] = 0;
104 105 106
    }
    while( *d )
    {
107 108
        *d = tolower(*d);
        d++;
109 110
    }
}
111

112 113 114 115 116
static void strcpy_get_ext( char *d, char *s )
{
    char *tmp = strrchr(s, '.');
    if( !tmp )
    {
117 118
        strcpy(d, "");
        return;
119 120 121 122 123 124 125
    } else strcpy( d, tmp + 1 );
}

static int whiteonly( char *s )
{
  while ( *s )
  {
126 127
        if( isalnum( *s ) ) return 0;
        s++;
128 129 130 131
  }
  return 1;
}

132
typedef struct _subfn
133 134 135 136 137 138 139
{
    int priority;
    char *psz_fname;
} subfn;

static int compare_sub_priority( const void *a, const void *b )
{
140 141 142
    if (((subfn*)a)->priority > ((subfn*)b)->priority)
    {
        return -1;
143
    }
144 145 146 147 148 149 150

    if (((subfn*)a)->priority < ((subfn*)b)->priority)
    {
        return 1;
    }

    return strcoll(((subfn*)a)->psz_fname, ((subfn*)b)->psz_fname);
151 152
}

153 154
/**
 * Detect subtitle files.
155
 *
156 157 158
 * When called this function will split up the psz_fname string into a
 * directory, filename and extension. It then opens the directory
 * in which the file resides and tries to find possible matches of
159
 * subtitles files.
160 161 162
 *
 * \ingroup Demux
 * \param p_this the calling \ref input_thread_t
163
 * \param psz_path a list of subdirectories (separated by a ',') to look in.
164
 * \param psz_fname the complete filename to base the search on.
165 166
 * \return a NULL terminated array of filenames with detected possible subtitles.
 * The array contains max MAX_SUBTITLE_FILES items and you need to free it after use.
167
 */
Gildas Bazin's avatar
 
Gildas Bazin committed
168 169
char **subtitles_Detect( input_thread_t *p_this, char *psz_path,
                         char *psz_fname )
170 171 172 173 174
{
    /* variables to be used for derivatives of psz_fname */
    char *f_dir, *f_fname, *f_fname_noext, *f_fname_trim, *tmp;
    /* variables to be used for derivatives FILE *f */
    char *tmp_fname_noext, *tmp_fname_trim, *tmp_fname_ext, *tmpresult;
175

176
    vlc_value_t fuzzy;
177 178
    int len, i, j, i_sub_count, i_nb_subdirs;
    unsigned int k, i_max_sub_len;
179 180
    subfn *result; /* unsorted results */
    char **result2; /* sorted results */
181
    char **subdirs; /* list of subdirectories to look in */
182

183 184 185 186
    FILE *f;
    DIR *d;
    struct dirent *de;

187 188 189 190 191 192 193 194 195 196
    i_nb_subdirs = 1;
    for( k = 0; k < strlen( psz_path ); k++ )
    {
        if( psz_path[k] == ',' ) 
        {
            i_nb_subdirs++;
        }
    }

    i_max_sub_len = 0;
197
    if( i_nb_subdirs > 0 )
198 199
    {
        char *psz_parser;
200

201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
        subdirs = (char**)malloc( sizeof(char*) * i_nb_subdirs );
        i = 0;
        psz_parser = psz_path;
        while( psz_parser && *psz_parser )
        {
            char *psz_subdir;
            psz_subdir = psz_parser;
            psz_parser = strchr( psz_subdir, ',' );
            if( psz_parser )
            {
                *psz_parser = '\0';
                psz_parser++;
                while( *psz_parser == ' ' )
                {
                    psz_parser++;
                }
            }
            subdirs[i] = strdup( psz_subdir );
            i++;
            if( strlen( psz_subdir ) > i_max_sub_len )
            {
                i_max_sub_len = strlen( psz_subdir );
            }
        }
    } 
    else 
    {
        i_nb_subdirs = -1;
        subdirs = NULL;
    }

232 233
    i_sub_count = 0;
    len = ( strlen( psz_fname ) > 256 ? strlen( psz_fname ) : 256 ) +
234
        ( i_max_sub_len > 256 ? i_max_sub_len : 256 ) + 2;
235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254

    f_dir = (char*)malloc(len);
    f_fname = (char*)malloc(len);
    f_fname_noext = (char*)malloc(len);
    f_fname_trim = (char*)malloc(len);

    tmp_fname_noext = (char*)malloc(len);
    tmp_fname_trim = (char*)malloc(len);
    tmp_fname_ext = (char*)malloc(len);

    tmpresult = (char*)malloc(len);

    result = (subfn*)malloc( sizeof(subfn) * MAX_SUBTITLE_FILES );
    memset( result, 0, sizeof(subfn) * MAX_SUBTITLE_FILES );

    /* extract filename & dirname from psz_fname */
    tmp = strrchr( psz_fname, DIRECTORY_SEPARATOR );
    if( tmp )
    {
        int pos;
Gildas Bazin's avatar
 
Gildas Bazin committed
255 256 257 258 259
        strncpy( f_fname, tmp + 1, len - 1 );
        f_fname[len - 1] = 0;
        pos = tmp - psz_fname + 1;
        strncpy( f_dir, psz_fname, __MIN(pos,len-1) );
        f_dir[__MIN(pos,len-1)] = 0;
260 261 262
    }
    else
    {
Gildas Bazin's avatar
 
Gildas Bazin committed
263 264
        strncpy( f_fname, psz_fname, len - 1 );
        f_fname[len - 1] = 0;
265
        strcpy( f_dir, "" );
266 267 268 269
    }

    strcpy_strip_ext( f_fname_noext, f_fname );
    strcpy_trim( f_fname_trim, f_fname_noext );
270
    var_Get( p_this, "sub-autodetect-fuzzy", &fuzzy );
271 272


273
    for( j = -1; j < i_nb_subdirs; j++)
274
    {
275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298
        if( j >= 0 )
        {
            if( subdirs[j] && subdirs[j][0] == '.' )
            {
                char* psz_dir;
                psz_dir = (char *)malloc( len );
                if( psz_dir ) 
                {
                    sprintf( psz_dir, "%s%s", f_dir, subdirs[j] );
                    d = opendir( psz_dir );
                    free( psz_dir );
                }
                else d = NULL;
            }
            else
            {
                d = opendir( subdirs[j] );
            }
        }
        else
        {
            d = opendir( f_dir );
        }

299
        if( d )
300 301
        {
            int b_found;
302 303
            msg_Dbg( p_this, "looking for a subtitle file in %s", 
                     j < 0 ? f_dir : subdirs[j] );
304
            while( ( de = readdir( d ) ) )
305
            {
306 307 308 309 310 311 312 313
                /* retrieve various parts of the filename */
                strcpy_strip_ext( tmp_fname_noext, de->d_name );
                strcpy_get_ext( tmp_fname_ext, de->d_name );
                strcpy_trim( tmp_fname_trim, tmp_fname_noext );

                /* does it end with a subtitle extension? */
                b_found = 0;
                for( i = 0; sub_exts[i]; i++ )
314
                {
315
                    if( strcmp( sub_exts[i], tmp_fname_ext ) == 0 )
316
                    {
317
                        b_found = 1;
Gildas Bazin's avatar
 
Gildas Bazin committed
318 319
                        msg_Dbg( p_this, "found a possible subtitle: %s",
                                 de->d_name );
320 321 322 323 324 325
                        break;
                    }
                }

                /* we have a (likely) subtitle file */
                if( b_found )
326
                {
327
                    int i_prio = 0;
Gildas Bazin's avatar
 
Gildas Bazin committed
328
                    if( !i_prio && !strcmp( tmp_fname_trim, f_fname_trim ) )
329
                    {
330 331 332
                        /* matches the movie name exactly */
                        i_prio = 4;
                    }
Gildas Bazin's avatar
 
Gildas Bazin committed
333 334
                    if( !i_prio &&
                        ( tmp = strstr( tmp_fname_trim, f_fname_trim ) ) )
335
                    {
336
                        /* contains the movie name */
337 338 339 340 341 342 343 344
                        tmp += strlen( f_fname_trim );
                        if( whiteonly( tmp ) )
                        {
                            /* chars in front of the movie name */
                            i_prio = 2;
                        }
                        else
                        {
Clément Stenac's avatar
Clément Stenac committed
345 346
                            /* chars after (and possibly in front of)
                             * the movie name */
347 348
                            i_prio = 3;
                        }
349 350
                    }
                    if( !i_prio )
351
                    {
352 353 354 355 356
                        /* doesn't contain the movie name */
                        if( j == 0 ) i_prio = 1;
                    }

                    if( i_prio >= fuzzy.i_int )
357
                    {
Gildas Bazin's avatar
 
Gildas Bazin committed
358 359 360 361
                        sprintf( tmpresult, "%s%s", j == 0 ? f_dir : psz_path,
                                 de->d_name );
                        msg_Dbg( p_this, "autodetected subtitle: %s with "
                                 "priority %d", de->d_name, i_prio );
362
                        if( ( f = fopen( tmpresult, "rt" ) ) )
363
                        {
364 365
                            fclose( f );
                            result[i_sub_count].priority = i_prio;
Gildas Bazin's avatar
 
Gildas Bazin committed
366
                            result[i_sub_count].psz_fname = strdup(tmpresult);
367 368 369 370 371 372 373 374
                            i_sub_count++;
                        }
                    }
                }
                if( i_sub_count >= MAX_SUBTITLE_FILES ) break;
            }
            closedir( d );
        }
375
    }
376

377 378
    if( subdirs ) free( subdirs );

379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
    free( f_dir );
    free( f_fname );
    free( f_fname_noext );
    free( f_fname_trim );

    free( tmp_fname_noext );
    free( tmp_fname_trim );
    free( tmp_fname_ext );

    free( tmpresult );

    qsort( result, i_sub_count, sizeof( subfn ), compare_sub_priority );

    result2 = (char**)malloc( sizeof(char*) * ( i_sub_count + 1 ) );
    memset( result2, 0, sizeof(char*) * ( i_sub_count + 1 ) );

    for( i = 0; i < i_sub_count; i++ )
    {
397
        result2[i] = result[i].psz_fname;
398 399 400 401 402
    }
    result2[i_sub_count] = NULL;
    free( result );
    return result2;
}