Commit f388eb55 authored by Derk-Jan Hartman's avatar Derk-Jan Hartman

* added a vlc_current_charset function. This tries to get the current charset

  in use by the OS. It's pretty nifty. if available it uses nl_langinfo. Then
  it falls back to trying setlocale (exept on OSX, where this funtion is useless)
  and then falls back to LC_ALL, LC_TYPE and LANG environment variables.
  If only the LANG variable is available (like on osx) it tries a countrycode
  to charset mapping (making an educated guess ).
  On windows it retrieves the charset with GetACP().
  It also has an aliases system to transform incompatible charset description
  strings to libiconv compatible charset descriptions. If you want you can even
  retrieve the charset on OS/2.
* modules/codec/subsdec/subsdec.c: By default we try to open a subtitle file in
  the systems charset (or language mapped to charset). It's no more than a guess,
  and if you open Latin1 subs on your Japanese system you will still need to
  specify the encoding by hand, but it's better then nothing.
* src/playlist/playlist.c: spelling error in a comment.
parent 190f4a1e
......@@ -69,6 +69,7 @@ HEADERS_include = \
include/beos_specific.h \
include/configuration.h \
include/darwin_specific.h \
include/charset.h \
include/codecs.h \
include/encoder.h \
include/ninput.h \
......@@ -314,6 +315,7 @@ SOURCES_libvlc_common = \
src/audio_output/output.c \
src/audio_output/intf.c \
src/stream_output/stream_output.c \
src/misc/charset.c \
src/misc/mtime.c \
src/misc/modules.c \
src/misc/threads.c \
......
dnl Autoconf settings for vlc
dnl $Id: configure.ac,v 1.65 2003/08/23 12:47:24 lool Exp $
dnl $Id: configure.ac,v 1.66 2003/08/23 12:59:31 hartman Exp $
AC_INIT(vlc,0.6.3-cvs)
......@@ -255,6 +255,22 @@ AC_CHECK_FUNCS(strdup strndup atof lseek)
AC_CHECK_FUNCS(strcasecmp,,[AC_CHECK_FUNCS(stricmp)])
AC_CHECK_FUNCS(strncasecmp,,[AC_CHECK_FUNCS(strnicmp)])
dnl Check for setlocal and langinfo
AC_CHECK_FUNCS(setlocale)
AC_CHECK_HEADERS(langinfo.h)
AC_CHECK_FUNCS(nl_langinfo)
AC_CACHE_CHECK([for nl_langinfo and CODESET], ac_cv_langinfo_codeset,
[AC_TRY_LINK([#include <langinfo.h>],
[char* cs = nl_langinfo(CODESET);],
ac_cv_langinfo_codeset=yes,
ac_cv_langinfo_codeset=no)
])
if test ${ac_cv_langinfo_codeset} = yes; then
AC_DEFINE(HAVE_LANGINFO_CODESET, 1,
[Define if you have <langinfo.h> and nl_langinfo(CODESET).])
fi
AC_CHECK_FUNCS(connect,,[
AC_CHECK_LIB(socket,connect,[
AX_ADD_LDFLAGS([vlc ipv4],-lsocket)
......
/*****************************************************************************
* charset.h: Determine a canonical name for the current locale's character encoding.
*****************************************************************************
* Copyright (C) 2003 VideoLAN
* $Id: charset.h,v 1.1 2003/08/23 12:59:31 hartman Exp $
*
* Author: Derk-Jan Hartman <thedj at users.sourceforge.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
VLC_EXPORT( vlc_bool_t, vlc_current_charset, ( char ** ) );
......@@ -2,7 +2,7 @@
* subsdec.c : SPU decoder thread
*****************************************************************************
* Copyright (C) 2000-2001 VideoLAN
* $Id: subsdec.c,v 1.5 2003/08/10 10:22:52 gbazin Exp $
* $Id: subsdec.c,v 1.6 2003/08/23 12:59:31 hartman Exp $
*
* Authors: Gildas Bazin <gbazin@netcourrier.com>
* Samuel Hocevar <sam@zoy.org>
......@@ -34,6 +34,7 @@
#include <osd.h>
#include "subsdec.h"
#include "charset.h"
/*****************************************************************************
* Local prototypes
......@@ -47,7 +48,8 @@ static vout_thread_t *FindVout( subsdec_thread_t * );
/*****************************************************************************
* Module descriptor.
*****************************************************************************/
static char *ppsz_encodings[] = { "ASCII", "ISO-8859-1", "ISO-8859-2", "ISO-8859-3",
static char *ppsz_encodings[] = { N_("System Default"),
"ASCII", "ISO-8859-1", "ISO-8859-2", "ISO-8859-3",
"ISO-8859-4", "ISO-8859-5", "ISO-8859-6", "ISO-8859-7", "ISO-8859-8",
"ISO-8859-9", "ISO-8859-10", "ISO-8859-13", "ISO-8859-14", "ISO-8859-15",
"ISO-8859-16", "ISO-2022-JP", "ISO-2022-JP-1", "ISO-2022-JP-2", "ISO-2022-CN",
......@@ -78,7 +80,7 @@ vlc_module_begin();
add_integer( "subsdec-align", 0, NULL, ALIGN_TEXT, ALIGN_LONGTEXT, VLC_TRUE );
#if defined(HAVE_ICONV)
add_string_from_list( "subsdec-encoding", "ISO-8859-1", ppsz_encodings, NULL, ENCODING_TEXT, ENCODING_LONGTEXT, VLC_FALSE );
add_string_from_list( "subsdec-encoding", N_("System Default"), ppsz_encodings, NULL, ENCODING_TEXT, ENCODING_LONGTEXT, VLC_FALSE );
#endif
vlc_module_end();
......@@ -149,7 +151,16 @@ static int RunDecoder( decoder_fifo_t * p_fifo )
/* Here we are dealing with text subtitles */
#if defined(HAVE_ICONV)
var_Get( p_subsdec->p_fifo, "subsdec-encoding", &val );
p_subsdec->iconv_handle = iconv_open( "UTF-8", val.psz_string);
if( strcmp( val.psz_string, N_("System Default") ) == 0 )
{
char *psz_charset =(char*)malloc( 100 );
vlc_current_charset(&psz_charset);
p_subsdec->iconv_handle = iconv_open( "UTF-8", psz_charset );
}
else
{
p_subsdec->iconv_handle = iconv_open( "UTF-8", val.psz_string );
}
if( p_subsdec->iconv_handle == (iconv_t)-1 )
{
msg_Warn( p_subsdec->p_fifo, "Unable to do requested conversion" );
......
/*****************************************************************************
* charset.c: Determine a canonical name for the current locale's character encoding.
*****************************************************************************
* Copyright (C) 2003 VideoLAN
* $Id: charset.c,v 1.1 2003/08/23 12:59:31 hartman Exp $
*
* Authors: Derk-Jan Hartman <thedj at users.sf.net>
*
* vlc_current_charset() an adaption of mp_locale_charset():
*
* Copyright (C) 2001-2003 The Mape Project
* Written by Karel Zak <zakkr@zf.jcu.cz>.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#if !defined WIN32
# if HAVE_LANGINFO_CODESET
# include <langinfo.h>
# else
# if HAVE_SETLOCALE
# include <locale.h>
# endif
# endif
#elif defined WIN32
# include <windows.h>
#endif
#include <vlc/vlc.h>
#include "charset.h"
typedef struct VLCCharsetAlias
{
char *psz_alias, *psz_name;
} VLCCharsetAlias;
/*
* The libcharset load all from external text file, but it's strange and
* slow solution, we rather use array(s) compiled into source. In the
* "good" libc this is not needful -- for example in linux.
*
* Please, put to this funtion exotic aliases only. The libc 'iconv' knows
* a lot of basic aliases (check it first by iconv -l).
*
*/
static const char* vlc_encoding_from_language( const char *l )
{
/* check for language (and perhaps country) codes */
if (strstr(l, "zh_TW")) return "Big5";
if (strstr(l, "zh_HK")) return "Big5HKSCS"; /* no MIME charset */
if (strstr(l, "zh")) return "GB2312";
if (strstr(l, "th")) return "TIS-620";
if (strstr(l, "ja")) return "EUC-JP";
if (strstr(l, "ko")) return "EUC-KR";
if (strstr(l, "ru")) return "KOI8-R";
if (strstr(l, "uk")) return "KOI8-U";
if (strstr(l, "pl") || strstr(l, "hr") ||
strstr(l, "hu") || strstr(l, "cs") ||
strstr(l, "sk") || strstr(l, "sl")) return "ISO-8859-2";
if (strstr(l, "eo") || strstr(l, "mt")) return "ISO-8859-3";
if (strstr(l, "lt") || strstr(l, "la")) return "ISO-8859-4";
if (strstr(l, "bg") || strstr(l, "be") ||
strstr(l, "mk") || strstr(l, "uk")) return "ISO-8859-5";
if (strstr(l, "ar")) return "ISO-8859-6";
if (strstr(l, "el")) return "ISO-8859-7";
if (strstr(l, "he") || strstr(l, "iw")) return "ISO-8859-8";
if (strstr(l, "tr")) return "ISO-8859-9";
if (strstr(l, "th")) return "ISO-8859-11";
if (strstr(l, "lv")) return "ISO-8859-13";
if (strstr(l, "cy")) return "ISO-8859-14";
if (strstr(l, "et")) return "ISO-8859-15"; /* all latin1 could be iso15 as well */
if (strstr(l, "ro")) return "ISO-8859-2"; /* or ISO-8859-16 */
if (strstr(l, "am") || strstr(l, "vi")) return "UTF-8";
/* We don't know. This ain't working go to default. */
return "ISO-8859-1";
}
static const char* vlc_charset_aliases( const char *psz_name )
{
VLCCharsetAlias *a;
#if defined WIN32
VLCCharsetAlias aliases[] =
{
{ "CP936", "GBK" },
{ "CP1361", "JOHAB" },
{ "CP20127", "ASCII" },
{ "CP20866", "KOI8-R" },
{ "CP21866", "KOI8-RU" },
{ "CP28591", "ISO-8859-1" },
{ "CP28592", "ISO-8859-2" },
{ "CP28593", "ISO-8859-3" },
{ "CP28594", "ISO-8859-4" },
{ "CP28595", "ISO-8859-5" },
{ "CP28596", "ISO-8859-6" },
{ "CP28597", "ISO-8859-7" },
{ "CP28598", "ISO-8859-8" },
{ "CP28599", "ISO-8859-9" },
{ "CP28605", "ISO-8859-15" },
{ NULL, NULL }
};
#elif SYS_AIX
VLCCharsetAlias aliases[] =
{
{ "IBM-850", "CP850" },
{ "IBM-856", "CP856" },
{ "IBM-921", "ISO-8859-13" },
{ "IBM-922", "CP922" },
{ "IBM-932", "CP932" },
{ "IBM-943", "CP943" },
{ "IBM-1046", "CP1046" },
{ "IBM-1124", "CP1124" },
{ "IBM-1129", "CP1129" },
{ "IBM-1252", "CP1252" },
{ "IBM-EUCCN", "GB2312" },
{ "IBM-EUCJP", "EUC-JP" },
{ "IBM-EUCKR", "EUC-KR" },
{ "IBM-EUCTW", "EUC-TW" },
{ NULL, NULL }
};
#elif SYS_HPUX
VLCCharsetAlias aliases[] =
{
{ "ROMAN8", "HP-ROMAN8" },
{ "ARABIC8", "HP-ARABIC8" },
{ "GREEK8", "HP-GREEK8" },
{ "HEBREW8", "HP-HEBREW8" },
{ "TURKISH8", "HP-TURKISH8" },
{ "KANA8", "HP-KANA8" },
{ "HP15CN", "GB2312" },
{ NULL, NULL }
};
#elif SYS_IRIX
VLCCharsetAlias aliases[] =
{
{ "EUCCN", "GB2312" },
{ NULL, NULL }
};
#elif SYS_OSF
VLCCharsetAlias aliases[] =
{
{ "KSC5601", "CP949" },
{ "SDECKANJI", "EUC-JP" },
{ "TACTIS", "TIS-620" },
{ NULL, NULL }
};
#elif SYS_SOLARIS
VLCCharsetAlias aliases[] =
{
{ "646", "ASCII" },
{ "CNS11643", "EUC-TW" },
{ "5601", "EUC-KR" },
{ "JOHAP92", "JOHAB" },
{ "PCK", "SHIFT_JIS" },
{ "2533", "TIS-620" },
{ NULL, NULL }
};
#elif SYS_BSD
VLCCharsetAlias aliases[] =
{
{ "646", " ASCII" },
{ "EUCCN", "GB2312" },
{ NULL, NULL }
};
#else
VLCCharsetAlias aliases[] = {{NULL, NULL}};
#endif
if( aliases )
{
for (a = aliases; a->psz_alias; a++)
if (strcasecmp (a->psz_alias, psz_name) == 0)
return a->psz_name;
}
/* we return original name beacuse iconv() probably will know
* something better about name if we don't know it :-)
*/
return psz_name;
}
/* Returns charset from "language_COUNTRY.charset@modifier" string */
static char* vlc_encoding_from_locale( char *psz_locale )
{
char *psz_dot = strchr( psz_locale, '.' );
if( psz_dot != NULL )
{
const char *psz_modifier;
static char buf[2 + 10 + 1];
psz_dot++;
/* Look for the possible @... trailer and remove it, if any. */
psz_modifier = strchr( psz_dot, '@' );
if( psz_modifier == NULL )
return psz_dot;
if( psz_modifier - psz_dot < sizeof( buf ))
{
memcpy( buf, psz_dot, psz_modifier - psz_dot );
buf[ psz_modifier - psz_dot ] = '\0';
return buf;
}
}
/* try language mapping */
return vlc_encoding_from_language( psz_locale );
}
vlc_bool_t vlc_current_charset( char **psz_charset )
{
const char *psz_codeset;
#if !(defined WIN32 || defined OS2)
# if HAVE_LANGINFO_CODESET
/* Most systems support nl_langinfo( CODESET ) nowadays. */
psz_codeset = nl_langinfo( CODESET );
# else
/* On old systems which lack it, use setlocale or getenv. */
const char *psz_locale = NULL;
/* But most old systems don't have a complete set of locales. Some
* (like SunOS 4 or DJGPP) have only the C locale. Therefore we don't
* use setlocale here; it would return "C" when it doesn't support the
* locale name the user has set. Darwin's setlocale is broken.
*/
# if HAVE_SETLOCALE && !SYS_DARWIN
psz_locale = setlocale( LC_ALL, NULL );
# endif
if( psz_locale == NULL || psz_locale[0] == '\0' )
{
psz_locale = getenv( "LC_ALL" );
if( psz_locale == NULL || psz_locale[0] == '\0' )
{
psz_locale = getenv( "LC_CTYPE" );
if( psz_locale == NULL || psz_locale[0] == '\0')
psz_locale = getenv( "LANG" );
}
}
/* On some old systems, one used to set locale = "iso8859_1". On others,
* you set it to "language_COUNTRY.charset". Darwin only has LANG :(
*/
psz_codeset = vlc_encoding_from_locale( psz_locale );
# endif /* HAVE_LANGINFO_CODESET */
#elif defined WIN32
static char buf[2 + 10 + 1];
/* Woe32 has a function returning the locale's codepage as a number. */
sprintf( buf, "CP%u", GetACP() );
psz_codeset = buf;
#elif defined OS2
const char *psz_locale;
static char buf[2 + 10 + 1];
ULONG cp[3];
ULONG cplen;
/* Allow user to override the codeset, as set in the operating system,
* with standard language environment variables.
*/
psz_locale = getenv( "LC_ALL" );
if( psz_locale == NULL || psz_locale[0] == '\0' )
{
psz+locale = getenv( "LC_CTYPE" );
if( psz_locale == NULL || locale[0] == '\0' )
locale = getenv( "LANG" );
}
if( psz_locale != NULL && psz_locale[0] != '\0' )
psz_codeset = vlc_encoding_from_locale( psz_locale );
else
{
/* OS/2 has a function returning the locale's codepage as a number. */
if( DosQueryCp( sizeof( cp ), cp, &cplen ) )
psz_codeset = "";
else
{
sprintf( buf, "CP%u", cp[0] );
psz_codeset = buf;
}
}
#endif
if( psz_codeset == NULL )
/* The canonical name cannot be determined. */
psz_codeset = "";
else
psz_codeset = vlc_charset_aliases( psz_codeset );
/* Don't return an empty string. GNU libc and GNU libiconv interpret
* the empty string as denoting "the locale's character encoding",
* thus GNU libiconv would call this function a second time.
*/
if( psz_codeset[0] == '\0' )
{
/*
* Last possibility is 'CHARSET' enviroment variable
*/
if( !( psz_codeset = getenv( "CHARSET" ) ) )
psz_codeset = "ISO-8859-1";
}
if( psz_charset )
*psz_charset = (char *)psz_codeset;
if (strcasecmp(psz_codeset, "UTF8")==0 || strcasecmp(psz_codeset, "UTF-8")==0)
return TRUE;
return FALSE;
}
......@@ -2,7 +2,7 @@
* playlist.c : Playlist management functions
*****************************************************************************
* Copyright (C) 1999-2001 VideoLAN
* $Id: playlist.c,v 1.45 2003/08/17 14:14:01 sigmunau Exp $
* $Id: playlist.c,v 1.46 2003/08/23 12:59:31 hartman Exp $
*
* Authors: Samuel Hocevar <sam@zoy.org>
*
......@@ -134,7 +134,7 @@ int playlist_Add( playlist_t *p_playlist, const char *psz_target,
* \param ppsz_options array of options
* \param i_options number of items in ppsz_options
* \param i_mode the mode used when adding
* \param i_pos the possition in the playlist where to add. If this is
* \param i_pos the position in the playlist where to add. If this is
* PLAYLIST_END the item will be added at the end of the playlist
* regardless of it's size
* \return always returns 0
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment