Commit 647cc799 authored by Rémi Denis-Courmont's avatar Rémi Denis-Courmont

- FromWide(): converts a wchar_t * to UTF-8 char *

  (if local charset is UTF-8, this is similar to wcstombs())
- FromUTF16(): converts a host-order UTF-16 byte sequence to UTF-8
parent e65878a6
...@@ -47,6 +47,16 @@ int utf8_fprintf( FILE *, const char *, ... ); ...@@ -47,6 +47,16 @@ int utf8_fprintf( FILE *, const char *, ... );
VLC_EXPORT( char *, EnsureUTF8, ( char * ) ); VLC_EXPORT( char *, EnsureUTF8, ( char * ) );
VLC_EXPORT( char *, FromUTF32, ( const uint32_t * ) ); VLC_EXPORT( char *, FromUTF32, ( const uint32_t * ) );
VLC_EXPORT( char *, FromUTF16, ( const uint16_t * ) );
static inline char *FromWide( const wchar_t *in )
{
return (sizeof( wchar_t ) == 2)
? FromUTF16( (const uint16_t *)in )
: FromUTF32( (const uint32_t *)in );
}
VLC_EXPORT( char *, __vlc_fix_readdir_charset, ( vlc_object_t *, const char * ) ); VLC_EXPORT( char *, __vlc_fix_readdir_charset, ( vlc_object_t *, const char * ) );
#define vlc_fix_readdir_charset(a,b) __vlc_fix_readdir_charset(VLC_OBJECT(a),b) #define vlc_fix_readdir_charset(a,b) __vlc_fix_readdir_charset(VLC_OBJECT(a),b)
......
...@@ -484,6 +484,7 @@ struct module_symbols_t ...@@ -484,6 +484,7 @@ struct module_symbols_t
char * (*convert_xml_special_chars_inner) (const char *psz_content); char * (*convert_xml_special_chars_inner) (const char *psz_content);
char * (*decode_encoded_URI_duplicate_inner) (const char *psz); char * (*decode_encoded_URI_duplicate_inner) (const char *psz);
void (*resolve_xml_special_chars_inner) (char *psz_value); void (*resolve_xml_special_chars_inner) (char *psz_value);
char * (*FromUTF16_inner) (const uint16_t *);
}; };
# if defined (__PLUGIN__) # if defined (__PLUGIN__)
# define aout_FiltersCreatePipeline (p_symbols)->aout_FiltersCreatePipeline_inner # define aout_FiltersCreatePipeline (p_symbols)->aout_FiltersCreatePipeline_inner
...@@ -948,6 +949,7 @@ struct module_symbols_t ...@@ -948,6 +949,7 @@ struct module_symbols_t
# define convert_xml_special_chars (p_symbols)->convert_xml_special_chars_inner # define convert_xml_special_chars (p_symbols)->convert_xml_special_chars_inner
# define decode_encoded_URI_duplicate (p_symbols)->decode_encoded_URI_duplicate_inner # define decode_encoded_URI_duplicate (p_symbols)->decode_encoded_URI_duplicate_inner
# define resolve_xml_special_chars (p_symbols)->resolve_xml_special_chars_inner # define resolve_xml_special_chars (p_symbols)->resolve_xml_special_chars_inner
# define FromUTF16 (p_symbols)->FromUTF16_inner
# elif defined (HAVE_DYNAMIC_PLUGINS) && !defined (__BUILTIN__) # elif defined (HAVE_DYNAMIC_PLUGINS) && !defined (__BUILTIN__)
/****************************************************************** /******************************************************************
* STORE_SYMBOLS: store VLC APIs into p_symbols for plugin access. * STORE_SYMBOLS: store VLC APIs into p_symbols for plugin access.
...@@ -1415,6 +1417,7 @@ struct module_symbols_t ...@@ -1415,6 +1417,7 @@ struct module_symbols_t
((p_symbols)->convert_xml_special_chars_inner) = convert_xml_special_chars; \ ((p_symbols)->convert_xml_special_chars_inner) = convert_xml_special_chars; \
((p_symbols)->decode_encoded_URI_duplicate_inner) = decode_encoded_URI_duplicate; \ ((p_symbols)->decode_encoded_URI_duplicate_inner) = decode_encoded_URI_duplicate; \
((p_symbols)->resolve_xml_special_chars_inner) = resolve_xml_special_chars; \ ((p_symbols)->resolve_xml_special_chars_inner) = resolve_xml_special_chars; \
((p_symbols)->FromUTF16_inner) = FromUTF16; \
(p_symbols)->net_ConvertIPv4_deprecated = NULL; \ (p_symbols)->net_ConvertIPv4_deprecated = NULL; \
(p_symbols)->__stats_CounterGet_deprecated = NULL; \ (p_symbols)->__stats_CounterGet_deprecated = NULL; \
(p_symbols)->__stats_TimerDumpAll_deprecated = NULL; \ (p_symbols)->__stats_TimerDumpAll_deprecated = NULL; \
......
/***************************************************************************** /*****************************************************************************
* unicode.c: UTF8 <-> locale functions * unicode.c: Unicode <-> locale functions
***************************************************************************** *****************************************************************************
* Copyright (C) 2005-2006 the VideoLAN team * Copyright (C) 2005-2006 the VideoLAN team
* $Id$ * $Id$
* *
* Authors: Rémi Denis-Courmont <rem # videolan.org> * Authors: Rémi Denis-Courmont <rem # videolan.org>
* *
* UTF16toUTF8() adapted from Perl 5 (also GPL'd)
* Copyright (C) 1998-2002, Larry Wall
*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or * the Free Software Foundation; either version 2 of the License, or
...@@ -655,9 +658,10 @@ error: ...@@ -655,9 +658,10 @@ error:
} }
/** /**
* UTF32toUTF8(): converts an array from UTF-32 to UTF-8. * UTF32toUTF8(): converts an array from UTF-32 (host byte order)
* to UTF-8.
* *
* @param src the UTF32 table to be converted * @param src the UTF-32 table to be converted
* @param len the number of code points to be converted from src * @param len the number of code points to be converted from src
* (ie. the number of uint32_t in the table pointed to by src) * (ie. the number of uint32_t in the table pointed to by src)
* @param newlen an optional pointer. If not NULL, *newlen will * @param newlen an optional pointer. If not NULL, *newlen will
...@@ -666,7 +670,8 @@ error: ...@@ -666,7 +670,8 @@ error:
* @return the result of the conversion (must be free'd()) * @return the result of the conversion (must be free'd())
* or NULL on error (in that case, *newlen is undefined). * or NULL on error (in that case, *newlen is undefined).
*/ */
char *UTF32toUTF8( const uint32_t *src, size_t len, size_t *newlen ) static char *
UTF32toUTF8( const uint32_t *src, size_t len, size_t *newlen )
{ {
char *res, *out; char *res, *out;
...@@ -725,17 +730,111 @@ char *UTF32toUTF8( const uint32_t *src, size_t len, size_t *newlen ) ...@@ -725,17 +730,111 @@ char *UTF32toUTF8( const uint32_t *src, size_t len, size_t *newlen )
/** /**
* FromUTF32(): converts an UTF-32 string to UTF-8. * FromUTF32(): converts an UTF-32 string to UTF-8.
* *
* @param src UTF-32 bytes sequence, aligned on a 32-bits boundary.
*
* @return the result of the conversion (must be free()'d), * @return the result of the conversion (must be free()'d),
* or NULL in case of error. * or NULL in case of error.
*/ */
char *FromUTF32( const uint32_t *src ) char *FromUTF32( const uint32_t *src )
{ {
size_t len;
const uint32_t *in; const uint32_t *in;
size_t len;
/* determine the size of the string */ /* determine the size of the string */
for( len = 1, in = src; GetWBE( in ); len++ ) for( len = 1, in = src; *in; len++ )
in++; in++;
return UTF32toUTF8( src, len, NULL ); return UTF32toUTF8( src, len, NULL );
} }
/**
* UTF16toUTF8: converts UTF-16 (host byte order) to UTF-8
*
* @param src UTF-16 bytes sequence, aligned on a 16-bits boundary
* @param len number of uint16_t to convert
*/
static char *
UTF16toUTF8( const uint16_t *in, size_t len, size_t *newlen )
{
char *res, *out;
/* allocate memory */
out = res = (char *)malloc( 3 * len );
if( res == NULL )
return NULL;
while( len > 0 )
{
uint32_t uv = *in;
in++;
len--;
if( uv < 0x80 )
{
*out++ = uv;
continue;
}
if( uv < 0x800 )
{
*out++ = (( uv >> 6) | 0xc0);
*out++ = (( uv & 0x3f) | 0x80);
continue;
}
if( (uv >= 0xd800) && (uv < 0xdbff) )
{ /* surrogates */
uint16_t low = GetWBE( in );
in++;
len--;
if( (low < 0xdc00) || (low >= 0xdfff) )
{
*out++ = '?'; /* Malformed surrogate */
continue;
}
else
uv = ((uv - 0xd800) << 10) + (low - 0xdc00) + 0x10000;
}
if( uv < 0x10000 )
{
*out++ = (( uv >> 12) | 0xe0);
*out++ = (((uv >> 6) & 0x3f) | 0x80);
*out++ = (( uv & 0x3f) | 0x80);
continue;
}
else
{
*out++ = (( uv >> 18) | 0xf0);
*out++ = (((uv >> 12) & 0x3f) | 0x80);
*out++ = (((uv >> 6) & 0x3f) | 0x80);
*out++ = (( uv & 0x3f) | 0x80);
continue;
}
}
len = out - res;
res = realloc( res, len );
if( newlen != NULL )
*newlen = len;
return res;
}
/**
* FromUTF16(): converts an UTF-16 string to UTF-8.
*
* @param src UTF-16 bytes sequence, aligned on a 16-bits boundary.
*
* @return the result of the conversion (must be free()'d),
* or NULL in case of error.
*/
char *FromUTF16( const uint16_t *src )
{
const uint16_t *in;
size_t len;
/* determine the size of the string */
for( len = 1, in = src; *in; len++ )
in += 2;
return UTF16toUTF8( src, len, NULL );
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment