Commit 73508ddd authored by Rémi Denis-Courmont's avatar Rémi Denis-Courmont

- Reimplement URL component decoding separately from URL unescaping

- Replace crap URL escaping with URL encoding, which is what we really need
parent d4f0ff77
......@@ -488,6 +488,9 @@ struct module_symbols_t
const char * (*IsUTF8_inner) (const char *);
const char * (*GetFallbackEncoding_inner) (void);
int (*utf8_scandir_inner) (const char *dirname, char ***namelist, int (*select)( const char * ), int (*compar)( const char **, const char ** ));
char * (*decode_URI_duplicate_inner) (const char *psz);
void (*decode_URI_inner) (char *psz);
char * (*encode_URI_inner) (const char *psz);
};
# if defined (__PLUGIN__)
# define aout_FiltersCreatePipeline (p_symbols)->aout_FiltersCreatePipeline_inner
......@@ -956,6 +959,9 @@ struct module_symbols_t
# define IsUTF8 (p_symbols)->IsUTF8_inner
# define GetFallbackEncoding (p_symbols)->GetFallbackEncoding_inner
# define utf8_scandir (p_symbols)->utf8_scandir_inner
# define decode_URI_duplicate (p_symbols)->decode_URI_duplicate_inner
# define decode_URI (p_symbols)->decode_URI_inner
# define encode_URI (p_symbols)->encode_URI_inner
# elif defined (HAVE_DYNAMIC_PLUGINS) && !defined (__BUILTIN__)
/******************************************************************
* STORE_SYMBOLS: store VLC APIs into p_symbols for plugin access.
......@@ -1427,6 +1433,9 @@ struct module_symbols_t
((p_symbols)->IsUTF8_inner) = IsUTF8; \
((p_symbols)->GetFallbackEncoding_inner) = GetFallbackEncoding; \
((p_symbols)->utf8_scandir_inner) = utf8_scandir; \
((p_symbols)->decode_URI_duplicate_inner) = decode_URI_duplicate; \
((p_symbols)->decode_URI_inner) = decode_URI; \
((p_symbols)->encode_URI_inner) = encode_URI; \
(p_symbols)->net_ConvertIPv4_deprecated = NULL; \
(p_symbols)->__stats_CounterGet_deprecated = NULL; \
(p_symbols)->__stats_TimerDumpAll_deprecated = NULL; \
......
......@@ -171,90 +171,14 @@ static inline void vlc_UrlClean( vlc_url_t *url )
VLC_EXPORT( char *, unescape_URI_duplicate, ( const char *psz ) );
VLC_EXPORT( void, unescape_URI, ( char *psz ) );
VLC_EXPORT( char *, decode_URI_duplicate, ( const char *psz ) );
VLC_EXPORT( void, decode_URI, ( char *psz ) );
VLC_EXPORT( char *, encode_URI, ( const char *psz ) );
static inline int isurlsafe( int c )
{
return ( (unsigned char)( c - 'a' ) < 26 )
|| ( (unsigned char)( c - 'A' ) < 26 )
|| ( (unsigned char)( c - '0' ) < 10 )
/* Hmm, we should not encode character that are allowed in URLs
* (even if they are not URL-safe), nor URL-safe characters.
* We still encode some of them because of Microsoft's crap browser.
*/
|| ( strchr( "-_.", c ) != NULL );
}
static inline char url_hexchar( int c )
{
return ( c < 10 ) ? c + '0' : c + 'A' - 10;
}
/*****************************************************************************
* vlc_UrlEncode:
*****************************************************************************
* perform URL encoding
* (you do NOT want to do URL decoding - it is not reversible - do NOT do it)
*****************************************************************************/
static inline char *vlc_UrlEncode( const char *psz_url )
{
char psz_enc[3 * strlen( psz_url ) + 1], *out = psz_enc;
const uint8_t *in;
for( in = (const uint8_t *)psz_url; *in; in++ )
{
uint8_t c = *in;
if( isurlsafe( c ) )
{
*out++ = (char)c;
}
else
{
uint16_t cp;
*out++ = '%';
/* UTF-8 to UCS-2 conversion */
if( ( c & 0x80 ) == 0 )
{
cp = c;
}
else if( ( c & 0xe0 ) == 0xc0 )
{
cp = (((uint16_t)c & 0x1f) << 6) | (in[1] & 0x3f);
in++;
}
else if( ( c & 0xf0 ) == 0xe0 )
{
cp = (((uint16_t)c & 0xf) << 12) | (((uint16_t)(in[1]) & 0x3f) << 6) | (in[2] & 0x3f);
in += 2;
}
else
{
/* cannot URL-encode code points outside the BMP */
/* better a wrong conversion than a crash */
cp = '?';
}
if( cp < 0xff )
{
/* Encode ISO-8859-1 characters */
*out++ = url_hexchar( cp >> 4 );
*out++ = url_hexchar( cp & 0xf );
}
else
{
/* Encode non-Latin-1 characters */
*out++ = 'u';
*out++ = url_hexchar( cp >> 12 );
*out++ = url_hexchar((cp >> 8) & 0xf );
*out++ = url_hexchar((cp >> 4) & 0xf );
*out++ = url_hexchar( cp & 0xf );
}
}
}
*out++ = '\0';
return strdup( psz_enc );
/* FIXME: do not encode / : ? and & _when_ not needed */
return encode_URI( psz_url );
}
/*****************************************************************************
......@@ -279,7 +203,10 @@ static inline int vlc_UrlIsNotEncoded( const char *psz_url )
ptr += 2;
}
else
if( !isurlsafe( c ) )
if( ( (unsigned char)( c - 'a' ) < 26 )
|| ( (unsigned char)( c - 'A' ) < 26 )
|| ( (unsigned char)( c - '0' ) < 10 )
|| ( strchr( "-_.", c ) != NULL ) )
return 1;
}
return 0; /* looks fine - but maybe it is not encoded */
......
......@@ -34,6 +34,7 @@
#include "vlc_strings.h"
#include "vlc_url.h"
#include "charset.h"
/**
* Unescape URI encoded string
......@@ -116,6 +117,111 @@ void unescape_URI( char *psz )
*out = '\0';
}
/**
* Decode encoded URI string
* \return decoded duplicated string
*/
char *decode_URI_duplicate( const char *psz )
{
char *psz_dup = strdup( psz );
unescape_URI( psz_dup );
return psz_dup;
}
/**
* Decode encoded URI string in place
* \return nothing
*/
void decode_URI( char *psz )
{
unsigned char *in = (unsigned char *)psz, *out = in, c;
while( ( c = *in++ ) != '\0' )
{
switch( c )
{
case '%':
{
char hex[2];
if( ( ( hex[0] = *in++ ) == 0 )
|| ( ( hex[1] = *in++ ) == 0 ) )
return;
hex[2] = '\0';
*out++ = (unsigned char)strtoul( hex, NULL, 0x10 );
break;
}
case '+':
*out++ = ' ';
default:
/* Inserting non-ASCII or non-printable characters is unsafe,
* and no sane browser will send these unencoded */
if( ( c < 32 ) || ( c > 127 ) )
*out++ = '?';
else
*out++ = c;
}
}
*out = '\0';
EnsureUTF8( psz );
}
static inline int isurlsafe( int c )
{
return ( (unsigned char)( c - 'a' ) < 26 )
|| ( (unsigned char)( c - 'A' ) < 26 )
|| ( (unsigned char)( c - '0' ) < 10 )
/* Hmm, we should not encode character that are allowed in URLs
* (even if they are not URL-safe), nor URL-safe characters.
* We still encode some of them because of Microsoft's crap browser.
*/
|| ( strchr( "-_.", c ) != NULL );
}
static inline char url_hexchar( int c )
{
return ( c < 10 ) ? c + '0' : c + 'A' - 10;
}
/**
* encode_URI_component
* Encodes an URI component.
*
* @param psz_url nul-terminated UTF-8 representation of the component.
* Obviously, you can't pass an URI containing a nul character, but you don't
* want to do that, do you?
*
* @return encoded string (must be free()'d)
*/
char *encode_URI_component( const char *psz_url )
{
char psz_enc[3 * strlen( psz_url ) + 1], *out = psz_enc;
const uint8_t *in;
for( in = (const uint8_t *)psz_url; *in; in++ )
{
uint8_t c = *in;
if( isurlsafe( c ) )
*out++ = (char)c;
else
if ( c == ' ')
*out++ = '+';
else
{
*out++ = '%';
*out++ = url_hexchar( c >> 4 );
*out++ = url_hexchar( c & 0xf );
}
}
*out++ = '\0';
return strdup( psz_enc );
}
/**
* Converts "&lt;", "&gt;" and "&amp;" to "<", ">" and "&"
* \param string to convert
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment