Commit 168549fa authored by Rémi Denis-Courmont's avatar Rémi Denis-Courmont

Support URL-escape non-ASCII characters properly as far as possible

(ie. all characters from the Basic Multilingual Plane from Unicode)
parent 798b0d97
/***************************************************************************** /*****************************************************************************
* vlc_url.h: URL related macros * vlc_url.h: URL related macros
***************************************************************************** *****************************************************************************
* Copyright (C) 2002-2005 the VideoLAN team * Copyright (C) 2002-2006 the VideoLAN team
* $Id$ * $Id$
* *
* Authors: Christophe Massiot <massiot@via.ecp.fr> * Authors: Christophe Massiot <massiot@via.ecp.fr>
...@@ -174,6 +174,7 @@ static inline int isurlsafe( int c ) ...@@ -174,6 +174,7 @@ static inline int isurlsafe( int c )
return ( (unsigned char)( c - 'a' ) < 26 ) return ( (unsigned char)( c - 'a' ) < 26 )
|| ( (unsigned char)( c - 'A' ) < 26 ) || ( (unsigned char)( c - 'A' ) < 26 )
|| ( (unsigned char)( c - '0' ) < 10 ) || ( (unsigned char)( c - '0' ) < 10 )
|| ( (unsigned char)( c ) > 127 )
/* Hmm, we should not encode character that are allowed in URLs /* Hmm, we should not encode character that are allowed in URLs
* (even if they are not URL-safe), nor URL-safe characters. * (even if they are not URL-safe), nor URL-safe characters.
* We still encode some of them because of Microsoft's crap browser. * We still encode some of them because of Microsoft's crap browser.
...@@ -181,6 +182,11 @@ static inline int isurlsafe( int c ) ...@@ -181,6 +182,11 @@ static inline int isurlsafe( int c )
|| ( strchr( "-_.", c ) != NULL ); || ( strchr( "-_.", c ) != NULL );
} }
static inline char url_hexchar( int c )
{
return ( c < 10 ) ? c + '0' : c + 'A' - 10;
}
/***************************************************************************** /*****************************************************************************
* vlc_UrlEncode: * vlc_UrlEncode:
***************************************************************************** *****************************************************************************
...@@ -189,14 +195,9 @@ static inline int isurlsafe( int c ) ...@@ -189,14 +195,9 @@ static inline int isurlsafe( int c )
*****************************************************************************/ *****************************************************************************/
static inline char *vlc_UrlEncode( const char *psz_url ) static inline char *vlc_UrlEncode( const char *psz_url )
{ {
char *psz_enc, *out; char psz_enc[3 * strlen( psz_url ) + 1], *out = psz_enc;
const unsigned char *in; const unsigned char *in;
psz_enc = (char *)malloc( 3 * strlen( psz_url ) + 1 );
if( psz_enc == NULL )
return NULL;
out = psz_enc;
for( in = (const unsigned char *)psz_url; *in; in++ ) for( in = (const unsigned char *)psz_url; *in; in++ )
{ {
unsigned char c = *in; unsigned char c = *in;
...@@ -205,16 +206,48 @@ static inline char *vlc_UrlEncode( const char *psz_url ) ...@@ -205,16 +206,48 @@ static inline char *vlc_UrlEncode( const char *psz_url )
*out++ = (char)c; *out++ = (char)c;
else else
{ {
uint16_t cp;
*out++ = '%'; *out++ = '%';
*out++ = ( ( c >> 4 ) >= 0xA ) ? 'A' + ( c >> 4 ) - 0xA /* UTF-8 to UCS-2 conversion */
: '0' + ( c >> 4 ); if( ( c & 0x7f ) == 0 )
*out++ = ( ( c & 0xf ) >= 0xA ) ? 'A' + ( c & 0xf ) - 0xA cp = c;
: '0' + ( c & 0xf ); else
if( ( c & 0xe0 ) == 0xc0 )
{
cp = ((c & 0x1f) << 6) | (in[1] & 0x3f);
in++;
}
else
if( ( c & 0xf0 ) == 0xe0 )
{
cp = ((c & 0xf) << 12) | ((in[1] & 0x3f) << 6) | (in[2] & 0x3f);
in += 2;
}
else
/* cannot URL-encode code points outside the BMP */
return NULL;
if( cp < 0xff )
{
/* Encode ISO-8859-1 characters */
*out++ = url_hexchar( cp >> 4 );
*out++ = url_hexchar( cp & 0xf );
}
else
{
/* Encode non-Latin-1 characters */
*out++ = 'u';
*out++ = url_hexchar( cp >> 12 );
*out++ = url_hexchar((cp >> 8) & 0xf );
*out++ = url_hexchar((cp >> 4) & 0xf );
*out++ = url_hexchar( cp & 0xf );
}
} }
} }
*out++ = '\0'; *out++ = '\0';
return (char *)realloc( psz_enc, out - psz_enc ); return strdup( psz_enc );
} }
/***************************************************************************** /*****************************************************************************
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment