Commit 7b331c54 authored by Rémi Denis-Courmont's avatar Rémi Denis-Courmont

Inline IsUTF8() and EnsureUTF8()

parent 89f83434
...@@ -50,6 +50,64 @@ ...@@ -50,6 +50,64 @@
*/ */
VLC_API size_t vlc_towc(const char *str, uint32_t *restrict pwc); VLC_API size_t vlc_towc(const char *str, uint32_t *restrict pwc);
/**
* Checks UTF-8 validity.
*
* Checks whether a null-terminated string is a valid UTF-8 bytes sequence.
*
* \param str string to check
*
* \retval str the string is a valid null-terminated UTF-8 sequence
* \retval NULL the string is not an UTF-8 sequence
*/
VLC_USED static inline const char *IsUTF8(const char *str)
{
size_t n;
uint32_t cp;
while ((n = vlc_towc(str, &cp)) != 0)
if (likely(n != (size_t)-1))
str += n;
else
return NULL;
return str;
}
/**
* Removes non-UTF-8 sequences.
*
* Replaces invalid or <i>over-long</i> UTF-8 bytes sequences within a
* null-terminated string with question marks. This is so that the string can
* be printed at least partially.
*
* \warning Do not use this were correctness is critical. use IsUTF8() and
* handle the error case instead. This function is mainly for display or debug.
*
* \note Converting from Latin-1 to UTF-8 in place is not possible (the string
* size would be increased). So it is not attempted even if it would otherwise
* be less disruptive.
*
* \retval str the string is a valid null-terminated UTF-8 sequence
* (i.e. no changes were made)
* \retval NULL the string is not an UTF-8 sequence
*/
static inline char *EnsureUTF8(char *str)
{
char *ret = str;
size_t n;
uint32_t cp;
while ((n = vlc_towc(str, &cp)) != 0)
if (likely(n != (size_t)-1))
str += n;
else
{
*str++ = '?';
ret = NULL;
}
return ret;
}
/* iconv wrappers (defined in src/extras/libc.c) */ /* iconv wrappers (defined in src/extras/libc.c) */
typedef void *vlc_iconv_t; typedef void *vlc_iconv_t;
VLC_API vlc_iconv_t vlc_iconv_open( const char *, const char * ) VLC_USED; VLC_API vlc_iconv_t vlc_iconv_open( const char *, const char * ) VLC_USED;
...@@ -62,9 +120,6 @@ VLC_API int utf8_vfprintf( FILE *stream, const char *fmt, va_list ap ); ...@@ -62,9 +120,6 @@ VLC_API int utf8_vfprintf( FILE *stream, const char *fmt, va_list ap );
VLC_API int utf8_fprintf( FILE *, const char *, ... ) VLC_FORMAT( 2, 3 ); VLC_API int utf8_fprintf( FILE *, const char *, ... ) VLC_FORMAT( 2, 3 );
VLC_API char * vlc_strcasestr(const char *, const char *) VLC_USED; VLC_API char * vlc_strcasestr(const char *, const char *) VLC_USED;
VLC_API char * EnsureUTF8( char * );
VLC_API const char * IsUTF8( const char * ) VLC_USED;
VLC_API char * FromCharset( const char *charset, const void *data, size_t data_size ) VLC_USED; VLC_API char * FromCharset( const char *charset, const void *data, size_t data_size ) VLC_USED;
VLC_API void * ToCharset( const char *charset, const char *in, size_t *outsize ) VLC_USED; VLC_API void * ToCharset( const char *charset, const char *in, size_t *outsize ) VLC_USED;
......
...@@ -107,7 +107,6 @@ dialog_Unregister ...@@ -107,7 +107,6 @@ dialog_Unregister
dialog_VFatal dialog_VFatal
encode_URI_component encode_URI_component
EndMD5 EndMD5
EnsureUTF8
es_format_Clean es_format_Clean
es_format_Copy es_format_Copy
es_format_Init es_format_Init
...@@ -233,7 +232,6 @@ input_Stop ...@@ -233,7 +232,6 @@ input_Stop
input_vaControl input_vaControl
input_Close input_Close
intf_Create intf_Create
IsUTF8
libvlc_InternalAddIntf libvlc_InternalAddIntf
libvlc_InternalPlay libvlc_InternalPlay
libvlc_InternalCleanup libvlc_InternalCleanup
......
...@@ -228,51 +228,6 @@ char *vlc_strcasestr (const char *haystack, const char *needle) ...@@ -228,51 +228,6 @@ char *vlc_strcasestr (const char *haystack, const char *needle)
return NULL; return NULL;
} }
/**
* Replaces invalid/overlong UTF-8 sequences with question marks.
* Note that it is not possible to convert from Latin-1 to UTF-8 on the fly,
* so we don't try that, even though it would be less disruptive.
*
* @return str if it was valid UTF-8, NULL if not.
*/
char *EnsureUTF8( char *str )
{
char *ret = str;
size_t n;
uint32_t cp;
while ((n = vlc_towc (str, &cp)) != 0)
if (likely(n != (size_t)-1))
str += n;
else
{
*str++ = '?';
ret = NULL;
}
return ret;
}
/**
* Checks whether a string is a valid UTF-8 byte sequence.
*
* @param str nul-terminated string to be checked
*
* @return str if it was valid UTF-8, NULL if not.
*/
const char *IsUTF8( const char *str )
{
size_t n;
uint32_t cp;
while ((n = vlc_towc (str, &cp)) != 0)
if (likely(n != (size_t)-1))
str += n;
else
return NULL;
return str;
}
/** /**
* Converts a string from the given character encoding to utf-8. * Converts a string from the given character encoding to utf-8.
* *
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment