Inline IsUTF8() and EnsureUTF8()

7b331c54 · Rémi Denis-Courmont · 89f83434 · 7b331c54 · 7b331c54 · 7b331c54
Commit 7b331c54 authored Nov 29, 2015 by Rémi Denis-Courmont
Hide whitespace changes
Inline Side-by-side

Showing with 58 additions and 50 deletions

include/vlc_charset.h include/vlc_charset.h +58 -3

src/libvlccore.sym src/libvlccore.sym +0 -2

src/text/unicode.c src/text/unicode.c +0 -45

No files found.
--- a/include/vlc_charset.h
+++ b/include/vlc_charset.h
@@ -50,6 +50,64 @@
 */
 VLC_API size_t vlc_towc(const char *str, uint32_t *restrict pwc);
+/**
+ * Checks UTF-8 validity.
+ *
+ * Checks whether a null-terminated string is a valid UTF-8 bytes sequence.
+ *
+ * \param str string to check
+ *
+ * \retval str the string is a valid null-terminated UTF-8 sequence
+ * \retval NULL the string is not an UTF-8 sequence
+ */
+VLC_USED static inline const char *IsUTF8(const char *str)
+{
+    size_t n;
+    uint32_t cp;
+    while ((n = vlc_towc(str, &cp)) != 0)
+        if (likely(n != (size_t)-1))
+            str += n;
+        else
+            return NULL;
+    return str;
+}
+/**
+ * Removes non-UTF-8 sequences.
+ *
+ * Replaces invalid or <i>over-long</i> UTF-8 bytes sequences within a
+ * null-terminated string with question marks. This is so that the string can
+ * be printed at least partially.
+ *
+ * \warning Do not use this were correctness is critical. use IsUTF8() and
+ * handle the error case instead. This function is mainly for display or debug.
+ *
+ * \note Converting from Latin-1 to UTF-8 in place is not possible (the string
+ * size would be increased). So it is not attempted even if it would otherwise
+ * be less disruptive.
+ *
+ * \retval str the string is a valid null-terminated UTF-8 sequence
+ *             (i.e. no changes were made)
+ * \retval NULL the string is not an UTF-8 sequence
+ */
+static inline char *EnsureUTF8(char *str)
+{
+    char *ret = str;
+    size_t n;
+    uint32_t cp;
+    while ((n = vlc_towc(str, &cp)) != 0)
+        if (likely(n != (size_t)-1))
+            str += n;
+        else
+        {
+            *str++ = '?';
+            ret = NULL;
+        }
+    return ret;
+}
 /* iconv wrappers (defined in src/extras/libc.c) */
 typedef void *vlc_iconv_t;
 VLC_API vlc_iconv_t vlc_iconv_open( const char *, const char * ) VLC_USED;
@@ -62,9 +120,6 @@ VLC_API int utf8_vfprintf( FILE *stream, const char *fmt, va_list ap );
 VLC_API int utf8_fprintf( FILE *, const char *, ... ) VLC_FORMAT( 2, 3 );
 VLC_API char * vlc_strcasestr(const char *, const char *) VLC_USED;
-VLC_API char * EnsureUTF8( char * );
-VLC_API const char * IsUTF8( const char * ) VLC_USED;
 VLC_API char * FromCharset( const char *charset, const void *data, size_t data_size ) VLC_USED;
 VLC_API void * ToCharset( const char *charset, const char *in, size_t *outsize ) VLC_USED;

--- a/src/libvlccore.sym
+++ b/src/libvlccore.sym
@@ -107,7 +107,6 @@ dialog_Unregister
 dialog_VFatal
 encode_URI_component
 EndMD5
-EnsureUTF8
 es_format_Clean
 es_format_Copy
 es_format_Init
@@ -233,7 +232,6 @@ input_Stop
 input_vaControl
 input_Close
 intf_Create
-IsUTF8
 libvlc_InternalAddIntf
 libvlc_InternalPlay
 libvlc_InternalCleanup

--- a/src/text/unicode.c
+++ b/src/text/unicode.c
@@ -228,51 +228,6 @@ char *vlc_strcasestr (const char *haystack, const char *needle)
    return NULL;
 }
-/**
- * Replaces invalid/overlong UTF-8 sequences with question marks.
- * Note that it is not possible to convert from Latin-1 to UTF-8 on the fly,
- * so we don't try that, even though it would be less disruptive.
- *
- * @return str if it was valid UTF-8, NULL if not.
- */
-char *EnsureUTF8( char *str )
-{
-    char *ret = str;
-    size_t n;
-    uint32_t cp;
-    while ((n = vlc_towc (str, &cp)) != 0)
-        if (likely(n != (size_t)-1))
-            str += n;
-        else
-        {
-            *str++ = '?';
-            ret = NULL;
-        }
-    return ret;
-}
-/**
- * Checks whether a string is a valid UTF-8 byte sequence.
- *
- * @param str nul-terminated string to be checked
- *
- * @return str if it was valid UTF-8, NULL if not.
- */
-const char *IsUTF8( const char *str )
-{
-    size_t n;
-    uint32_t cp;
-    while ((n = vlc_towc (str, &cp)) != 0)
-        if (likely(n != (size_t)-1))
-            str += n;
-        else
-            return NULL;
-    return str;
-}
 /**
 * Converts a string from the given character encoding to utf-8.
 *