Commit 1b49bb51 authored by Rémi Denis-Courmont's avatar Rémi Denis-Courmont

Use return value of EnsureUTF8() to specify whether the bytes sequence was

valid (UTF-8) or not (incomplete, incorrect non-ASCII, or overlong sequences)
parent 80e5477b
...@@ -428,6 +428,7 @@ int utf8_lstat( const char *filename, void *buf) ...@@ -428,6 +428,7 @@ int utf8_lstat( const char *filename, void *buf)
***************************************************************************** *****************************************************************************
* Not Todo : convert Latin1 to UTF-8 on the flu * Not Todo : convert Latin1 to UTF-8 on the flu
* It is not possible given UTF-8 needs more space * It is not possible given UTF-8 needs more space
* Returns str if it was valid UTF-8, NULL if not.
*****************************************************************************/ *****************************************************************************/
#define isutf8cont( c ) (((c) >= 0x80) && ((c) <= 0xBF)) #define isutf8cont( c ) (((c) >= 0x80) && ((c) <= 0xBF))
char *EnsureUTF8( char *str ) char *EnsureUTF8( char *str )
...@@ -451,7 +452,10 @@ char *EnsureUTF8( char *str ) ...@@ -451,7 +452,10 @@ char *EnsureUTF8( char *str )
if( isutf8cont( c ) ) if( isutf8cont( c ) )
ptr += 2; /* OK */ ptr += 2; /* OK */
else else
{
*ptr++ = '?'; /* invalid */ *ptr++ = '?'; /* invalid */
str = NULL;
}
} }
else else
/* 3 bytes */ /* 3 bytes */
...@@ -464,10 +468,16 @@ char *EnsureUTF8( char *str ) ...@@ -464,10 +468,16 @@ char *EnsureUTF8( char *str )
if( isutf8cont( c ) ) if( isutf8cont( c ) )
ptr += 3; /* OK */ ptr += 3; /* OK */
else else
{
*ptr++ = '?'; *ptr++ = '?';
str = NULL;
}
} }
else else
{
*ptr++ = '?'; *ptr++ = '?';
str = NULL;
}
} }
else else
if( ( ( c >= 0xE1 ) && ( c <= 0xEC ) ) || ( c == 0xEC ) if( ( ( c >= 0xE1 ) && ( c <= 0xEC ) ) || ( c == 0xEC )
...@@ -480,10 +490,16 @@ char *EnsureUTF8( char *str ) ...@@ -480,10 +490,16 @@ char *EnsureUTF8( char *str )
if( isutf8cont( c ) ) if( isutf8cont( c ) )
ptr += 3; /* OK */ ptr += 3; /* OK */
else else
{
*ptr++ = '?'; *ptr++ = '?';
str = NULL;
}
} }
else else
{
*ptr++ = '?'; *ptr++ = '?';
str = NULL;
}
} }
else else
if( c == 0xED ) if( c == 0xED )
...@@ -495,10 +511,16 @@ char *EnsureUTF8( char *str ) ...@@ -495,10 +511,16 @@ char *EnsureUTF8( char *str )
if( isutf8cont( c ) ) if( isutf8cont( c ) )
ptr += 3; /* OK */ ptr += 3; /* OK */
else else
{
*ptr++ = '?'; *ptr++ = '?';
str = NULL;
}
} }
else else
{
*ptr++ = '?'; *ptr++ = '?';
str = NULL;
}
} }
else else
/* 4 bytes */ /* 4 bytes */
...@@ -514,13 +536,22 @@ char *EnsureUTF8( char *str ) ...@@ -514,13 +536,22 @@ char *EnsureUTF8( char *str )
if( isutf8cont( c ) ) if( isutf8cont( c ) )
ptr += 4; /* OK */ ptr += 4; /* OK */
else else
{
*ptr++ = '?'; *ptr++ = '?';
str = NULL;
}
} }
else else
{
*ptr++ = '?'; *ptr++ = '?';
str = NULL;
}
} }
else else
{
*ptr++ = '?'; *ptr++ = '?';
str = NULL;
}
} }
else else
if( ( c >= 0xF1 ) && ( c <= 0xF3 ) ) if( ( c >= 0xF1 ) && ( c <= 0xF3 ) )
...@@ -535,13 +566,22 @@ char *EnsureUTF8( char *str ) ...@@ -535,13 +566,22 @@ char *EnsureUTF8( char *str )
if( isutf8cont( c ) ) if( isutf8cont( c ) )
ptr += 4; /* OK */ ptr += 4; /* OK */
else else
{
*ptr++ = '?'; *ptr++ = '?';
str = NULL;
}
} }
else else
{
*ptr++ = '?'; *ptr++ = '?';
str = NULL;
}
} }
else else
{
*ptr++ = '?'; *ptr++ = '?';
str = NULL;
}
} }
else else
if( c == 0xF4 ) if( c == 0xF4 )
...@@ -556,16 +596,28 @@ char *EnsureUTF8( char *str ) ...@@ -556,16 +596,28 @@ char *EnsureUTF8( char *str )
if( isutf8cont( c ) ) if( isutf8cont( c ) )
ptr += 4; /* OK */ ptr += 4; /* OK */
else else
{
*ptr++ = '?'; *ptr++ = '?';
str = NULL;
}
} }
else else
{
*ptr++ = '?'; *ptr++ = '?';
str = NULL;
}
} }
else else
{
*ptr++ = '?'; *ptr++ = '?';
str = NULL;
}
} }
else else
{
*ptr++ = '?'; *ptr++ = '?';
str = NULL;
}
} }
return str; return str;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment