Commit 79de2215 authored by Rémi Denis-Courmont's avatar Rémi Denis-Courmont

resolve_xml_special_chars handles non-ASCII Unicode code points

parent 7f7fc9ea
...@@ -384,23 +384,42 @@ void resolve_xml_special_chars( char *psz_value ) ...@@ -384,23 +384,42 @@ void resolve_xml_special_chars( char *psz_value )
{ {
if( *psz_value == '&' ) if( *psz_value == '&' )
{ {
const char *psz_value1 = psz_value + 1; if( psz_value[1] == '#' )
if( *psz_value1 == '#' ) { /* &#xxx; Unicode code point */
{
char *psz_end; char *psz_end;
int i = strtol( psz_value+2, &psz_end, 10 ); unsigned long cp = strtoul( psz_value+2, &psz_end, 10 );
if( *psz_end == ';' ) if( *psz_end == ';' )
{ {
if( i >= 32 && i <= 126 ) psz_value = psz_end + 1;
if( cp == 0 )
(void)0; /* skip nuls */
else
if( cp <= 0x7F )
{
*p_pos = cp;
}
else
/* Unicode code point outside ASCII.
* &#xxx; representation is longer than UTF-8 :) */
if( cp <= 0x7FF )
{ {
*p_pos = (char)i; *p_pos++ = 0xC0 | (cp >> 6);
psz_value = psz_end+1; *p_pos = 0x80 | (cp & 0x3F);
} }
else else
if( cp <= 0xFFFF )
{ {
/* Unhandled code, FIXME */ *p_pos++ = 0xE0 | (cp >> 12);
*p_pos = *psz_value; *p_pos++ = 0x80 | ((cp >> 6) & 0x3F);
psz_value++; *p_pos = 0x80 | (cp & 0x3F);
}
else
if( cp <= 0x1FFFFF ) /* Outside the BMP */
{ /* Unicode stops at 10FFFF, but who cares? */
*p_pos++ = 0xF0 | (cp >> 18);
*p_pos++ = 0x80 | ((cp >> 12) & 0x3F);
*p_pos++ = 0x80 | ((cp >> 6) & 0x3F);
*p_pos = 0x80 | (cp & 0x3F);
} }
} }
else else
...@@ -411,10 +430,10 @@ void resolve_xml_special_chars( char *psz_value ) ...@@ -411,10 +430,10 @@ void resolve_xml_special_chars( char *psz_value )
} }
} }
else else
{ { /* Well-known XML entity */
const struct xml_entity_s *ent; const struct xml_entity_s *ent;
ent = bsearch (psz_value1, xml_entities, ent = bsearch (psz_value + 1, xml_entities,
sizeof (xml_entities) / sizeof (*ent), sizeof (xml_entities) / sizeof (*ent),
sizeof (*ent), cmp_entity); sizeof (*ent), cmp_entity);
if (ent != NULL) if (ent != NULL)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment