Commit eaffe5c6 authored by Antoine Cellerier's avatar Antoine Cellerier

Clean up and speed up resolve_xml_special_chars().

parent d966d7bf
...@@ -237,6 +237,132 @@ char *encode_URI_component( const char *psz_url ) ...@@ -237,6 +237,132 @@ char *encode_URI_component( const char *psz_url )
return strdup( psz_enc ); return strdup( psz_enc );
} }
static struct xml_entity_s
{
const char *psz_entity;
size_t i_length;
const char *psz_char;
} p_xml_entities[] = {
{ "Æ", 7, "Æ" },
{ "Á", 8, "Á" },
{ "Â", 7, "Â" },
{ "À", 8, "À" },
{ "Å", 7, "Å" },
{ "Ã", 8, "Ã" },
{ "Ä", 6, "Ä" },
{ "Ç", 8, "Ç" },
{ "‡", 8, "‡" },
{ "Ð", 5, "Ð" },
{ "É", 8, "É" },
{ "Ê", 7, "Ê" },
{ "È", 8, "È" },
{ "Ë", 6, "Ë" },
{ "Í", 8, "Í" },
{ "Î", 7, "Î" },
{ "Ì", 8, "Ì" },
{ "Ï", 6, "Ï" },
{ "Ñ", 8, "Ñ" },
{ "Œ", 7, "Œ" },
{ "Ó", 8, "Ó" },
{ "Ô", 7, "Ô" },
{ "Ò", 8, "Ò" },
{ "Ø", 8, "Ø" },
{ "Õ", 8, "Õ" },
{ "Ö", 6, "Ö" },
{ "Š", 8, "Š" },
{ "Þ", 7, "Þ" },
{ "Ú", 8, "Ú" },
{ "Û", 7, "Û" },
{ "Ù", 8, "Ù" },
{ "Ü", 6, "Ü" },
{ "Ý", 8, "Ý" },
{ "Ÿ", 6, "Ÿ" },
{ "á", 8, "á" },
{ "â", 7, "â" },
{ "´", 7, "´" },
{ "æ", 7, "æ" },
{ "à", 8, "à" },
{ "å", 7, "å" },
{ "ã", 8, "ã" },
{ "ä", 6, "ä" },
{ "„", 7, "„" },
{ "¦", 8, "¦" },
{ "ç", 8, "ç" },
{ "¸", 7, "¸" },
{ "¢", 6, "¢" },
{ "ˆ", 6, "ˆ" },
{ "©", 6, "©" },
{ "¤", 8, "¤" },
{ "†", 8, "†" },
{ "°", 5, "°" },
{ "÷", 8, "÷" },
{ "é", 8, "é" },
{ "ê", 7, "ê" },
{ "è", 8, "è" },
{ "ð", 5, "ð" },
{ "ë", 6, "ë" },
{ "€", 6, "€" },
{ "½", 8, "½" },
{ "¼", 8, "¼" },
{ "¾", 8, "¾" },
{ "…", 8, "…" },
{ "í", 8, "í" },
{ "î", 7, "î" },
{ "¡", 7, "¡" },
{ "ì", 8, "ì" },
{ "¿", 8, "¿" },
{ "ï", 6, "ï" },
{ "«", 7, "«" },
{ "“", 7, "“" },
{ "‹", 8, "‹" },
{ "‘", 7, "‘" },
{ "¯", 6, "¯" },
{ "—", 7, "—" },
{ "µ", 7, "µ" },
{ "·", 8, "·" },
{ "–", 7, "–" },
{ "¬", 5, "¬" },
{ "ñ", 8, "ñ" },
{ "ó", 8, "ó" },
{ "ô", 7, "ô" },
{ "œ", 7, "œ" },
{ "ò", 8, "ò" },
{ "ª", 6, "ª" },
{ "º", 6, "º" },
{ "ø", 8, "ø" },
{ "õ", 8, "õ" },
{ "ö", 6, "ö" },
{ "¶", 6, "¶" },
{ "‰", 8, "‰" },
{ "±", 8, "±" },
{ "£", 7, "£" },
{ "»", 7, "»" },
{ "”", 7, "”" },
{ "®", 5, "®" },
{ "›", 8, "›" },
{ "’", 7, "’" },
{ "‚", 7, "‚" },
{ "š", 8, "š" },
{ "§", 6, "§" },
{ "­", 5, "­" },
{ "¹", 6, "¹" },
{ "²", 6, "²" },
{ "³", 6, "³" },
{ "ß", 7, "ß" },
{ "þ", 7, "þ" },
{ "˜", 7, "˜" },
{ "×", 7, "×" },
{ "™", 7, "™" },
{ "ú", 8, "ú" },
{ "û", 7, "û" },
{ "ù", 8, "ù" },
{ "¨", 5, "¨" },
{ "ü", 6, "ü" },
{ "ý", 8, "ý" },
{ "¥", 5, "¥" },
{ "ÿ", 6, "ÿ" },
};
/** /**
* Converts "&lt;", "&gt;" and "&amp;" to "<", ">" and "&" * Converts "&lt;", "&gt;" and "&amp;" to "<", ">" and "&"
* \param string to convert * \param string to convert
...@@ -255,18 +381,11 @@ void resolve_xml_special_chars( char *psz_value ) ...@@ -255,18 +381,11 @@ void resolve_xml_special_chars( char *psz_value )
*p_pos = dst; \ *p_pos = dst; \
psz_value += len; \ psz_value += len; \
} }
#define TRY_LONGCHAR( src, len, dst ) \
if( !strncmp( psz_value, src, len ) ) \
{ \
strncpy( p_pos, dst, strlen( dst ) ); \
p_pos += strlen( dst ) - 1; \
psz_value += len; \
}
TRY_CHAR( "&lt;", 4, '<' ) TRY_CHAR( "&lt;", 4, '<' )
else TRY_CHAR( "&gt;", 4, '>' )
else TRY_CHAR( "&amp;", 5, '&' ) else TRY_CHAR( "&amp;", 5, '&' )
else TRY_CHAR( "&quot;", 6, '"' )
else TRY_CHAR( "&apos;", 6, '\'' ) else TRY_CHAR( "&apos;", 6, '\'' )
else TRY_CHAR( "&gt;", 4, '>' )
else TRY_CHAR( "&quot;", 6, '"' )
else if( psz_value[1] == '#' ) else if( psz_value[1] == '#' )
{ {
char *psz_end; char *psz_end;
...@@ -292,128 +411,40 @@ void resolve_xml_special_chars( char *psz_value ) ...@@ -292,128 +411,40 @@ void resolve_xml_special_chars( char *psz_value )
psz_value++; psz_value++;
} }
} }
else TRY_LONGCHAR( "&Agrave;", 8, "À" )
else TRY_LONGCHAR( "&Aacute;", 8, "Á" )
else TRY_LONGCHAR( "&Acirc;", 7, "Â" )
else TRY_LONGCHAR( "&Atilde;", 8, "Ã" )
else TRY_LONGCHAR( "&Auml;", 6, "Ä" )
else TRY_LONGCHAR( "&Aring;", 7, "Å" )
else TRY_LONGCHAR( "&AElig;", 7, "Æ" )
else TRY_LONGCHAR( "&Ccedil;", 8, "Ç" )
else TRY_LONGCHAR( "&Egrave;", 8, "È" )
else TRY_LONGCHAR( "&Eacute;", 8, "É" )
else TRY_LONGCHAR( "&Ecirc;", 7, "Ê" )
else TRY_LONGCHAR( "&Euml;", 6, "Ë" )
else TRY_LONGCHAR( "&Igrave;", 8, "Ì" )
else TRY_LONGCHAR( "&Iacute;", 8, "Í" )
else TRY_LONGCHAR( "&Icirc;", 7, "Î" )
else TRY_LONGCHAR( "&Iuml;", 6, "Ï" )
else TRY_LONGCHAR( "&ETH;", 5, "Ð" )
else TRY_LONGCHAR( "&Ntilde;", 8, "Ñ" )
else TRY_LONGCHAR( "&Ograve;", 8, "Ò" )
else TRY_LONGCHAR( "&Oacute;", 8, "Ó" )
else TRY_LONGCHAR( "&Ocirc;", 7, "Ô" )
else TRY_LONGCHAR( "&Otilde;", 8, "Õ" )
else TRY_LONGCHAR( "&Ouml;", 6, "Ö" )
else TRY_LONGCHAR( "&Oslash;", 8, "Ø" )
else TRY_LONGCHAR( "&Ugrave;", 8, "Ù" )
else TRY_LONGCHAR( "&Uacute;", 8, "Ú" )
else TRY_LONGCHAR( "&Ucirc;", 7, "Û" )
else TRY_LONGCHAR( "&Uuml;", 6, "Ü" )
else TRY_LONGCHAR( "&Yacute;", 8, "Ý" )
else TRY_LONGCHAR( "&THORN;", 7, "Þ" )
else TRY_LONGCHAR( "&szlig;", 7, "ß" )
else TRY_LONGCHAR( "&agrave;", 8, "à" )
else TRY_LONGCHAR( "&aacute;", 8, "á" )
else TRY_LONGCHAR( "&acirc;", 7, "â" )
else TRY_LONGCHAR( "&atilde;", 8, "ã" )
else TRY_LONGCHAR( "&auml;", 6, "ä" )
else TRY_LONGCHAR( "&aring;", 7, "å" )
else TRY_LONGCHAR( "&aelig;", 7, "æ" )
else TRY_LONGCHAR( "&ccedil;", 8, "ç" )
else TRY_LONGCHAR( "&egrave;", 8, "è" )
else TRY_LONGCHAR( "&eacute;", 8, "é" )
else TRY_LONGCHAR( "&ecirc;", 7, "ê" )
else TRY_LONGCHAR( "&euml;", 6, "ë" )
else TRY_LONGCHAR( "&igrave;", 8, "ì" )
else TRY_LONGCHAR( "&iacute;", 8, "í" )
else TRY_LONGCHAR( "&icirc;", 7, "î" )
else TRY_LONGCHAR( "&iuml;", 6, "ï" )
else TRY_LONGCHAR( "&eth;", 5, "ð" )
else TRY_LONGCHAR( "&ntilde;", 8, "ñ" )
else TRY_LONGCHAR( "&ograve;", 8, "ò" )
else TRY_LONGCHAR( "&oacute;", 8, "ó" )
else TRY_LONGCHAR( "&ocirc;", 7, "ô" )
else TRY_LONGCHAR( "&otilde;", 8, "õ" )
else TRY_LONGCHAR( "&ouml;", 6, "ö" )
else TRY_LONGCHAR( "&oslash;", 8, "ø" )
else TRY_LONGCHAR( "&ugrave;", 8, "ù" )
else TRY_LONGCHAR( "&uacute;", 8, "ú" )
else TRY_LONGCHAR( "&ucirc;", 7, "û" )
else TRY_LONGCHAR( "&uuml;", 6, "ü" )
else TRY_LONGCHAR( "&yacute;", 8, "ý" )
else TRY_LONGCHAR( "&thorn;", 7, "þ" )
else TRY_LONGCHAR( "&yuml;", 6, "ÿ" )
else TRY_LONGCHAR( "&iexcl;", 7, "¡" )
else TRY_LONGCHAR( "&curren;", 8, "¤" )
else TRY_LONGCHAR( "&cent;", 6, "¢" )
else TRY_LONGCHAR( "&pound;", 7, "£" )
else TRY_LONGCHAR( "&yen;", 5, "¥" )
else TRY_LONGCHAR( "&brvbar;", 8, "¦" )
else TRY_LONGCHAR( "&sect;", 6, "§" )
else TRY_LONGCHAR( "&uml;", 5, "¨" )
else TRY_LONGCHAR( "&copy;", 6, "©" )
else TRY_LONGCHAR( "&ordf;", 6, "ª" )
else TRY_LONGCHAR( "&laquo;", 7, "«" )
else TRY_LONGCHAR( "&not;", 5, "¬" )
else TRY_LONGCHAR( "&shy;", 5, "­" )
else TRY_LONGCHAR( "&reg;", 5, "®" )
else TRY_LONGCHAR( "&trade;", 7, "™" )
else TRY_LONGCHAR( "&macr;", 6, "¯" )
else TRY_LONGCHAR( "&deg;", 5, "°" )
else TRY_LONGCHAR( "&plusmn;", 8, "±" )
else TRY_LONGCHAR( "&sup2;", 6, "²" )
else TRY_LONGCHAR( "&sup3;", 6, "³" )
else TRY_LONGCHAR( "&acute;", 7, "´" )
else TRY_LONGCHAR( "&micro;", 7, "µ" )
else TRY_LONGCHAR( "&para;", 6, "¶" )
else TRY_LONGCHAR( "&middot;", 8, "·" )
else TRY_LONGCHAR( "&cedil;", 7, "¸" )
else TRY_LONGCHAR( "&sup1;", 6, "¹" )
else TRY_LONGCHAR( "&ordm;", 6, "º" )
else TRY_LONGCHAR( "&raquo;", 7, "»" )
else TRY_LONGCHAR( "&frac14;", 8, "¼" )
else TRY_LONGCHAR( "&frac12;", 8, "½" )
else TRY_LONGCHAR( "&frac34;", 8, "¾" )
else TRY_LONGCHAR( "&iquest;", 8, "¿" )
else TRY_LONGCHAR( "&times;", 7, "×" )
else TRY_LONGCHAR( "&divide;", 8, "÷" )
else TRY_LONGCHAR( "&OElig;", 7, "Œ" )
else TRY_LONGCHAR( "&oelig;", 7, "œ" )
else TRY_LONGCHAR( "&Scaron;", 8, "Š" )
else TRY_LONGCHAR( "&scaron;", 8, "š" )
else TRY_LONGCHAR( "&Yuml;", 6, "Ÿ" )
else TRY_LONGCHAR( "&circ;", 6, "ˆ" )
else TRY_LONGCHAR( "&tilde;", 7, "˜" )
else TRY_LONGCHAR( "&ndash;", 7, "–" )
else TRY_LONGCHAR( "&mdash;", 7, "—" )
else TRY_LONGCHAR( "&lsquo;", 7, "‘" )
else TRY_LONGCHAR( "&rsquo;", 7, "’" )
else TRY_LONGCHAR( "&sbquo;", 7, "‚" )
else TRY_LONGCHAR( "&ldquo;", 7, "“" )
else TRY_LONGCHAR( "&rdquo;", 7, "”" )
else TRY_LONGCHAR( "&bdquo;", 7, "„" )
else TRY_LONGCHAR( "&dagger;", 8, "†" )
else TRY_LONGCHAR( "&Dagger;", 8, "‡" )
else TRY_LONGCHAR( "&hellip;", 8, "…" )
else TRY_LONGCHAR( "&permil;", 8, "‰" )
else TRY_LONGCHAR( "&lsaquo;", 8, "‹" )
else TRY_LONGCHAR( "&rsaquo;", 8, "›" )
else TRY_LONGCHAR( "&euro;", 6, "€" )
else else
{ {
*p_pos = *psz_value; const size_t i_entities = sizeof( p_xml_entities ) /
psz_value++; sizeof( p_xml_entities[0] );
assert( i_entities < 128 );
size_t step = 128>>1;
size_t i = step-1;
int cmp = -1;
while( step )
{
step >>= 1;
if( i >= i_entities )
cmp = -1;
else
cmp = strncmp( psz_value, p_xml_entities[i].psz_entity,
p_xml_entities[i].i_length );
if( cmp == 0 )
{
strncpy( p_pos, p_xml_entities[i].psz_char,
p_xml_entities[i].i_length );
p_pos += strlen( p_xml_entities[i].psz_char ) - 1;
psz_value += p_xml_entities[i].i_length;
break;
}
else if( cmp < 0 )
i -= step;
else
i += step;
}
if( cmp != 0 )
{
*p_pos = *psz_value;
psz_value++;
}
} }
} }
else else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment