Commit 91fcb101 authored by Rémi Denis-Courmont's avatar Rémi Denis-Courmont

* reimplement EnableUTF8 without iconv

 * move Unicode stuff to a separate file
parent aed69fef
......@@ -384,6 +384,7 @@ SOURCES_libvlc_common = \
src/misc/block.c \
src/misc/modules.c \
src/misc/threads.c \
src/misc/unicode.c \
src/misc/cpu.c \
src/misc/configuration.c \
src/misc/image.c \
......
......@@ -1883,121 +1883,6 @@ static void SetLanguage ( char const *psz_lang )
#endif
}
/*****************************************************************************
* FromLocale: converts a locale string to UTF-8
*****************************************************************************/
/* FIXME FIXME: it really has to be made quicker */
char *FromLocale( const char *locale )
{
char *psz_charset;
if( locale == NULL )
return NULL;
if( !vlc_current_charset( &psz_charset ) )
{
char *iptr = (char *)locale, *output, *optr;
size_t inb, outb;
/* cannot fail (unless vlc_current_charset sucks) */
vlc_iconv_t hd = vlc_iconv_open( "UTF-8", psz_charset );
free( psz_charset );
/*
* We are not allowed to modify the locale pointer, even if we cast it to
* non-const.
*/
inb = strlen( locale );
outb = inb * 6 + 1;
/* FIXME: I'm not sure about the value for the multiplication
* (for western people, multiplication by 3 (Latin9) is sufficient) */
optr = output = calloc( outb , 1);
while( vlc_iconv( hd, &iptr, &inb, &optr, &outb ) == (size_t)-1 )
*iptr = '?'; /* should not happen, and yes, it sucks */
vlc_iconv_close( hd );
return realloc( output, strlen( output ) + 1 );
}
free( psz_charset );
return (char *)locale;
}
/*****************************************************************************
* ToLocale: converts an UTF-8 string to locale
*****************************************************************************/
/* FIXME FIXME: it really has to be made quicker */
char *ToLocale( const char *utf8 )
{
char *psz_charset;
if( utf8 == NULL )
return NULL;
if( !vlc_current_charset( &psz_charset ) )
{
char *iptr = (char *)utf8, *output, *optr;
size_t inb, outb;
/* cannot fail (unless vlc_current_charset sucks) */
vlc_iconv_t hd = vlc_iconv_open( psz_charset, "UTF-8" );
free( psz_charset );
/*
* We are not allowed to modify the locale pointer, even if we cast it to
* non-const.
*/
inb = strlen( utf8 );
/* FIXME: I'm not sure about the value for the multiplication
* (for western people, multiplication is not needed) */
outb = inb * 2 + 1;
optr = output = calloc( outb, 1 );
while( vlc_iconv( hd, &iptr, &inb, &optr, &outb ) == (size_t)-1 )
*iptr = '?'; /* should not happen, and yes, it sucks */
vlc_iconv_close( hd );
return realloc( output, strlen( output ) + 1 );
}
free( psz_charset );
return (char *)utf8;
}
void LocaleFree( const char *str )
{
if( str != NULL )
{
/* FIXME: this deserve a price for the most inefficient peice of code */
char *psz_charset;
if( !vlc_current_charset( &psz_charset ) )
free( (char *)str );
free( psz_charset );
}
}
/* FIXME: don't use iconv at all */
char *EnsureUTF8( char *str )
{
vlc_iconv_t hd;
size_t inb, outb;
char *ostr, *istr;
if( str == NULL )
return NULL;
ostr = istr = str;
inb = outb = strlen( str );
hd = vlc_iconv_open( "UTF-8", "UTF-8" );
while( vlc_iconv( hd, &istr, &inb, &ostr, &outb ) == (size_t)-1 )
*istr = '?';
vlc_iconv_close( hd );
return str;
}
/*****************************************************************************
* GetFilenames: parse command line options which are not flags
*****************************************************************************
......
/*****************************************************************************
* unicode.c: UTF8 <-> locale functions
*****************************************************************************
* Copyright (C) 2005 the VideoLAN team
* $Id$
*
* Authors: Rémi Denis-Courmont <rem # videolan.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
/*****************************************************************************
* Preamble
*****************************************************************************/
#include <vlc/vlc.h>
#include "charset.h"
/*****************************************************************************
* FromLocale: converts a locale string to UTF-8
*****************************************************************************/
/* FIXME FIXME: it really has to be made quicker */
char *FromLocale( const char *locale )
{
char *psz_charset;
if( locale == NULL )
return NULL;
if( !vlc_current_charset( &psz_charset ) )
{
char *iptr = (char *)locale, *output, *optr;
size_t inb, outb;
/* cannot fail (unless vlc_current_charset sucks) */
vlc_iconv_t hd = vlc_iconv_open( "UTF-8", psz_charset );
free( psz_charset );
/*
* We are not allowed to modify the locale pointer, even if we cast it to
* non-const.
*/
inb = strlen( locale );
outb = inb * 6 + 1;
/* FIXME: I'm not sure about the value for the multiplication
* (for western people, multiplication by 3 (Latin9) is sufficient) */
optr = output = calloc( outb , 1);
while( vlc_iconv( hd, &iptr, &inb, &optr, &outb ) == (size_t)-1 )
*iptr = '?'; /* should not happen, and yes, it sucks */
vlc_iconv_close( hd );
return realloc( output, strlen( output ) + 1 );
}
free( psz_charset );
return (char *)locale;
}
/*****************************************************************************
* ToLocale: converts an UTF-8 string to locale
*****************************************************************************/
/* FIXME FIXME: it really has to be made quicker */
char *ToLocale( const char *utf8 )
{
char *psz_charset;
if( utf8 == NULL )
return NULL;
if( !vlc_current_charset( &psz_charset ) )
{
char *iptr = (char *)utf8, *output, *optr;
size_t inb, outb;
/* cannot fail (unless vlc_current_charset sucks) */
vlc_iconv_t hd = vlc_iconv_open( psz_charset, "UTF-8" );
free( psz_charset );
/*
* We are not allowed to modify the locale pointer, even if we cast it to
* non-const.
*/
inb = strlen( utf8 );
/* FIXME: I'm not sure about the value for the multiplication
* (for western people, multiplication is not needed) */
outb = inb * 2 + 1;
optr = output = calloc( outb, 1 );
while( vlc_iconv( hd, &iptr, &inb, &optr, &outb ) == (size_t)-1 )
*iptr = '?'; /* should not happen, and yes, it sucks */
vlc_iconv_close( hd );
return realloc( output, strlen( output ) + 1 );
}
free( psz_charset );
return (char *)utf8;
}
void LocaleFree( const char *str )
{
if( str != NULL )
{
/* FIXME: this deserve a price for the most inefficient peice of code */
char *psz_charset;
if( !vlc_current_charset( &psz_charset ) )
free( (char *)str );
free( psz_charset );
}
}
/*****************************************************************************
* EnsureUTF8: replaces invalid/overlong UTF-8 sequences with question marks
*****************************************************************************
* Not Todo : convert Latin1 to UTF-8 on the flu
* It is not possible given UTF-8 needs more space
*****************************************************************************/
#define isutf8cont( c ) (((c) >= 0x80) && ((c) <= 0xBF))
char *EnsureUTF8( char *str )
{
unsigned char *ptr, c;
ptr = (unsigned char *)str;
while( (c = *ptr) != '\0' )
{
/* US-ASCII, 1 byte */
if( ( ( c >= 0x20 ) && ( c <= 0x7F ) )
|| ( c == 0x09 ) || ( c == 0x0A ) || ( c == 0x0D ) )
{
ptr++; /* OK */
}
else
/* 2 bytes */
if( ( c >= 0xC2 ) && ( c <= 0xDF ) )
{
c = ptr[1];
if( isutf8cont( c ) )
ptr += 2; /* OK */
else
*ptr++ = '?'; /* invalid */
}
else
/* 3 bytes */
if( c == 0xE0 )
{
c = ptr[1];
if( ( c >= 0xA0 ) && ( c <= 0xBF ) )
{
c = ptr[2];
if( isutf8cont( c ) )
ptr += 3; /* OK */
else
*ptr++ = '?';
}
else
*ptr++ = '?';
}
else
if( ( ( c >= 0xE1 ) && ( c <= 0xEC ) ) || ( c == 0xEC )
|| ( c == 0xEE ) || ( c == 0xEF ) )
{
c = ptr[1];
if( isutf8cont( c ) )
{
c = ptr[2];
if( isutf8cont( c ) )
ptr += 3; /* OK */
else
*ptr++ = '?';
}
else
*ptr++ = '?';
}
else
if( c == 0xED )
{
c = ptr[1];
if( ( c >= 0x80 ) && ( c <= 0x9F ) )
{
c = ptr[2];
if( isutf8cont( c ) )
ptr += 3; /* OK */
else
*ptr++ = '?';
}
else
*ptr++ = '?';
}
else
/* 4 bytes */
if( c == 0xF0 )
{
c = ptr[1];
if( ( c >= 0x90 ) && ( c <= 0xBF ) )
{
c = ptr[2];
if( isutf8cont( c ) )
{
c = ptr[3];
if( isutf8cont( c ) )
ptr += 4; /* OK */
else
*ptr++ = '?';
}
else
*ptr++ = '?';
}
else
*ptr++ = '?';
}
else
if( ( c >= 0xF1 ) && ( c <= 0xF3 ) )
{
c = ptr[1];
if( isutf8cont( c ) )
{
c = ptr[2];
if( isutf8cont( c ) )
{
c = ptr[3];
if( isutf8cont( c ) )
ptr += 4; /* OK */
else
*ptr++ = '?';
}
else
*ptr++ = '?';
}
else
*ptr++ = '?';
}
else
if( c == 0xF4 )
{
c = ptr[1];
if( ( c >= 0x80 ) && ( c <= 0x8F ) )
{
c = ptr[2];
if( isutf8cont( c ) )
{
c = ptr[3];
if( isutf8cont( c ) )
ptr += 4; /* OK */
else
*ptr++ = '?';
}
else
*ptr++ = '?';
}
else
*ptr++ = '?';
}
else
*ptr++ = '?';
}
return str;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment