Commit de3f0650 authored by Rémi Denis-Courmont's avatar Rémi Denis-Courmont

include:

 * export GetFallbackEncoding()
subsdec:
 * autodetect UTF-8 encoding,
 * use better non-Unicode default encoding from language instead of
 CP1252 when local encoding is UTF-8, and local encoding in all other cases
 (should improve subtitles decoding on POSIX a lot)
 * some memory fixes
 * some cosmetic fixes
parent c1591683
...@@ -62,6 +62,8 @@ static inline char *FromWide( const wchar_t *in ) ...@@ -62,6 +62,8 @@ static inline char *FromWide( const wchar_t *in )
VLC_EXPORT( char *, __vlc_fix_readdir_charset, ( vlc_object_t *, const char * ) ); VLC_EXPORT( char *, __vlc_fix_readdir_charset, ( vlc_object_t *, const char * ) );
#define vlc_fix_readdir_charset(a,b) __vlc_fix_readdir_charset(VLC_OBJECT(a),b) #define vlc_fix_readdir_charset(a,b) __vlc_fix_readdir_charset(VLC_OBJECT(a),b)
VLC_EXPORT( const char *, GetFallbackEncoding, ( void ) );
extern double i18n_strtod( const char *, char ** ); extern double i18n_strtod( const char *, char ** );
extern double i18n_atof( const char * ); extern double i18n_atof( const char * );
VLC_EXPORT( double, us_strtod, ( const char *, char ** ) ); VLC_EXPORT( double, us_strtod, ( const char *, char ** ) );
......
...@@ -486,6 +486,7 @@ struct module_symbols_t ...@@ -486,6 +486,7 @@ struct module_symbols_t
void (*resolve_xml_special_chars_inner) (char *psz_value); void (*resolve_xml_special_chars_inner) (char *psz_value);
char * (*FromUTF16_inner) (const uint16_t *); char * (*FromUTF16_inner) (const uint16_t *);
const char * (*IsUTF8_inner) (const char *); const char * (*IsUTF8_inner) (const char *);
const char * (*GetFallbackEncoding_inner) (void);
}; };
# if defined (__PLUGIN__) # if defined (__PLUGIN__)
# define aout_FiltersCreatePipeline (p_symbols)->aout_FiltersCreatePipeline_inner # define aout_FiltersCreatePipeline (p_symbols)->aout_FiltersCreatePipeline_inner
...@@ -952,6 +953,7 @@ struct module_symbols_t ...@@ -952,6 +953,7 @@ struct module_symbols_t
# define resolve_xml_special_chars (p_symbols)->resolve_xml_special_chars_inner # define resolve_xml_special_chars (p_symbols)->resolve_xml_special_chars_inner
# define FromUTF16 (p_symbols)->FromUTF16_inner # define FromUTF16 (p_symbols)->FromUTF16_inner
# define IsUTF8 (p_symbols)->IsUTF8_inner # define IsUTF8 (p_symbols)->IsUTF8_inner
# define GetFallbackEncoding (p_symbols)->GetFallbackEncoding_inner
# elif defined (HAVE_DYNAMIC_PLUGINS) && !defined (__BUILTIN__) # elif defined (HAVE_DYNAMIC_PLUGINS) && !defined (__BUILTIN__)
/****************************************************************** /******************************************************************
* STORE_SYMBOLS: store VLC APIs into p_symbols for plugin access. * STORE_SYMBOLS: store VLC APIs into p_symbols for plugin access.
...@@ -1421,6 +1423,7 @@ struct module_symbols_t ...@@ -1421,6 +1423,7 @@ struct module_symbols_t
((p_symbols)->resolve_xml_special_chars_inner) = resolve_xml_special_chars; \ ((p_symbols)->resolve_xml_special_chars_inner) = resolve_xml_special_chars; \
((p_symbols)->FromUTF16_inner) = FromUTF16; \ ((p_symbols)->FromUTF16_inner) = FromUTF16; \
((p_symbols)->IsUTF8_inner) = IsUTF8; \ ((p_symbols)->IsUTF8_inner) = IsUTF8; \
((p_symbols)->GetFallbackEncoding_inner) = GetFallbackEncoding; \
(p_symbols)->net_ConvertIPv4_deprecated = NULL; \ (p_symbols)->net_ConvertIPv4_deprecated = NULL; \
(p_symbols)->__stats_CounterGet_deprecated = NULL; \ (p_symbols)->__stats_CounterGet_deprecated = NULL; \
(p_symbols)->__stats_TimerDumpAll_deprecated = NULL; \ (p_symbols)->__stats_TimerDumpAll_deprecated = NULL; \
......
/***************************************************************************** /*****************************************************************************
* subsdec.c : text subtitles decoder * subsdec.c : text subtitles decoder
***************************************************************************** *****************************************************************************
* Copyright (C) 2000-2001 the VideoLAN team * Copyright (C) 2000-2006 the VideoLAN team
* $Id$ * $Id$
* *
* Authors: Gildas Bazin <gbazin@videolan.org> * Authors: Gildas Bazin <gbazin@videolan.org>
...@@ -53,6 +53,7 @@ struct decoder_sys_t ...@@ -53,6 +53,7 @@ struct decoder_sys_t
int i_original_width; int i_original_width;
int i_align; /* Subtitles alignment on the vout */ int i_align; /* Subtitles alignment on the vout */
vlc_iconv_t iconv_handle; /* handle to iconv instance */ vlc_iconv_t iconv_handle; /* handle to iconv instance */
vlc_bool_t b_autodetect_utf8;
ssa_style_t **pp_ssa_styles; ssa_style_t **pp_ssa_styles;
int i_ssa_styles; int i_ssa_styles;
...@@ -106,9 +107,12 @@ static char *ppsz_justification_text[] = {N_("Center"),N_("Left"),N_("Right")}; ...@@ -106,9 +107,12 @@ static char *ppsz_justification_text[] = {N_("Center"),N_("Left"),N_("Right")};
#define ENCODING_LONGTEXT N_("Set the encoding used in text subtitles") #define ENCODING_LONGTEXT N_("Set the encoding used in text subtitles")
#define ALIGN_TEXT N_("Subtitles justification") #define ALIGN_TEXT N_("Subtitles justification")
#define ALIGN_LONGTEXT N_("Set the justification of subtitles") #define ALIGN_LONGTEXT N_("Set the justification of subtitles")
#define AUTODETECT_UTF8_TEXT N_("UTF-8 subtitles autodetection")
#define AUTODETECT_UTF8_LONGTEXT N_("This enables automatic detection of " \
"UTF-8 encoding within subtitles files.")
#define FORMAT_TEXT N_("Formatted Subtitles") #define FORMAT_TEXT N_("Formatted Subtitles")
#define FORMAT_LONGTEXT N_("Some subtitle formats allow for text formatting.\ #define FORMAT_LONGTEXT N_("Some subtitle formats allow for text formatting. " \
VLC partly implements this, but you can choose to disable all formatting.") "VLC partly implements this, but you can choose to disable all formatting.")
vlc_module_begin(); vlc_module_begin();
...@@ -125,6 +129,8 @@ vlc_module_begin(); ...@@ -125,6 +129,8 @@ vlc_module_begin();
add_string( "subsdec-encoding", DEFAULT_NAME, NULL, add_string( "subsdec-encoding", DEFAULT_NAME, NULL,
ENCODING_TEXT, ENCODING_LONGTEXT, VLC_FALSE ); ENCODING_TEXT, ENCODING_LONGTEXT, VLC_FALSE );
change_string_list( ppsz_encodings, 0, 0 ); change_string_list( ppsz_encodings, 0, 0 );
add_bool( "subsdec-autodetect-utf8", VLC_TRUE, NULL,
AUTODETECT_UTF8_TEXT, AUTODETECT_UTF8_LONGTEXT, VLC_FALSE );
add_bool( "subsdec-formatted", VLC_TRUE, NULL, FORMAT_TEXT, FORMAT_LONGTEXT, add_bool( "subsdec-formatted", VLC_TRUE, NULL, FORMAT_TEXT, FORMAT_LONGTEXT,
VLC_FALSE ); VLC_FALSE );
vlc_module_end(); vlc_module_end();
...@@ -160,6 +166,7 @@ static int OpenDecoder( vlc_object_t *p_this ) ...@@ -160,6 +166,7 @@ static int OpenDecoder( vlc_object_t *p_this )
/* init of p_sys */ /* init of p_sys */
p_sys->i_align = 0; p_sys->i_align = 0;
p_sys->iconv_handle = (vlc_iconv_t)-1; p_sys->iconv_handle = (vlc_iconv_t)-1;
p_sys->b_autodetect_utf8 = VLC_FALSE;
p_sys->b_ass = VLC_FALSE; p_sys->b_ass = VLC_FALSE;
p_sys->i_original_height = -1; p_sys->i_original_height = -1;
p_sys->i_original_width = -1; p_sys->i_original_width = -1;
...@@ -180,29 +187,13 @@ static int OpenDecoder( vlc_object_t *p_this ) ...@@ -180,29 +187,13 @@ static int OpenDecoder( vlc_object_t *p_this )
var_Get( p_dec, "subsdec-encoding", &val ); var_Get( p_dec, "subsdec-encoding", &val );
if( !strcmp( val.psz_string, DEFAULT_NAME ) ) if( !strcmp( val.psz_string, DEFAULT_NAME ) )
{ {
char *psz_charset; const char *psz_charset = GetFallbackEncoding();
if( vlc_current_charset( &psz_charset ) ) p_sys->b_autodetect_utf8 = var_CreateGetBool( p_dec,
{ "subsdec-autodetect-utf8" );
/*
* Most subtitles are not in UTF-8.
* FIXME: This is western-centric. We should use a fallback
* charset depending on the locale language instead.
*/
if( psz_charset != NULL)
free( psz_charset );
psz_charset = strdup( "CP1252" );
}
if( psz_charset == NULL )
{
free( p_sys );
return VLC_ENOMEM;
}
p_sys->iconv_handle = vlc_iconv_open( "UTF-8", psz_charset ); p_sys->iconv_handle = vlc_iconv_open( "UTF-8", psz_charset );
msg_Dbg( p_dec, "using default character encoding: %s", psz_charset ); msg_Dbg( p_dec, "using default character encoding: %s", psz_charset );
free( psz_charset );
} }
else if( !strcmp( val.psz_string, "UTF-8" ) ) else if( !strcmp( val.psz_string, "UTF-8" ) )
{ {
...@@ -309,35 +300,51 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block ) ...@@ -309,35 +300,51 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
/* Should be resiliant against bad subtitles */ /* Should be resiliant against bad subtitles */
psz_subtitle = strndup( (const char *)p_block->p_buffer, psz_subtitle = strndup( (const char *)p_block->p_buffer,
p_block->i_buffer ); p_block->i_buffer );
if( psz_subtitle == NULL )
return NULL;
if( p_sys->iconv_handle != (vlc_iconv_t)-1 ) if( p_sys->iconv_handle == (vlc_iconv_t)-1 )
EnsureUTF8( psz_subtitle );
else
{ {
char *psz_new_subtitle;
char *psz_convert_buffer_out;
char *psz_convert_buffer_in;
size_t ret, inbytes_left, outbytes_left;
psz_new_subtitle = malloc( 6 * strlen( psz_subtitle ) );
psz_convert_buffer_out = psz_new_subtitle;
psz_convert_buffer_in = psz_subtitle;
inbytes_left = strlen( psz_subtitle );
outbytes_left = 6 * inbytes_left;
ret = vlc_iconv( p_sys->iconv_handle, &psz_convert_buffer_in,
&inbytes_left, &psz_convert_buffer_out,
&outbytes_left );
*psz_convert_buffer_out = '\0';
if( psz_subtitle ) free( psz_subtitle ); if( p_sys->b_autodetect_utf8 )
psz_subtitle = NULL; {
if( IsUTF8( psz_subtitle ) == NULL )
{
msg_Dbg( p_dec, "Invalid UTF-8 sequence: "
"disabling UTF-8 subtitles autodetection" );
p_sys->b_autodetect_utf8 = VLC_FALSE;
}
}
if( inbytes_left ) if( !p_sys->b_autodetect_utf8 )
{ {
size_t inbytes_left = strlen( psz_subtitle );
size_t outbytes_left = 6 * inbytes_left;
char *psz_new_subtitle = malloc( outbytes_left + 1 );
char *psz_convert_buffer_out = psz_new_subtitle;
const char *psz_convert_buffer_in = psz_subtitle;
size_t ret = vlc_iconv( p_sys->iconv_handle,
&psz_convert_buffer_in, &inbytes_left,
&psz_convert_buffer_out, &outbytes_left );
*psz_convert_buffer_out++ = '\0';
free( psz_subtitle );
if( ( ret == (size_t)(-1) ) || inbytes_left )
{
free( psz_new_subtitle );
msg_Err( p_dec, _("Failed to convert subtitle encoding.\n" msg_Err( p_dec, _("Failed to convert subtitle encoding.\n"
"Try manually setting a character-encoding " "Try manually setting a character-encoding "
"before you open the file.") ); "before you open the file.") );
return NULL; return NULL;
} }
psz_subtitle = psz_new_subtitle;
psz_subtitle = realloc( psz_new_subtitle,
psz_convert_buffer_out - psz_new_subtitle );
}
} }
/* Create the subpicture unit */ /* Create the subpicture unit */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment