Commit 7ec1ad38 authored by Rémi Denis-Courmont's avatar Rémi Denis-Courmont

input: remove BOM skipping hack

This should not be needed (BOM is a zero-width character). If any
stream reader barks on a BOM, it really needs fixing on its own.
The subtitle demuxer was fixed in the previous commit.

In the case of transparent conversion from UTF-16 to UTF-8, the BOM
will be converted to UTF-8 as any other character. Thus the stream
reader can (correctly) detect a UTF-8 text stream.
parent 559a8d39
...@@ -1487,34 +1487,22 @@ char *stream_ReadLine( stream_t *s ) ...@@ -1487,34 +1487,22 @@ char *stream_ReadLine( stream_t *s )
i_pos = stream_Tell( s ); i_pos = stream_Tell( s );
if( i_pos == 0 && i_data >= 3 ) if( i_pos == 0 && i_data >= 3 )
{ {
int i_bom_size = 0;
const char *psz_encoding = NULL; const char *psz_encoding = NULL;
if( !memcmp( p_data, "\xEF\xBB\xBF", 3 ) ) if( !memcmp( p_data, "\xEF\xBB\xBF", 3 ) )
{ {
psz_encoding = "UTF-8"; psz_encoding = "UTF-8";
i_bom_size = 3;
} }
else if( !memcmp( p_data, "\xFF\xFE", 2 ) ) else if( !memcmp( p_data, "\xFF\xFE", 2 ) )
{ {
psz_encoding = "UTF-16LE"; psz_encoding = "UTF-16LE";
s->p_text->b_little_endian = true; s->p_text->b_little_endian = true;
s->p_text->i_char_width = 2; s->p_text->i_char_width = 2;
i_bom_size = 2;
} }
else if( !memcmp( p_data, "\xFE\xFF", 2 ) ) else if( !memcmp( p_data, "\xFE\xFF", 2 ) )
{ {
psz_encoding = "UTF-16BE"; psz_encoding = "UTF-16BE";
s->p_text->i_char_width = 2; s->p_text->i_char_width = 2;
i_bom_size = 2;
}
/* Seek past the BOM */
if( i_bom_size )
{
stream_Seek( s, i_bom_size );
p_data += i_bom_size;
i_data -= i_bom_size;
} }
/* Open the converter if we need it */ /* Open the converter if we need it */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment