Commit 67a62cc7 authored by Rémi Denis-Courmont's avatar Rémi Denis-Courmont

DVB: resync with TS demux

parent b298ce46
......@@ -2338,107 +2338,109 @@ void en50221_End( access_t * p_access )
* program. */
}
static inline void *FixUTF8( char *p )
{
EnsureUTF8( p );
return p;
}
/* FIXME same than EITConvertToUTF8 from TS demux */
char *dvbsi_to_utf8( const char *psz_instring, size_t i_length )
{
const char *psz_encoding, *psz_stringstart;
char *psz_outstring, *psz_tmp;
char psz_encbuf[12];
size_t i_in, i_out;
vlc_iconv_t iconv_handle;
const char *psz_encoding;
char psz_encbuf[sizeof( "ISO_8859-123" )];
size_t offset = 1;
if( i_length < 1 ) return NULL;
if( psz_instring[0] < 0 || psz_instring[0] >= 0x20 )
if( psz_instring[0] >= 0x20 )
{
psz_stringstart = psz_instring;
psz_encoding = "ISO_8859-1"; /* should be ISO6937 according to spec, but this seems to be the one used */
} else switch( psz_instring[0] )
psz_encoding = "ISO_6937";
offset = 0;
}
else switch( psz_instring[0] )
{
case 0x01:
psz_stringstart = &psz_instring[1];
psz_encoding = "ISO_8859-5";
break;
case 0x02:
psz_stringstart = &psz_instring[1];
psz_encoding = "ISO_8859-6";
break;
case 0x03:
psz_stringstart = &psz_instring[1];
psz_encoding = "ISO_8859-7";
break;
case 0x04:
psz_stringstart = &psz_instring[1];
psz_encoding = "ISO_8859-8";
break;
case 0x05:
psz_stringstart = &psz_instring[1];
psz_encoding = "ISO_8859-9";
break;
case 0x06:
psz_stringstart = &psz_instring[1];
psz_encoding = "ISO_8859-10";
break;
case 0x07:
psz_stringstart = &psz_instring[1];
psz_encoding = "ISO_8859-11";
break;
case 0x08:
psz_stringstart = &psz_instring[1]; /*possibly reserved?*/
psz_encoding = "ISO_8859-12";
break;
case 0x09:
psz_stringstart = &psz_instring[1];
psz_encoding = "ISO_8859-13";
break;
case 0x0a:
psz_stringstart = &psz_instring[1];
psz_encoding = "ISO_8859-14";
break;
case 0x0b:
psz_stringstart = &psz_instring[1];
psz_encoding = "ISO_8859-15";
break;
case 0x10:
if( i_length < 3 || psz_instring[1] != '\0' || psz_instring[2] > 0x0f
#warning Is Latin-10 (psz_instring[2] == 16) really illegal?
if( i_length < 3 || psz_instring[1] != 0x00 || psz_instring[2] > 15
|| psz_instring[2] == 0 )
return FixUTF8(strndup(psz_instring,i_length));
sprintf( psz_encbuf, "ISO_8859-%d", psz_instring[2] );
psz_stringstart = &psz_instring[3];
{
psz_encoding = "UTF-8";
offset = 0;
}
else
{
sprintf( psz_encbuf, "ISO_8859-%u", psz_instring[2] );
psz_encoding = psz_encbuf;
offset = 3;
}
break;
case 0x11:
psz_stringstart = &psz_instring[1];
#warning Is there a BOM or do we use a fixed endianess?
psz_encoding = "UTF-16";
break;
case 0x12:
psz_stringstart = &psz_instring[1];
psz_encoding = "KSC5601-1987";
break;
case 0x13:
psz_stringstart = &psz_instring[1];
psz_encoding = "GB2312";/*GB-2312-1980 */
psz_encoding = "GB2312"; /* GB-2312-1980 */
break;
case 0x14:
psz_stringstart = &psz_instring[1];
psz_encoding = "BIG-5";
break;
case 0x15:
return FixUTF8(strndup(&psz_instring[1],i_length-1));
psz_encoding = "UTF-8";
break;
default:
/* invalid */
return FixUTF8(strndup(psz_instring,i_length));
}
iconv_handle = vlc_iconv_open( "UTF-8", psz_encoding );
i_in = i_length - (psz_stringstart - psz_instring );
i_out = i_in * 6;
psz_outstring = psz_tmp = (char*)xmalloc( i_out + 1 );
vlc_iconv( iconv_handle, &psz_stringstart, &i_in, &psz_tmp, &i_out );
vlc_iconv_close( iconv_handle );
*psz_tmp = '\0';
return psz_outstring;
psz_encoding = "UTF-8";
offset = 0;
}
psz_instring += offset;
i_length -= offset;
char *psz = FromCharset( psz_encoding, psz_instring, i_length );
if( psz == NULL )
{ /* Invalid character set (e.g. ISO_8859-12) */
psz = strndup( (const char *)psz_instring, i_length );
if( unlikely(psz == NULL) )
return NULL;
EnsureUTF8( psz );
}
/* Convert EIT-coded CR/LFs */
for(char *p = strstr( psz, "\xc2\x8a" ); p != NULL;
p = strstr( p, "\xc2\x8a" ))
{
p[0] = ' ';
p[1] = '\n';
}
return psz;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment