Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
V
vlc
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Redmine
Redmine
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Metrics
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
videolan
vlc
Commits
fdfc6ad1
Commit
fdfc6ad1
authored
Sep 08, 2009
by
Marian Ďurkovič
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Perform charset detection and conversion to
UTF-8
also for SDT fields.
parent
dd537f5c
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
136 additions
and
134 deletions
+136
-134
modules/demux/ts.c
modules/demux/ts.c
+136
-134
No files found.
modules/demux/ts.c
View file @
fdfc6ad1
...
...
@@ -2571,6 +2571,134 @@ static void ValidateDVBMeta( demux_t *p_demux, int i_pid )
#ifdef TS_USE_DVB_SI
/* FIXME same than dvbsi_to_utf8 from dvb access */
static
char
*
EITConvertToUTF8
(
const
unsigned
char
*
psz_instring
,
size_t
i_length
)
{
const
char
*
psz_encoding
;
char
*
psz_outstring
;
char
psz_encbuf
[
sizeof
(
"ISO_8859-123"
)];
size_t
i_in
,
i_out
,
offset
=
1
;
vlc_iconv_t
iconv_handle
;
if
(
i_length
<
1
)
return
NULL
;
if
(
psz_instring
[
0
]
>=
0x20
)
{
psz_encoding
=
"ISO_8859-1"
;
/* According to the specification, this should be ISO6937,
* but it seems Latin-1 is used instead. */
offset
=
0
;
}
else
switch
(
psz_instring
[
0
]
)
{
case
0x01
:
psz_encoding
=
"ISO_8859-5"
;
break
;
case
0x02
:
psz_encoding
=
"ISO_8859-6"
;
break
;
case
0x03
:
psz_encoding
=
"ISO_8859-7"
;
break
;
case
0x04
:
psz_encoding
=
"ISO_8859-8"
;
break
;
case
0x05
:
psz_encoding
=
"ISO_8859-9"
;
break
;
case
0x06
:
psz_encoding
=
"ISO_8859-10"
;
break
;
case
0x07
:
psz_encoding
=
"ISO_8859-11"
;
break
;
case
0x08
:
psz_encoding
=
"ISO_8859-12"
;
break
;
case
0x09
:
psz_encoding
=
"ISO_8859-13"
;
break
;
case
0x0a
:
psz_encoding
=
"ISO_8859-14"
;
break
;
case
0x0b
:
psz_encoding
=
"ISO_8859-15"
;
break
;
case
0x10
:
#warning Is Latin-10 (psz_instring[2] == 16) really illegal?
if
(
i_length
<
3
||
psz_instring
[
1
]
!=
0x00
||
psz_instring
[
2
]
>
15
||
psz_instring
[
2
]
==
0
)
{
psz_encoding
=
"UTF-8"
;
offset
=
0
;
}
else
{
sprintf
(
psz_encbuf
,
"ISO_8859-%u"
,
psz_instring
[
2
]
);
psz_encoding
=
psz_encbuf
;
offset
=
3
;
}
break
;
case
0x11
:
#warning Is there a BOM or do we use a fixed endianess?
psz_encoding
=
"UTF-16"
;
break
;
case
0x12
:
psz_encoding
=
"KSC5601-1987"
;
break
;
case
0x13
:
psz_encoding
=
"GB2312"
;
/* GB-2312-1980 */
break
;
case
0x14
:
psz_encoding
=
"BIG-5"
;
break
;
case
0x15
:
psz_encoding
=
"UTF-8"
;
break
;
default:
/* invalid */
psz_encoding
=
"UTF-8"
;
offset
=
0
;
}
i_in
=
i_length
-
offset
;
i_out
=
i_in
*
6
+
1
;
psz_outstring
=
malloc
(
i_out
);
if
(
!
psz_outstring
)
{
return
NULL
;
}
iconv_handle
=
vlc_iconv_open
(
"UTF-8"
,
psz_encoding
);
if
(
iconv_handle
==
(
vlc_iconv_t
)(
-
1
)
)
{
/* Invalid character set (e.g. ISO_8859-12) */
memcpy
(
psz_outstring
,
&
psz_instring
[
offset
],
i_in
);
psz_outstring
[
i_in
]
=
'\0'
;
EnsureUTF8
(
psz_outstring
);
}
else
{
const
char
*
psz_in
=
(
const
char
*
)
&
psz_instring
[
offset
];
char
*
psz_out
=
psz_outstring
;
while
(
vlc_iconv
(
iconv_handle
,
&
psz_in
,
&
i_in
,
&
psz_out
,
&
i_out
)
==
(
size_t
)(
-
1
)
)
{
/* skip naughty byte. This may fail terribly for multibyte stuff,
* but what can we do anyway? */
psz_in
++
;
i_in
--
;
vlc_iconv
(
iconv_handle
,
NULL
,
NULL
,
NULL
,
NULL
);
/* reset */
}
vlc_iconv_close
(
iconv_handle
);
*
psz_out
=
'\0'
;
}
return
psz_outstring
;
}
static
void
SDTCallBack
(
demux_t
*
p_demux
,
dvbpsi_sdt_t
*
p_sdt
)
{
demux_sys_t
*
p_sys
=
p_demux
->
p_sys
;
...
...
@@ -2634,14 +2762,13 @@ static void SDTCallBack( demux_t *p_demux, dvbpsi_sdt_t *p_sdt )
"DVB MHP service"
};
dvbpsi_service_dr_t
*
pD
=
dvbpsi_DecodeServiceDr
(
p_dr
);
char
str1
[
257
]
;
char
str2
[
257
]
;
char
*
str1
=
NULL
;
char
*
str2
=
NULL
;
memcpy
(
str1
,
pD
->
i_service_provider_name
,
pD
->
i_service_provider_name_length
);
str1
[
pD
->
i_service_provider_name_length
]
=
'\0'
;
memcpy
(
str2
,
pD
->
i_service_name
,
pD
->
i_service_name_length
);
str2
[
pD
->
i_service_name_length
]
=
'\0'
;
str1
=
EITConvertToUTF8
(
pD
->
i_service_provider_name
,
pD
->
i_service_provider_name_length
);
str2
=
EITConvertToUTF8
(
pD
->
i_service_name
,
pD
->
i_service_name_length
);
msg_Dbg
(
p_demux
,
" - type=%d provider=%s name=%s"
,
pD
->
i_service_type
,
str1
,
str2
);
...
...
@@ -2650,6 +2777,8 @@ static void SDTCallBack( demux_t *p_demux, dvbpsi_sdt_t *p_sdt )
vlc_meta_SetPublisher
(
p_meta
,
str1
);
if
(
pD
->
i_service_type
>=
0x01
&&
pD
->
i_service_type
<=
0x10
)
psz_type
=
ppsz_type
[
pD
->
i_service_type
];
free
(
str1
);
free
(
str2
);
}
}
...
...
@@ -2739,133 +2868,6 @@ static int EITConvertDuration( uint32_t i_duration )
}
#undef CVT_FROM_BCD
/* FIXME same than dvbsi_to_utf8 from dvb access */
static
char
*
EITConvertToUTF8
(
const
unsigned
char
*
psz_instring
,
size_t
i_length
)
{
const
char
*
psz_encoding
;
char
*
psz_outstring
;
char
psz_encbuf
[
sizeof
(
"ISO_8859-123"
)];
size_t
i_in
,
i_out
,
offset
=
1
;
vlc_iconv_t
iconv_handle
;
if
(
i_length
<
1
)
return
NULL
;
if
(
psz_instring
[
0
]
>=
0x20
)
{
psz_encoding
=
"ISO_8859-1"
;
/* According to the specification, this should be ISO6937,
* but it seems Latin-1 is used instead. */
offset
=
0
;
}
else
switch
(
psz_instring
[
0
]
)
{
case
0x01
:
psz_encoding
=
"ISO_8859-5"
;
break
;
case
0x02
:
psz_encoding
=
"ISO_8859-6"
;
break
;
case
0x03
:
psz_encoding
=
"ISO_8859-7"
;
break
;
case
0x04
:
psz_encoding
=
"ISO_8859-8"
;
break
;
case
0x05
:
psz_encoding
=
"ISO_8859-9"
;
break
;
case
0x06
:
psz_encoding
=
"ISO_8859-10"
;
break
;
case
0x07
:
psz_encoding
=
"ISO_8859-11"
;
break
;
case
0x08
:
psz_encoding
=
"ISO_8859-12"
;
break
;
case
0x09
:
psz_encoding
=
"ISO_8859-13"
;
break
;
case
0x0a
:
psz_encoding
=
"ISO_8859-14"
;
break
;
case
0x0b
:
psz_encoding
=
"ISO_8859-15"
;
break
;
case
0x10
:
#warning Is Latin-10 (psz_instring[2] == 16) really illegal?
if
(
i_length
<
3
||
psz_instring
[
1
]
!=
0x00
||
psz_instring
[
2
]
>
15
||
psz_instring
[
2
]
==
0
)
{
psz_encoding
=
"UTF-8"
;
offset
=
0
;
}
else
{
sprintf
(
psz_encbuf
,
"ISO_8859-%u"
,
psz_instring
[
2
]
);
psz_encoding
=
psz_encbuf
;
offset
=
3
;
}
break
;
case
0x11
:
#warning Is there a BOM or do we use a fixed endianess?
psz_encoding
=
"UTF-16"
;
break
;
case
0x12
:
psz_encoding
=
"KSC5601-1987"
;
break
;
case
0x13
:
psz_encoding
=
"GB2312"
;
/* GB-2312-1980 */
break
;
case
0x14
:
psz_encoding
=
"BIG-5"
;
break
;
case
0x15
:
psz_encoding
=
"UTF-8"
;
break
;
default:
/* invalid */
psz_encoding
=
"UTF-8"
;
offset
=
0
;
}
i_in
=
i_length
-
offset
;
i_out
=
i_in
*
6
+
1
;
psz_outstring
=
malloc
(
i_out
);
if
(
!
psz_outstring
)
{
return
NULL
;
}
iconv_handle
=
vlc_iconv_open
(
"UTF-8"
,
psz_encoding
);
if
(
iconv_handle
==
(
vlc_iconv_t
)(
-
1
)
)
{
/* Invalid character set (e.g. ISO_8859-12) */
memcpy
(
psz_outstring
,
&
psz_instring
[
offset
],
i_in
);
psz_outstring
[
i_in
]
=
'\0'
;
EnsureUTF8
(
psz_outstring
);
}
else
{
const
char
*
psz_in
=
(
const
char
*
)
&
psz_instring
[
offset
];
char
*
psz_out
=
psz_outstring
;
while
(
vlc_iconv
(
iconv_handle
,
&
psz_in
,
&
i_in
,
&
psz_out
,
&
i_out
)
==
(
size_t
)(
-
1
)
)
{
/* skip naughty byte. This may fail terribly for multibyte stuff,
* but what can we do anyway? */
psz_in
++
;
i_in
--
;
vlc_iconv
(
iconv_handle
,
NULL
,
NULL
,
NULL
,
NULL
);
/* reset */
}
vlc_iconv_close
(
iconv_handle
);
*
psz_out
=
'\0'
;
}
return
psz_outstring
;
}
static
void
EITCallBack
(
demux_t
*
p_demux
,
dvbpsi_eit_t
*
p_eit
,
bool
b_current_following
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment