Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
V
vlc-2-2
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Redmine
Redmine
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Metrics
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
videolan
vlc-2-2
Commits
fdfc6ad1
Commit
fdfc6ad1
authored
Sep 08, 2009
by
Marian Ďurkovič
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Perform charset detection and conversion to
UTF-8
also for SDT fields.
parent
dd537f5c
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
136 additions
and
134 deletions
+136
-134
modules/demux/ts.c
modules/demux/ts.c
+136
-134
No files found.
modules/demux/ts.c
View file @
fdfc6ad1
...
...
@@ -2571,6 +2571,134 @@ static void ValidateDVBMeta( demux_t *p_demux, int i_pid )
#ifdef TS_USE_DVB_SI
/* FIXME same than dvbsi_to_utf8 from dvb access */
static
char
*
EITConvertToUTF8
(
const
unsigned
char
*
psz_instring
,
size_t
i_length
)
{
const
char
*
psz_encoding
;
char
*
psz_outstring
;
char
psz_encbuf
[
sizeof
(
"ISO_8859-123"
)];
size_t
i_in
,
i_out
,
offset
=
1
;
vlc_iconv_t
iconv_handle
;
if
(
i_length
<
1
)
return
NULL
;
if
(
psz_instring
[
0
]
>=
0x20
)
{
psz_encoding
=
"ISO_8859-1"
;
/* According to the specification, this should be ISO6937,
* but it seems Latin-1 is used instead. */
offset
=
0
;
}
else
switch
(
psz_instring
[
0
]
)
{
case
0x01
:
psz_encoding
=
"ISO_8859-5"
;
break
;
case
0x02
:
psz_encoding
=
"ISO_8859-6"
;
break
;
case
0x03
:
psz_encoding
=
"ISO_8859-7"
;
break
;
case
0x04
:
psz_encoding
=
"ISO_8859-8"
;
break
;
case
0x05
:
psz_encoding
=
"ISO_8859-9"
;
break
;
case
0x06
:
psz_encoding
=
"ISO_8859-10"
;
break
;
case
0x07
:
psz_encoding
=
"ISO_8859-11"
;
break
;
case
0x08
:
psz_encoding
=
"ISO_8859-12"
;
break
;
case
0x09
:
psz_encoding
=
"ISO_8859-13"
;
break
;
case
0x0a
:
psz_encoding
=
"ISO_8859-14"
;
break
;
case
0x0b
:
psz_encoding
=
"ISO_8859-15"
;
break
;
case
0x10
:
#warning Is Latin-10 (psz_instring[2] == 16) really illegal?
if
(
i_length
<
3
||
psz_instring
[
1
]
!=
0x00
||
psz_instring
[
2
]
>
15
||
psz_instring
[
2
]
==
0
)
{
psz_encoding
=
"UTF-8"
;
offset
=
0
;
}
else
{
sprintf
(
psz_encbuf
,
"ISO_8859-%u"
,
psz_instring
[
2
]
);
psz_encoding
=
psz_encbuf
;
offset
=
3
;
}
break
;
case
0x11
:
#warning Is there a BOM or do we use a fixed endianess?
psz_encoding
=
"UTF-16"
;
break
;
case
0x12
:
psz_encoding
=
"KSC5601-1987"
;
break
;
case
0x13
:
psz_encoding
=
"GB2312"
;
/* GB-2312-1980 */
break
;
case
0x14
:
psz_encoding
=
"BIG-5"
;
break
;
case
0x15
:
psz_encoding
=
"UTF-8"
;
break
;
default:
/* invalid */
psz_encoding
=
"UTF-8"
;
offset
=
0
;
}
i_in
=
i_length
-
offset
;
i_out
=
i_in
*
6
+
1
;
psz_outstring
=
malloc
(
i_out
);
if
(
!
psz_outstring
)
{
return
NULL
;
}
iconv_handle
=
vlc_iconv_open
(
"UTF-8"
,
psz_encoding
);
if
(
iconv_handle
==
(
vlc_iconv_t
)(
-
1
)
)
{
/* Invalid character set (e.g. ISO_8859-12) */
memcpy
(
psz_outstring
,
&
psz_instring
[
offset
],
i_in
);
psz_outstring
[
i_in
]
=
'\0'
;
EnsureUTF8
(
psz_outstring
);
}
else
{
const
char
*
psz_in
=
(
const
char
*
)
&
psz_instring
[
offset
];
char
*
psz_out
=
psz_outstring
;
while
(
vlc_iconv
(
iconv_handle
,
&
psz_in
,
&
i_in
,
&
psz_out
,
&
i_out
)
==
(
size_t
)(
-
1
)
)
{
/* skip naughty byte. This may fail terribly for multibyte stuff,
* but what can we do anyway? */
psz_in
++
;
i_in
--
;
vlc_iconv
(
iconv_handle
,
NULL
,
NULL
,
NULL
,
NULL
);
/* reset */
}
vlc_iconv_close
(
iconv_handle
);
*
psz_out
=
'\0'
;
}
return
psz_outstring
;
}
static
void
SDTCallBack
(
demux_t
*
p_demux
,
dvbpsi_sdt_t
*
p_sdt
)
{
demux_sys_t
*
p_sys
=
p_demux
->
p_sys
;
...
...
@@ -2634,14 +2762,13 @@ static void SDTCallBack( demux_t *p_demux, dvbpsi_sdt_t *p_sdt )
"DVB MHP service"
};
dvbpsi_service_dr_t
*
pD
=
dvbpsi_DecodeServiceDr
(
p_dr
);
char
str1
[
257
]
;
char
str2
[
257
]
;
char
*
str1
=
NULL
;
char
*
str2
=
NULL
;
memcpy
(
str1
,
pD
->
i_service_provider_name
,
pD
->
i_service_provider_name_length
);
str1
[
pD
->
i_service_provider_name_length
]
=
'\0'
;
memcpy
(
str2
,
pD
->
i_service_name
,
pD
->
i_service_name_length
);
str2
[
pD
->
i_service_name_length
]
=
'\0'
;
str1
=
EITConvertToUTF8
(
pD
->
i_service_provider_name
,
pD
->
i_service_provider_name_length
);
str2
=
EITConvertToUTF8
(
pD
->
i_service_name
,
pD
->
i_service_name_length
);
msg_Dbg
(
p_demux
,
" - type=%d provider=%s name=%s"
,
pD
->
i_service_type
,
str1
,
str2
);
...
...
@@ -2650,6 +2777,8 @@ static void SDTCallBack( demux_t *p_demux, dvbpsi_sdt_t *p_sdt )
vlc_meta_SetPublisher
(
p_meta
,
str1
);
if
(
pD
->
i_service_type
>=
0x01
&&
pD
->
i_service_type
<=
0x10
)
psz_type
=
ppsz_type
[
pD
->
i_service_type
];
free
(
str1
);
free
(
str2
);
}
}
...
...
@@ -2739,133 +2868,6 @@ static int EITConvertDuration( uint32_t i_duration )
}
#undef CVT_FROM_BCD
/* FIXME same than dvbsi_to_utf8 from dvb access */
static
char
*
EITConvertToUTF8
(
const
unsigned
char
*
psz_instring
,
size_t
i_length
)
{
const
char
*
psz_encoding
;
char
*
psz_outstring
;
char
psz_encbuf
[
sizeof
(
"ISO_8859-123"
)];
size_t
i_in
,
i_out
,
offset
=
1
;
vlc_iconv_t
iconv_handle
;
if
(
i_length
<
1
)
return
NULL
;
if
(
psz_instring
[
0
]
>=
0x20
)
{
psz_encoding
=
"ISO_8859-1"
;
/* According to the specification, this should be ISO6937,
* but it seems Latin-1 is used instead. */
offset
=
0
;
}
else
switch
(
psz_instring
[
0
]
)
{
case
0x01
:
psz_encoding
=
"ISO_8859-5"
;
break
;
case
0x02
:
psz_encoding
=
"ISO_8859-6"
;
break
;
case
0x03
:
psz_encoding
=
"ISO_8859-7"
;
break
;
case
0x04
:
psz_encoding
=
"ISO_8859-8"
;
break
;
case
0x05
:
psz_encoding
=
"ISO_8859-9"
;
break
;
case
0x06
:
psz_encoding
=
"ISO_8859-10"
;
break
;
case
0x07
:
psz_encoding
=
"ISO_8859-11"
;
break
;
case
0x08
:
psz_encoding
=
"ISO_8859-12"
;
break
;
case
0x09
:
psz_encoding
=
"ISO_8859-13"
;
break
;
case
0x0a
:
psz_encoding
=
"ISO_8859-14"
;
break
;
case
0x0b
:
psz_encoding
=
"ISO_8859-15"
;
break
;
case
0x10
:
#warning Is Latin-10 (psz_instring[2] == 16) really illegal?
if
(
i_length
<
3
||
psz_instring
[
1
]
!=
0x00
||
psz_instring
[
2
]
>
15
||
psz_instring
[
2
]
==
0
)
{
psz_encoding
=
"UTF-8"
;
offset
=
0
;
}
else
{
sprintf
(
psz_encbuf
,
"ISO_8859-%u"
,
psz_instring
[
2
]
);
psz_encoding
=
psz_encbuf
;
offset
=
3
;
}
break
;
case
0x11
:
#warning Is there a BOM or do we use a fixed endianess?
psz_encoding
=
"UTF-16"
;
break
;
case
0x12
:
psz_encoding
=
"KSC5601-1987"
;
break
;
case
0x13
:
psz_encoding
=
"GB2312"
;
/* GB-2312-1980 */
break
;
case
0x14
:
psz_encoding
=
"BIG-5"
;
break
;
case
0x15
:
psz_encoding
=
"UTF-8"
;
break
;
default:
/* invalid */
psz_encoding
=
"UTF-8"
;
offset
=
0
;
}
i_in
=
i_length
-
offset
;
i_out
=
i_in
*
6
+
1
;
psz_outstring
=
malloc
(
i_out
);
if
(
!
psz_outstring
)
{
return
NULL
;
}
iconv_handle
=
vlc_iconv_open
(
"UTF-8"
,
psz_encoding
);
if
(
iconv_handle
==
(
vlc_iconv_t
)(
-
1
)
)
{
/* Invalid character set (e.g. ISO_8859-12) */
memcpy
(
psz_outstring
,
&
psz_instring
[
offset
],
i_in
);
psz_outstring
[
i_in
]
=
'\0'
;
EnsureUTF8
(
psz_outstring
);
}
else
{
const
char
*
psz_in
=
(
const
char
*
)
&
psz_instring
[
offset
];
char
*
psz_out
=
psz_outstring
;
while
(
vlc_iconv
(
iconv_handle
,
&
psz_in
,
&
i_in
,
&
psz_out
,
&
i_out
)
==
(
size_t
)(
-
1
)
)
{
/* skip naughty byte. This may fail terribly for multibyte stuff,
* but what can we do anyway? */
psz_in
++
;
i_in
--
;
vlc_iconv
(
iconv_handle
,
NULL
,
NULL
,
NULL
,
NULL
);
/* reset */
}
vlc_iconv_close
(
iconv_handle
);
*
psz_out
=
'\0'
;
}
return
psz_outstring
;
}
static
void
EITCallBack
(
demux_t
*
p_demux
,
dvbpsi_eit_t
*
p_eit
,
bool
b_current_following
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment