Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
V
vlc-1.1
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Redmine
Redmine
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Metrics
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
videolan
vlc-1.1
Commits
7b017ca7
Commit
7b017ca7
authored
May 08, 2011
by
Rémi Denis-Courmont
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Import vlc_towc() function from VLC 1.2
parent
4eb1ffe9
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
84 additions
and
0 deletions
+84
-0
src/libvlc.h
src/libvlc.h
+2
-0
src/text/unicode.c
src/text/unicode.c
+82
-0
No files found.
src/libvlc.h
View file @
7b017ca7
...
...
@@ -38,6 +38,8 @@ extern const size_t libvlc_actions_count;
extern
int
vlc_InitActions
(
libvlc_int_t
*
);
extern
void
vlc_DeinitActions
(
libvlc_int_t
*
);
size_t
vlc_towc
(
const
char
*
str
,
uint32_t
*
restrict
pwc
);
/*
* OS-specific initialization
*/
...
...
src/text/unicode.c
View file @
7b017ca7
...
...
@@ -283,6 +283,88 @@ int utf8_fprintf( FILE *stream, const char *fmt, ... )
}
/**
* Converts the first character from a UTF-8 sequence into a code point.
*
* @param str an UTF-8 bytes sequence
* @return 0 if str points to an empty string, i.e. the first character is NUL;
* number of bytes that the first character occupies (from 1 to 4) otherwise;
* -1 if the byte sequence was not a valid UTF-8 sequence.
*/
size_t
vlc_towc
(
const
char
*
str
,
uint32_t
*
restrict
pwc
)
{
uint8_t
*
ptr
=
(
uint8_t
*
)
str
,
c
;
uint32_t
cp
;
assert
(
str
!=
NULL
);
c
=
*
ptr
;
if
(
unlikely
(
c
>
0xF4
))
return
-
1
;
int
charlen
=
clz8
(
c
^
0xFF
);
switch
(
charlen
)
{
case
0
:
// 7-bit ASCII character -> short cut
*
pwc
=
c
;
return
c
!=
'\0'
;
case
1
:
// continuation byte -> error
return
-
1
;
case
2
:
if
(
unlikely
(
c
<
0xC2
))
// ASCII overlong
return
-
1
;
cp
=
(
c
&
0x1F
)
<<
6
;
break
;
case
3
:
cp
=
(
c
&
0x0F
)
<<
12
;
break
;
case
4
:
cp
=
(
c
&
0x07
)
<<
16
;
break
;
default:
assert
(
0
);
}
/* Unrolled continuation bytes decoding */
switch
(
charlen
)
{
case
4
:
c
=
*++
ptr
;
if
(
unlikely
((
c
>>
6
)
!=
2
))
// not a continuation byte
return
-
1
;
cp
|=
(
c
&
0x3f
)
<<
12
;
if
(
unlikely
(
cp
>=
0x110000
))
// beyond Unicode range
return
-
1
;
/* fall through */
case
3
:
c
=
*++
ptr
;
if
(
unlikely
((
c
>>
6
)
!=
2
))
// not a continuation byte
return
-
1
;
cp
|=
(
c
&
0x3f
)
<<
6
;
if
(
unlikely
(
cp
>=
0xD800
&&
cp
<
0xC000
))
// UTF-16 surrogate
return
-
1
;
if
(
unlikely
(
cp
<
(
1u
<<
(
5
*
charlen
-
4
))))
// non-ASCII overlong
return
-
1
;
/* fall through */
case
2
:
c
=
*++
ptr
;
if
(
unlikely
((
c
>>
6
)
!=
2
))
// not a continuation byte
return
-
1
;
cp
|=
(
c
&
0x3f
);
break
;
}
*
pwc
=
cp
;
return
charlen
;
}
static
char
*
CheckUTF8
(
char
*
str
,
char
rep
)
{
uint8_t
*
ptr
=
(
uint8_t
*
)
str
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment