Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
V
vlc
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Redmine
Redmine
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Metrics
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
videolan
vlc
Commits
9a55e838
Commit
9a55e838
authored
Dec 15, 2006
by
Rémi Denis-Courmont
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Simpler
UTF-8
check functions + rudimentary unit test
parent
d2dff446
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
131 additions
and
126 deletions
+131
-126
src/Makefile.am
src/Makefile.am
+6
-2
src/test/utf8.c
src/test/utf8.c
+85
-0
src/text/unicode.c
src/text/unicode.c
+40
-124
No files found.
src/Makefile.am
View file @
9a55e838
...
...
@@ -363,8 +363,9 @@ endif
# Unit/regression test
###############################################################################
if
USE_LIBTOOL
check_PROGRAMS
=
test_i18n_atof test_url
check_PROGRAMS
=
test_i18n_atof test_url
test_utf8
TESTS
=
$(check_PROGRAMS)
endif
CFLAGS_tests
=
`
$(VLC_CONFIG)
--cflags
libvlc
`
...
...
@@ -375,6 +376,9 @@ test_i18n_atof_CFLAGS = $(CFLAGS_tests)
test_url_SOURCES
=
test
/url.c
test_url_LDADD
=
libvlc.la
test_url_CFLAGS
=
$(CFLAGS_tests)
endif
test_utf8_SOURCES
=
test
/utf8.c
test_utf8_LDADD
=
libvlc.la
test_utf8_CFLAGS
=
$(CFLAGS_tests)
FORCE
:
src/test/utf8.c
0 → 100644
View file @
9a55e838
/*****************************************************************************
* utf8.c: Test for UTF-8 encoding/decoding stuff
*****************************************************************************
* Copyright (C) 2006 Rémi Denis-Courmont
* $Id$
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
#include <vlc/vlc.h>
#include "vlc_charset.h"
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
static
void
test
(
const
char
*
in
,
const
char
*
out
)
{
bool
isutf8
=
!
strcmp
(
in
,
out
);
char
*
str
=
strdup
(
in
);
if
(
str
==
NULL
)
abort
();
if
(
isutf8
)
printf
(
"
\"
%s
\"
should be accepted...
\n
"
,
in
);
else
printf
(
"
\"
%s
\"
should be rewritten as
\"
%s
\"
...
\n
"
,
in
,
out
);
if
((
IsUTF8
(
in
)
!=
NULL
)
!=
isutf8
)
{
printf
(
" ERROR: IsUTF8 (%s) failed
\n
"
,
in
);
exit
(
1
);
}
if
((
EnsureUTF8
(
str
)
!=
NULL
)
!=
isutf8
)
{
printf
(
" ERROR: EnsureUTF8 (%s) failed
\n
"
,
in
);
exit
(
2
);
}
if
(
strcmp
(
str
,
out
))
{
printf
(
" ERROR: got
\"
%s
\"\n
"
,
str
);
exit
(
3
);
}
if
((
EnsureUTF8
(
str
)
==
NULL
)
||
IsUTF8
(
str
)
==
NULL
)
{
printf
(
" ERROR: EnsureUTF8 (%s) is not UTF-8
\n
"
,
in
);
exit
(
4
);
}
free
(
str
);
}
int
main
(
void
)
{
(
void
)
setvbuf
(
stdout
,
NULL
,
_IONBF
,
0
);
test
(
""
,
""
);
test
(
"this_should_not_be_modified_1234"
,
"this_should_not_be_modified_1234"
);
test
(
"
\xFF
"
,
"?"
);
// invalid byte
test
(
"
\xEF\xBB\xBF
Hello"
,
"
\xEF\xBB\xBF
Hello"
);
// BOM
test
(
"
\x00\xE9
"
,
""
);
// no conversion past end of string
test
(
"T
\xC3\xA9
l
\xC3\xA9
vision
\xE2\x82\xAC
"
,
"Télévision €"
);
test
(
"T
\xE9
l
\xE9
vision"
,
"T?l?vision"
);
test
(
"
\xC1\x94\xC3\xa9
l
\xC3\xA9
vision"
,
"??élévision"
);
/* overlong */
test
(
"Hel
\xF0\x83\x85\x87
lo"
,
"Hel????lo"
);
/* more overlong */
return
0
;
}
src/text/unicode.c
View file @
9a55e838
...
...
@@ -639,147 +639,63 @@ int utf8_fprintf( FILE *stream, const char *fmt, ... )
static
char
*
CheckUTF8
(
char
*
str
,
char
rep
)
#define isutf8cont( c ) (((c) >= 0x80) && ((c) <= 0xBF))
{
unsigned
char
*
ptr
,
c
;
uint8_t
*
ptr
=
(
uint8_t
*
)
str
;
assert
(
str
!=
NULL
);
ptr
=
(
unsigned
char
*
)
str
;
while
(
(
c
=
*
ptr
)
!=
'\0'
)
for
(;;)
{
/* US-ASCII, 1 byte */
if
(
c
<=
0x7F
)
ptr
++
;
/* OK */
else
/* 2 bytes */
if
(
(
c
>=
0xC2
)
&&
(
c
<=
0xDF
)
)
{
c
=
ptr
[
1
];
if
(
isutf8cont
(
c
)
)
ptr
+=
2
;
/* OK */
else
goto
error
;
}
else
/* 3 bytes */
if
(
c
==
0xE0
)
{
c
=
ptr
[
1
];
if
(
(
c
>=
0xA0
)
&&
(
c
<=
0xBF
)
)
{
c
=
ptr
[
2
];
if
(
isutf8cont
(
c
)
)
ptr
+=
3
;
/* OK */
else
goto
error
;
}
else
goto
error
;
}
else
if
(
(
(
c
>=
0xE1
)
&&
(
c
<=
0xEC
)
)
||
(
c
==
0xEC
)
||
(
c
==
0xEE
)
||
(
c
==
0xEF
)
)
{
c
=
ptr
[
1
];
if
(
isutf8cont
(
c
)
)
{
c
=
ptr
[
2
];
if
(
isutf8cont
(
c
)
)
ptr
+=
3
;
/* OK */
else
goto
error
;
}
else
goto
error
;
}
else
if
(
c
==
0xED
)
{
c
=
ptr
[
1
];
if
(
(
c
>=
0x80
)
&&
(
c
<=
0x9F
)
)
{
c
=
ptr
[
2
];
if
(
isutf8cont
(
c
)
)
ptr
+=
3
;
/* OK */
else
goto
error
;
}
else
goto
error
;
}
else
/* 4 bytes */
if
(
c
==
0xF0
)
{
c
=
ptr
[
1
];
if
(
(
c
>=
0x90
)
&&
(
c
<=
0xBF
)
)
uint8_t
c
=
ptr
[
0
];
int
charlen
=
-
1
;
if
(
c
==
'\0'
)
break
;
for
(
int
i
=
0
;
i
<
7
;
i
++
)
if
((
c
>>
(
7
-
i
))
==
((
0xff
>>
(
7
-
i
))
^
1
))
{
c
=
ptr
[
2
];
if
(
isutf8cont
(
c
)
)
{
c
=
ptr
[
3
];
if
(
isutf8cont
(
c
)
)
ptr
+=
4
;
/* OK */
else
goto
error
;
}
else
goto
error
;
charlen
=
i
;
break
;
}
else
goto
error
;
}
else
if
(
(
c
>=
0xF1
)
&&
(
c
<=
0xF3
)
)
switch
(
charlen
)
{
c
=
ptr
[
1
];
if
(
isutf8cont
(
c
)
)
{
c
=
ptr
[
2
];
if
(
isutf8cont
(
c
)
)
{
c
=
ptr
[
3
];
if
(
isutf8cont
(
c
)
)
ptr
+=
4
;
/* OK */
goto
error
;
}
else
goto
error
;
}
else
case
0
:
// 7-bit ASCII character -> OK
ptr
++
;
continue
;
case
-
1
:
// 1111111x -> error
case
1
:
// continuation byte -> error
goto
error
;
}
else
if
(
c
==
0xF4
)
assert
(
charlen
>=
2
);
uint32_t
cp
=
c
&
~
((
0xff
>>
(
7
-
charlen
))
<<
(
7
-
charlen
));
for
(
int
i
=
1
;
i
<
charlen
;
i
++
)
{
c
=
ptr
[
1
];
if
(
(
c
>=
0x80
)
&&
(
c
<=
0x8F
)
)
{
c
=
ptr
[
2
];
if
(
isutf8cont
(
c
)
)
{
c
=
ptr
[
3
];
if
(
isutf8cont
(
c
)
)
ptr
+=
4
;
/* OK */
else
goto
error
;
}
else
goto
error
;
}
else
assert
(
cp
<
(
1
<<
26
));
c
=
ptr
[
i
];
if
((
c
==
'\0'
)
// unexpected end of string
||
((
c
>>
6
)
!=
2
))
// not a continuation byte
goto
error
;
cp
=
(
cp
<<
6
)
|
(
ptr
[
i
]
&
0x3f
);
}
else
if
(
cp
<
128
)
// overlong (special case for ASCII)
goto
error
;
if
(
cp
<
(
1u
<<
(
5
*
charlen
-
3
)))
// overlong
goto
error
;
ptr
+=
charlen
;
continue
;
error:
if
(
rep
==
0
)
error:
if
(
rep
==
0
)
return
NULL
;
*
ptr
++
=
'?'
;
*
ptr
++
=
rep
;
str
=
NULL
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment