Commit d4328b40 authored by Adam Tlalka's avatar Adam Tlalka Committed by Linus Torvalds

[PATCH] console utf-8 mode fixes

Fix utf-8 mode so alternate charset modes always work according to control
sequences interpreted in do_con_trol function preserving backward US-ASCII
and VT100 semigraphics compatibility.

Malformed utf-8 sequences are represented as sequences of replacement
glyphs,original codes or '?' as a last resort.

unicode-xterm, gnome-terminal, kconsole and other terminal emulators in
utf-8 mode respect acsc, enacs, rmacs sequences.  Also I found that some
important system programs (from Debian distro) uses acsc in utf-8 mode -
dselect, aptitude, w3m for example.
Signed-off-by: default avatarAdam Tlalka <atlka@pg.gda.pl>
Acked-by: default avatarAlan Cox <alan@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 08c67d2a
...@@ -63,6 +63,13 @@ ...@@ -63,6 +63,13 @@
* *
* Removed console_lock, enabled interrupts across all console operations * Removed console_lock, enabled interrupts across all console operations
* 13 March 2001, Andrew Morton * 13 March 2001, Andrew Morton
*
* Fixed UTF-8 mode so alternate charset modes always work according
* to control sequences interpreted in do_con_trol function
* preserving backward VT100 semigraphics compatibility,
* malformed UTF sequences represented as sequences of replacement glyphs,
* original codes or '?' as a last resort if replacement glyph is undefined
* by Adam Tla/lka <atlka@pg.gda.pl>, Aug 2006
*/ */
#include <linux/module.h> #include <linux/module.h>
...@@ -2005,17 +2012,23 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co ...@@ -2005,17 +2012,23 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
/* Do no translation at all in control states */ /* Do no translation at all in control states */
if (vc->vc_state != ESnormal) { if (vc->vc_state != ESnormal) {
tc = c; tc = c;
} else if (vc->vc_utf) { } else if (vc->vc_utf && !vc->vc_disp_ctrl) {
/* Combine UTF-8 into Unicode */ /* Combine UTF-8 into Unicode */
/* Incomplete characters silently ignored */ /* Malformed sequences as sequences of replacement glyphs */
rescan_last_byte:
if(c > 0x7f) { if(c > 0x7f) {
if (vc->vc_utf_count > 0 && (c & 0xc0) == 0x80) { if (vc->vc_utf_count) {
if ((c & 0xc0) == 0x80) {
vc->vc_utf_char = (vc->vc_utf_char << 6) | (c & 0x3f); vc->vc_utf_char = (vc->vc_utf_char << 6) | (c & 0x3f);
vc->vc_utf_count--; if (--vc->vc_utf_count) {
if (vc->vc_utf_count == 0) vc->vc_npar++;
continue;
}
tc = c = vc->vc_utf_char; tc = c = vc->vc_utf_char;
else continue; } else
goto replacement_glyph;
} else { } else {
vc->vc_npar = 0;
if ((c & 0xe0) == 0xc0) { if ((c & 0xe0) == 0xc0) {
vc->vc_utf_count = 1; vc->vc_utf_count = 1;
vc->vc_utf_char = (c & 0x1f); vc->vc_utf_char = (c & 0x1f);
...@@ -2032,14 +2045,15 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co ...@@ -2032,14 +2045,15 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
vc->vc_utf_count = 5; vc->vc_utf_count = 5;
vc->vc_utf_char = (c & 0x01); vc->vc_utf_char = (c & 0x01);
} else } else
vc->vc_utf_count = 0; goto replacement_glyph;
continue; continue;
} }
} else { } else {
if (vc->vc_utf_count)
goto replacement_glyph;
tc = c; tc = c;
vc->vc_utf_count = 0;
} }
} else { /* no utf */ } else { /* no utf or alternate charset mode */
tc = vc->vc_translate[vc->vc_toggle_meta ? (c | 0x80) : c]; tc = vc->vc_translate[vc->vc_toggle_meta ? (c | 0x80) : c];
} }
...@@ -2054,31 +2068,33 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co ...@@ -2054,31 +2068,33 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
* direct-to-font zone in UTF-8 mode. * direct-to-font zone in UTF-8 mode.
*/ */
ok = tc && (c >= 32 || ok = tc && (c >= 32 ||
(!vc->vc_utf && !(((vc->vc_disp_ctrl ? CTRL_ALWAYS !(vc->vc_disp_ctrl ? (CTRL_ALWAYS >> c) & 1 :
: CTRL_ACTION) >> c) & 1))) vc->vc_utf || ((CTRL_ACTION >> c) & 1)))
&& (c != 127 || vc->vc_disp_ctrl) && (c != 127 || vc->vc_disp_ctrl)
&& (c != 128+27); && (c != 128+27);
if (vc->vc_state == ESnormal && ok) { if (vc->vc_state == ESnormal && ok) {
/* Now try to find out how to display it */ /* Now try to find out how to display it */
tc = conv_uni_to_pc(vc, tc); tc = conv_uni_to_pc(vc, tc);
if (tc & ~charmask) {
if ( tc == -4 ) { if ( tc == -4 ) {
/* If we got -4 (not found) then see if we have /* If we got -4 (not found) then see if we have
defined a replacement character (U+FFFD) */ defined a replacement character (U+FFFD) */
replacement_glyph:
tc = conv_uni_to_pc(vc, 0xfffd); tc = conv_uni_to_pc(vc, 0xfffd);
if (!(tc & ~charmask))
/* One reason for the -4 can be that we just goto display_glyph;
did a clear_unimap(); } else if ( tc != -3 )
try at least to show something. */ continue; /* nothing to display */
if (tc == -4) /* no hash table or no replacement --
tc = c; * hope for the best */
} else if ( tc == -3 ) { if ( c & ~charmask )
/* Bad hash table -- hope for the best */ tc = '?';
else
tc = c; tc = c;
} }
if (tc & ~charmask)
continue; /* Conversion failed */
display_glyph:
if (vc->vc_need_wrap || vc->vc_decim) if (vc->vc_need_wrap || vc->vc_decim)
FLUSH FLUSH
if (vc->vc_need_wrap) { if (vc->vc_need_wrap) {
...@@ -2102,6 +2118,15 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co ...@@ -2102,6 +2118,15 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
vc->vc_x++; vc->vc_x++;
draw_to = (vc->vc_pos += 2); draw_to = (vc->vc_pos += 2);
} }
if (vc->vc_utf_count) {
if (vc->vc_npar) {
vc->vc_npar--;
goto display_glyph;
}
vc->vc_utf_count = 0;
c = orig;
goto rescan_last_byte;
}
continue; continue;
} }
FLUSH FLUSH
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment