summaryrefslogtreecommitdiff
path: root/src/mbyte.c
diff options
context:
space:
mode:
authorBram Moolenaar <Bram@vim.org>2004-10-07 21:02:47 +0000
committerBram Moolenaar <Bram@vim.org>2004-10-07 21:02:47 +0000
commit3fdfa4a9a52ab3d1a790262ee872a49853ad4626 (patch)
tree76f57a06a5f3b9e0abc15446b38722658fde7e1e /src/mbyte.c
parente5f258eb4c4b87ea1d6f61c1a0a9deecbb5d9726 (diff)
downloadvim-3fdfa4a9a52ab3d1a790262ee872a49853ad4626.zip
updated for version 7.0017
Diffstat (limited to 'src/mbyte.c')
-rw-r--r--src/mbyte.c100
1 files changed, 95 insertions, 5 deletions
diff --git a/src/mbyte.c b/src/mbyte.c
index a71fb5174..071096653 100644
--- a/src/mbyte.c
+++ b/src/mbyte.c
@@ -216,7 +216,7 @@ enc_canon_table[] =
#define IDX_ISO_14 13
{"iso-8859-14", ENC_8BIT, 0},
#define IDX_ISO_15 14
- {"iso-8859-15", ENC_8BIT, 0},
+ {"iso-8859-15", ENC_8BIT + ENC_LATIN9, 0},
#define IDX_KOI8_R 15
{"koi8-r", ENC_8BIT, 0},
#define IDX_KOI8_U 16
@@ -534,6 +534,7 @@ codepage_invalid:
#ifdef WIN3264
enc_codepage = encname2codepage(p_enc);
+ enc_latin9 = (STRCMP(p_enc, "iso-8859-15") == 0);
#endif
/*
@@ -2486,6 +2487,36 @@ mb_tail_off(base, p)
return 1 - dbcs_head_off(base, p);
}
+#if defined(HAVE_GTK2) || defined(PROTO)
+/*
+ * Return TRUE if string "s" is a valid utf-8 string.
+ * When "end" is NULL stop at the first NUL.
+ * When "end" is positive stop there.
+ */
+ int
+utf_valid_string(s, end)
+ char_u *s;
+ char_u *end;
+{
+ int l;
+ char_u *p = s;
+
+ while (end == NULL ? *p != NUL : p < end)
+ {
+ if ((*p & 0xc0) == 0x80)
+ return FALSE; /* invalid lead byte */
+ l = utf8len_tab[*p];
+ if (end != NULL && p + l > end)
+ return FALSE; /* incomplete byte sequence */
+ ++p;
+ while (--l > 0)
+ if ((*p++ & 0xc0) != 0x80)
+ return FALSE; /* invalid trail byte */
+ }
+ return TRUE;
+}
+#endif
+
#if defined(FEAT_GUI) || defined(PROTO)
/*
* Special version of mb_tail_off() for use in ScreenLines[].
@@ -5453,11 +5484,22 @@ convert_setup(vcp, from, to)
vcp->vc_type = CONV_TO_UTF8;
vcp->vc_factor = 2; /* up to twice as long */
}
+ else if ((from_prop & ENC_LATIN9) && (to_prop & ENC_UNICODE))
+ {
+ /* Internal latin9 -> utf-8 conversion. */
+ vcp->vc_type = CONV_9_TO_UTF8;
+ vcp->vc_factor = 3; /* up to three as long (euro sign) */
+ }
else if ((from_prop & ENC_UNICODE) && (to_prop & ENC_LATIN1))
{
/* Internal utf-8 -> latin1 conversion. */
vcp->vc_type = CONV_TO_LATIN1;
}
+ else if ((from_prop & ENC_UNICODE) && (to_prop & ENC_LATIN9))
+ {
+ /* Internal utf-8 -> latin9 conversion. */
+ vcp->vc_type = CONV_TO_LATIN9;
+ }
#ifdef WIN3264
/* Win32-specific codepage <-> codepage conversion without iconv. */
else if (((from_prop & ENC_UNICODE) || encname2codepage(from) > 0)
@@ -5622,13 +5664,40 @@ string_convert_ext(vcp, ptr, lenp, unconvlenp)
d = retval;
for (i = 0; i < len; ++i)
{
- if (ptr[i] < 0x80)
- *d++ = ptr[i];
+ c = ptr[i];
+ if (c < 0x80)
+ *d++ = c;
else
{
- *d++ = 0xc0 + ((unsigned)ptr[i] >> 6);
- *d++ = 0x80 + (ptr[i] & 0x3f);
+ *d++ = 0xc0 + ((unsigned)c >> 6);
+ *d++ = 0x80 + (c & 0x3f);
+ }
+ }
+ *d = NUL;
+ if (lenp != NULL)
+ *lenp = (int)(d - retval);
+ break;
+
+ case CONV_9_TO_UTF8: /* latin9 to utf-8 conversion */
+ retval = alloc(len * 3 + 1);
+ if (retval == NULL)
+ break;
+ d = retval;
+ for (i = 0; i < len; ++i)
+ {
+ c = ptr[i];
+ switch (c)
+ {
+ case 0xa4: c = 0x20ac; break; /* euro */
+ case 0xa6: c = 0x0160; break; /* S hat */
+ case 0xa8: c = 0x0161; break; /* S -hat */
+ case 0xb4: c = 0x017d; break; /* Z hat */
+ case 0xb8: c = 0x017e; break; /* Z -hat */
+ case 0xbc: c = 0x0152; break; /* OE */
+ case 0xbd: c = 0x0153; break; /* oe */
+ case 0xbe: c = 0x0178; break; /* Y */
}
+ d += utf_char2bytes(c, d);
}
*d = NUL;
if (lenp != NULL)
@@ -5636,6 +5705,7 @@ string_convert_ext(vcp, ptr, lenp, unconvlenp)
break;
case CONV_TO_LATIN1: /* utf-8 to latin1 conversion */
+ case CONV_TO_LATIN9: /* utf-8 to latin9 conversion */
retval = alloc(len + 1);
if (retval == NULL)
break;
@@ -5658,6 +5728,26 @@ string_convert_ext(vcp, ptr, lenp, unconvlenp)
else
{
c = utf_ptr2char(ptr + i);
+ if (vcp->vc_type == CONV_TO_LATIN9)
+ switch (c)
+ {
+ case 0x20ac: c = 0xa4; break; /* euro */
+ case 0x0160: c = 0xa6; break; /* S hat */
+ case 0x0161: c = 0xa8; break; /* S -hat */
+ case 0x017d: c = 0xb4; break; /* Z hat */
+ case 0x017e: c = 0xb8; break; /* Z -hat */
+ case 0x0152: c = 0xbc; break; /* OE */
+ case 0x0153: c = 0xbd; break; /* oe */
+ case 0x0178: c = 0xbe; break; /* Y */
+ case 0xa4:
+ case 0xa6:
+ case 0xa8:
+ case 0xb4:
+ case 0xb8:
+ case 0xbc:
+ case 0xbd:
+ case 0xbe: c = 0x100; break; /* not in latin9 */
+ }
if (!utf_iscomposing(c)) /* skip composing chars */
{
if (c < 0x100)