summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBram Moolenaar <Bram@vim.org>2017-01-28 16:39:34 +0100
committerBram Moolenaar <Bram@vim.org>2017-01-28 16:39:34 +0100
commit4019cf90b8657d4ab1c39744db63550f44f405a2 (patch)
treedd19804e007e7748d0d2ca5a0e4aaa5dd0a19217
parentf42dd3c3901ea0ba38e67a616aea9953cae81b8d (diff)
downloadvim-4019cf90b8657d4ab1c39744db63550f44f405a2.zip
patch 8.0.0252: not properly recognizing word characters between 128 and 255
Problem: Characters below 256 that are not one byte are not always recognized as word characters. Solution: Make vim_iswordc() and vim_iswordp() work the same way. Add a test for this. (Ozaki Kiichi)
-rw-r--r--.gitignore1
-rw-r--r--src/Makefile39
-rw-r--r--src/charset.c23
-rw-r--r--src/kword_test.c85
-rw-r--r--src/mbyte.c10
-rw-r--r--src/proto/mbyte.pro1
-rw-r--r--src/version.c2
7 files changed, 143 insertions, 18 deletions
diff --git a/.gitignore b/.gitignore
index 212d0e748..ac3a86367 100644
--- a/.gitignore
+++ b/.gitignore
@@ -81,3 +81,4 @@ src/testdir/viminfo
src/memfile_test
src/json_test
src/message_test
+src/kword_test
diff --git a/src/Makefile b/src/Makefile
index c675c06a5..6844443e9 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1584,14 +1584,16 @@ EXTRA_SRC = hangulin.c if_lua.c if_mzsch.c auto/if_perl.c if_perlsfio.c \
# Unittest files
JSON_TEST_SRC = json_test.c
JSON_TEST_TARGET = json_test$(EXEEXT)
+KWORD_TEST_SRC = kword_test.c
+KWORD_TEST_TARGET = kword_test$(EXEEXT)
MEMFILE_TEST_SRC = memfile_test.c
MEMFILE_TEST_TARGET = memfile_test$(EXEEXT)
MESSAGE_TEST_SRC = message_test.c
MESSAGE_TEST_TARGET = message_test$(EXEEXT)
-UNITTEST_SRC = $(JSON_TEST_SRC) $(MEMFILE_TEST_SRC) $(MESSAGE_TEST_SRC)
-UNITTEST_TARGETS = $(JSON_TEST_TARGET) $(MEMFILE_TEST_TARGET) $(MESSAGE_TEST_TARGET)
-RUN_UNITTESTS = run_json_test run_memfile_test run_message_test
+UNITTEST_SRC = $(JSON_TEST_SRC) $(KWORD_TEST_SRC) $(MEMFILE_TEST_SRC) $(MESSAGE_TEST_SRC)
+UNITTEST_TARGETS = $(JSON_TEST_TARGET) $(KWORD_TEST_TARGET) $(MEMFILE_TEST_TARGET) $(MESSAGE_TEST_TARGET)
+RUN_UNITTESTS = run_json_test run_kword_test run_memfile_test run_message_test
# All sources, also the ones that are not configured
ALL_SRC = $(BASIC_SRC) $(ALL_GUI_SRC) $(UNITTEST_SRC) $(EXTRA_SRC)
@@ -1611,7 +1613,6 @@ OBJ_COMMON = \
objects/arabic.o \
objects/buffer.o \
objects/blowfish.o \
- objects/charset.o \
objects/crypt.o \
objects/crypt_zip.o \
objects/dict.o \
@@ -1679,6 +1680,7 @@ OBJ_COMMON = \
# The files included by tests are not in OBJ_COMMON.
OBJ_MAIN = \
+ objects/charset.o \
objects/json.o \
objects/main.o \
objects/memfile.o \
@@ -1687,13 +1689,23 @@ OBJ_MAIN = \
OBJ = $(OBJ_COMMON) $(OBJ_MAIN)
OBJ_JSON_TEST = \
+ objects/charset.o \
objects/memfile.o \
objects/message.o \
objects/json_test.o
JSON_TEST_OBJ = $(OBJ_COMMON) $(OBJ_JSON_TEST)
+OBJ_KWORD_TEST = \
+ objects/json.o \
+ objects/memfile.o \
+ objects/message.o \
+ objects/kword_test.o
+
+KWORD_TEST_OBJ = $(OBJ_COMMON) $(OBJ_KWORD_TEST)
+
OBJ_MEMFILE_TEST = \
+ objects/charset.o \
objects/json.o \
objects/message.o \
objects/memfile_test.o
@@ -1701,6 +1713,7 @@ OBJ_MEMFILE_TEST = \
MEMFILE_TEST_OBJ = $(OBJ_COMMON) $(OBJ_MEMFILE_TEST)
OBJ_MESSAGE_TEST = \
+ objects/charset.o \
objects/json.o \
objects/memfile.o \
objects/message_test.o
@@ -1710,6 +1723,7 @@ MESSAGE_TEST_OBJ = $(OBJ_COMMON) $(OBJ_MESSAGE_TEST)
ALL_OBJ = $(OBJ_COMMON) \
$(OBJ_MAIN) \
$(OBJ_JSON_TEST) \
+ $(OBJ_KWORD_TEST) \
$(OBJ_MEMFILE_TEST) \
$(OBJ_MESSAGE_TEST)
@@ -2036,6 +2050,9 @@ unittest unittests: $(RUN_UNITTESTS)
run_json_test: $(JSON_TEST_TARGET)
$(VALGRIND) ./$(JSON_TEST_TARGET) || exit 1; echo $* passed;
+run_kword_test: $(KWORD_TEST_TARGET)
+ $(VALGRIND) ./$(KWORD_TEST_TARGET) || exit 1; echo $* passed;
+
run_memfile_test: $(MEMFILE_TEST_TARGET)
$(VALGRIND) ./$(MEMFILE_TEST_TARGET) || exit 1; echo $* passed;
@@ -2222,6 +2239,13 @@ $(JSON_TEST_TARGET): auto/config.mk objects $(JSON_TEST_OBJ)
MAKE="$(MAKE)" LINK_AS_NEEDED=$(LINK_AS_NEEDED) \
sh $(srcdir)/link.sh
+$(KWORD_TEST_TARGET): auto/config.mk objects $(KWORD_TEST_OBJ)
+ $(CCC) version.c -o objects/version.o
+ @LINK="$(PURIFY) $(SHRPENV) $(CClink) $(ALL_LIB_DIRS) $(LDFLAGS) \
+ -o $(KWORD_TEST_TARGET) $(KWORD_TEST_OBJ) $(ALL_LIBS)" \
+ MAKE="$(MAKE)" LINK_AS_NEEDED=$(LINK_AS_NEEDED) \
+ sh $(srcdir)/link.sh
+
$(MEMFILE_TEST_TARGET): auto/config.mk objects $(MEMFILE_TEST_OBJ)
$(CCC) version.c -o objects/version.o
@LINK="$(PURIFY) $(SHRPENV) $(CClink) $(ALL_LIB_DIRS) $(LDFLAGS) \
@@ -3058,6 +3082,9 @@ objects/json.o: json.c
objects/json_test.o: json_test.c
$(CCC) -o $@ json_test.c
+objects/kword_test.o: kword_test.c
+ $(CCC) -o $@ kword_test.c
+
objects/list.o: list.c
$(CCC) -o $@ list.c
@@ -3597,6 +3624,10 @@ objects/json_test.o: json_test.c main.c vim.h auto/config.h feature.h os_unix.h
auto/osdef.h ascii.h keymap.h term.h macros.h option.h structs.h \
regexp.h gui.h gui_beval.h proto/gui_beval.pro alloc.h ex_cmds.h spell.h \
proto.h globals.h farsi.h arabic.h json.c
+objects/kword_test.o: kword_test.c main.c vim.h auto/config.h feature.h os_unix.h \
+ auto/osdef.h ascii.h keymap.h term.h macros.h option.h structs.h \
+ regexp.h gui.h gui_beval.h proto/gui_beval.pro alloc.h ex_cmds.h spell.h \
+ proto.h globals.h farsi.h arabic.h charset.c mbyte.c
objects/memfile_test.o: memfile_test.c main.c vim.h auto/config.h feature.h \
os_unix.h auto/osdef.h ascii.h keymap.h term.h macros.h option.h \
structs.h regexp.h gui.h gui_beval.h proto/gui_beval.pro alloc.h \
diff --git a/src/charset.c b/src/charset.c
index e766fa5f6..eb8baa946 100644
--- a/src/charset.c
+++ b/src/charset.c
@@ -899,16 +899,17 @@ vim_iswordc(int c)
int
vim_iswordc_buf(int c, buf_T *buf)
{
-#ifdef FEAT_MBYTE
if (c >= 0x100)
{
+#ifdef FEAT_MBYTE
if (enc_dbcs != 0)
return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
if (enc_utf8)
- return utf_class(c) >= 2;
- }
+ return utf_class_buf(c, buf) >= 2;
#endif
- return (c > 0 && c < 0x100 && GET_CHARTAB(buf, c) != 0);
+ return FALSE;
+ }
+ return (c > 0 && GET_CHARTAB(buf, c) != 0);
}
/*
@@ -917,21 +918,19 @@ vim_iswordc_buf(int c, buf_T *buf)
int
vim_iswordp(char_u *p)
{
-#ifdef FEAT_MBYTE
- if (has_mbyte && MB_BYTE2LEN(*p) > 1)
- return mb_get_class(p) >= 2;
-#endif
- return GET_CHARTAB(curbuf, *p) != 0;
+ return vim_iswordp_buf(p, curbuf);
}
int
vim_iswordp_buf(char_u *p, buf_T *buf)
{
+ int c = *p;
+
#ifdef FEAT_MBYTE
- if (has_mbyte && MB_BYTE2LEN(*p) > 1)
- return mb_get_class(p) >= 2;
+ if (has_mbyte && MB_BYTE2LEN(c) > 1)
+ c = (*mb_ptr2char)(p);
#endif
- return (GET_CHARTAB(buf, *p) != 0);
+ return vim_iswordc_buf(c, buf);
}
/*
diff --git a/src/kword_test.c b/src/kword_test.c
new file mode 100644
index 000000000..2e6640b8b
--- /dev/null
+++ b/src/kword_test.c
@@ -0,0 +1,85 @@
+/* vi:set ts=8 sts=4 sw=4 noet:
+ *
+ * VIM - Vi IMproved by Bram Moolenaar
+ *
+ * Do ":help uganda" in Vim to read copying and usage conditions.
+ * Do ":help credits" in Vim to see a list of people who contributed.
+ * See README.txt for an overview of the Vim source code.
+ */
+
+/*
+ * kword_test.c: Unittests for vim_iswordc() and vim_iswordp().
+ */
+
+#undef NDEBUG
+#include <assert.h>
+
+/* Must include main.c because it contains much more than just main() */
+#define NO_VIM_MAIN
+#include "main.c"
+
+/* This file has to be included because the tested functions are static */
+#include "charset.c"
+
+#ifdef FEAT_MBYTE
+/*
+ * Test the results of vim_iswordc() and vim_iswordp() are matched.
+ */
+ static void
+test_isword_funcs_utf8(void)
+{
+ buf_T buf;
+ int c;
+
+ vim_memset(&buf, 0, sizeof(buf));
+ p_enc = (char_u *)"utf-8";
+ p_isi = (char_u *)"";
+ p_isp = (char_u *)"";
+ p_isf = (char_u *)"";
+ buf.b_p_isk = (char_u *)"@,48-57,_,128-167,224-235";
+
+ curbuf = &buf;
+ mb_init(); /* calls init_chartab() */
+
+ for (c = 0; c < 0x10000; ++c)
+ {
+ char_u p[4] = {0};
+ int c1;
+ int retc;
+ int retp;
+
+ utf_char2bytes(c, p);
+ c1 = utf_ptr2char(p);
+ if (c != c1)
+ {
+ fprintf(stderr, "Failed: ");
+ fprintf(stderr,
+ "[c = %#04x, p = {%#02x, %#02x, %#02x}] ",
+ c, p[0], p[1], p[2]);
+ fprintf(stderr, "c != utf_ptr2char(p) (=%#04x)\n", c1);
+ abort();
+ }
+ retc = vim_iswordc_buf(c, &buf);
+ retp = vim_iswordp_buf(p, &buf);
+ if (retc != retp)
+ {
+ fprintf(stderr, "Failed: ");
+ fprintf(stderr,
+ "[c = %#04x, p = {%#02x, %#02x, %#02x}] ",
+ c, p[0], p[1], p[2]);
+ fprintf(stderr, "vim_iswordc(c) (=%d) != vim_iswordp(p) (=%d)\n",
+ retc, retp);
+ abort();
+ }
+ }
+}
+#endif
+
+ int
+main(void)
+{
+#ifdef FEAT_MBYTE
+ test_isword_funcs_utf8();
+#endif
+ return 0;
+}
diff --git a/src/mbyte.c b/src/mbyte.c
index 321bff58d..11dc0fb35 100644
--- a/src/mbyte.c
+++ b/src/mbyte.c
@@ -895,7 +895,7 @@ mb_get_class_buf(char_u *p, buf_T *buf)
if (enc_dbcs != 0 && p[0] != NUL && p[1] != NUL)
return dbcs_class(p[0], p[1]);
if (enc_utf8)
- return utf_class(utf_ptr2char(p));
+ return utf_class_buf(utf_ptr2char(p), buf);
return 0;
}
@@ -2694,6 +2694,12 @@ static struct interval emoji_all[] =
int
utf_class(int c)
{
+ return utf_class_buf(c, curbuf);
+}
+
+ int
+utf_class_buf(int c, buf_T *buf)
+{
/* sorted list of non-overlapping intervals */
static struct clinterval
{
@@ -2780,7 +2786,7 @@ utf_class(int c)
{
if (c == ' ' || c == '\t' || c == NUL || c == 0xa0)
return 0; /* blank */
- if (vim_iswordc(c))
+ if (vim_iswordc_buf(c, buf))
return 2; /* word character */
return 1; /* punctuation */
}
diff --git a/src/proto/mbyte.pro b/src/proto/mbyte.pro
index 806a6c698..83bcadc69 100644
--- a/src/proto/mbyte.pro
+++ b/src/proto/mbyte.pro
@@ -40,6 +40,7 @@ int utf_char2bytes(int c, char_u *buf);
int utf_iscomposing(int c);
int utf_printable(int c);
int utf_class(int c);
+int utf_class_buf(int c, buf_T *buf);
int utf_ambiguous_width(int c);
int utf_fold(int a);
int utf_toupper(int a);
diff --git a/src/version.c b/src/version.c
index 381864758..8645713b8 100644
--- a/src/version.c
+++ b/src/version.c
@@ -765,6 +765,8 @@ static char *(features[]) =
static int included_patches[] =
{ /* Add new patch number below this line */
/**/
+ 252,
+/**/
251,
/**/
250,