patch 8.0.0252: not properly recognizing word characters between 128 and 255

Problem: Characters below 256 that are not one byte are not always recognized as word characters. Solution: Make vim_iswordc() and vim_iswordp() work the same way. Add a test for this. (Ozaki Kiichi)
author: Bram Moolenaar <Bram@vim.org> 2017-01-28 16:39:34 +0100
committer: Bram Moolenaar <Bram@vim.org> 2017-01-28 16:39:34 +0100
commit: 4019cf90b8657d4ab1c39744db63550f44f405a2 (patch)
tree: dd19804e007e7748d0d2ca5a0e4aaa5dd0a19217
parent: f42dd3c3901ea0ba38e67a616aea9953cae81b8d (diff)
download: vim-4019cf90b8657d4ab1c39744db63550f44f405a2.zip
7 files changed, 143 insertions, 18 deletions
diff --git a/.gitignore b/.gitignore
index 212d0e748..ac3a86367 100644
--- a/.gitignore
+++ b/.gitignore
@@ -81,3 +81,4 @@ src/testdir/viminfo
 src/memfile_test
 src/json_test
 src/message_test
+src/kword_test
diff --git a/src/Makefile b/src/Makefile
index c675c06a5..6844443e9 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1584,14 +1584,16 @@ EXTRA_SRC = hangulin.c if_lua.c if_mzsch.c auto/if_perl.c if_perlsfio.c \
 # Unittest files
 JSON_TEST_SRC = json_test.c
 JSON_TEST_TARGET = json_test$(EXEEXT)
+KWORD_TEST_SRC = kword_test.c
+KWORD_TEST_TARGET = kword_test$(EXEEXT)
 MEMFILE_TEST_SRC = memfile_test.c
 MEMFILE_TEST_TARGET = memfile_test$(EXEEXT)
 MESSAGE_TEST_SRC = message_test.c
 MESSAGE_TEST_TARGET = message_test$(EXEEXT)
 
-UNITTEST_SRC = $(JSON_TEST_SRC) $(MEMFILE_TEST_SRC) $(MESSAGE_TEST_SRC)
-UNITTEST_TARGETS = $(JSON_TEST_TARGET) $(MEMFILE_TEST_TARGET) $(MESSAGE_TEST_TARGET)
-RUN_UNITTESTS = run_json_test run_memfile_test run_message_test
+UNITTEST_SRC = $(JSON_TEST_SRC) $(KWORD_TEST_SRC) $(MEMFILE_TEST_SRC) $(MESSAGE_TEST_SRC)
+UNITTEST_TARGETS = $(JSON_TEST_TARGET) $(KWORD_TEST_TARGET) $(MEMFILE_TEST_TARGET) $(MESSAGE_TEST_TARGET)
+RUN_UNITTESTS = run_json_test run_kword_test run_memfile_test run_message_test
 
 # All sources, also the ones that are not configured
 ALL_SRC = $(BASIC_SRC) $(ALL_GUI_SRC) $(UNITTEST_SRC) $(EXTRA_SRC)
@@ -1611,7 +1613,6 @@ OBJ_COMMON = \
 	objects/arabic.o \
 	objects/buffer.o \
 	objects/blowfish.o \
-	objects/charset.o \
 	objects/crypt.o \
 	objects/crypt_zip.o \
 	objects/dict.o \
@@ -1679,6 +1680,7 @@ OBJ_COMMON = \
 
 # The files included by tests are not in OBJ_COMMON.
 OBJ_MAIN = \
+	objects/charset.o \
 	objects/json.o \
 	objects/main.o \
 	objects/memfile.o \
@@ -1687,13 +1689,23 @@ OBJ_MAIN = \
 OBJ = $(OBJ_COMMON) $(OBJ_MAIN)
 
 OBJ_JSON_TEST = \
+	objects/charset.o \
 	objects/memfile.o \
 	objects/message.o \
 	objects/json_test.o
 
 JSON_TEST_OBJ = $(OBJ_COMMON) $(OBJ_JSON_TEST)
 
+OBJ_KWORD_TEST = \
+	objects/json.o \
+	objects/memfile.o \
+	objects/message.o \
+	objects/kword_test.o
+
+KWORD_TEST_OBJ = $(OBJ_COMMON) $(OBJ_KWORD_TEST)
+
 OBJ_MEMFILE_TEST = \
+	objects/charset.o \
 	objects/json.o \
 	objects/message.o \
 	objects/memfile_test.o
@@ -1701,6 +1713,7 @@ OBJ_MEMFILE_TEST = \
 MEMFILE_TEST_OBJ = $(OBJ_COMMON) $(OBJ_MEMFILE_TEST)
 
 OBJ_MESSAGE_TEST = \
+	objects/charset.o \
 	objects/json.o \
 	objects/memfile.o \
 	objects/message_test.o
@@ -1710,6 +1723,7 @@ MESSAGE_TEST_OBJ = $(OBJ_COMMON) $(OBJ_MESSAGE_TEST)
 ALL_OBJ = $(OBJ_COMMON) \
 	  $(OBJ_MAIN) \
 	  $(OBJ_JSON_TEST) \
+	  $(OBJ_KWORD_TEST) \
 	  $(OBJ_MEMFILE_TEST) \
 	  $(OBJ_MESSAGE_TEST)
 
@@ -2036,6 +2050,9 @@ unittest unittests: $(RUN_UNITTESTS)
 run_json_test: $(JSON_TEST_TARGET)
 	$(VALGRIND) ./$(JSON_TEST_TARGET) || exit 1; echo $* passed;
 
+run_kword_test: $(KWORD_TEST_TARGET)
+	$(VALGRIND) ./$(KWORD_TEST_TARGET) || exit 1; echo $* passed;
+
 run_memfile_test: $(MEMFILE_TEST_TARGET)
 	$(VALGRIND) ./$(MEMFILE_TEST_TARGET) || exit 1; echo $* passed;
 
@@ -2222,6 +2239,13 @@ $(JSON_TEST_TARGET): auto/config.mk objects $(JSON_TEST_OBJ)
 		MAKE="$(MAKE)" LINK_AS_NEEDED=$(LINK_AS_NEEDED) \
 		sh $(srcdir)/link.sh
 
+$(KWORD_TEST_TARGET): auto/config.mk objects $(KWORD_TEST_OBJ)
+	$(CCC) version.c -o objects/version.o
+	@LINK="$(PURIFY) $(SHRPENV) $(CClink) $(ALL_LIB_DIRS) $(LDFLAGS) \
+		-o $(KWORD_TEST_TARGET) $(KWORD_TEST_OBJ) $(ALL_LIBS)" \
+		MAKE="$(MAKE)" LINK_AS_NEEDED=$(LINK_AS_NEEDED) \
+		sh $(srcdir)/link.sh
+
 $(MEMFILE_TEST_TARGET): auto/config.mk objects $(MEMFILE_TEST_OBJ)
 	$(CCC) version.c -o objects/version.o
 	@LINK="$(PURIFY) $(SHRPENV) $(CClink) $(ALL_LIB_DIRS) $(LDFLAGS) \
@@ -3058,6 +3082,9 @@ objects/json.o: json.c
 objects/json_test.o: json_test.c
 	$(CCC) -o $@ json_test.c
 
+objects/kword_test.o: kword_test.c
+	$(CCC) -o $@ kword_test.c
+
 objects/list.o: list.c
 	$(CCC) -o $@ list.c
 
@@ -3597,6 +3624,10 @@ objects/json_test.o: json_test.c main.c vim.h auto/config.h feature.h os_unix.h
  auto/osdef.h ascii.h keymap.h term.h macros.h option.h structs.h \
  regexp.h gui.h gui_beval.h proto/gui_beval.pro alloc.h ex_cmds.h spell.h \
  proto.h globals.h farsi.h arabic.h json.c
+objects/kword_test.o: kword_test.c main.c vim.h auto/config.h feature.h os_unix.h \
+ auto/osdef.h ascii.h keymap.h term.h macros.h option.h structs.h \
+ regexp.h gui.h gui_beval.h proto/gui_beval.pro alloc.h ex_cmds.h spell.h \
+ proto.h globals.h farsi.h arabic.h charset.c mbyte.c
 objects/memfile_test.o: memfile_test.c main.c vim.h auto/config.h feature.h \
  os_unix.h auto/osdef.h ascii.h keymap.h term.h macros.h option.h \
  structs.h regexp.h gui.h gui_beval.h proto/gui_beval.pro alloc.h \
diff --git a/src/charset.c b/src/charset.c
index e766fa5f6..eb8baa946 100644
--- a/src/charset.c
+++ b/src/charset.c
@@ -899,16 +899,17 @@ vim_iswordc(int c)
     int
 vim_iswordc_buf(int c, buf_T *buf)
 {
-#ifdef FEAT_MBYTE
     if (c >= 0x100)
     {
+#ifdef FEAT_MBYTE
 	if (enc_dbcs != 0)
 	    return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
 	if (enc_utf8)
-	    return utf_class(c) >= 2;
-    }
+	    return utf_class_buf(c, buf) >= 2;
 #endif
-    return (c > 0 && c < 0x100 && GET_CHARTAB(buf, c) != 0);
+	return FALSE;
+    }
+    return (c > 0 && GET_CHARTAB(buf, c) != 0);
 }
 
 /*
@@ -917,21 +918,19 @@ vim_iswordc_buf(int c, buf_T *buf)
     int
 vim_iswordp(char_u *p)
 {
-#ifdef FEAT_MBYTE
-    if (has_mbyte && MB_BYTE2LEN(*p) > 1)
-	return mb_get_class(p) >= 2;
-#endif
-    return GET_CHARTAB(curbuf, *p) != 0;
+    return vim_iswordp_buf(p, curbuf);
 }
 
     int
 vim_iswordp_buf(char_u *p, buf_T *buf)
 {
+    int	c = *p;
+
 #ifdef FEAT_MBYTE
-    if (has_mbyte && MB_BYTE2LEN(*p) > 1)
-	return mb_get_class(p) >= 2;
+    if (has_mbyte && MB_BYTE2LEN(c) > 1)
+	c = (*mb_ptr2char)(p);
 #endif
-    return (GET_CHARTAB(buf, *p) != 0);
+    return vim_iswordc_buf(c, buf);
 }
 
 /*
diff --git a/src/kword_test.c b/src/kword_test.c
new file mode 100644
index 000000000..2e6640b8b
--- /dev/null
+++ b/src/kword_test.c
@@ -0,0 +1,85 @@
+/* vi:set ts=8 sts=4 sw=4 noet:
+ *
+ * VIM - Vi IMproved	by Bram Moolenaar
+ *
+ * Do ":help uganda"  in Vim to read copying and usage conditions.
+ * Do ":help credits" in Vim to see a list of people who contributed.
+ * See README.txt for an overview of the Vim source code.
+ */
+
+/*
+ * kword_test.c: Unittests for vim_iswordc() and vim_iswordp().
+ */
+
+#undef NDEBUG
+#include <assert.h>
+
+/* Must include main.c because it contains much more than just main() */
+#define NO_VIM_MAIN
+#include "main.c"
+
+/* This file has to be included because the tested functions are static */
+#include "charset.c"
+
+#ifdef FEAT_MBYTE
+/*
+ * Test the results of vim_iswordc() and vim_iswordp() are matched.
+ */
+    static void
+test_isword_funcs_utf8(void)
+{
+    buf_T buf;
+    int c;
+
+    vim_memset(&buf, 0, sizeof(buf));
+    p_enc = (char_u *)"utf-8";
+    p_isi = (char_u *)"";
+    p_isp = (char_u *)"";
+    p_isf = (char_u *)"";
+    buf.b_p_isk = (char_u *)"@,48-57,_,128-167,224-235";
+
+    curbuf = &buf;
+    mb_init(); /* calls init_chartab() */
+
+    for (c = 0; c < 0x10000; ++c)
+    {
+	char_u p[4] = {0};
+	int c1;
+	int retc;
+	int retp;
+
+	utf_char2bytes(c, p);
+	c1 = utf_ptr2char(p);
+	if (c != c1)
+	{
+	    fprintf(stderr, "Failed: ");
+	    fprintf(stderr,
+		    "[c = %#04x, p = {%#02x, %#02x, %#02x}] ",
+		    c, p[0], p[1], p[2]);
+	    fprintf(stderr, "c != utf_ptr2char(p) (=%#04x)\n", c1);
+	    abort();
+	}
+	retc = vim_iswordc_buf(c, &buf);
+	retp = vim_iswordp_buf(p, &buf);
+	if (retc != retp)
+	{
+	    fprintf(stderr, "Failed: ");
+	    fprintf(stderr,
+		    "[c = %#04x, p = {%#02x, %#02x, %#02x}] ",
+		    c, p[0], p[1], p[2]);
+	    fprintf(stderr, "vim_iswordc(c) (=%d) != vim_iswordp(p) (=%d)\n",
+		    retc, retp);
+	    abort();
+	}
+    }
+}
+#endif
+
+    int
+main(void)
+{
+#ifdef FEAT_MBYTE
+    test_isword_funcs_utf8();
+#endif
+    return 0;
+}
diff --git a/src/mbyte.c b/src/mbyte.c
index 321bff58d..11dc0fb35 100644
--- a/src/mbyte.c
+++ b/src/mbyte.c
@@ -895,7 +895,7 @@ mb_get_class_buf(char_u *p, buf_T *buf)
     if (enc_dbcs != 0 && p[0] != NUL && p[1] != NUL)
 	return dbcs_class(p[0], p[1]);
     if (enc_utf8)
-	return utf_class(utf_ptr2char(p));
+	return utf_class_buf(utf_ptr2char(p), buf);
     return 0;
 }
 
@@ -2694,6 +2694,12 @@ static struct interval emoji_all[] =
     int
 utf_class(int c)
 {
+    return utf_class_buf(c, curbuf);
+}
+
+    int
+utf_class_buf(int c, buf_T *buf)
+{
     /* sorted list of non-overlapping intervals */
     static struct clinterval
     {
@@ -2780,7 +2786,7 @@ utf_class(int c)
     {
 	if (c == ' ' || c == '\t' || c == NUL || c == 0xa0)
 	    return 0;	    /* blank */
-	if (vim_iswordc(c))
+	if (vim_iswordc_buf(c, buf))
 	    return 2;	    /* word character */
 	return 1;	    /* punctuation */
     }
diff --git a/src/proto/mbyte.pro b/src/proto/mbyte.pro
index 806a6c698..83bcadc69 100644
--- a/src/proto/mbyte.pro
+++ b/src/proto/mbyte.pro
@@ -40,6 +40,7 @@ int utf_char2bytes(int c, char_u *buf);
 int utf_iscomposing(int c);
 int utf_printable(int c);
 int utf_class(int c);
+int utf_class_buf(int c, buf_T *buf);
 int utf_ambiguous_width(int c);
 int utf_fold(int a);
 int utf_toupper(int a);
diff --git a/src/version.c b/src/version.c
index 381864758..8645713b8 100644
--- a/src/version.c
+++ b/src/version.c
@@ -765,6 +765,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    252,
+/**/
     251,
 /**/
     250,
author	Bram Moolenaar <Bram@vim.org>	2017-01-28 16:39:34 +0100
committer	Bram Moolenaar <Bram@vim.org>	2017-01-28 16:39:34 +0100
commit	4019cf90b8657d4ab1c39744db63550f44f405a2 (patch)
tree	dd19804e007e7748d0d2ca5a0e4aaa5dd0a19217
parent	f42dd3c3901ea0ba38e67a616aea9953cae81b8d (diff)
download	vim-4019cf90b8657d4ab1c39744db63550f44f405a2.zip