summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--runtime/doc/Makefile20
-rw-r--r--runtime/doc/vim.12
-rw-r--r--src/Make_bc5.mak1
-rw-r--r--src/Make_cyg.mak3
-rw-r--r--src/normal.c2
-rw-r--r--src/proto/charset.pro2
-rw-r--r--src/spell.c901
7 files changed, 659 insertions, 272 deletions
diff --git a/runtime/doc/Makefile b/runtime/doc/Makefile
index 6eca937cb..1c2959de5 100644
--- a/runtime/doc/Makefile
+++ b/runtime/doc/Makefile
@@ -381,31 +381,31 @@ os_win32.txt:
touch os_win32.txt
vim-it.UTF-8.1: vim-it.1
- iconv -f latin1 -t utf-8 $< >$@
+ iconv -f latin1 -t utf-8 $> >$@
evim-it.UTF-8.1: evim-it.1
- iconv -f latin1 -t utf-8 $< >$@
+ iconv -f latin1 -t utf-8 $> >$@
vimdiff-it.UTF-8.1: vimdiff-it.1
- iconv -f latin1 -t utf-8 $< >$@
+ iconv -f latin1 -t utf-8 $> >$@
vimtutor-it.UTF-8.1: vimtutor-it.1
- iconv -f latin1 -t utf-8 $< >$@
+ iconv -f latin1 -t utf-8 $> >$@
xxd-it.UTF-8.1: xxd-it.1
- iconv -f latin1 -t utf-8 $< >$@
+ iconv -f latin1 -t utf-8 $> >$@
vim-ru.UTF-8.1: vim-ru.1
- iconv -f KOI8-R -t utf-8 $< >$@
+ iconv -f KOI8-R -t utf-8 $> >$@
evim-ru.UTF-8.1: evim-ru.1
- iconv -f KOI8-R -t utf-8 $< >$@
+ iconv -f KOI8-R -t utf-8 $> >$@
vimdiff-ru.UTF-8.1: vimdiff-ru.1
- iconv -f KOI8-R -t utf-8 $< >$@
+ iconv -f KOI8-R -t utf-8 $> >$@
vimtutor-ru.UTF-8.1: vimtutor-ru.1
- iconv -f KOI8-R -t utf-8 $< >$@
+ iconv -f KOI8-R -t utf-8 $> >$@
xxd-ru.UTF-8.1: xxd-ru.1
- iconv -f KOI8-R -t utf-8 $< >$@
+ iconv -f KOI8-R -t utf-8 $> >$@
diff --git a/runtime/doc/vim.1 b/runtime/doc/vim.1
index b98a96e2c..d78bf2efa 100644
--- a/runtime/doc/vim.1
+++ b/runtime/doc/vim.1
@@ -465,7 +465,7 @@ Type ":help" in
to get started.
Type ":help subject" to get help on a specific subject.
For example: ":help ZZ" to get help for the "ZZ" command.
-Use <Tab> and CTRL-D to complete subjects (":help cmdline\-completion").
+Use <Tab> and CTRL\-D to complete subjects (":help cmdline\-completion").
Tags are present to jump from one place to another (sort of hypertext links,
see ":help").
All documentation files can be viewed in this way, for example
diff --git a/src/Make_bc5.mak b/src/Make_bc5.mak
index 25145d94d..195d79f9c 100644
--- a/src/Make_bc5.mak
+++ b/src/Make_bc5.mak
@@ -564,6 +564,7 @@ vimobj = \
$(OBJDIR)\regexp.obj \
$(OBJDIR)\screen.obj \
$(OBJDIR)\search.obj \
+ $(OBJDIR)\spell.obj \
$(OBJDIR)\syntax.obj \
$(OBJDIR)\tag.obj \
$(OBJDIR)\term.obj \
diff --git a/src/Make_cyg.mak b/src/Make_cyg.mak
index 24a52ce8a..51e36189a 100644
--- a/src/Make_cyg.mak
+++ b/src/Make_cyg.mak
@@ -1,6 +1,6 @@
#
# Makefile for VIM on Win32, using Cygnus gcc
-# Last updated by Dan Sharp. Last Change: 2005 Jan 29
+# Last updated by Dan Sharp. Last Change: 2005 Mar 21
#
# Also read INSTALLpc.txt!
#
@@ -424,6 +424,7 @@ OBJ = \
$(OUTDIR)/regexp.o \
$(OUTDIR)/screen.o \
$(OUTDIR)/search.o \
+ $(OUTDIR)/spell.o \
$(OUTDIR)/syntax.o \
$(OUTDIR)/tag.o \
$(OUTDIR)/term.o \
diff --git a/src/normal.c b/src/normal.c
index 458b8155e..8780b954d 100644
--- a/src/normal.c
+++ b/src/normal.c
@@ -3874,6 +3874,7 @@ check_scrollbind(topline_diff, leftcol_diff)
nv_ignore(cap)
cmdarg_T *cap;
{
+ cap->retval |= CA_COMMAND_BUSY; /* don't call edit() now */
}
/*
@@ -8675,6 +8676,7 @@ nv_cursorhold(cap)
{
apply_autocmds(EVENT_CURSORHOLD, NULL, NULL, FALSE, curbuf);
did_cursorhold = TRUE;
+ cap->retval |= CA_COMMAND_BUSY; /* don't call edit() now */
}
#endif
diff --git a/src/proto/charset.pro b/src/proto/charset.pro
index afc8562ce..a3d4c145d 100644
--- a/src/proto/charset.pro
+++ b/src/proto/charset.pro
@@ -20,6 +20,8 @@ int vim_isIDc __ARGS((int c));
int vim_iswordc __ARGS((int c));
int vim_iswordp __ARGS((char_u *p));
int vim_iswordc_buf __ARGS((char_u *p, buf_T *buf));
+void init_spell_chartab __ARGS((void));
+int spell_iswordc __ARGS((char_u *p));
int vim_isfilec __ARGS((int c));
int vim_isprintc __ARGS((int c));
int vim_isprintc_strict __ARGS((int c));
diff --git a/src/spell.c b/src/spell.c
index a167ad1f1..c92eb65bf 100644
--- a/src/spell.c
+++ b/src/spell.c
@@ -9,6 +9,11 @@
/*
* spell.c: code for spell checking
+ *
+ * Terminology:
+ * "dword" is a dictionary word, made out of letters and digits.
+ * "nword" is a word with a character that's not a letter or digit.
+ * "word" is either a "dword" or an "nword".
*/
#if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64)
@@ -23,6 +28,8 @@
# include <fcntl.h>
#endif
+#define MAXWLEN 100 /* assume max. word len is this many bytes */
+
/*
* Structure that is used to store the text from the language file. This
* avoids the need to allocate each individual word and copying it. It's
@@ -36,24 +43,67 @@ struct sblock_S
char_u sb_data[1]; /* data, actually longer */
};
+/* Structure to store words and additions. Used twice : once for case-folded
+ * and once for keep-case words. */
+typedef struct winfo_S
+{
+ hashtab_T wi_ht; /* hashtable with all words, both dword_T and
+ nword_T (check flags for DW_NWORD) */
+ garray_T wi_add; /* table with pointers to additions in a
+ dword_T */
+ int wi_addlen; /* longest addition length */
+} winfo_T;
+
/*
* Structure used to store words and other info for one language.
*/
typedef struct slang_S slang_T;
-
struct slang_S
{
slang_T *sl_next; /* next language */
char_u sl_name[2]; /* language name "en", "nl", etc. */
- hashtab_T sl_ht; /* hashtable with all words */
- garray_T sl_match; /* table with pointers to matches */
- garray_T sl_add; /* table with pointers to additions */
- char_u sl_regions[13]; /* table with up to 6 region names */
+ winfo_T sl_fwords; /* case-folded words and additions */
+ winfo_T sl_kwords; /* keep-case words and additions */
+ char_u sl_regions[17]; /* table with up to 8 region names plus NUL */
sblock_T *sl_block; /* list with allocated memory blocks */
};
static slang_T *first_lang = NULL;
+/* Entry for dword in "sl_ht". Also used for part of an nword, starting with
+ * the first non-word character. And used for additions in wi_add. */
+typedef struct dword_S
+{
+ char_u dw_region; /* one bit per region where it's valid */
+ char_u dw_flags; /* WF_ flags */
+ char_u dw_word[1]; /* actually longer, NUL terminated */
+} dword_T;
+
+#define REGION_ALL 0xff
+
+#define HI2DWORD(hi) (dword_T *)(hi->hi_key - 2)
+
+/* Entry for a nword in "sl_ht". Note that the last three items must be
+ * identical to dword_T, so that they can be in the same hashtable. */
+typedef struct nword_S
+{
+ garray_T nw_ga; /* table with pointers to dword_T for part
+ starting with non-word character */
+ int nw_maxlen; /* longest nword length (after the dword) */
+ char_u nw_region; /* one bit per region where it's valid */
+ char_u nw_flags; /* WF_ flags */
+ char_u nw_word[1]; /* actually longer, NUL terminated */
+} nword_T;
+
+/* Get nword_T pointer from hashitem that uses nw_word */
+static nword_T dumnw;
+#define HI2NWORD(hi) ((nword_T *)((hi)->hi_key - (dumnw.nw_word - (char_u *)&dumnw)))
+
+#define DW_CAP 0x01 /* word must start with capital */
+#define DW_RARE 0x02 /* rare word */
+#define DW_NWORD 0x04 /* this is an nword_T */
+#define DW_DWORD 0x08 /* (also) use as dword without nword */
+
/*
* Structure used in "b_langp", filled from 'spelllang'.
*/
@@ -64,25 +114,15 @@ typedef struct langp_S
} langp_T;
#define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i))
-#define MATCH_ENTRY(gap, i) *(((char_u **)(gap)->ga_data) + i)
-
-/*
- * The byte before a word in the hashtable indicates the type of word.
- * Also used for the byte just before a match.
- * The top two bits are used to indicate rare and case-sensitive words.
- * The lower bits are used to indicate the region in which the word is valid.
- * Words valid in all regions use REGION_ALL.
- */
-#define REGION_MASK 0x3f
-#define REGION_ALL 0x3f
-#define CASE_MASK 0x40
-#define RARE_MASK 0x80
+#define DWORD_ENTRY(gap, i) *(((dword_T **)(gap)->ga_data) + i)
#define SP_OK 0
#define SP_BAD 1
#define SP_RARE 2
#define SP_LOCAL 3
+static char *e_invchar2 = N_("E753: Invalid character in \"%s\"");
+
static slang_T *spell_load_lang __ARGS((char_u *lang));
static void spell_load_file __ARGS((char_u *fname));
static int find_region __ARGS((char_u *rp, char_u *region));
@@ -102,19 +142,34 @@ spell_check(wp, ptr, attrp)
char_u *ptr;
int *attrp;
{
- char_u *e;
+ char_u *e; /* end of word */
+ char_u *ne; /* new end of word */
+ char_u *me; /* max. end of match */
langp_T *lp;
int result;
int len = 0;
- hash_T hash;
hashitem_T *hi;
- int c;
-#define MAXWLEN 80 /* assume max. word len is 80 */
- char_u word[MAXWLEN + 1];
+ int round;
+ char_u kword[MAXWLEN + 1]; /* word copy */
+ char_u fword[MAXWLEN + 1]; /* word with case folded */
+ char_u match[MAXWLEN + 1]; /* fword with additional chars */
+ char_u kwordclen[MAXWLEN + 1]; /* len of orig chars after kword[] */
+ char_u fwordclen[MAXWLEN + 1]; /* len of chars after fword[] */
+ char_u *clen;
+ int cidx = 0; /* char index in xwordclen[] */
+ hash_T fhash; /* hash for fword */
+ hash_T khash; /* hash for kword */
+ int match_len = 0; /* length of match[] */
+ int fmatch_len = 0; /* length of nword match in chars */
garray_T *gap;
- int l, h, t;
- char_u *p;
+ int l, t;
+ char_u *p, *tp;
int n;
+ dword_T *dw;
+ dword_T *tdw;
+ winfo_T *wi;
+ nword_T *nw;
+ int w_isupper;
/* Find the end of the word. We already know that *ptr is a word char. */
e = ptr;
@@ -122,119 +177,265 @@ spell_check(wp, ptr, attrp)
{
mb_ptr_adv(e);
++len;
- } while (*e != NUL && vim_iswordc_buf(e, wp->w_buffer));
+ } while (*e != NUL && spell_iswordc(e));
+
+ /* A word starting with a number is always OK. */
+ if (*ptr >= '0' && *ptr <= '9')
+ return (int)(e - ptr);
+
+#ifdef FEAT_MBYTE
+ w_isupper = MB_ISUPPER(mb_ptr2char(ptr));
+#else
+ w_isupper = MB_ISUPPER(*ptr);
+#endif
+
+ /* Make a copy of the word so that it can be NUL terminated.
+ * Compute hash value. */
+ mch_memmove(kword, ptr, e - ptr);
+ kword[e - ptr] = NUL;
+ khash = hash_hash(kword);
+
+ /* Make case-folded copy of the Word. Compute its hash value. */
+ (void)str_foldcase(ptr, e - ptr, fword, MAXWLEN + 1);
+ fhash = hash_hash(fword);
+
+ /* Further case-folded characters to check for an nword match go in
+ * match[]. */
+ me = e;
+
+ /* "ne" is the end for the longest match */
+ ne = e;
/* The word is bad unless we find it in the dictionary. */
result = SP_BAD;
- /* Words are always stored with folded case. */
- (void)str_foldcase(ptr, e - ptr, word, MAXWLEN + 1);
- hash = hash_hash(word);
-
/*
* Loop over the languages specified in 'spelllang'.
- * We check them all, because a match may find a longer word.
+ * We check them all, because a matching nword may be longer than an
+ * already found dword or nword.
*/
- for (lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0); lp->lp_slang != NULL;
- ++lp)
+ for (lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0); lp->lp_slang != NULL; ++lp)
{
- /* Check words when it wasn't recognized as a good word yet. */
- if (result != SP_OK)
+ /*
+ * Check for a matching word in the hashtable.
+ * Check both the keep-case word and the fold-case word.
+ */
+ for (round = 0; round <= 1; ++round)
{
- /* Word lookup. Using a hash table is fast. */
- hi = hash_lookup(&lp->lp_slang->sl_ht, word, hash);
+ if (round == 0)
+ {
+ wi = &lp->lp_slang->sl_kwords;
+ hi = hash_lookup(&wi->wi_ht, kword, khash);
+ }
+ else
+ {
+ wi = &lp->lp_slang->sl_fwords;
+ hi = hash_lookup(&wi->wi_ht, fword, fhash);
+ }
if (!HASHITEM_EMPTY(hi))
{
- /* The character before the key indicates the type of word. */
- c = hi->hi_key[-1];
- if ((c & CASE_MASK) != 0)
+ /*
+ * If this is an nword entry, check for match with remainder.
+ */
+ dw = HI2DWORD(hi);
+ if (dw->dw_flags & DW_NWORD)
{
- /* Need to check first letter is uppercase. If it is,
- * check region. If it isn't it may be a rare word. */
- if (
+ /* If the word is not defined as a dword we must find an
+ * nword. */
+ if ((dw->dw_flags & DW_DWORD) == 0)
+ dw = NULL;
+
+ /* Fold more characters when needed for the nword. Need
+ * to do one extra to check for a non-word character after
+ * the nword. Also keep the byte-size of each character,
+ * both before and after folding case. */
+ nw = HI2NWORD(hi);
+ while ((round == 0
+ ? me - e <= nw->nw_maxlen
+ : match_len <= nw->nw_maxlen)
+ && *me != NUL)
+ {
#ifdef FEAT_MBYTE
- MB_ISUPPER(mb_ptr2char(ptr))
+ l = mb_ptr2len_check(me);
#else
- MB_ISUPPER(*ptr)
+ l = 1;
#endif
- )
+ (void)str_foldcase(me, l, match + match_len,
+ MAXWLEN - match_len + 1);
+ me += l;
+ kwordclen[cidx] = l;
+ fwordclen[cidx] = STRLEN(match + match_len);
+ match_len += fwordclen[cidx];
+ ++cidx;
+ }
+
+ if (round == 0)
{
- if ((c & lp->lp_region) == 0)
- result = SP_LOCAL;
- else
- result = SP_OK;
+ clen = kwordclen;
+ tp = e;
}
- else if (c & RARE_MASK)
- result = SP_RARE;
- }
- else
- {
- if ((c & lp->lp_region) == 0)
- result = SP_LOCAL;
- else if (c & RARE_MASK)
- result = SP_RARE;
else
- result = SP_OK;
+ {
+ clen = fwordclen;
+ tp = match;
+ }
+
+ /* Match with each item. The longest match wins:
+ * "you've" is longer than "you". */
+ gap = &nw->nw_ga;
+ for (t = 0; t < gap->ga_len; ++t)
+ {
+ /* Skip entries with wrong case for first char.
+ * Continue if it's a rare word without a captial. */
+ tdw = DWORD_ENTRY(gap, t);
+ if ((tdw->dw_flags & (DW_CAP | DW_RARE)) == DW_CAP
+ && !w_isupper)
+ continue;
+
+ p = tdw->dw_word;
+ l = 0;
+ for (n = 0; p[n] != 0; n += clen[l++])
+ if (vim_memcmp(p + n, tp + n, clen[l]) != 0)
+ break;
+
+ /* Use a match if it's longer than previous matches
+ * and the next character is not a word character. */
+ if (p[n] == 0 && l > fmatch_len && (tp[n] == 0
+ || !spell_iswordc(tp + n)))
+ {
+ dw = tdw;
+ fmatch_len = l;
+ if (round == 0)
+ ne = tp + n;
+ else
+ {
+ /* Need to use the length of the original
+ * chars, not the fold-case ones. */
+ ne = e;
+ for (l = 0; l < fmatch_len; ++l)
+ ne += kwordclen[l];
+ }
+ if ((lp->lp_region & tdw->dw_region) == 0)
+ result = SP_LOCAL;
+ else if ((tdw->dw_flags & DW_CAP) && !w_isupper)
+ result = SP_RARE;
+ else
+ result = SP_OK;
+ }
+ }
+
}
- }
- }
- /* Match lookup. Uses a binary search. If there is a match adjust
- * "e" to the end. This is also done when a word matched, because
- * "you've" is longer than "you". */
- gap = &lp->lp_slang->sl_match;
- l = 0; /* low index */
- h = gap->ga_len - 1; /* high index */
- /* keep searching, the match must be between "l" and "h" (inclusive) */
- while (h >= l)
- {
- t = (h + l) / 2;
- p = MATCH_ENTRY(gap, t) + 1;
- for (n = 0; p[n] != 0 && p[n] == ptr[n]; ++n)
- ;
- if (p[n] == 0)
- {
- if ((ptr[n] == 0 || !vim_iswordc_buf(ptr + n, wp->w_buffer)))
+ if (dw != NULL)
{
- /* match! */
- e = ptr + n;
- if (result != SP_OK)
+ if (dw->dw_flags & DW_CAP)
{
- if ((lp->lp_region & p[-1]) == 0)
+ /* Need to check first letter is uppercase. If it is,
+ * check region. If it isn't it may be a rare word.
+ * */
+ if (w_isupper)
+ {
+ if ((dw->dw_region & lp->lp_region) == 0)
+ result = SP_LOCAL;
+ else
+ result = SP_OK;
+ }
+ else if (dw->dw_flags & DW_RARE)
+ result = SP_RARE;
+ }
+ else
+ {
+ if ((dw->dw_region & lp->lp_region) == 0)
result = SP_LOCAL;
+ else if (dw->dw_flags & DW_RARE)
+ result = SP_RARE;
else
result = SP_OK;
}
- break;
}
- /* match is too short, next item is new low index */
- l = t + 1;
}
- else if (p[n] < ptr[n])
- /* match is before word, next item is new low index */
- l = t + 1;
- else
- /* match is after word, previous item is new high index */
- h = t - 1;
}
- /* Addition lookup. Uses a linear search, there should be very few.
- * If there is a match adjust "e" to the end. This doesn't change
- * whether a word was good or bad, only the length. */
- gap = &lp->lp_slang->sl_add;
- for (t = 0; t < gap->ga_len; ++t)
- {
- p = MATCH_ENTRY(gap, t) + 1;
- for (n = 0; p[n] != 0 && p[n] == e[n]; ++n)
- ;
- if (p[n] == 0
- && (e[n] == 0 || !vim_iswordc_buf(e + n, wp->w_buffer)))
+ /*
+ * Check for an addition.
+ * Only after a dword, not after an nword.
+ * Check both the keep-case word and the fold-case word.
+ */
+ if (fmatch_len == 0)
+ for (round = 0; round <= 1; ++round)
{
- /* match */
- e += n;
- break;
+ if (round == 0)
+ wi = &lp->lp_slang->sl_kwords;
+ else
+ wi = &lp->lp_slang->sl_fwords;
+ gap = &wi->wi_add;
+ if (gap->ga_len == 0) /* no additions, skip quickly */
+ continue;
+
+ /* Fold characters when needed for the addition. Need to do one
+ * extra to check for a word character after the addition. */
+ while ((round == 0
+ ? me - e <= wi->wi_addlen
+ : match_len <= wi->wi_addlen)
+ && *me != NUL)
+ {
+#ifdef FEAT_MBYTE
+ l = mb_ptr2len_check(me);
+#else
+ l = 1;
+#endif
+ (void)str_foldcase(me, l, match + match_len,
+ MAXWLEN - match_len + 1);
+ me += l;
+ kwordclen[cidx] = l;
+ fwordclen[cidx] = STRLEN(match + match_len);
+ match_len += fwordclen[cidx];
+ ++cidx;
+ }
+
+ if (round == 0)
+ {
+ clen = kwordclen;
+ tp = e;
+ }
+ else
+ {
+ clen = fwordclen;
+ tp = match;
+ }
+
+ /* Addition lookup. Uses a linear search, there should be
+ * very few. If there is a match adjust "ne" to the end.
+ * This doesn't change whether a word was good or bad, only
+ * the length. */
+ for (t = 0; t < gap->ga_len; ++t)
+ {
+ tdw = DWORD_ENTRY(gap, t);
+ p = tdw->dw_word;
+ l = 0;
+ for (n = 0; p[n] != 0; n += clen[l++])
+ if (vim_memcmp(p + n, tp + n, clen[l]) != 0)
+ break;
+
+ /* Use a match if it's longer than previous matches
+ * and the next character is not a word character. */
+ if (p[n] == 0 && l > fmatch_len
+ && (tp[n] == 0 || !spell_iswordc(tp + n)))
+ {
+ fmatch_len = l;
+ if (round == 0)
+ ne = tp + n;
+ else
+ {
+ /* Need to use the length of the original
+ * chars, not the fold-case ones. */
+ ne = e;
+ for (l = 0; l < fmatch_len; ++l)
+ ne += kwordclen[l];
+ }
+ }
+ }
}
- }
}
if (result != SP_OK)
@@ -247,7 +448,7 @@ spell_check(wp, ptr, attrp)
*attrp = highlight_attr[HLF_SPL];
}
- return (int)(e - ptr);
+ return (int)(ne - ptr);
}
static slang_T *load_lp; /* passed from spell_load_lang() to
@@ -264,15 +465,19 @@ spell_load_lang(lang)
char_u fname_enc[80];
char_u fname_ascii[20];
char_u *p;
+ int r;
lp = (slang_T *)alloc(sizeof(slang_T));
if (lp != NULL)
{
lp->sl_name[0] = lang[0];
lp->sl_name[1] = lang[1];
- hash_init(&lp->sl_ht);
- ga_init2(&lp->sl_match, sizeof(char_u *), 20);
- ga_init2(&lp->sl_add, sizeof(char_u *), 4);
+ hash_init(&lp->sl_fwords.wi_ht);
+ ga_init2(&lp->sl_fwords.wi_add, sizeof(dword_T *), 4);
+ lp->sl_fwords.wi_addlen = 0;
+ hash_init(&lp->sl_kwords.wi_ht);
+ ga_init2(&lp->sl_kwords.wi_add, sizeof(dword_T *), 4);
+ lp->sl_kwords.wi_addlen = 0;
lp->sl_regions[0] = NUL;
lp->sl_block = NULL;
@@ -286,17 +491,20 @@ spell_load_lang(lang)
p = (char_u *)"latin1";
load_lp = lp;
sprintf((char *)fname_enc, "spell/%c%c.%s.spl", lang[0], lang[1], p);
- if (do_in_runtimepath(fname_enc, TRUE, spell_load_file) == FAIL)
+ r = do_in_runtimepath(fname_enc, TRUE, spell_load_file);
+ if (r == FAIL)
{
/* Try again to find an ASCII spell file. */
sprintf((char *)fname_ascii, "spell/%c%c.spl", lang[0], lang[1]);
- if (do_in_runtimepath(fname_ascii, TRUE, spell_load_file) == FAIL)
- {
- vim_free(lp);
- lp = NULL;
- smsg((char_u *)_("Warning: Cannot find dictionary \"%s\""),
+ r = do_in_runtimepath(fname_ascii, TRUE, spell_load_file);
+ }
+
+ if (r == FAIL)
+ {
+ vim_free(lp);
+ lp = NULL;
+ smsg((char_u *)_("Warning: Cannot find dictionary \"%s\""),
fname_enc + 6);
- }
}
else
{
@@ -319,229 +527,398 @@ spell_load_file(fname)
int fd;
size_t len;
size_t l;
- size_t rest = 0;
char_u *p = NULL, *np;
- sblock_T *bl;
+ sblock_T *bl = NULL;
+ int bl_used = 0;
+ size_t rest = 0;
+ char_u *rbuf; /* read buffer */
+ char_u *rbuf_end; /* past last valid char in "rbuf" */
hash_T hash;
hashitem_T *hi;
int c;
+ int cc;
int region = REGION_ALL;
- char_u word[MAXWLEN + 1];
- int n;
+ int wlen;
+ winfo_T *wi;
+ dword_T *dw, *edw;
+ nword_T *nw = NULL;
+ int flags;
+ char_u *save_sourcing_name = sourcing_name;
+ linenr_T save_sourcing_lnum = sourcing_lnum;
+
+ rbuf = alloc((unsigned)(SBLOCKSIZE + MAXWLEN + 1));
+ if (rbuf == NULL)
+ return;
fd = mch_open((char *)fname, O_RDONLY | O_EXTRA, 0);
if (fd < 0)
{
EMSG2(_(e_notopen), fname);
- return;
+ goto theend;
}
+ sourcing_name = fname;
+ sourcing_lnum = 0;
+
/* Get the length of the whole file. */
len = lseek(fd, (off_t)0, SEEK_END);
lseek(fd, (off_t)0, SEEK_SET);
- /* Loop, reading the file one block at a time.
+ /*
+ * Read the file one block at a time.
* "rest" is the length of an incomplete line at the previous block.
- * "p" points to the remainder. */
+ * "p" points to the remainder.
+ */
while (len > 0)
{
- /* Allocate a block of memory to store the info in. This is not freed
- * until spell_reload() is called. */
+ /* Read a block from the file. Prepend the remainder of the previous
+ * block, if any. */
+ if (rest > 0)
+ {
+ if (rest > MAXWLEN) /* truncate long line (should be comment) */
+ rest = MAXWLEN;
+ mch_memmove(rbuf, p, rest);
+ --sourcing_lnum;
+ }
if (len > SBLOCKSIZE)
l = SBLOCKSIZE;
else
l = len;
len -= l;
- bl = (sblock_T *)alloc((unsigned)(sizeof(sblock_T) - 1 + l + rest));
- if (bl == NULL)
- break;
- bl->sb_next = load_lp->sl_block;
- load_lp->sl_block = bl;
-
- /* Read a block from the file. Prepend the remainder of the previous
- * block. */
- if (rest > 0)
- mch_memmove(bl->sb_data, p, rest);
- if (read(fd, bl->sb_data + rest, l) != l)
+ if (read(fd, rbuf + rest, l) != l)
{
EMSG2(_(e_notread), fname);
break;
}
- l += rest;
+ rbuf_end = rbuf + l + rest;
rest = 0;
/* Deal with each line that was read until we finish the block. */
- for (p = bl->sb_data; l > 0; p = np)
+ for (p = rbuf; p < rbuf_end; p = np)
{
- /* "np" points to the char after the line (CR or NL). */
- for (np = p; l > 0 && *np >= ' '; ++np)
- --l;
- if (l == 0)
+ ++sourcing_lnum;
+
+ /* "np" points to the first char after the line (CR, NL or white
+ * space). */
+ for (np = p; np < rbuf_end && *np >= ' '; mb_ptr_adv(np))
+ ;
+ if (np >= rbuf_end)
{
- /* Incomplete line (or end of file). */
+ /* Incomplete line or end of file. */
rest = np - p;
if (len == 0)
- EMSG2(_("E751: Truncated spell file: %s"), fname);
+ EMSG(_("E751: Truncated spell file"));
break;
}
*np = NUL; /* terminate the line with a NUL */
- /* Skip comment and empty lines. */
- c = *p;
- if (c != '#' && np > p)
+ if (*p == '-')
{
- if (c == '=' || c == '+')
+ /*
+ * Region marker: ---, -xx, -xx-yy, etc.
+ */
+ ++p;
+ if (*p == '-')
{
- garray_T *gap;
-
- /* Match or Add item. */
- if (c == '=')
- gap = &load_lp->sl_match;
- else
- gap = &load_lp->sl_add;
-
- if (ga_grow(gap, 1) == OK)
+ if (p[1] != '-' || p[2] != NUL)
{
- for (n = 0; n < gap->ga_len; ++n)
- if ((c = STRCMP(p + 1,
- MATCH_ENTRY(gap, n) + 1)) < 0)
- break;
- if (c == 0)
- {
- if (p_verbose > 0)
- smsg((char_u *)_("Warning: duplicate match \"%s\" in %s"),
- p + 1, fname);
- }
- else
- {
- mch_memmove((char_u **)gap->ga_data + n + 1,
- (char_u **)gap->ga_data + n,
- (gap->ga_len - n) * sizeof(char_u *));
- *(((char_u **)gap->ga_data) + n) = p;
- *p = region;
- ++gap->ga_len;
- }
+ EMSG2(_(e_invchar2), p - 1);
+ len = 0;
+ break;
}
+ region = REGION_ALL;
}
- else if (c == '-')
+ else
{
- /* region item */
- ++p;
- if (*p == '-')
- /* end of a region */
- region = REGION_ALL;
- else
- {
- char_u *rp = load_lp->sl_regions;
- int r;
+ char_u *rp = load_lp->sl_regions;
+ int r;
- /* The region may be repeated: "-ca-uk". Fill
- * "region" with the bit mask for the ones we find. */
- region = 0;
- for (;;)
+ /* Start of a region. The region may be repeated:
+ * "-ca-uk". Fill "region" with the bit mask for the
+ * ones we find. */
+ region = 0;
+ for (;;)
+ {
+ r = find_region(rp, p);
+ if (r == REGION_ALL)
{
- /* start of a region */
- r = find_region(rp, p);
- if (r == REGION_ALL)
+ /* new region, add it to sl_regions[] */
+ r = STRLEN(rp);
+ if (r >= 16)
{
- /* new region, add it */
- r = STRLEN(rp);
- if (r >= 12)
- {
- EMSG2(_("E752: Too many regions in %s"),
- fname);
- r = REGION_ALL;
- }
- else
- {
- rp[r] = p[0];
- rp[r + 1] = p[1];
- rp[r + 2] = NUL;
- r = 1 << (r / 2);
- }
+ EMSG2(_("E752: Too many regions: %s"), p);
+ len = 0;
+ break;
}
else
- r = 1 << r;
+ {
+ rp[r] = p[0];
+ rp[r + 1] = p[1];
+ rp[r + 2] = NUL;
+ r = 1 << (r / 2);
+ }
+ }
+ else
+ r = 1 << r;
- region |= r;
- if (p[2] != '-')
+ region |= r;
+ if (p[2] != '-')
+ {
+ if (p[2] > ' ')
{
- if (p[2] != NUL)
- EMSG2(_("E753: Invalid character in \"%s\""),
- p - 1);
- break;
+ EMSG2(_(e_invchar2), p - 1);
+ len = 0;
}
- p += 3;
+ break;
}
+ p += 3;
}
}
+ }
+ else if (*p != '#' && *p != NUL)
+ {
+ /*
+ * Not an empty line or comment.
+ */
+ if (*p == '!')
+ {
+ wi = &load_lp->sl_kwords; /* keep case */
+ ++p;
+ }
else
+ wi = &load_lp->sl_fwords; /* fold case */
+
+ flags = 0;
+ c = *p;
+ if (c == '>') /* rare word */
{
- /* add the word */
- if (c == '>')
- c = region | RARE_MASK;
- else
+ flags = DW_RARE;
+ ++p;
+ }
+ else if (*p == '+') /* addition */
+ ++p;
+
+ if (c != '+' && !spell_iswordc(p))
+ {
+ EMSG2(_(e_invchar2), p);
+ len = 0;
+ break;
+ }
+
+ /* Make sure there is room for the word. Folding case may
+ * double the size. */
+ wlen = np - p;
+ if (bl == NULL || bl_used + sizeof(dword_T) + wlen
+#ifdef FEAT_MBYTE
+ * (has_mbyte ? 2 : 1)
+#endif
+ >= SBLOCKSIZE)
+ {
+ /* Allocate a block of memory to store the dword_T in.
+ * This is not freed until spell_reload() is called. */
+ bl = (sblock_T *)alloc((unsigned)(sizeof(sblock_T)
+ + SBLOCKSIZE));
+ if (bl == NULL)
{
- if (c != ' ')
- EMSG2(_("E753: Invalid character in \"%s\""), p);
- c = region;
+ len = 0;
+ break;
}
+ bl->sb_next = load_lp->sl_block;
+ load_lp->sl_block = bl;
+ bl_used = 0;
+ }
+ dw = (dword_T *)(bl->sb_data + bl_used);
+
+ /* For fold-case words fold the case and check for start
+ * with uppercase letter. */
+ if (wi == &load_lp->sl_fwords)
+ {
#ifdef FEAT_MBYTE
- if (MB_ISUPPER(mb_ptr2char(p + 1)))
+ if (MB_ISUPPER(mb_ptr2char(p)))
#else
- if (MB_ISUPPER(p[1]))
+ if (MB_ISUPPER(*p))
#endif
- c |= CASE_MASK;
- *p++ = c;
- (void)str_foldcase(p, np - p, word, MAXWLEN + 1);
- n = STRLEN(word);
- if (n > np - p)
- {
- sblock_T *s;
+ flags |= DW_CAP;
+
+ /* Fold case. */
+ (void)str_foldcase(p, np - p, dw->dw_word, wlen
+#ifdef FEAT_MBYTE
+ * (has_mbyte ? 2 : 1)
+#endif
+ + 1);
+#ifdef FEAT_MBYTE
+ /* case folding may change length of word */
+ wlen = STRLEN(dw->dw_word);
+#endif
+ }
+ else
+ {
+ /* Keep case: copy the word as-is. */
+ mch_memmove(dw->dw_word, p, wlen + 1);
+ }
- /* Folding case made word longer! We need to allocate
- * memory for it. */
- s = (sblock_T *)alloc((unsigned)sizeof(sblock_T)
- + n + 1);
- if (s != NULL)
+ if (c == '+')
+ {
+ garray_T *gap = &wi->wi_add;
+
+ /* Addition. TODO: search for matching entry? */
+ if (wi->wi_addlen < wlen)
+ wi->wi_addlen = wlen;
+ if (ga_grow(gap, 1) == FAIL)
+ {
+ len = 0;
+ break;
+ }
+ *(((dword_T **)gap->ga_data) + gap->ga_len) = dw;
+ ++gap->ga_len;
+ dw->dw_region = region;
+ dw->dw_flags = flags;
+ bl_used += sizeof(dword_T) + wlen;
+ }
+ else
+ {
+ /*
+ * Check for a non-word character. If found it's
+ * going to be an nword.
+ * For an nword we split in two: the leading dword and
+ * the remainder. The dword goes in the hashtable
+ * with an nword_T, the remainder is put in the
+ * dword_T (starting with the first non-word
+ * character).
+ */
+ cc = NUL;
+ for (p = dw->dw_word; *p != NUL; mb_ptr_adv(p))
+ if (!spell_iswordc(p))
{
- s->sb_next = load_lp->sl_block;
- load_lp->sl_block = s;
- s->sb_data[0] = p[-1];
- p = s->sb_data + 1;
+ cc = *p;
+ *p = NUL;
+ break;
}
- }
- mch_memmove(p, word, n + 1);
- hash = hash_hash(p);
- hi = hash_lookup(&load_lp->sl_ht, p, hash);
+ /* check if we already have this dword */
+ hash = hash_hash(dw->dw_word);
+ hi = hash_lookup(&wi->wi_ht, dw->dw_word, hash);
if (!HASHITEM_EMPTY(hi))
{
- c = hi->hi_key[-1];
- if ((c & (CASE_MASK | RARE_MASK))
- == (p[-1] & (CASE_MASK | RARE_MASK)))
+ /* Existing entry. */
+ edw = HI2DWORD(hi);
+ if ((edw->dw_flags & (DW_CAP | DW_RARE))
+ == (dw->dw_flags & (DW_CAP | DW_RARE)))
{
if (p_verbose > 0)
smsg((char_u *)_("Warning: duplicate word \"%s\" in %s"),
- p, fname);
+ dw->dw_word, fname);
+ }
+ }
+
+ if (cc != NUL) /* nword */
+ {
+ if (HASHITEM_EMPTY(hi)
+ || (edw->dw_flags & DW_NWORD) == 0)
+ {
+ sblock_T *sb;
+
+ /* Need to allocate a new nword_T. Put it in an
+ * sblock_T, so that we can free it later. */
+ sb = (sblock_T *)alloc(
+ (unsigned)(sizeof(sblock_T)
+ + sizeof(nword_T) + wlen));
+ if (sb == NULL)
+ {
+ len = 0;
+ break;
+ }
+ sb->sb_next = load_lp->sl_block;
+ load_lp->sl_block = sb;
+ nw = (nword_T *)sb->sb_data;
+
+ ga_init2(&nw->nw_ga, sizeof(dword_T *), 4);
+ nw->nw_maxlen = 0;
+ STRCPY(nw->nw_word, dw->dw_word);
+ if (!HASHITEM_EMPTY(hi))
+ {
+ /* Note: the nw_region and nw_flags is for
+ * the dword that matches with the start
+ * of this nword, not for the nword
+ * itself! */
+ nw->nw_region = edw->dw_region;
+ nw->nw_flags = edw->dw_flags | DW_NWORD;
+
+ /* Remove the dword item so that we can
+ * add it as an nword. */
+ hash_remove(&wi->wi_ht, hi);
+ hi = hash_lookup(&wi->wi_ht,
+ nw->nw_word, hash);
+ }
+ else
+ {
+ nw->nw_region = 0;
+ nw->nw_flags = DW_NWORD;
+ }
}
else
- hi->hi_key[-1] |= (p[-1] & (CASE_MASK | RARE_MASK));
+ nw = HI2NWORD(hi);
+ }
+
+ if (HASHITEM_EMPTY(hi))
+ {
+ /* Add new dword or nword entry. */
+ hash_add_item(&wi->wi_ht, hi, cc == NUL
+ ? dw->dw_word : nw->nw_word, hash);
+ if (cc == NUL)
+ {
+ /* New dword: init the values and count the
+ * used space. */
+ dw->dw_flags = DW_DWORD | flags;
+ dw->dw_region = region;
+ bl_used += sizeof(dword_T) + wlen;
+ }
+ }
+ else if (cc == NUL)
+ {
+ /* existing dword: add the region and flags */
+ dw = edw;
+ dw->dw_region |= region;
+ dw->dw_flags |= DW_DWORD | flags;
+ }
+
+ if (cc != NUL)
+ {
+ /* Use the dword for the non-word character and
+ * following characters. */
+ dw->dw_region = region;
+ dw->dw_flags = flags;
+ STRCPY(dw->dw_word + 1, p + 1);
+ dw->dw_word[0] = cc;
+ l = wlen - (p - dw->dw_word);
+ bl_used += sizeof(dword_T) + l;
+ if (nw->nw_maxlen < l)
+ nw->nw_maxlen = l;
+
+ /* Add the dword to the growarray in the nword. */
+ if (ga_grow(&nw->nw_ga, 1) == FAIL)
+ {
+ len = 0;
+ break;
+ }
+ *((dword_T **)nw->nw_ga.ga_data + nw->nw_ga.ga_len)
+ = dw;
+ ++nw->nw_ga.ga_len;
}
- else
- hash_add_item(&load_lp->sl_ht, hi, p, hash);
}
}
- while (l > 0 && *np < ' ')
- {
+ /* Skip over CR and NL characters and trailing white space. */
+ while (np < rbuf_end && *np <= ' ')
++np;
- --l;
- }
}
}
close(fd);
+theend:
+ sourcing_name = save_sourcing_name;
+ sourcing_lnum = save_sourcing_lnum;
+ vim_free(rbuf);
}
/*
@@ -672,15 +1049,19 @@ spell_reload()
slang_T *lp;
sblock_T *sp;
+ /* Initialize the table for spell_iswordc(). */
+ init_spell_chartab();
+
/* Unload all allocated memory. */
while (first_lang != NULL)
{
lp = first_lang;
first_lang = lp->sl_next;
- hash_clear(&lp->sl_ht);
- ga_clear(&lp->sl_match);
- ga_clear(&lp->sl_add);
+ hash_clear(&lp->sl_fwords.wi_ht);
+ ga_clear(&lp->sl_fwords.wi_add);
+ hash_clear(&lp->sl_kwords.wi_ht);
+ ga_clear(&lp->sl_kwords.wi_add);
while (lp->sl_block != NULL)
{
sp = lp->sl_block;