summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBram Moolenaar <Bram@vim.org>2005-06-16 21:51:00 +0000
committerBram Moolenaar <Bram@vim.org>2005-06-16 21:51:00 +0000
commitea424166e2a53649eea8d8899fc9294ca023964c (patch)
treec35c0b8daf356f341979d346c440336bcd13c899
parent78599adb5219f98053673ca27683f922a9ca338b (diff)
downloadvim-ea424166e2a53649eea8d8899fc9294ca023964c.zip
updated for version 7.0086
-rw-r--r--src/ex_docmd.c2
-rw-r--r--src/message.c37
-rw-r--r--src/os_vms.c11
-rw-r--r--src/spell.c704
4 files changed, 563 insertions, 191 deletions
diff --git a/src/ex_docmd.c b/src/ex_docmd.c
index fbb0f5a78..20c58f1d1 100644
--- a/src/ex_docmd.c
+++ b/src/ex_docmd.c
@@ -4146,7 +4146,7 @@ expand_filename(eap, cmdlinep, errormsgp)
/* For a shell command a '!' must be escaped. */
if ((eap->usefilter || eap->cmdidx == CMD_bang)
- && vim_strpbrk(repl, "!&;()") != NULL)
+ && vim_strpbrk(repl, (char_u *)"!&;()") != NULL)
{
char_u *l;
diff --git a/src/message.c b/src/message.c
index ff7964989..79dbc2e17 100644
--- a/src/message.c
+++ b/src/message.c
@@ -657,40 +657,7 @@ emsg2(s, a1)
return emsg3(s, a1, NULL);
}
-/*
- * Print an error message with one or two "%s" and one or two string arguments.
- */
- int
-emsg3(s, a1, a2)
- char_u *s, *a1, *a2;
-{
- if ((emsg_off > 0 && vim_strchr(p_debug, 'm') == NULL)
-#ifdef FEAT_EVAL
- || emsg_skip > 0
-#endif
- )
- return TRUE; /* no error messages at the moment */
- vim_snprintf((char *)IObuff, IOSIZE, (char *)s, (char *)a1, (char *)a2);
- return emsg(IObuff);
-}
-
-/*
- * Print an error message with one "%ld" and one long int argument.
- */
- int
-emsgn(s, n)
- char_u *s;
- long n;
-{
- if ((emsg_off > 0 && vim_strchr(p_debug, 'm') == NULL)
-#ifdef FEAT_EVAL
- || emsg_skip > 0
-#endif
- )
- return TRUE; /* no error messages at the moment */
- vim_snprintf((char *)IObuff, IOSIZE, (char *)s, n);
- return emsg(IObuff);
-}
+/* emsg3() and emsgn() are in misc2.c to avoid warnings for the prototypes. */
void
emsg_invreg(name)
@@ -3965,7 +3932,7 @@ vim_snprintf(str, str_m, fmt, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10)
/* zero padding as requested by the precision or by the minimal
* field width for numeric conversions required? */
- if (number_of_zeros_to_pad <= 0)
+ if (number_of_zeros_to_pad == 0)
{
/* will not copy first part of numeric right now, *
* force it to be copied later in its entirety */
diff --git a/src/os_vms.c b/src/os_vms.c
index c26937d8a..6fd05d503 100644
--- a/src/os_vms.c
+++ b/src/os_vms.c
@@ -300,7 +300,7 @@ vms_read(char *inbuf, size_t nbytes)
{
int status, function, len;
TT_MODE tt_mode;
- ITEM itmlst[2];
+ ITEM itmlst[3];
static long trm_mask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
/* whatever happened earlier we need an iochan here */
@@ -308,10 +308,11 @@ vms_read(char *inbuf, size_t nbytes)
tt_mode = get_tty();
vul_item(&itmlst[0], 0, TRM$_MODIFIERS,
- (char *)( TRM$M_TM_ESCAPE | TRM$M_TM_TIMED | TRM$M_TM_NOECHO |
- TRM$M_TM_NOEDIT | TRM$M_TM_NOFILTR |
- TRM$M_TM_NORECALL| TRM$M_TM_TRMNOECHO), 0);
- vul_item(&itmlst[1], sizeof(trm_mask), TRM$_TERM, (char *)&trm_mask, 0);
+ (char *)( TRM$M_TM_ESCAPE | TRM$M_TM_TIMED | TRM$M_TM_NOECHO |
+ TRM$M_TM_NOEDIT | TRM$M_TM_NOFILTR |
+ TRM$M_TM_NORECALL| TRM$M_TM_TRMNOECHO), 0);
+ vul_item(&itmlst[1], 0, TRM$_TIMEOUT, (char *) 1, 0 );
+ vul_item(&itmlst[2], sizeof(trm_mask), TRM$_TERM, (char *)&trm_mask, 0);
function = (IO$_READLBLK | IO$M_EXTEND);
memset(inbuf, 0, nbytes);
diff --git a/src/spell.c b/src/spell.c
index 54ca1036b..9c00a269f 100644
--- a/src/spell.c
+++ b/src/spell.c
@@ -189,9 +189,6 @@ typedef long idx_T;
#define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP)
-#define WF_USED 0x10000 /* Word was found in text. Must be in separate
- byte before region and flags. */
-
#define BY_NOFLAGS 0 /* end of word without flags or region */
#define BY_FLAGS 1 /* end of word, flag byte follows */
#define BY_INDEX 2 /* child is shared, index follows */
@@ -243,7 +240,13 @@ struct slang_S
int sl_followup; /* SAL followup */
int sl_collapse; /* SAL collapse_result */
int sl_rem_accents; /* SAL remove_accents */
- char_u *sl_map; /* string with similar chars from MAP lines */
+ int sl_has_map; /* TRUE if there is a MAP line */
+#ifdef FEAT_MBYTE
+ hashtab_T sl_map_hash; /* MAP for multi-byte chars */
+ int sl_map_array[256]; /* MAP for first 256 chars */
+#else
+ char_u sl_map_array[256]; /* MAP for first 256 chars */
+#endif
};
/* First language that is loaded, start of the linked list of loaded
@@ -329,7 +332,6 @@ typedef struct suggest_S
#define SCORE_ALLCAP 120 /* need all-cap case */
#define SCORE_REGION 70 /* word is for different region */
#define SCORE_RARE 180 /* rare word */
-#define SCORE_NOTUSED 11 /* word not found in text yet */
/* score for edit distance */
#define SCORE_SWAP 90 /* swap two characters */
@@ -402,22 +404,59 @@ static int set_spell_finish __ARGS((spelltab_T *new_st));
#endif
/*
+ * For finding suggestion: At each node in the tree these states are tried:
+ */
+typedef enum
+{
+ STATE_START = 0, /* At start of node, check if word may end or
+ * split word. */
+ STATE_SPLITUNDO, /* Undo word split. */
+ STATE_ENDNUL, /* Past NUL bytes at start of the node. */
+ STATE_PLAIN, /* Use each byte of the node. */
+ STATE_DEL, /* Delete a byte from the bad word. */
+ STATE_INS, /* Insert a byte in the bad word. */
+ STATE_SWAP, /* Swap two bytes. */
+ STATE_UNSWAP, /* Undo swap two bytes. */
+ STATE_SWAP3, /* Swap two bytes over three. */
+ STATE_UNSWAP3, /* Undo Swap two bytes over three. */
+ STATE_ROT3L, /* Rotate three bytes left */
+ STATE_UNROT3L, /* Undo rotate three bytes left */
+ STATE_ROT3R, /* Rotate three bytes right */
+ STATE_UNROT3R, /* Undo rotate three bytes right */
+ STATE_REP_INI, /* Prepare for using REP items. */
+ STATE_REP, /* Use matching REP items from the .aff file. */
+ STATE_REP_UNDO, /* Undo a REP item replacement. */
+ STATE_FINAL /* End of this node. */
+} state_T;
+
+/*
* Struct to keep the state at each level in spell_try_change().
*/
typedef struct trystate_S
{
- int ts_state; /* state at this level, STATE_ */
+ state_T ts_state; /* state at this level, STATE_ */
int ts_score; /* score */
- int ts_curi; /* index in list of child nodes */
- int ts_fidx; /* index in fword[], case-folded bad word */
- int ts_fidxtry; /* ts_fidx at which bytes may be changed */
- int ts_twordlen; /* valid length of tword[] */
+ short ts_curi; /* index in list of child nodes */
+ char_u ts_fidx; /* index in fword[], case-folded bad word */
+ char_u ts_fidxtry; /* ts_fidx at which bytes may be changed */
+ char_u ts_twordlen; /* valid length of tword[] */
+#ifdef FEAT_MBYTE
+ char_u ts_tcharlen; /* number of bytes in tword character */
+ char_u ts_tcharidx; /* current byte index in tword character */
+ char_u ts_isdiff; /* DIFF_ values */
+ char_u ts_fcharstart; /* index in fword where badword char started */
+#endif
idx_T ts_arridx; /* index in tree array, start of node */
char_u ts_save_prewordlen; /* saved "prewordlen" */
- int ts_save_splitoff; /* su_splitoff saved here */
- int ts_save_badflags; /* badflags saved here */
+ char_u ts_save_splitoff; /* su_splitoff saved here */
+ char_u ts_save_badflags; /* badflags saved here */
} trystate_T;
+/* values for ts_isdiff */
+#define DIFF_NONE 0 /* no different byte (yet) */
+#define DIFF_YES 1 /* different byte found */
+#define DIFF_INSERT 2 /* inserting character */
+
static slang_T *slang_alloc __ARGS((char_u *lang));
static void slang_free __ARGS((slang_T *lp));
static void slang_clear __ARGS((slang_T *lp));
@@ -441,9 +480,8 @@ static int try_deeper __ARGS((suginfo_T *su, trystate_T *stack, int depth, int s
static void find_keepcap_word __ARGS((slang_T *slang, char_u *fword, char_u *kword));
static void spell_try_soundalike __ARGS((suginfo_T *su));
static void make_case_word __ARGS((char_u *fword, char_u *cword, int flags));
-#if 0
+static void set_map_str __ARGS((slang_T *lp, char_u *map));
static int similar_chars __ARGS((slang_T *slang, int c1, int c2));
-#endif
#ifdef RESCORE
static void add_suggestion __ARGS((suginfo_T *su, char_u *goodword, int use_score, int had_bonus));
#else
@@ -792,10 +830,6 @@ find_word(mip, keepcap)
{
flags = idxs[arridx];
- /* Set a flag for words that were used. The region and case
- * doesn't matter here, it's only used to rate the suggestions. */
- idxs[arridx] = flags | WF_USED;
-
if (keepcap)
{
/* For "keepcap" tree the case is always right. */
@@ -1128,8 +1162,20 @@ slang_clear(lp)
ga_clear(gap);
}
- vim_free(lp->sl_map);
- lp->sl_map = NULL;
+#ifdef FEAT_MBYTE
+ {
+ int todo = lp->sl_map_hash.ht_used;
+ hashitem_T *hi;
+
+ for (hi = lp->sl_map_hash.ht_array; todo > 0; ++hi)
+ if (!HASHITEM_EMPTY(hi))
+ {
+ --todo;
+ vim_free(hi->hi_key);
+ }
+ }
+ hash_clear(&lp->sl_map_hash);
+#endif
}
/*
@@ -1369,7 +1415,8 @@ formerr:
for (i = 0; i < cnt; ++i)
p[i] = getc(fd); /* <mapstr> */
p[i] = NUL;
- lp->sl_map = p;
+ set_map_str(lp, p);
+ vim_free(p);
/* round 1: <LWORDTREE>
@@ -4414,6 +4461,12 @@ allcap_copy(word, wcopy)
/*
* Try finding suggestions by adding/removing/swapping letters.
+ *
+ * This uses a state machine. At each node in the tree we try various
+ * operations. When trying if an operation work "depth" is increased and the
+ * stack[] is used to store info. This allows combinations, thus insert one
+ * character, replace one and delete another. The number of changes is
+ * limited by su->su_maxscore, checked in try_deeper().
*/
static void
spell_try_change(su)
@@ -4432,8 +4485,8 @@ spell_try_change(su)
char_u *byts;
idx_T *idxs;
int depth;
- int c;
- int n;
+ int c, c2, c3;
+ int n = 0;
int flags;
int badflags;
garray_T *gap;
@@ -4441,7 +4494,7 @@ spell_try_change(su)
int len;
char_u *p;
fromto_T *ftp;
- int fl, tl;
+ int fl = 0, tl;
/* get caps flags for bad word */
badflags = captype(su->su_badptr, su->su_badptr + su->su_badlen);
@@ -4450,26 +4503,6 @@ spell_try_change(su)
* to find matches (esp. REP items). */
STRCPY(fword, su->su_fbadword);
- /*
- * At each node in the tree these states are tried:
- */
-#define STATE_START 0 /* At start of node, check if word may end or
- * split word. */
-#define STATE_SPLITUNDO 1 /* Undo word split. */
-#define STATE_ENDNUL 2 /* Past NUL bytes at start of the node. */
-#define STATE_PLAIN 3 /* Use each byte of the node. */
-#define STATE_DEL 4 /* Delete a byte from the bad word. */
-#define STATE_INS 5 /* Insert a byte in the bad word. */
-#define STATE_SWAP 6 /* Swap two bytes. */
-#define STATE_SWAP3A 7 /* Swap two bytes over three. */
-#define STATE_ROT3L 8 /* Rotate three bytes left */
-#define STATE_ROT3R 9 /* Rotate three bytes right */
-#define STATE_ROT_UNDO 10 /* undo rotating */
-#define STATE_REP_INI 11 /* Prepare for using REP items. */
-#define STATE_REP 12 /* Use matching REP items from the .aff file. */
-#define STATE_REP_UNDO 13 /* Undo a REP item replacement. */
-#define STATE_FINAL 99 /* End of this node. */
-
for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0);
lp->lp_slang != NULL; ++lp)
@@ -4498,7 +4531,17 @@ spell_try_change(su)
stack[0].ts_fidxtry = 0;
stack[0].ts_twordlen = 0;
stack[0].ts_arridx = 0;
+#ifdef FEAT_MBYTE
+ stack[0].ts_tcharlen = 0;
+#endif
+ /*
+ * Loop to find all suggestions. At each round we either:
+ * - For the current state try one operation, advance "ts_curi",
+ * increase "depth".
+ * - When a state is done go to the next, set "ts_state".
+ * - When all states are tried decrease "depth".
+ */
while (depth >= 0 && !got_int)
{
sp = &stack[depth];
@@ -4559,10 +4602,6 @@ spell_try_change(su)
if (flags & WF_RARE)
newscore += SCORE_RARE;
- /* Words that were not found in the text get a penalty. */
- if ((flags & WF_USED) == 0)
- newscore += SCORE_NOTUSED;
-
if (!spell_valid_case(badflags,
captype(preword + prewordlen, NULL)))
newscore += SCORE_ICASE;
@@ -4576,7 +4615,12 @@ spell_try_change(su)
#endif
);
}
- else if (sp->ts_fidx >= sp->ts_fidxtry)
+ else if (sp->ts_fidx >= sp->ts_fidxtry
+#ifdef FEAT_MBYTE
+ /* Don't split halfway a character. */
+ && (!has_mbyte || sp->ts_tcharlen == 0)
+#endif
+ )
{
/* The word in the tree ends but the badword
* continues: try inserting a space and check that a valid
@@ -4663,165 +4707,420 @@ spell_try_change(su)
/* Normal byte, go one level deeper. If it's not equal to
* the byte in the bad word adjust the score. But don't
* even try when the byte was already changed. */
- if (c == fword[sp->ts_fidx])
- newscore = 0;
-
- /* TODO: this is too slow and comparing bytes isn't right
- * for multi-byte characters. */
-#if 0
- else if (lp->lp_slang->sl_map != NULL
- && similar_chars(lp->lp_slang,
- c, fword[sp->ts_fidx]))
- newscore = SCORE_SIMILAR;
+ if (c == fword[sp->ts_fidx]
+#ifdef FEAT_MBYTE
+ || (sp->ts_tcharlen > 0
+ && sp->ts_isdiff != DIFF_NONE)
#endif
+ )
+ newscore = 0;
else
newscore = SCORE_SUBST;
if ((newscore == 0 || sp->ts_fidx >= sp->ts_fidxtry)
&& try_deeper(su, stack, depth, newscore))
{
++depth;
- ++stack[depth].ts_fidx;
- tword[stack[depth].ts_twordlen++] = c;
- stack[depth].ts_arridx = idxs[arridx];
+ sp = &stack[depth];
+ ++sp->ts_fidx;
+ tword[sp->ts_twordlen++] = c;
+ sp->ts_arridx = idxs[arridx];
+#ifdef FEAT_MBYTE
+ if (newscore == SCORE_SUBST)
+ sp->ts_isdiff = DIFF_YES;
+ if (has_mbyte)
+ {
+ /* Multi-byte characters are a bit complicated to
+ * handle: They differ when any of the bytes
+ * differ and then their length may also differ. */
+ if (sp->ts_tcharlen == 0)
+ {
+ /* First byte. */
+ sp->ts_tcharidx = 0;
+ sp->ts_tcharlen = MB_BYTE2LEN(c);
+ sp->ts_fcharstart = sp->ts_fidx - 1;
+ sp->ts_isdiff = (newscore != 0)
+ ? DIFF_YES : DIFF_NONE;
+ }
+ else if (sp->ts_isdiff == DIFF_INSERT)
+ /* When inserting trail bytes don't advance in
+ * the bad word. */
+ --sp->ts_fidx;
+ if (++sp->ts_tcharidx == sp->ts_tcharlen)
+ {
+ /* Last byte of character. */
+ if (sp->ts_isdiff == DIFF_YES)
+ {
+ /* Correct ts_fidx for the byte length of
+ * the character (we didn't check that
+ * before). */
+ sp->ts_fidx = sp->ts_fcharstart
+ + MB_BYTE2LEN(
+ fword[sp->ts_fcharstart]);
+
+ /* For a similar character adjust score
+ * from SCORE_SUBST to SCORE_SIMILAR. */
+ if (lp->lp_slang->sl_has_map
+ && similar_chars(lp->lp_slang,
+ mb_ptr2char(tword
+ + sp->ts_twordlen
+ - sp->ts_tcharlen),
+ mb_ptr2char(fword
+ + sp->ts_fcharstart)))
+ sp->ts_score -=
+ SCORE_SUBST - SCORE_SIMILAR;
+ }
+
+ /* Starting a new char, reset the length. */
+ sp->ts_tcharlen = 0;
+ }
+ }
+ else
+#endif
+ {
+ /* If we found a similar char adjust the score.
+ * We do this after calling try_deeper() because
+ * it's slow. */
+ if (newscore != 0
+ && lp->lp_slang->sl_has_map
+ && similar_chars(lp->lp_slang,
+ c, fword[sp->ts_fidx - 1]))
+ sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR;
+ }
}
}
break;
case STATE_DEL:
- /* Try skipping one byte in the bad word (delete it). */
+#ifdef FEAT_MBYTE
+ /* When past the first byte of a multi-byte char don't try
+ * delete/insert/swap a character. */
+ if (has_mbyte && sp->ts_tcharlen > 0)
+ {
+ sp->ts_state = STATE_FINAL;
+ break;
+ }
+#endif
+ /*
+ * Try skipping one character in the bad word (delete it).
+ */
sp->ts_state = STATE_INS;
sp->ts_curi = 1;
if (fword[sp->ts_fidx] != NUL
&& try_deeper(su, stack, depth, SCORE_DEL))
{
++depth;
- ++stack[depth].ts_fidx;
+#ifdef FEAT_MBYTE
+ if (has_mbyte)
+ stack[depth].ts_fidx += MB_BYTE2LEN(fword[sp->ts_fidx]);
+ else
+#endif
+ ++stack[depth].ts_fidx;
break;
}
/*FALLTHROUGH*/
case STATE_INS:
- /* Insert one byte. Do this for each possible bytes at this
+ /* Insert one byte. Do this for each possible byte at this
* node. */
n = sp->ts_arridx;
if (sp->ts_curi > byts[n])
{
/* Done all bytes at this node, do next state. */
sp->ts_state = STATE_SWAP;
- sp->ts_curi = 1;
}
else
{
- /* Do one more byte at this node. */
+ /* Do one more byte at this node. Skip NUL bytes. */
n += sp->ts_curi++;
c = byts[n];
if (c != 0 && try_deeper(su, stack, depth, SCORE_INS))
{
++depth;
- tword[stack[depth].ts_twordlen++] = c;
- stack[depth].ts_arridx = idxs[n];
+ sp = &stack[depth];
+ tword[sp->ts_twordlen++] = c;
+ sp->ts_arridx = idxs[n];
+#ifdef FEAT_MBYTE
+ if (has_mbyte)
+ {
+ fl = MB_BYTE2LEN(c);
+ if (fl > 1)
+ {
+ /* There are following bytes for the same
+ * character. We must find all bytes before
+ * trying delete/insert/swap/etc. */
+ sp->ts_tcharlen = fl;
+ sp->ts_tcharidx = 1;
+ sp->ts_isdiff = DIFF_INSERT;
+ }
+ }
+#endif
}
}
break;
case STATE_SWAP:
- /* Swap two bytes: "12" -> "21". This means looking for the
- * following byte at the current node and the current byte at
- * its child node. We change "fword" here, it's changed back
- * afterwards. TODO: should swap characters instead of bytes.
- * */
- c = fword[sp->ts_fidx];
- if (c != NUL && fword[sp->ts_fidx + 1] != NUL
- && try_deeper(su, stack, depth, SCORE_SWAP))
+ /*
+ * Swap two bytes in the bad word: "12" -> "21".
+ * We change "fword" here, it's changed back afterwards.
+ */
+ p = fword + sp->ts_fidx;
+ c = *p;
+ if (c == NUL)
+ {
+ /* End of word, can't swap or replace. */
+ sp->ts_state = STATE_FINAL;
+ break;
+ }
+#ifdef FEAT_MBYTE
+ if (has_mbyte)
{
- sp->ts_state = STATE_SWAP3A;
+ n = mb_ptr2len_check(p);
+ c = mb_ptr2char(p);
+ c2 = mb_ptr2char(p + n);
+ }
+ else
+#endif
+ c2 = p[1];
+ if (c == c2)
+ {
+ /* Characters are identical, swap won't do anything. */
+ sp->ts_state = STATE_SWAP3;
+ break;
+ }
+ if (c2 != NUL && try_deeper(su, stack, depth, SCORE_SWAP))
+ {
+ sp->ts_state = STATE_UNSWAP;
++depth;
- fword[sp->ts_fidx] = fword[sp->ts_fidx + 1];
- fword[sp->ts_fidx + 1] = c;
- stack[depth].ts_fidxtry = sp->ts_fidx + 2;
+#ifdef FEAT_MBYTE
+ if (has_mbyte)
+ {
+ fl = mb_char2len(c2);
+ mch_memmove(p, p + n, fl);
+ mb_char2bytes(c, p + fl);
+ stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
+ }
+ else
+#endif
+ {
+ p[0] = c2;
+ p[1] = c;
+ stack[depth].ts_fidxtry = sp->ts_fidx + 2;
+ }
}
else
/* If this swap doesn't work then SWAP3 won't either. */
sp->ts_state = STATE_REP_INI;
break;
- case STATE_SWAP3A:
- /* First undo the STATE_SWAP swap: "21" -> "12". */
- c = fword[sp->ts_fidx];
- fword[sp->ts_fidx] = fword[sp->ts_fidx + 1];
- fword[sp->ts_fidx + 1] = c;
+ case STATE_UNSWAP:
+ /* Undo the STATE_SWAP swap: "21" -> "12". */
+ p = fword + sp->ts_fidx;
+#ifdef FEAT_MBYTE
+ if (has_mbyte)
+ {
+ n = MB_BYTE2LEN(*p);
+ c = mb_ptr2char(p + n);
+ mch_memmove(p + MB_BYTE2LEN(p[n]), p, n);
+ mb_char2bytes(c, p);
+ }
+ else
+#endif
+ {
+ c = *p;
+ *p = p[1];
+ p[1] = c;
+ }
+ /*FALLTHROUGH*/
+ case STATE_SWAP3:
/* Swap two bytes, skipping one: "123" -> "321". We change
- * "fword" here, it's changed back afterwards. TODO: should
- * swap characters instead of bytes. */
- c = fword[sp->ts_fidx];
- if (c != NUL && fword[sp->ts_fidx + 1] != NUL
- && fword[sp->ts_fidx + 2] != NUL
- && try_deeper(su, stack, depth, SCORE_SWAP3))
+ * "fword" here, it's changed back afterwards. */
+ p = fword + sp->ts_fidx;
+#ifdef FEAT_MBYTE
+ if (has_mbyte)
{
- sp->ts_state = STATE_ROT3L;
+ n = mb_ptr2len_check(p);
+ c = mb_ptr2char(p);
+ fl = mb_ptr2len_check(p + n);
+ c2 = mb_ptr2char(p + n);
+ c3 = mb_ptr2char(p + n + fl);
+ }
+ else
+#endif
+ {
+ c = *p;
+ c2 = p[1];
+ c3 = p[2];
+ }
+
+ /* When characters are identical: "121" then SWAP3 result is
+ * identical, ROT3L result is same as SWAP: "211", ROT3L
+ * result is same as SWAP on next char: "112". Thus skip all
+ * swapping. Also skip when c3 is NUL. */
+ if (c == c3 || c3 == NUL)
+ {
+ sp->ts_state = STATE_REP_INI;
+ break;
+ }
+ if (try_deeper(su, stack, depth, SCORE_SWAP3))
+ {
+ sp->ts_state = STATE_UNSWAP3;
++depth;
- fword[sp->ts_fidx] = fword[sp->ts_fidx + 2];
- fword[sp->ts_fidx + 2] = c;
- stack[depth].ts_fidxtry = sp->ts_fidx + 3;
+#ifdef FEAT_MBYTE
+ if (has_mbyte)
+ {
+ tl = mb_char2len(c3);
+ mch_memmove(p, p + n + fl, tl);
+ mb_char2bytes(c2, p + tl);
+ mb_char2bytes(c, p + fl + tl);
+ stack[depth].ts_fidxtry = sp->ts_fidx + n + fl + tl;
+ }
+ else
+#endif
+ {
+ p[0] = p[2];
+ p[2] = c;
+ stack[depth].ts_fidxtry = sp->ts_fidx + 3;
+ }
}
else
sp->ts_state = STATE_REP_INI;
break;
+ case STATE_UNSWAP3:
+ /* Undo STATE_SWAP3: "321" -> "123" */
+ p = fword + sp->ts_fidx;
+#ifdef FEAT_MBYTE
+ if (has_mbyte)
+ {
+ n = MB_BYTE2LEN(*p);
+ c2 = mb_ptr2char(p + n);
+ fl = MB_BYTE2LEN(p[n]);
+ c = mb_ptr2char(p + n + fl);
+ tl = MB_BYTE2LEN(p[n + fl]);
+ mch_memmove(p + fl + tl, p, n);
+ mb_char2bytes(c, p);
+ mb_char2bytes(c2, p + tl);
+ }
+ else
+#endif
+ {
+ c = *p;
+ *p = p[2];
+ p[2] = c;
+ }
+ /*FALLTHROUGH*/
+
case STATE_ROT3L:
- /* First undo STATE_SWAP3A: "321" -> "123" */
- c = fword[sp->ts_fidx];
- fword[sp->ts_fidx] = fword[sp->ts_fidx + 2];
- fword[sp->ts_fidx + 2] = c;
-
- /* Rotate three bytes left: "123" -> "231". We change
- * "fword" here, it's changed back afterwards. TODO: should
- * swap characters instead of bytes. */
+ /* Rotate three characters left: "123" -> "231". We change
+ * "fword" here, it's changed back afterwards. */
if (try_deeper(su, stack, depth, SCORE_SWAP3))
{
- sp->ts_state = STATE_ROT3R;
+ sp->ts_state = STATE_UNROT3L;
++depth;
- c = fword[sp->ts_fidx];
- fword[sp->ts_fidx] = fword[sp->ts_fidx + 1];
- fword[sp->ts_fidx + 1] = fword[sp->ts_fidx + 2];
- fword[sp->ts_fidx + 2] = c;
- stack[depth].ts_fidxtry = sp->ts_fidx + 3;
+ p = fword + sp->ts_fidx;
+#ifdef FEAT_MBYTE
+ if (has_mbyte)
+ {
+ n = mb_ptr2len_check(p);
+ c = mb_ptr2char(p);
+ fl = mb_ptr2len_check(p + n);
+ fl += mb_ptr2len_check(p + n + fl);
+ mch_memmove(p, p + n, fl);
+ mb_char2bytes(c, p + fl);
+ stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
+ }
+ else
+#endif
+ {
+ c = *p;
+ *p = p[1];
+ p[1] = p[2];
+ p[2] = c;
+ stack[depth].ts_fidxtry = sp->ts_fidx + 3;
+ }
}
else
sp->ts_state = STATE_REP_INI;
break;
- case STATE_ROT3R:
- /* First undo STATE_ROT3L: "231" -> "123" */
- c = fword[sp->ts_fidx + 2];
- fword[sp->ts_fidx + 2] = fword[sp->ts_fidx + 1];
- fword[sp->ts_fidx + 1] = fword[sp->ts_fidx];
- fword[sp->ts_fidx] = c;
+ case STATE_UNROT3L:
+ /* Undo STATE_ROT3L: "231" -> "123" */
+ p = fword + sp->ts_fidx;
+#ifdef FEAT_MBYTE
+ if (has_mbyte)
+ {
+ n = MB_BYTE2LEN(*p);
+ n += MB_BYTE2LEN(p[n]);
+ c = mb_ptr2char(p + n);
+ tl = MB_BYTE2LEN(p[n]);
+ mch_memmove(p + tl, p, n);
+ mb_char2bytes(c, p);
+ }
+ else
+#endif
+ {
+ c = p[2];
+ p[2] = p[1];
+ p[1] = *p;
+ *p = c;
+ }
+ /*FALLTHROUGH*/
+ case STATE_ROT3R:
/* Rotate three bytes right: "123" -> "312". We change
- * "fword" here, it's changed back afterwards. TODO: should
- * swap characters instead of bytes. */
+ * "fword" here, it's changed back afterwards. */
if (try_deeper(su, stack, depth, SCORE_SWAP3))
{
- sp->ts_state = STATE_ROT_UNDO;
+ sp->ts_state = STATE_UNROT3R;
++depth;
- c = fword[sp->ts_fidx + 2];
- fword[sp->ts_fidx + 2] = fword[sp->ts_fidx + 1];
- fword[sp->ts_fidx + 1] = fword[sp->ts_fidx];
- fword[sp->ts_fidx] = c;
- stack[depth].ts_fidxtry = sp->ts_fidx + 3;
+ p = fword + sp->ts_fidx;
+#ifdef FEAT_MBYTE
+ if (has_mbyte)
+ {
+ n = mb_ptr2len_check(p);
+ n += mb_ptr2len_check(p + n);
+ c = mb_ptr2char(p + n);
+ tl = mb_ptr2len_check(p + n);
+ mch_memmove(p + tl, p, n);
+ mb_char2bytes(c, p);
+ stack[depth].ts_fidxtry = sp->ts_fidx + n + tl;
+ }
+ else
+#endif
+ {
+ c = p[2];
+ p[2] = p[1];
+ p[1] = *p;
+ *p = c;
+ stack[depth].ts_fidxtry = sp->ts_fidx + 3;
+ }
}
else
sp->ts_state = STATE_REP_INI;
break;
- case STATE_ROT_UNDO:
+ case STATE_UNROT3R:
/* Undo STATE_ROT3R: "312" -> "123" */
- c = fword[sp->ts_fidx];
- fword[sp->ts_fidx] = fword[sp->ts_fidx + 1];
- fword[sp->ts_fidx + 1] = fword[sp->ts_fidx + 2];
- fword[sp->ts_fidx + 2] = c;
+ p = fword + sp->ts_fidx;
+#ifdef FEAT_MBYTE
+ if (has_mbyte)
+ {
+ c = mb_ptr2char(p);
+ tl = MB_BYTE2LEN(*p);
+ n = MB_BYTE2LEN(p[tl]);
+ n += MB_BYTE2LEN(p[tl + n]);
+ mch_memmove(p, p + tl, n);
+ mb_char2bytes(c, p + n);
+ }
+ else
+#endif
+ {
+ c = *p;
+ *p = p[1];
+ p[1] = p[2];
+ p[2] = c;
+ }
/*FALLTHROUGH*/
case STATE_REP_INI:
@@ -4837,7 +5136,7 @@ spell_try_change(su)
}
/* Use the first byte to quickly find the first entry that
- * matches. If the index is -1 there is none. */
+ * may match. If the index is -1 there is none. */
sp->ts_curi = lp->lp_slang->sl_rep_first[fword[sp->ts_fidx]];
if (sp->ts_curi < 0)
{
@@ -4850,8 +5149,8 @@ spell_try_change(su)
case STATE_REP:
/* Try matching with REP items from the .aff file. For each
- * match replace the charactes and check if the resulting word
- * is valid. */
+ * match replace the characters and check if the resulting
+ * word is valid. */
p = fword + sp->ts_fidx;
gap = &lp->lp_slang->sl_rep;
@@ -4878,6 +5177,9 @@ spell_try_change(su)
mch_memmove(p + tl, p + fl, STRLEN(p + fl) + 1);
mch_memmove(p, ftp->ft_to, tl);
stack[depth].ts_fidxtry = sp->ts_fidx + tl;
+#ifdef FEAT_MBYTE
+ stack[depth].ts_tcharlen = 0;
+#endif
break;
}
}
@@ -4928,13 +5230,10 @@ try_deeper(su, stack, depth, score_add)
if (newscore >= su->su_maxscore)
return FALSE;
+ stack[depth + 1] = stack[depth];
stack[depth + 1].ts_state = STATE_START;
stack[depth + 1].ts_score = newscore;
stack[depth + 1].ts_curi = 1; /* start just after length byte */
- stack[depth + 1].ts_fidx = stack[depth].ts_fidx;
- stack[depth + 1].ts_fidxtry = stack[depth].ts_fidxtry;
- stack[depth + 1].ts_twordlen = stack[depth].ts_twordlen;
- stack[depth + 1].ts_arridx = stack[depth].ts_arridx;
return TRUE;
}
@@ -5286,7 +5585,90 @@ make_case_word(fword, cword, flags)
STRCPY(cword, fword);
}
-#if 0
+/*
+ * Use map string "map" for languages "lp".
+ */
+ static void
+set_map_str(lp, map)
+ slang_T *lp;
+ char_u *map;
+{
+ char_u *p;
+ int headc = 0;
+ int c;
+ int i;
+
+ if (*map == NUL)
+ {
+ lp->sl_has_map = FALSE;
+ return;
+ }
+ lp->sl_has_map = TRUE;
+
+ /* Init the array and hash table empty. */
+ for (i = 0; i < 256; ++i)
+ lp->sl_map_array[i] = 0;
+#ifdef FEAT_MBYTE
+ hash_init(&lp->sl_map_hash);
+#endif
+
+ /*
+ * The similar characters are stored separated with slashes:
+ * "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and
+ * before the same slash. For characters above 255 sl_map_hash is used.
+ */
+ for (p = map; *p != NUL; )
+ {
+#ifdef FEAT_MBYTE
+ c = mb_ptr2char_adv(&p);
+#else
+ c = *p++;
+#endif
+ if (c == '/')
+ headc = 0;
+ else
+ {
+ if (headc == 0)
+ headc = c;
+
+#ifdef FEAT_MBYTE
+ /* Characters above 255 don't fit in sl_map_array[], put them in
+ * the hash table. Each entry is the char, a NUL the headchar and
+ * a NUL. */
+ if (c >= 256)
+ {
+ int cl = mb_char2len(c);
+ int headcl = mb_char2len(headc);
+ char_u *b;
+ hash_T hash;
+ hashitem_T *hi;
+
+ b = alloc((unsigned)(cl + headcl + 2));
+ if (b == NULL)
+ return;
+ mb_char2bytes(c, b);
+ b[cl] = NUL;
+ mb_char2bytes(headc, b + cl + 1);
+ b[cl + 1 + headcl] = NUL;
+ hash = hash_hash(b);
+ hi = hash_lookup(&lp->sl_map_hash, b, hash);
+ if (HASHITEM_EMPTY(hi))
+ hash_add_item(&lp->sl_map_hash, hi, b, hash);
+ else
+ {
+ /* This should have been checked when generating the .spl
+ * file. */
+ EMSG(_("E999: duplicate char in MAP entry"));
+ vim_free(b);
+ }
+ }
+ else
+#endif
+ lp->sl_map_array[c] = headc;
+ }
+ }
+}
+
/*
* Return TRUE if "c1" and "c2" are similar characters according to the MAP
* lines in the .aff file.
@@ -5297,21 +5679,43 @@ similar_chars(slang, c1, c2)
int c1;
int c2;
{
- char_u *p1;
- char_u *p2;
-
- /* The similar characters are stored separated with slashes:
- * "aaa/bbb/ccc/". Search for each character and if the next slash is the
- * same one they are in the same MAP entry. */
- p1 = vim_strchr(slang->sl_map, c1);
- if (p1 == NULL)
- return FALSE;
- p2 = vim_strchr(slang->sl_map, c2);
- if (p2 == NULL)
+ int m1, m2;
+#ifdef FEAT_MBYTE
+ char_u buf[MB_MAXBYTES];
+ hashitem_T *hi;
+
+ if (c1 >= 256)
+ {
+ buf[mb_char2bytes(c1, buf)] = 0;
+ hi = hash_find(&slang->sl_map_hash, buf);
+ if (HASHITEM_EMPTY(hi))
+ m1 = 0;
+ else
+ m1 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
+ }
+ else
+#endif
+ m1 = slang->sl_map_array[c1];
+ if (m1 == 0)
return FALSE;
- return vim_strchr(p1, '/') == vim_strchr(p2, '/');
-}
+
+
+#ifdef FEAT_MBYTE
+ if (c2 >= 256)
+ {
+ buf[mb_char2bytes(c2, buf)] = 0;
+ hi = hash_find(&slang->sl_map_hash, buf);
+ if (HASHITEM_EMPTY(hi))
+ m2 = 0;
+ else
+ m2 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
+ }
+ else
#endif
+ m2 = slang->sl_map_array[c2];
+
+ return m1 == m2;
+}
/*
* Add a suggestion to the list of suggestions.