summaryrefslogtreecommitdiff
path: root/src/spell.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/spell.c')
-rw-r--r--src/spell.c717
1 files changed, 511 insertions, 206 deletions
diff --git a/src/spell.c b/src/spell.c
index 6251b9353..4ff413a71 100644
--- a/src/spell.c
+++ b/src/spell.c
@@ -379,7 +379,7 @@ typedef struct suggest_S
/* Number of suggestions kept when cleaning up. When rescore_suggestions() is
* called the score may change, thus we need to keep more than what is
* displayed. */
-#define SUG_CLEAN_COUNT(su) ((su)->su_maxcount < 25 ? 25 : (su)->su_maxcount)
+#define SUG_CLEAN_COUNT(su) ((su)->su_maxcount < 50 ? 50 : (su)->su_maxcount)
/* Threshold for sorting and cleaning up suggestions. Don't want to keep lots
* of suggestions that are not going to be displayed. */
@@ -530,9 +530,11 @@ static slang_T *slang_alloc __ARGS((char_u *lang));
static void slang_free __ARGS((slang_T *lp));
static void slang_clear __ARGS((slang_T *lp));
static void find_word __ARGS((matchinf_T *mip, int mode));
+static int valid_word_prefix __ARGS((int totprefcnt, int arridx, int prefid, char_u *word, slang_T *slang));
static void find_prefix __ARGS((matchinf_T *mip));
static int fold_more __ARGS((matchinf_T *mip));
static int spell_valid_case __ARGS((int origflags, int treeflags));
+static int no_spell_checking __ARGS((void));
static void spell_load_lang __ARGS((char_u *lang));
static char_u *spell_enc __ARGS((void));
static void spell_load_cb __ARGS((char_u *fname, void *cookie));
@@ -555,20 +557,22 @@ static int try_deeper __ARGS((suginfo_T *su, trystate_T *stack, int depth, int s
static void find_keepcap_word __ARGS((slang_T *slang, char_u *fword, char_u *kword));
static void score_comp_sal __ARGS((suginfo_T *su));
static void score_combine __ARGS((suginfo_T *su));
+static int stp_sal_score __ARGS((suggest_T *stp, suginfo_T *su, slang_T *slang, char_u *badsound));
static void suggest_try_soundalike __ARGS((suginfo_T *su));
static void make_case_word __ARGS((char_u *fword, char_u *cword, int flags));
static void set_map_str __ARGS((slang_T *lp, char_u *map));
static int similar_chars __ARGS((slang_T *slang, int c1, int c2));
-static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int use_score, int had_bonus));
+static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus));
static void add_banned __ARGS((suginfo_T *su, char_u *word));
static int was_banned __ARGS((suginfo_T *su, char_u *word));
static void free_banned __ARGS((suginfo_T *su));
static void rescore_suggestions __ARGS((suginfo_T *su));
static int cleanup_suggestions __ARGS((garray_T *gap, int maxscore, int keep));
static void spell_soundfold __ARGS((slang_T *slang, char_u *inword, char_u *res));
-static int spell_sound_score __ARGS((slang_T *slang, char_u *goodword, char_u *badsound));
static int soundalike_score __ARGS((char_u *goodsound, char_u *badsound));
static int spell_edit_score __ARGS((char_u *badword, char_u *goodword));
+static void dump_word __ARGS((char_u *word, int round, int flags, linenr_T lnum));
+static linenr_T apply_prefixes __ARGS((slang_T *slang, char_u *word, int round, int flags, linenr_T startlnum));
/*
* Use our own character-case definitions, because the current locale may
@@ -770,15 +774,10 @@ find_word(mip, mode)
char_u *p;
#endif
int res = SP_BAD;
- int valid;
slang_T *slang = mip->mi_lp->lp_slang;
unsigned flags;
char_u *byts;
idx_T *idxs;
- int prefcnt;
- int pidx;
- regmatch_T regmatch;
- regprog_T *rp;
int prefid;
if (mode == FIND_KEEPWORD)
@@ -964,35 +963,9 @@ find_word(mip, mode)
{
/* The prefix ID is stored two bytes above the flags. */
prefid = (unsigned)flags >> 16;
-
- valid = FALSE;
- for (prefcnt = mip->mi_prefcnt - 1; prefcnt >= 0; --prefcnt)
- {
- pidx = slang->sl_pidxs[mip->mi_prefarridx + prefcnt];
-
- /* Check the prefix ID. */
- if (prefid != (pidx & 0xff))
- continue;
-
- /* Check the condition, if there is one. The
- * condition index is stored above the prefix ID byte.
- */
- rp = slang->sl_prefprog[(unsigned)pidx >> 8];
- if (rp != NULL)
- {
- regmatch.regprog = rp;
- regmatch.rm_ic = FALSE;
- if (!vim_regexec(&regmatch,
- mip->mi_fword + mip->mi_prefixlen, 0))
- continue;
- }
-
- /* It's a match, use it. */
- valid = TRUE;
- break;
- }
-
- if (!valid)
+ if (!valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx,
+ prefid, mip->mi_fword + mip->mi_prefixlen,
+ slang))
continue;
}
@@ -1017,8 +990,7 @@ find_word(mip, mode)
mip->mi_result = res;
mip->mi_end = mip->mi_word + wlen;
}
- else if (mip->mi_result == res
- && mip->mi_end < mip->mi_word + wlen)
+ else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen)
mip->mi_end = mip->mi_word + wlen;
if (res == SP_OK)
@@ -1031,6 +1003,48 @@ find_word(mip, mode)
}
/*
+ * Return TRUE if the prefix indicated by "mip->mi_prefarridx" matches with
+ * the prefix ID "prefid" for the word "word".
+ */
+ static int
+valid_word_prefix(totprefcnt, arridx, prefid, word, slang)
+ int totprefcnt; /* nr of prefix IDs */
+ int arridx; /* idx in sl_pidxs[] */
+ int prefid;
+ char_u *word;
+ slang_T *slang;
+{
+ int prefcnt;
+ int pidx;
+ regprog_T *rp;
+ regmatch_T regmatch;
+
+ for (prefcnt = totprefcnt - 1; prefcnt >= 0; --prefcnt)
+ {
+ pidx = slang->sl_pidxs[arridx + prefcnt];
+
+ /* Check the prefix ID. */
+ if (prefid != (pidx & 0xff))
+ continue;
+
+ /* Check the condition, if there is one. The condition index is
+ * stored above the prefix ID byte. */
+ rp = slang->sl_prefprog[(unsigned)pidx >> 8];
+ if (rp != NULL)
+ {
+ regmatch.regprog = rp;
+ regmatch.rm_ic = FALSE;
+ if (!vim_regexec(&regmatch, word, 0))
+ continue;
+ }
+
+ /* It's a match! */
+ return TRUE;
+ }
+ return FALSE;
+}
+
+/*
* Check if the word at "mip->mi_word" has a matching prefix.
* If it does, then check the following word.
*
@@ -1178,6 +1192,19 @@ spell_valid_case(origflags, treeflags)
&& ((treeflags & WF_ONECAP) == 0 || origflags == WF_ONECAP)));
}
+/*
+ * Return TRUE if spell checking is not enabled.
+ */
+ static int
+no_spell_checking()
+{
+ if (!curwin->w_p_spell || *curbuf->b_p_spl == NUL)
+ {
+ EMSG(_("E756: Spell checking is not enabled"));
+ return TRUE;
+ }
+ return FALSE;
+}
/*
* Move to next spell error.
@@ -1204,11 +1231,8 @@ spell_move_to(dir, allwords, curline)
int buflen = 0;
int skip = 0;
- if (!curwin->w_p_spell || *curbuf->b_p_spl == NUL)
- {
- EMSG(_("E756: Spell checking not enabled"));
+ if (no_spell_checking())
return FAIL;
- }
/*
* Start looking for bad word at the start of the line, because we can't
@@ -1679,8 +1703,10 @@ formerr:
i = set_spell_charflags(p, cnt, fol);
vim_free(p);
vim_free(fol);
+#if 0 /* tolerate the differences */
if (i == FAIL)
goto formerr;
+#endif
}
else
{
@@ -2063,69 +2089,67 @@ read_tree(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr)
/*
* Parse 'spelllang' and set buf->b_langp accordingly.
- * Returns an error message or NULL.
+ * Returns NULL if it's OK, an error message otherwise.
*/
char_u *
did_set_spelllang(buf)
buf_T *buf;
{
garray_T ga;
- char_u *lang;
- char_u *e;
+ char_u *splp;
char_u *region;
int region_mask;
slang_T *lp;
int c;
- char_u lbuf[MAXWLEN + 1];
+ char_u lang[MAXWLEN + 1];
char_u spf_name[MAXPATHL];
- int did_spf = FALSE;
+ int load_spf;
+ int len;
+ char_u *p;
ga_init2(&ga, sizeof(langp_T), 2);
- /* Get the name of the .spl file associated with 'spellfile'. */
+ /* Make the name of the .spl file associated with 'spellfile'. */
if (*buf->b_p_spf == NUL)
- did_spf = TRUE;
+ load_spf = FALSE;
else
+ {
vim_snprintf((char *)spf_name, sizeof(spf_name), "%s.spl",
buf->b_p_spf);
+ load_spf = TRUE;
+ }
- /* loop over comma separated languages. */
- for (lang = buf->b_p_spl; *lang != NUL; lang = e)
+ /* loop over comma separated language names. */
+ for (splp = buf->b_p_spl; *splp != NUL; )
{
- e = vim_strchr(lang, ',');
- if (e == NULL)
- e = lang + STRLEN(lang);
+ /* Get one language name. */
+ copy_option_part(&splp, lang, MAXWLEN, ",");
+
+ /* If there is a region name let "region" point to it and remove it
+ * from the name. */
region = NULL;
- if (e > lang + 2)
+ len = STRLEN(lang);
+ if (len > 3 && lang[len - 3] == '_')
{
- if (e - lang >= MAXWLEN)
- {
- ga_clear(&ga);
- return e_invarg;
- }
- if (lang[2] == '_')
- region = lang + 3;
+ region = lang + len - 2;
+ len -= 3;
+ lang[len] = NUL;
}
/* Check if we loaded this language before. */
for (lp = first_lang; lp != NULL; lp = lp->sl_next)
- if (STRNICMP(lp->sl_name, lang, 2) == 0)
+ if (STRICMP(lp->sl_name, lang) == 0)
break;
+ /* If not found try loading the language now. */
if (lp == NULL)
- {
- /* Not found, load the language. */
- vim_strncpy(lbuf, lang, e - lang);
- if (region != NULL)
- mch_memmove(lbuf + 2, lbuf + 5, e - lang - 4);
- spell_load_lang(lbuf);
- }
+ spell_load_lang(lang);
/*
- * Loop over the languages, there can be several files for each.
+ * Loop over the languages, there can be several files for "lang".
*/
for (lp = first_lang; lp != NULL; lp = lp->sl_next)
- if (STRNICMP(lp->sl_name, lang, 2) == 0)
+ if (STRICMP(lp->sl_name, lang) == 0)
{
region_mask = REGION_ALL;
if (region != NULL)
@@ -2135,13 +2159,9 @@ did_set_spelllang(buf)
if (c == REGION_ALL)
{
if (!lp->sl_add)
- {
- c = *e;
- *e = NUL;
- smsg((char_u *)_("Warning: region %s not supported"),
- lang);
- *e = c;
- }
+ smsg((char_u *)
+ _("Warning: region %s not supported"),
+ region);
}
else
region_mask = 1 << c;
@@ -2156,28 +2176,32 @@ did_set_spelllang(buf)
LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
++ga.ga_len;
- /* Check if this is the 'spellfile' spell file. */
- if (fullpathcmp(spf_name, lp->sl_fname, FALSE) == FPC_SAME)
- did_spf = TRUE;
+ /* Check if this is the spell file related to 'spellfile'. */
+ if (load_spf && fullpathcmp(spf_name, lp->sl_fname, FALSE)
+ == FPC_SAME)
+ load_spf = FALSE;
}
-
- if (*e == ',')
- ++e;
}
/*
* Make sure the 'spellfile' file is loaded. It may be in 'runtimepath',
* then it's probably loaded above already. Otherwise load it here.
*/
- if (!did_spf)
+ if (load_spf)
{
+ /* Check if it was loaded already. */
for (lp = first_lang; lp != NULL; lp = lp->sl_next)
if (fullpathcmp(spf_name, lp->sl_fname, FALSE) == FPC_SAME)
break;
if (lp == NULL)
{
- vim_strncpy(lbuf, gettail(spf_name), 2);
- lp = spell_load_file(spf_name, lbuf, NULL, TRUE);
+ /* Not loaded, try loading it now. The language name includes the
+ * region name, the region is ignored otherwise. */
+ vim_strncpy(lang, gettail(buf->b_p_spf), MAXWLEN);
+ p = vim_strchr(lang, '.');
+ if (p != NULL)
+ *p = NUL; /* truncate at ".encoding.add" */
+ lp = spell_load_file(spf_name, lang, NULL, TRUE);
}
if (lp != NULL && ga_grow(&ga, 1) == OK)
{
@@ -2457,6 +2481,7 @@ typedef struct spellinfo_S
sblock_T *si_blocks; /* memory blocks used */
int si_ascii; /* handling only ASCII words */
int si_add; /* addition file */
+ int si_clear_chartab; /* when TRUE clear char tables */
int si_region; /* region mask */
vimconv_T si_conv; /* for conversion to 'encoding' */
int si_memtot; /* runtime memory used */
@@ -2909,6 +2934,14 @@ spell_read_aff(fname, spin)
if (fol != NULL || low != NULL || upp != NULL)
{
+ if (spin->si_clear_chartab)
+ {
+ /* Clear the char type tables, don't want to use any of the
+ * currently used spell properties. */
+ init_spell_chartab();
+ spin->si_clear_chartab = FALSE;
+ }
+
/*
* Don't write a word table for an ASCII file, so that we don't check
* for conflicts with a word table that matches 'encoding'.
@@ -3107,6 +3140,8 @@ spell_read_dic(fname, spin, affile)
{
line_breakcheck();
++lnum;
+ if (line[0] == '#')
+ continue; /* comment line */
/* Remove CR, LF and white space from the end. White space halfway
* the word is kept to allow e.g., "et al.". */
@@ -4395,6 +4430,8 @@ mkspell(fcount, fnames, ascii, overwrite, added_word)
if (incount <= 0)
EMSG(_(e_invarg)); /* need at least output and input names */
+ else if (vim_strchr(gettail(wfname), '_') != NULL)
+ EMSG(_("E751: Output file name must not have region name"));
else if (incount > 8)
EMSG(_("E754: Only up to 8 regions supported"));
else
@@ -4436,11 +4473,6 @@ mkspell(fcount, fnames, ascii, overwrite, added_word)
}
spin.si_region_count = incount;
- if (!spin.si_add)
- /* Clear the char type tables, don't want to use any of the
- * currently used spell properties. */
- init_spell_chartab();
-
spin.si_foldroot = wordtree_alloc(&spin.si_blocks);
spin.si_keeproot = wordtree_alloc(&spin.si_blocks);
spin.si_prefroot = wordtree_alloc(&spin.si_blocks);
@@ -4452,6 +4484,14 @@ mkspell(fcount, fnames, ascii, overwrite, added_word)
return;
}
+ /* When not producing a .add.spl file clear the character table when
+ * we encounter one in the .aff file. This means we dump the current
+ * one in the .spl file if the .aff file doesn't define one. That's
+ * better than guessing the contents, the table will match a
+ * previously loaded spell file. */
+ if (!spin.si_add)
+ spin.si_clear_chartab = TRUE;
+
/*
* Read all the .aff and .dic files.
* Text is converted to 'encoding'.
@@ -4591,9 +4631,16 @@ spell_add_word(word, len, bad)
{
FILE *fd;
buf_T *buf;
+ int new_spf = FALSE;
+ struct stat st;
+ /* If 'spellfile' isn't set figure out a good default value. */
if (*curbuf->b_p_spf == NUL)
+ {
init_spellfile();
+ new_spf = TRUE;
+ }
+
if (*curbuf->b_p_spf == NUL)
EMSG(_("E764: 'spellfile' is not set"));
else
@@ -4607,6 +4654,23 @@ spell_add_word(word, len, bad)
else
{
fd = mch_fopen((char *)curbuf->b_p_spf, "a");
+ if (fd == NULL && new_spf)
+ {
+ /* We just initialized the 'spellfile' option and can't open
+ * the file. We may need to create the "spell" directory
+ * first. We already checked the runtime directory is
+ * writable in init_spellfile(). */
+ STRCPY(NameBuff, curbuf->b_p_spf);
+ *gettail_sep(NameBuff) = NUL;
+ if (mch_stat((char *)NameBuff, &st) < 0)
+ {
+ /* The directory doesn't exist. Try creating it and
+ * opening the file again. */
+ vim_mkdir(NameBuff, 0755);
+ fd = mch_fopen((char *)curbuf->b_p_spf, "a");
+ }
+ }
+
if (fd == NULL)
EMSG2(_(e_notopen), curbuf->b_p_spf);
else
@@ -4640,10 +4704,17 @@ init_spellfile()
int l;
slang_T *sl;
char_u *rtp;
+ char_u *lend;
if (*curbuf->b_p_spl != NUL && curbuf->b_langp.ga_len > 0)
{
- /* Loop over all entries in 'runtimepath'. */
+ /* Find the end of the language name. Exclude the region. */
+ for (lend = curbuf->b_p_spl; *lend != NUL
+ && vim_strchr((char_u *)",._", *lend) == NULL; ++lend)
+ ;
+
+ /* Loop over all entries in 'runtimepath'. Use the first one where we
+ * are allowed to write. */
rtp = p_rtp;
while (*rtp != NUL)
{
@@ -4657,7 +4728,7 @@ init_spellfile()
l = STRLEN(buf);
vim_snprintf((char *)buf + l, MAXPATHL - l,
"/spell/%.*s.%s.add",
- 2, curbuf->b_p_spl,
+ (int)(lend - curbuf->b_p_spl), curbuf->b_p_spl,
strstr((char *)gettail(sl->sl_fname), ".ascii.") != NULL
? (char_u *)"ascii" : spell_enc());
set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL);
@@ -5113,7 +5184,7 @@ spell_suggest()
if (p_verbose > 0)
{
/* Add the score. */
- if (sps_flags & SPS_DOUBLE)
+ if (sps_flags & (SPS_DOUBLE | SPS_BEST))
vim_snprintf((char *)IObuff, IOSIZE, _(" (%s%d - %d)"),
stp->st_salscore ? "s " : "",
stp->st_score, stp->st_altscore);
@@ -5421,7 +5492,7 @@ suggest_try_special(su)
su->su_fbadword[len] = NUL;
make_case_word(su->su_fbadword, word, su->su_badflags);
su->su_fbadword[len] = c;
- add_suggestion(su, &su->su_ga, word, su->su_badlen, SCORE_DEL, TRUE);
+ add_suggestion(su, &su->su_ga, word, su->su_badlen, SCORE_DEL, 0, TRUE);
}
}
@@ -5584,7 +5655,7 @@ suggest_try_change(su)
/* The badword also ends: add suggestions, */
add_suggestion(su, &su->su_ga, preword,
sp->ts_fidx - repextra,
- sp->ts_score + newscore, FALSE);
+ sp->ts_score + newscore, 0, FALSE);
}
else if (sp->ts_fidx >= sp->ts_fidxtry
#ifdef FEAT_MBYTE
@@ -6386,8 +6457,6 @@ score_comp_sal(su)
int i;
suggest_T *stp;
suggest_T *sstp;
- char_u fword[MAXWLEN];
- char_u goodsound[MAXWLEN];
int score;
if (ga_grow(&su->su_sga, su->su_ga.ga_len) == FAIL)
@@ -6405,11 +6474,9 @@ score_comp_sal(su)
{
stp = &SUG(su->su_ga, i);
- /* Case-fold the suggested word and sound-fold it. */
- (void)spell_casefold(stp->st_word, STRLEN(stp->st_word),
- fword, MAXWLEN);
- spell_soundfold(lp->lp_slang, fword, goodsound);
- score = soundalike_score(goodsound, badsound);
+ /* Case-fold the suggested word, sound-fold it and compute the
+ * sound-a-like score. */
+ score = stp_sal_score(stp, su, lp->lp_slang, badsound);
if (score < SCORE_MAXMAX)
{
/* Add the suggestion. */
@@ -6444,9 +6511,6 @@ score_combine(su)
suggest_T *stp;
char_u *p;
char_u badsound[MAXWLEN];
- char_u badsound2[MAXWLEN];
- char_u goodsound[MAXWLEN];
- char_u fword[MAXWLEN];
int round;
/* Add the alternate score to su_ga. */
@@ -6461,25 +6525,8 @@ score_combine(su)
for (i = 0; i < su->su_ga.ga_len; ++i)
{
stp = &SUG(su->su_ga, i);
-
- if (stp->st_orglen <= su->su_badlen)
- p = badsound;
- else
- {
- /* soundfold the bad word with a different length */
- (void)spell_casefold(su->su_badptr, stp->st_orglen,
- fword, MAXWLEN);
- spell_soundfold(lp->lp_slang, fword, badsound2);
- p = badsound2;
- }
-
- /* Case-fold the word, sound-fold the word and compute the
- * score for the difference. */
- (void)spell_casefold(stp->st_word, STRLEN(stp->st_word),
- fword, MAXWLEN);
- spell_soundfold(lp->lp_slang, fword, goodsound);
-
- stp->st_altscore = soundalike_score(goodsound, p);
+ stp->st_altscore = stp_sal_score(stp, su, lp->lp_slang,
+ badsound);
if (stp->st_altscore == SCORE_MAXMAX)
stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4;
else
@@ -6549,6 +6596,50 @@ score_combine(su)
}
/*
+ * For the goodword in "stp" compute the soundalike score compared to the
+ * badword.
+ */
+ static int
+stp_sal_score(stp, su, slang, badsound)
+ suggest_T *stp;
+ suginfo_T *su;
+ slang_T *slang;
+ char_u *badsound; /* sound-folded badword */
+{
+ char_u *p;
+ char_u badsound2[MAXWLEN];
+ char_u fword[MAXWLEN];
+ char_u goodsound[MAXWLEN];
+
+ if (stp->st_orglen <= su->su_badlen)
+ p = badsound;
+ else
+ {
+ /* soundfold the bad word with more characters following */
+ (void)spell_casefold(su->su_badptr, stp->st_orglen, fword, MAXWLEN);
+
+ /* When joining two words the sound often changes a lot. E.g., "t he"
+ * sounds like "t h" while "the" sounds like "@". Avoid that by
+ * removing the space. Don't do it when the good word also contains a
+ * space. */
+ if (vim_iswhite(su->su_badptr[su->su_badlen])
+ && *skiptowhite(stp->st_word) == NUL)
+ for (p = fword; *(p = skiptowhite(p)) != NUL; )
+ mch_memmove(p, p + 1, STRLEN(p));
+
+ spell_soundfold(slang, fword, badsound2);
+ p = badsound2;
+ }
+
+ /* Case-fold the word, sound-fold the word and compute the score for the
+ * difference. */
+ (void)spell_casefold(stp->st_word, STRLEN(stp->st_word), fword, MAXWLEN);
+ spell_soundfold(slang, fword, goodsound);
+
+ return soundalike_score(goodsound, p);
+}
+
+/*
* Find suggestions by comparing the word in a sound-a-like form.
*/
static void
@@ -6604,8 +6695,11 @@ suggest_try_soundalike(su)
while (depth >= 0 && !got_int)
{
if (curi[depth] > byts[arridx[depth]])
+ {
/* Done all bytes at this node, go up one level. */
--depth;
+ line_breakcheck();
+ }
else
{
/* Do one more byte at this node. */
@@ -6642,7 +6736,7 @@ suggest_try_soundalike(su)
char_u *p;
int score;
- if (round == 1 && flags != 0)
+ if (round == 1 && (flags & WF_CAPMASK) != 0)
{
/* Need to fix case according to
* "flags". */
@@ -6655,7 +6749,7 @@ suggest_try_soundalike(su)
if (sps_flags & SPS_DOUBLE)
add_suggestion(su, &su->su_sga, p,
su->su_badlen,
- sound_score, FALSE);
+ sound_score, 0, FALSE);
else
{
/* Compute the score. */
@@ -6668,11 +6762,11 @@ suggest_try_soundalike(su)
add_suggestion(su, &su->su_ga, p,
su->su_badlen,
RESCORE(score, sound_score),
- TRUE);
+ sound_score, TRUE);
else
add_suggestion(su, &su->su_ga, p,
su->su_badlen,
- score + sound_score, FALSE);
+ score + sound_score, 0, FALSE);
}
}
}
@@ -6692,8 +6786,6 @@ suggest_try_soundalike(su)
curi[depth] = 1;
}
}
-
- line_breakcheck();
}
}
}
@@ -6859,12 +6951,13 @@ similar_chars(slang, c1, c2)
* with spell_edit_score().
*/
static void
-add_suggestion(su, gap, goodword, badlen, score, had_bonus)
+add_suggestion(su, gap, goodword, badlen, score, altscore, had_bonus)
suginfo_T *su;
garray_T *gap;
char_u *goodword;
int badlen; /* length of bad word used */
int score;
+ int altscore;
int had_bonus; /* value for st_had_bonus */
{
suggest_T *stp;
@@ -6918,7 +7011,7 @@ add_suggestion(su, gap, goodword, badlen, score, had_bonus)
if (stp->st_word != NULL)
{
stp->st_score = score;
- stp->st_altscore = 0;
+ stp->st_altscore = altscore;
stp->st_had_bonus = had_bonus;
stp->st_orglen = badlen;
++gap->ga_len;
@@ -7003,10 +7096,6 @@ rescore_suggestions(su)
langp_T *lp;
suggest_T *stp;
char_u sal_badword[MAXWLEN];
- char_u tword[MAXWLEN];
- char_u salword[MAXWLEN];
- char_u *p;
- int score;
int i;
for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0);
@@ -7022,18 +7111,11 @@ rescore_suggestions(su)
stp = &SUG(su->su_ga, i);
if (!stp->st_had_bonus)
{
- if (stp->st_orglen <= su->su_badlen)
- p = sal_badword;
- else
- {
- /* soundfold the bad word with a different length */
- (void)spell_casefold(su->su_badptr, stp->st_orglen,
- tword, MAXWLEN);
- spell_soundfold(lp->lp_slang, tword, salword);
- p = salword;
- }
- score = spell_sound_score(lp->lp_slang, stp->st_word, p);
- stp->st_score = RESCORE(stp->st_score, score);
+ stp->st_altscore = stp_sal_score(stp, su,
+ lp->lp_slang, sal_badword);
+ if (stp->st_altscore == SCORE_MAXMAX)
+ stp->st_altscore = SCORE_BIG;
+ stp->st_score = RESCORE(stp->st_score, stp->st_altscore);
}
}
break;
@@ -7424,53 +7506,38 @@ spell_soundfold(slang, inword, res)
}
/*
- * Return the score for how much words sound different.
- */
- static int
-spell_sound_score(slang, goodword, badsound)
- slang_T *slang;
- char_u *goodword; /* good word */
- char_u *badsound; /* sound-folded bad word */
-{
- char_u fword[MAXWLEN];
- char_u goodsound[MAXWLEN];
- int score;
-
- /* Case-fold the goodword, needed for sound folding. */
- (void)spell_casefold(goodword, STRLEN(goodword), fword, MAXWLEN);
-
- /* sound-fold the goodword */
- spell_soundfold(slang, fword, goodsound);
-
- /* Compute the edit distance-score of the sounds. This is slow but we
- * only do it for a small number of words. */
- score = spell_edit_score(badsound, goodsound);
-
- /* Correction: adding/inserting "*" at the start (word starts with vowel)
- * shouldn't be counted so much, vowels halfway the word aren't counted at
- * all. */
- if (*badsound != *goodsound && (*badsound == '*' || *goodsound == '*'))
- score -= SCORE_DEL / 2;
-
- return score;
-}
-
-/*
* Compute a score for two sound-a-like words.
* This permits up to two inserts/deletes/swaps/etc. to keep things fast.
* Instead of a generic loop we write out the code. That keeps it fast by
* avoiding checks that will not be possible.
*/
static int
-soundalike_score(goodsound, badsound)
- char_u *goodsound; /* sound-folded good word */
- char_u *badsound; /* sound-folded bad word */
+soundalike_score(goodstart, badstart)
+ char_u *goodstart; /* sound-folded good word */
+ char_u *badstart; /* sound-folded bad word */
{
- int goodlen = STRLEN(goodsound);
- int badlen = STRLEN(badsound);
+ char_u *goodsound = goodstart;
+ char_u *badsound = badstart;
+ int goodlen;
+ int badlen;
int n;
char_u *pl, *ps;
char_u *pl2, *ps2;
+ int score = 0;
+
+ /* adding/inserting "*" at the start (word starts with vowel) shouldn't be
+ * counted so much, vowels halfway the word aren't counted at all. */
+ if ((*badsound == '*' || *goodsound == '*') && *badsound != *goodsound)
+ {
+ score = SCORE_DEL / 2;
+ if (*badsound == '*')
+ ++badsound;
+ else
+ ++goodsound;
+ }
+
+ goodlen = STRLEN(goodsound);
+ badlen = STRLEN(badsound);
/* Return quickly if the lenghts are too different to be fixed by two
* changes. */
@@ -7480,12 +7547,12 @@ soundalike_score(goodsound, badsound)
if (n > 0)
{
- pl = goodsound; /* longest */
+ pl = goodsound; /* goodsound is longest */
ps = badsound;
}
else
{
- pl = badsound; /* longest */
+ pl = badsound; /* badsound is longest */
ps = goodsound;
}
@@ -7511,7 +7578,7 @@ soundalike_score(goodsound, badsound)
}
/* strings must be equal after second delete */
if (STRCMP(pl + 1, ps) == 0)
- return SCORE_DEL * 2;
+ return score + SCORE_DEL * 2;
/* Failed to compare. */
break;
@@ -7528,7 +7595,7 @@ soundalike_score(goodsound, badsound)
while (*pl2 == *ps2)
{
if (*pl2 == NUL) /* reached the end */
- return SCORE_DEL;
+ return score + SCORE_DEL;
++pl2;
++ps2;
}
@@ -7536,11 +7603,11 @@ soundalike_score(goodsound, badsound)
/* 2: delete then swap, then rest must be equal */
if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
&& STRCMP(pl2 + 2, ps2 + 2) == 0)
- return SCORE_DEL + SCORE_SWAP;
+ return score + SCORE_DEL + SCORE_SWAP;
/* 3: delete then substitute, then the rest must be equal */
if (STRCMP(pl2 + 1, ps2 + 1) == 0)
- return SCORE_DEL + SCORE_SUBST;
+ return score + SCORE_DEL + SCORE_SUBST;
/* 4: first swap then delete */
if (pl[0] == ps[1] && pl[1] == ps[0])
@@ -7554,7 +7621,7 @@ soundalike_score(goodsound, badsound)
}
/* delete a char and then strings must be equal */
if (STRCMP(pl2 + 1, ps2) == 0)
- return SCORE_SWAP + SCORE_DEL;
+ return score + SCORE_SWAP + SCORE_DEL;
}
/* 5: first substitute then delete */
@@ -7567,7 +7634,7 @@ soundalike_score(goodsound, badsound)
}
/* delete a char and then strings must be equal */
if (STRCMP(pl2 + 1, ps2) == 0)
- return SCORE_SUBST + SCORE_DEL;
+ return score + SCORE_SUBST + SCORE_DEL;
/* Failed to compare. */
break;
@@ -7579,7 +7646,7 @@ soundalike_score(goodsound, badsound)
* 1: check if for identical strings
*/
if (*pl == NUL)
- return 0;
+ return score;
/* 2: swap */
if (pl[0] == ps[1] && pl[1] == ps[0])
@@ -7589,18 +7656,18 @@ soundalike_score(goodsound, badsound)
while (*pl2 == *ps2)
{
if (*pl2 == NUL) /* reached the end */
- return SCORE_SWAP;
+ return score + SCORE_SWAP;
++pl2;
++ps2;
}
/* 3: swap and swap again */
if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
&& STRCMP(pl2 + 2, ps2 + 2) == 0)
- return SCORE_SWAP + SCORE_SWAP;
+ return score + SCORE_SWAP + SCORE_SWAP;
/* 4: swap and substitute */
if (STRCMP(pl2 + 1, ps2 + 1) == 0)
- return SCORE_SWAP + SCORE_SUBST;
+ return score + SCORE_SWAP + SCORE_SUBST;
}
/* 5: substitute */
@@ -7609,7 +7676,7 @@ soundalike_score(goodsound, badsound)
while (*pl2 == *ps2)
{
if (*pl2 == NUL) /* reached the end */
- return SCORE_SUBST;
+ return score + SCORE_SUBST;
++pl2;
++ps2;
}
@@ -7617,11 +7684,11 @@ soundalike_score(goodsound, badsound)
/* 6: substitute and swap */
if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
&& STRCMP(pl2 + 2, ps2 + 2) == 0)
- return SCORE_SUBST + SCORE_SWAP;
+ return score + SCORE_SUBST + SCORE_SWAP;
/* 7: substitute and substitute */
if (STRCMP(pl2 + 1, ps2 + 1) == 0)
- return SCORE_SUBST + SCORE_SUBST;
+ return score + SCORE_SUBST + SCORE_SUBST;
/* 8: insert then delete */
pl2 = pl;
@@ -7632,7 +7699,7 @@ soundalike_score(goodsound, badsound)
++ps2;
}
if (STRCMP(pl2 + 1, ps2) == 0)
- return SCORE_INS + SCORE_DEL;
+ return score + SCORE_INS + SCORE_DEL;
/* 9: delete then insert */
pl2 = pl + 1;
@@ -7643,7 +7710,7 @@ soundalike_score(goodsound, badsound)
++ps2;
}
if (STRCMP(pl2, ps2 + 1) == 0)
- return SCORE_INS + SCORE_DEL;
+ return score + SCORE_INS + SCORE_DEL;
/* Failed to compare. */
break;
@@ -7768,4 +7835,242 @@ spell_edit_score(badword, goodword)
return i;
}
+/*
+ * ":spelldump"
+ */
+/*ARGSUSED*/
+ void
+ex_spelldump(eap)
+ exarg_T *eap;
+{
+ buf_T *buf = curbuf;
+ langp_T *lp;
+ slang_T *slang;
+ idx_T arridx[MAXWLEN];
+ int curi[MAXWLEN];
+ char_u word[MAXWLEN];
+ int c;
+ char_u *byts;
+ idx_T *idxs;
+ linenr_T lnum = 0;
+ int round;
+ int depth;
+ int n;
+ int flags;
+
+ if (no_spell_checking())
+ return;
+
+ /* Create a new empty buffer by splitting the window. */
+ do_cmdline_cmd((char_u *)"new");
+ if (!bufempty() || !buf_valid(buf))
+ return;
+
+ for (lp = LANGP_ENTRY(buf->b_langp, 0); lp->lp_slang != NULL; ++lp)
+ {
+ slang = lp->lp_slang;
+
+ vim_snprintf((char *)IObuff, IOSIZE, "# file: %s", slang->sl_fname);
+ ml_append(lnum++, IObuff, (colnr_T)0, FALSE);
+
+ /* round 1: case-folded tree
+ * round 2: keep-case tree */
+ for (round = 1; round <= 2; ++round)
+ {
+ if (round == 1)
+ {
+ byts = slang->sl_fbyts;
+ idxs = slang->sl_fidxs;
+ }
+ else
+ {
+ byts = slang->sl_kbyts;
+ idxs = slang->sl_kidxs;
+ }
+ if (byts == NULL)
+ continue; /* array is empty */
+
+ depth = 0;
+ arridx[0] = 0;
+ curi[0] = 1;
+ while (depth >= 0 && !got_int)
+ {
+ if (curi[depth] > byts[arridx[depth]])
+ {
+ /* Done all bytes at this node, go up one level. */
+ --depth;
+ line_breakcheck();
+ }
+ else
+ {
+ /* Do one more byte at this node. */
+ n = arridx[depth] + curi[depth];
+ ++curi[depth];
+ c = byts[n];
+ if (c == 0)
+ {
+ /* End of word, deal with the word.
+ * Don't use keep-case words in the fold-case tree,
+ * they will appear in the keep-case tree.
+ * Only use the word when the region matches. */
+ flags = (int)idxs[n];
+ if ((round == 2 || (flags & WF_KEEPCAP) == 0)
+ && ((flags & WF_REGION) == 0
+ || (((unsigned)flags >> 8)
+ & lp->lp_region) != 0))
+ {
+ word[depth] = NUL;
+ dump_word(word, round, flags, lnum++);
+
+ /* Apply the prefix, if there is one. */
+ if ((unsigned)flags >> 16 != 0)
+ lnum = apply_prefixes(slang, word, round,
+ flags, lnum);
+ }
+ }
+ else
+ {
+ /* Normal char, go one level deeper. */
+ word[depth++] = c;
+ arridx[depth] = idxs[n];
+ curi[depth] = 1;
+ }
+ }
+ }
+ }
+ }
+
+ /* Delete the empty line that we started with. */
+ if (curbuf->b_ml.ml_line_count > 1)
+ ml_delete(curbuf->b_ml.ml_line_count, FALSE);
+
+ redraw_later(NOT_VALID);
+}
+
+/*
+ * Dump one word: apply case modifications and append a line to the buffer.
+ */
+ static void
+dump_word(word, round, flags, lnum)
+ char_u *word;
+ int round;
+ int flags;
+ linenr_T lnum;
+{
+ int keepcap = FALSE;
+ char_u *p;
+ char_u cword[MAXWLEN];
+ char_u badword[MAXWLEN + 3];
+
+ if (round == 1 && (flags & WF_CAPMASK) != 0)
+ {
+ /* Need to fix case according to "flags". */
+ make_case_word(word, cword, flags);
+ p = cword;
+ }
+ else
+ {
+ p = word;
+ if (round == 2 && (captype(word, NULL) & WF_KEEPCAP) == 0)
+ keepcap = TRUE;
+ }
+
+ /* Bad word is preceded by "/!" and some other
+ * flags. */
+ if ((flags & (WF_BANNED | WF_RARE)) || keepcap)
+ {
+ STRCPY(badword, "/");
+ if (keepcap)
+ STRCAT(badword, "=");
+ if (flags & WF_BANNED)
+ STRCAT(badword, "!");
+ else if (flags & WF_RARE)
+ STRCAT(badword, "?");
+ STRCAT(badword, p);
+ p = badword;
+ }
+
+ ml_append(lnum, p, (colnr_T)0, FALSE);
+}
+
+/*
+ * Find matching prefixes for "word". Prepend each to "word" and append
+ * a line to the buffer.
+ * Return the updated line number.
+ */
+ static linenr_T
+apply_prefixes(slang, word, round, flags, startlnum)
+ slang_T *slang;
+ char_u *word; /* case-folded word */
+ int round;
+ int flags; /* flags with prefix ID */
+ linenr_T startlnum;
+{
+ idx_T arridx[MAXWLEN];
+ int curi[MAXWLEN];
+ char_u prefix[MAXWLEN];
+ int c;
+ char_u *byts;
+ idx_T *idxs;
+ linenr_T lnum = startlnum;
+ int depth;
+ int n;
+ int len;
+ int prefid = (unsigned)flags >> 16;
+ int i;
+
+ byts = slang->sl_pbyts;
+ idxs = slang->sl_pidxs;
+ if (byts != NULL) /* array not is empty */
+ {
+ /*
+ * Loop over all prefixes, building them byte-by-byte in prefix[].
+ * When at the end of a prefix check that it supports "prefid".
+ */
+ depth = 0;
+ arridx[0] = 0;
+ curi[0] = 1;
+ while (depth >= 0 && !got_int)
+ {
+ len = arridx[depth];
+ if (curi[depth] > byts[len])
+ {
+ /* Done all bytes at this node, go up one level. */
+ --depth;
+ line_breakcheck();
+ }
+ else
+ {
+ /* Do one more byte at this node. */
+ n = len + curi[depth];
+ ++curi[depth];
+ c = byts[n];
+ if (c == 0)
+ {
+ /* End of prefix, find out how many IDs there are. */
+ for (i = 1; i < len; ++i)
+ if (byts[n + i] != 0)
+ break;
+ curi[depth] += i - 1;
+
+ if (valid_word_prefix(i, n, prefid, word, slang))
+ {
+ vim_strncpy(prefix + depth, word, MAXWLEN - depth);
+ dump_word(prefix, round, flags, lnum++);
+ }
+ }
+ else
+ {
+ /* Normal char, go one level deeper. */
+ prefix[depth++] = c;
+ arridx[depth] = idxs[n];
+ curi[depth] = 1;
+ }
+ }
+ }
+ }
+
+ return lnum;
+}
+
#endif /* FEAT_SYN_HL */