diff options
-rw-r--r-- | src/spell.c | 513 |
1 files changed, 396 insertions, 117 deletions
diff --git a/src/spell.c b/src/spell.c index eab725a11..6251b9353 100644 --- a/src/spell.c +++ b/src/spell.c @@ -357,6 +357,7 @@ typedef struct suginfo_S garray_T su_sga; /* like su_ga, sound-folded scoring */ char_u *su_badptr; /* start of bad word in line */ int su_badlen; /* length of detected bad word in line */ + int su_badflags; /* caps flags for bad word */ char_u su_badword[MAXWLEN]; /* bad word truncated at su_badlen */ char_u su_fbadword[MAXWLEN]; /* su_badword case-folded */ hashtab_T su_banned; /* table with banned words */ @@ -484,9 +485,7 @@ typedef enum STATE_UNSWAP, /* Undo swap two characters. */ STATE_SWAP3, /* Swap two characters over three. */ STATE_UNSWAP3, /* Undo Swap two characters over three. */ - STATE_ROT3L, /* Rotate three characters left */ STATE_UNROT3L, /* Undo rotate three characters left */ - STATE_ROT3R, /* Rotate three characters right */ STATE_UNROT3R, /* Undo rotate three characters right */ STATE_REP_INI, /* Prepare for using REP items. */ STATE_REP, /* Use matching REP items from the .aff file. */ @@ -495,7 +494,7 @@ typedef enum } state_T; /* - * Struct to keep the state at each level in spell_try_change(). + * Struct to keep the state at each level in suggest_try_change(). */ typedef struct trystate_S { @@ -514,7 +513,7 @@ typedef struct trystate_S #endif char_u ts_save_prewordlen; /* saved "prewordlen" */ char_u ts_save_splitoff; /* su_splitoff saved here */ - char_u ts_save_badflags; /* badflags saved here */ + char_u ts_save_badflags; /* su_badflags saved here */ } trystate_T; /* values for ts_isdiff */ @@ -550,16 +549,17 @@ static void spell_find_suggest __ARGS((char_u *badptr, suginfo_T *su, int maxcou static void spell_find_cleanup __ARGS((suginfo_T *su)); static void onecap_copy __ARGS((char_u *word, char_u *wcopy, int upper)); static void allcap_copy __ARGS((char_u *word, char_u *wcopy)); -static void spell_try_change __ARGS((suginfo_T *su)); +static void suggest_try_special __ARGS((suginfo_T *su)); +static void suggest_try_change __ARGS((suginfo_T *su)); static int try_deeper __ARGS((suginfo_T *su, trystate_T *stack, int depth, int score_add)); static void find_keepcap_word __ARGS((slang_T *slang, char_u *fword, char_u *kword)); static void score_comp_sal __ARGS((suginfo_T *su)); static void score_combine __ARGS((suginfo_T *su)); -static void spell_try_soundalike __ARGS((suginfo_T *su)); +static void suggest_try_soundalike __ARGS((suginfo_T *su)); static void make_case_word __ARGS((char_u *fword, char_u *cword, int flags)); static void set_map_str __ARGS((slang_T *lp, char_u *map)); static int similar_chars __ARGS((slang_T *slang, int c1, int c2)); -static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int use_score, int had_bonus)); +static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int use_score, int had_bonus)); static void add_banned __ARGS((suginfo_T *su, char_u *word)); static int was_banned __ARGS((suginfo_T *su, char_u *word)); static void free_banned __ARGS((suginfo_T *su)); @@ -641,7 +641,8 @@ spell_check(wp, ptr, attrp) return 1; /* A number is always OK. Also skip hexadecimal numbers 0xFF99 and - * 0X99FF. But when a word character follows do check spelling. */ + * 0X99FF. But when a word character follows do check spelling to find + * "3GPP". */ if (*ptr >= '0' && *ptr <= '9') { if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X')) @@ -653,16 +654,17 @@ spell_check(wp, ptr, attrp) } if (!SPELL_ISWORDP(mi.mi_end)) return (int)(mi.mi_end - ptr); + + /* Try including the digits in the word. */ + mi.mi_fend = ptr + nrlen; } + else + mi.mi_fend = ptr; - /* Find the end of the word. */ + /* Find the normal end of the word (until the next non-word character). */ mi.mi_word = ptr; - mi.mi_fend = ptr; - if (SPELL_ISWORDP(mi.mi_fend)) { - /* Make case-folded copy of the characters until the next non-word - * character. */ do { mb_ptr_adv(mi.mi_fend); @@ -709,13 +711,17 @@ spell_check(wp, ptr, attrp) if (mi.mi_result != SP_OK) { - /* If we found a number skip over it. Allows for "42nd". */ + /* If we found a number skip over it. Allows for "42nd". Do flag + * rare and local words, e.g., "3GPP". */ if (nrlen > 0) - return nrlen; + { + if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED) + return nrlen; + } /* When we are at a non-word character there is no error, just * skip over the character (try looking for a word after it). */ - if (!SPELL_ISWORDP(ptr)) + else if (!SPELL_ISWORDP(ptr)) { #ifdef FEAT_MBYTE if (has_mbyte) @@ -810,7 +816,7 @@ find_word(mip, mode) */ for (;;) { - if (flen == 0 && *mip->mi_fend != NUL) + if (flen <= 0 && *mip->mi_fend != NUL) flen = fold_more(mip); len = byts[arridx++]; @@ -846,6 +852,8 @@ find_word(mip, mode) /* Perform a binary search in the list of accepted bytes. */ c = ptr[wlen]; + if (c == TAB) /* <Tab> is handled like <Space> */ + c = ' '; lo = arridx; hi = arridx + len - 1; while (lo < hi) @@ -870,6 +878,21 @@ find_word(mip, mode) arridx = idxs[lo]; ++wlen; --flen; + + /* One space in the good word may stand for several spaces in the + * checked word. */ + if (c == ' ') + { + for (;;) + { + if (flen <= 0 && *mip->mi_fend != NUL) + flen = fold_more(mip); + if (ptr[wlen] != ' ' && ptr[wlen] != TAB) + break; + ++wlen; + --flen; + } + } } /* @@ -929,7 +952,8 @@ find_word(mip, mode) mip->mi_capflags = captype(mip->mi_word, mip->mi_cend); } - if (!spell_valid_case(mip->mi_capflags, flags)) + if (mip->mi_capflags == WF_KEEPCAP + || !spell_valid_case(mip->mi_capflags, flags)) continue; } @@ -1170,11 +1194,15 @@ spell_move_to(dir, allwords, curline) pos_T found_pos; char_u *line; char_u *p; - int attr = 0; + char_u *endp; + int attr; int len; int has_syntax = syntax_present(curbuf); int col; int can_spell; + char_u *buf = NULL; + int buflen = 0; + int skip = 0; if (!curwin->w_p_spell || *curbuf->b_p_spl == NUL) { @@ -1184,10 +1212,14 @@ spell_move_to(dir, allwords, curline) /* * Start looking for bad word at the start of the line, because we can't - * start halfway a word, we don't know where it starts or ends. + * start halfway a word, we don't know where the it starts or ends. * * When searching backwards, we continue in the line to find the last * bad word (in the cursor line: before the cursor). + * + * We concatenate the start of the next line, so that wrapped words work + * (e.g. "et<line-break>cetera"). Doesn't work when searching backwards + * though... */ lnum = curwin->w_cursor.lnum; found_pos.lnum = 0; @@ -1195,17 +1227,35 @@ spell_move_to(dir, allwords, curline) while (!got_int) { line = ml_get(lnum); - p = line; - while (*p != NUL) + len = STRLEN(line); + if (buflen < len + MAXWLEN + 2) + { + vim_free(buf); + buflen = len + MAXWLEN + 2; + buf = alloc(buflen); + if (buf == NULL) + break; + } + + /* Copy the line into "buf" and append the start of the next line if + * possible. */ + STRCPY(buf, line); + if (lnum < curbuf->b_ml.ml_line_count) + spell_cat_line(buf + STRLEN(buf), ml_get(lnum + 1), MAXWLEN); + + p = buf + skip; + endp = buf + len; + while (p < endp) { /* When searching backward don't search after the cursor. */ if (dir == BACKWARD && lnum == curwin->w_cursor.lnum - && (colnr_T)(p - line) >= curwin->w_cursor.col) + && (colnr_T)(p - buf) >= curwin->w_cursor.col) break; /* start of word */ + attr = 0; len = spell_check(curwin, p, &attr); if (attr != 0) @@ -1218,20 +1268,15 @@ spell_move_to(dir, allwords, curline) if (dir == BACKWARD || lnum > curwin->w_cursor.lnum || (lnum == curwin->w_cursor.lnum - && (colnr_T)(curline ? p - line + len - : p - line) + && (colnr_T)(curline ? p - buf + len + : p - buf) > curwin->w_cursor.col)) { if (has_syntax) { - col = p - line; + col = p - buf; (void)syn_get_id(lnum, (colnr_T)col, FALSE, &can_spell); - - /* have to get the line again, a multi-line - * regexp may make it invalid */ - line = ml_get(lnum); - p = line + col; } else can_spell = TRUE; @@ -1239,7 +1284,7 @@ spell_move_to(dir, allwords, curline) if (can_spell) { found_pos.lnum = lnum; - found_pos.col = p - line; + found_pos.col = p - buf; #ifdef FEAT_VIRTUALEDIT found_pos.coladd = 0; #endif @@ -1247,22 +1292,20 @@ spell_move_to(dir, allwords, curline) { /* No need to search further. */ curwin->w_cursor = found_pos; + vim_free(buf); return OK; } } } } - attr = 0; } /* advance to character after the word */ p += len; - if (*p == NUL) - break; } if (curline) - return FAIL; /* only check cursor line */ + break; /* only check cursor line */ /* Advance to next line. */ if (dir == BACKWARD) @@ -1271,23 +1314,60 @@ spell_move_to(dir, allwords, curline) { /* Use the last match in the line. */ curwin->w_cursor = found_pos; + vim_free(buf); return OK; } if (lnum == 1) - return FAIL; + break; --lnum; } else { if (lnum == curbuf->b_ml.ml_line_count) - return FAIL; + break; ++lnum; + + /* Skip the characters at the start of the next line that were + * included in a match crossing line boundaries. */ + if (attr == 0) + skip = p - endp; + else + skip = 0; } line_breakcheck(); } - return FAIL; /* interrupted */ + vim_free(buf); + return FAIL; +} + +/* + * For spell checking: concatenate the start of the following line "line" into + * "buf", blanking-out special characters. Copy less then "maxlen" bytes. + */ + void +spell_cat_line(buf, line, maxlen) + char_u *buf; + char_u *line; + int maxlen; +{ + char_u *p; + int n; + + p = skipwhite(line); + while (vim_strchr((char_u *)"*#/\"\t", *p) != NULL) + p = skipwhite(p + 1); + + if (*p != NUL) + { + *buf = ' '; + vim_strncpy(buf + 1, line, maxlen - 1); + n = p - line; + if (n >= maxlen) + n = maxlen - 1; + vim_memset(buf + 1, ' ', n); + } } /* @@ -1874,7 +1954,7 @@ endOK: * Read one row of siblings from the spell file and store it in the byte array * "byts" and index array "idxs". Recursively read the children. * - * NOTE: The code here must match put_tree(). + * NOTE: The code here must match put_node(). * * Returns the index follosing the siblings. * Returns -1 if the file is shorter than expected. @@ -2293,6 +2373,7 @@ typedef struct afffile_S char_u *af_enc; /* "SET", normalized, alloc'ed string or NULL */ int af_rar; /* RAR ID for rare word */ int af_kep; /* KEP ID for keep-case word */ + int af_bad; /* BAD ID for banned word */ int af_pfxpostpone; /* postpone prefixes without chop string */ hashtab_T af_pref; /* hashtable for prefixes, affheader_T */ hashtab_T af_suff; /* hashtable for suffixes, affheader_T */ @@ -2340,14 +2421,20 @@ struct sblock_S typedef struct wordnode_S wordnode_T; struct wordnode_S { - char_u wn_hashkey[6]; /* room for the hash key */ - wordnode_T *wn_next; /* next node with same hash key */ + union /* shared to save space */ + { + char_u hashkey[6]; /* room for the hash key */ + int index; /* index in written nodes (valid after first + round) */ + } wn_u1; + union /* shared to save space */ + { + wordnode_T *next; /* next node with same hash key */ + wordnode_T *wnode; /* parent node that will write this node */ + } wn_u2; wordnode_T *wn_child; /* child (next byte in word) */ wordnode_T *wn_sibling; /* next sibling (alternate byte in word, always sorted) */ - wordnode_T *wn_wnode; /* parent node that will write this node */ - int wn_index; /* index in written nodes (valid after first - round) */ char_u wn_byte; /* Byte for this node. NUL for word end */ char_u wn_flags; /* when wn_byte is NUL: WF_ flags */ short wn_region; /* when wn_byte is NUL: region mask; for @@ -2409,7 +2496,8 @@ static void wordtree_compress __ARGS((wordnode_T *root, spellinfo_T *spin)); static int node_compress __ARGS((wordnode_T *node, hashtab_T *ht, int *tot)); static int node_equal __ARGS((wordnode_T *n1, wordnode_T *n2)); static void write_vim_spell __ARGS((char_u *fname, spellinfo_T *spin)); -static int put_tree __ARGS((FILE *fd, wordnode_T *node, int index, int regionmask, int prefixtree)); +static void clear_node __ARGS((wordnode_T *node)); +static int put_node __ARGS((FILE *fd, wordnode_T *node, int index, int regionmask, int prefixtree)); static void mkspell __ARGS((int fcount, char_u **fnames, int ascii, int overwrite, int added_word)); static void init_spellfile __ARGS((void)); @@ -2575,6 +2663,13 @@ spell_read_aff(fname, spin) if (items[1][1] != NUL) smsg((char_u *)_(e_affname), fname, lnum, items[1]); } + else if (STRCMP(items[0], "BAD") == 0 && itemcnt == 2 + && aff->af_bad == 0) + { + aff->af_bad = items[1][0]; + if (items[1][1] != NUL) + smsg((char_u *)_(e_affname), fname, lnum, items[1]); + } else if (STRCMP(items[0], "PFXPOSTPONE") == 0 && itemcnt == 1) { aff->af_pfxpostpone = TRUE; @@ -2763,6 +2858,24 @@ spell_read_aff(fname, spin) } else if (do_map) { + int c; + + /* Check that every character appears only once. */ + for (p = items[1]; *p != NUL; ) + { +#ifdef FEAT_MBYTE + c = mb_ptr2char_adv(&p); +#else + c = *p++; +#endif + if ((spin->si_map.ga_len > 0 + && vim_strchr(spin->si_map.ga_data, c) + != NULL) + || vim_strchr(p, c) != NULL) + smsg((char_u *)_("Duplicate character in MAP in %s line %d"), + fname, lnum); + } + /* We simply concatenate all the MAP strings, separated by * slashes. */ ga_concat(&spin->si_map, items[1]); @@ -3078,6 +3191,9 @@ spell_read_dic(fname, spin, affile) if (affile->af_rar != NUL && vim_strchr(afflist, affile->af_rar) != NULL) flags |= WF_RARE; + if (affile->af_bad != NUL + && vim_strchr(afflist, affile->af_bad) != NULL) + flags |= WF_BANNED; if (affile->af_pfxpostpone) /* Need to store the list of prefix IDs with the word. */ @@ -3755,18 +3871,18 @@ node_compress(node, ht, tot) ++len; if ((child = np->wn_child) != NULL) { - /* Compress the child. This fills wn_hashkey. */ + /* Compress the child. This fills hashkey. */ compressed += node_compress(child, ht, tot); /* Try to find an identical child. */ - hash = hash_hash(child->wn_hashkey); - hi = hash_lookup(ht, child->wn_hashkey, hash); + hash = hash_hash(child->wn_u1.hashkey); + hi = hash_lookup(ht, child->wn_u1.hashkey, hash); tp = NULL; if (!HASHITEM_EMPTY(hi)) { /* There are children with an identical hash value. Now check * if there is one that is really identical. */ - for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_next) + for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next) if (node_equal(child, tp)) { /* Found one! Now use that child in place of the @@ -3782,14 +3898,14 @@ node_compress(node, ht, tot) * the node, add it to the linked list after the first * item. */ tp = HI2WN(hi); - child->wn_next = tp->wn_next; - tp->wn_next = child; + child->wn_u2.next = tp->wn_u2.next; + tp->wn_u2.next = child; } } else /* No other child has this hash value, add it to the * hashtable. */ - hash_add_item(ht, hi, child->wn_hashkey, hash); + hash_add_item(ht, hi, child->wn_u1.hashkey, hash); } } *tot += len; @@ -3799,7 +3915,7 @@ node_compress(node, ht, tot) * find a lookalike node. This must be done after compressing the sibling * list, otherwise the hash key would become invalid by the compression. */ - node->wn_hashkey[0] = len; + node->wn_u1.hashkey[0] = len; nr = 0; for (np = node; np != NULL; np = np->wn_sibling) { @@ -3814,14 +3930,14 @@ node_compress(node, ht, tot) /* Avoid NUL bytes, it terminates the hash key. */ n = nr & 0xff; - node->wn_hashkey[1] = n == 0 ? 1 : n; + node->wn_u1.hashkey[1] = n == 0 ? 1 : n; n = (nr >> 8) & 0xff; - node->wn_hashkey[2] = n == 0 ? 1 : n; + node->wn_u1.hashkey[2] = n == 0 ? 1 : n; n = (nr >> 16) & 0xff; - node->wn_hashkey[3] = n == 0 ? 1 : n; + node->wn_u1.hashkey[3] = n == 0 ? 1 : n; n = (nr >> 24) & 0xff; - node->wn_hashkey[4] = n == 0 ? 1 : n; - node->wn_hashkey[5] = NUL; + node->wn_u1.hashkey[4] = n == 0 ? 1 : n; + node->wn_u1.hashkey[5] = NUL; return compressed; } @@ -4017,23 +4133,49 @@ write_vim_spell(fname, spin) else tree = spin->si_prefroot; + /* Clear the index and wnode fields in the tree. */ + clear_node(tree); + /* Count the number of nodes. Needed to be able to allocate the - * memory when reading the nodes. Also fills in the index for shared + * memory when reading the nodes. Also fills in index for shared * nodes. */ - nodecount = put_tree(NULL, tree, 0, regionmask, round == 3); + nodecount = put_node(NULL, tree, 0, regionmask, round == 3); /* number of nodes in 4 bytes */ put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */ spin->si_memtot += nodecount + nodecount * sizeof(int); /* Write the nodes. */ - (void)put_tree(fd, tree, 0, regionmask, round == 3); + (void)put_node(fd, tree, 0, regionmask, round == 3); } fclose(fd); } /* + * Clear the index and wnode fields of "node", it siblings and its + * children. This is needed because they are a union with other items to save + * space. + */ + static void +clear_node(node) + wordnode_T *node; +{ + wordnode_T *np; + + if (node != NULL) + for (np = node; np != NULL; np = np->wn_sibling) + { + np->wn_u1.index = 0; + np->wn_u2.wnode = NULL; + + if (np->wn_byte != NUL) + clear_node(np->wn_child); + } +} + + +/* * Dump a word tree at node "node". * * This first writes the list of possible bytes (siblings). Then for each @@ -4046,7 +4188,7 @@ write_vim_spell(fname, spin) * Returns the number of nodes used. */ static int -put_tree(fd, node, index, regionmask, prefixtree) +put_node(fd, node, index, regionmask, prefixtree) FILE *fd; /* NULL when only counting */ wordnode_T *node; int index; @@ -4063,7 +4205,7 @@ put_tree(fd, node, index, regionmask, prefixtree) return 0; /* Store the index where this node is written. */ - node->wn_index = index; + node->wn_u1.index = index; /* Count the number of siblings. */ for (np = node; np != NULL; np = np->wn_sibling) @@ -4116,19 +4258,20 @@ put_tree(fd, node, index, regionmask, prefixtree) } else { - if (np->wn_child->wn_index != 0 && np->wn_child->wn_wnode != node) + if (np->wn_child->wn_u1.index != 0 + && np->wn_child->wn_u2.wnode != node) { /* The child is written elsewhere, write the reference. */ if (fd != NULL) { putc(BY_INDEX, fd); /* <byte> */ /* <nodeidx> */ - put_bytes(fd, (long_u)np->wn_child->wn_index, 3); + put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3); } } - else if (np->wn_child->wn_wnode == NULL) + else if (np->wn_child->wn_u2.wnode == NULL) /* We will write the child below and give it an index. */ - np->wn_child->wn_wnode = node; + np->wn_child->wn_u2.wnode = node; if (fd != NULL) if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */ @@ -4145,8 +4288,8 @@ put_tree(fd, node, index, regionmask, prefixtree) /* Recursively dump the children of each sibling. */ for (np = node; np != NULL; np = np->wn_sibling) - if (np->wn_byte != 0 && np->wn_child->wn_wnode == node) - newindex = put_tree(fd, np->wn_child, newindex, regionmask, + if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node) + newindex = put_node(fd, np->wn_child, newindex, regionmask, prefixtree); return newindex; @@ -4900,10 +5043,30 @@ spell_suggest() suggest_T *stp; /* Find the start of the badly spelled word. */ - if (spell_move_to(FORWARD, TRUE, TRUE) == FAIL) + if (spell_move_to(FORWARD, TRUE, TRUE) == FAIL + || curwin->w_cursor.col > prev_cursor.col) { - beep_flush(); - return; + if (!curwin->w_p_spell || *curbuf->b_p_spl == NUL) + return; + + /* No bad word or it starts after the cursor: use the word under the + * cursor. */ + curwin->w_cursor = prev_cursor; + line = ml_get_curline(); + p = line + curwin->w_cursor.col; + /* Backup to before start of word. */ + while (p > line && SPELL_ISWORDP(p)) + mb_ptr_back(line, p); + /* Forward to start of word. */ + while (!SPELL_ISWORDP(p)) + mb_ptr_adv(p); + + if (!SPELL_ISWORDP(p)) /* No word found. */ + { + beep_flush(); + return; + } + curwin->w_cursor.col = p - line; } /* Get the word and its length. */ @@ -4923,6 +5086,7 @@ spell_suggest() msg_puts(IObuff); msg_clr_eos(); msg_putchar('\n'); + msg_scroll = TRUE; for (i = 0; i < sug.su_ga.ga_len; ++i) { @@ -4935,22 +5099,30 @@ spell_suggest() vim_strncpy(wcopy + STRLEN(wcopy), sug.su_badptr + stp->st_orglen, sug.su_badlen - stp->st_orglen); + vim_snprintf((char *)IObuff, IOSIZE, _("%2d \"%s\""), i + 1, wcopy); + msg_puts(IObuff); + + /* The word may replace more than "su_badlen". */ + if (sug.su_badlen < stp->st_orglen) + { + vim_snprintf((char *)IObuff, IOSIZE, _(" < \"%.*s\""), + stp->st_orglen, sug.su_badptr); + msg_puts(IObuff); + } + if (p_verbose > 0) { + /* Add the score. */ if (sps_flags & SPS_DOUBLE) - vim_snprintf((char *)IObuff, IOSIZE, - _("%2d \"%s\" (%s%d - %d)"), - i + 1, wcopy, + vim_snprintf((char *)IObuff, IOSIZE, _(" (%s%d - %d)"), stp->st_salscore ? "s " : "", stp->st_score, stp->st_altscore); else - vim_snprintf((char *)IObuff, IOSIZE, _("%2d \"%s\" (%d)"), - i + 1, wcopy, stp->st_score); + vim_snprintf((char *)IObuff, IOSIZE, _(" (%d)"), + stp->st_score); + msg_advance(30); + msg_puts(IObuff); } - else - vim_snprintf((char *)IObuff, IOSIZE, _("%2d \"%s\""), - i + 1, wcopy); - msg_puts(IObuff); lines_left = 3; /* avoid more prompt */ msg_putchar('\n'); } @@ -5058,26 +5230,33 @@ spell_find_suggest(badptr, su, maxcount) vim_strncpy(su->su_badword, su->su_badptr, su->su_badlen); (void)spell_casefold(su->su_badptr, su->su_badlen, su->su_fbadword, MAXWLEN); + /* get caps flags for bad word */ + su->su_badflags = captype(su->su_badptr, su->su_badptr + su->su_badlen); /* Ban the bad word itself. It may appear in another region. */ add_banned(su, su->su_badword); /* - * 1. Try inserting/deleting/swapping/changing a letter, use REP entries - * from the .aff file and inserting a space (split the word). + * 1. Try special cases, such as repeating a word: "the the" -> "the". * * Set a maximum score to limit the combination of operations that is * tried. */ su->su_maxscore = SCORE_MAXINIT; - spell_try_change(su); + suggest_try_special(su); + + /* + * 2. Try inserting/deleting/swapping/changing a letter, use REP entries + * from the .aff file and inserting a space (split the word). + */ + suggest_try_change(su); /* For the resulting top-scorers compute the sound-a-like score. */ if (sps_flags & SPS_DOUBLE) score_comp_sal(su); /* - * 2. Try finding sound-a-like words. + * 3. Try finding sound-a-like words. * * Only do this when we don't have a lot of suggestions yet, because it's * very slow and often doesn't find new suggestions. @@ -5088,7 +5267,7 @@ spell_find_suggest(badptr, su, maxcount) { /* Allow a higher score now. */ su->su_maxscore = SCORE_MAXMAX; - spell_try_soundalike(su); + suggest_try_soundalike(su); } /* When CTRL-C was hit while searching do show the results. */ @@ -5217,6 +5396,36 @@ allcap_copy(word, wcopy) } /* + * Try finding suggestions by recognizing specific situations. + */ + static void +suggest_try_special(su) + suginfo_T *su; +{ + char_u *p; + int len; + int c; + char_u word[MAXWLEN]; + + /* + * Recognize a word that is repeated: "the the". + */ + p = skiptowhite(su->su_fbadword); + len = p - su->su_fbadword; + p = skipwhite(p); + if (STRLEN(p) == len && STRNCMP(su->su_fbadword, p, len) == 0) + { + /* Include badflags: if the badword is onecap or allcap + * use that for the goodword too: "The the" -> "The". */ + c = su->su_fbadword[len]; + su->su_fbadword[len] = NUL; + make_case_word(su->su_fbadword, word, su->su_badflags); + su->su_fbadword[len] = c; + add_suggestion(su, &su->su_ga, word, su->su_badlen, SCORE_DEL, TRUE); + } +} + +/* * Try finding suggestions by adding/removing/swapping letters. * * This uses a state machine. At each node in the tree we try various @@ -5226,7 +5435,7 @@ allcap_copy(word, wcopy) * limited by su->su_maxscore, checked in try_deeper(). */ static void -spell_try_change(su) +suggest_try_change(su) suginfo_T *su; { char_u fword[MAXWLEN]; /* copy of the bad word, case-folded */ @@ -5245,21 +5454,21 @@ spell_try_change(su) int c, c2, c3; int n = 0; int flags; - int badflags; garray_T *gap; idx_T arridx; int len; char_u *p; fromto_T *ftp; int fl = 0, tl; - - /* get caps flags for bad word */ - badflags = captype(su->su_badptr, su->su_badptr + su->su_badlen); + int repextra = 0; /* extra bytes in fword[] from REP item */ /* We make a copy of the case-folded bad word, so that we can modify it - * to find matches (esp. REP items). */ + * to find matches (esp. REP items). Append some more text, changing + * chars after the bad word may help. */ STRCPY(fword, su->su_fbadword); - + n = STRLEN(fword); + p = su->su_badptr + su->su_badlen; + (void)spell_casefold(p, STRLEN(p), fword + n, MAXWLEN - n); for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0); lp->lp_slang != NULL; ++lp) @@ -5306,7 +5515,7 @@ spell_try_change(su) len = byts[arridx]; /* bytes in this node */ arridx += sp->ts_curi; /* index of current byte */ - if (sp->ts_curi > len || (c = byts[arridx]) != 0) + if (sp->ts_curi > len || byts[arridx] != 0) { /* Past bytes in node and/or past NUL bytes. */ sp->ts_state = STATE_ENDNUL; @@ -5330,10 +5539,22 @@ spell_try_change(su) find_keepcap_word(lp->lp_slang, tword + splitoff, preword + prewordlen); else + { /* Include badflags: if the badword is onecap or allcap - * use that for the goodword too. */ + * use that for the goodword too. But if the badword is + * allcap and it's only one char long use onecap. */ + c = su->su_badflags; + if ((c & WF_ALLCAP) +#ifdef FEAT_MBYTE + && su->su_badlen == mb_ptr2len_check(su->su_badptr) +#else + && su->su_badlen == 1 +#endif + ) + c = WF_ONECAP; make_case_word(tword + splitoff, - preword + prewordlen, flags | badflags); + preword + prewordlen, flags | c); + } /* Don't use a banned word. It may appear again as a good * word, thus remember it. */ @@ -5352,14 +5573,17 @@ spell_try_change(su) if (flags & WF_RARE) newscore += SCORE_RARE; - if (!spell_valid_case(badflags, + if (!spell_valid_case(su->su_badflags, captype(preword + prewordlen, NULL))) newscore += SCORE_ICASE; - if (fword[sp->ts_fidx] == 0) + if ((fword[sp->ts_fidx] == NUL + || !SPELL_ISWORDP(fword + sp->ts_fidx)) + && sp->ts_fidx >= sp->ts_fidxtry) { /* The badword also ends: add suggestions, */ add_suggestion(su, &su->su_ga, preword, + sp->ts_fidx - repextra, sp->ts_score + newscore, FALSE); } else if (sp->ts_fidx >= sp->ts_fidxtry @@ -5376,7 +5600,7 @@ spell_try_change(su) { /* Save things to be restored at STATE_SPLITUNDO. */ sp->ts_save_prewordlen = prewordlen; - sp->ts_save_badflags = badflags; + sp->ts_save_badflags = su->su_badflags; sp->ts_save_splitoff = splitoff; /* Append a space to preword. */ @@ -5400,7 +5624,8 @@ spell_try_change(su) else #endif p = su->su_badptr + sp->ts_fidx; - badflags = captype(p, su->su_badptr + su->su_badlen); + su->su_badflags = captype(p, su->su_badptr + + su->su_badlen); sp->ts_state = STATE_SPLITUNDO; ++depth; @@ -5411,8 +5636,8 @@ spell_try_change(su) break; case STATE_SPLITUNDO: - /* Fixup the changes done for word split. */ - badflags = sp->ts_save_badflags; + /* Undo the changes done for word split. */ + su->su_badflags = sp->ts_save_badflags; splitoff = sp->ts_save_splitoff; prewordlen = sp->ts_save_prewordlen; @@ -5422,7 +5647,7 @@ spell_try_change(su) case STATE_ENDNUL: /* Past the NUL bytes in the node. */ - if (fword[sp->ts_fidx] == 0) + if (fword[sp->ts_fidx] == NUL) { /* The badword ends, can't use the bytes in this node. */ sp->ts_state = STATE_DEL; @@ -5756,9 +5981,7 @@ spell_try_change(su) *p = p[2]; p[2] = c; } - /*FALLTHROUGH*/ - case STATE_ROT3L: /* Rotate three characters left: "123" -> "231". We change * "fword" here, it's changed back afterwards. */ if (try_deeper(su, stack, depth, SCORE_SWAP3)) @@ -5792,7 +6015,7 @@ spell_try_change(su) break; case STATE_UNROT3L: - /* Undo STATE_ROT3L: "231" -> "123" */ + /* Undo ROT3L: "231" -> "123" */ p = fword + sp->ts_fidx; #ifdef FEAT_MBYTE if (has_mbyte) @@ -5812,9 +6035,7 @@ spell_try_change(su) p[1] = *p; *p = c; } - /*FALLTHROUGH*/ - case STATE_ROT3R: /* Rotate three bytes right: "123" -> "312". We change * "fword" here, it's changed back afterwards. */ if (try_deeper(su, stack, depth, SCORE_SWAP3)) @@ -5848,7 +6069,7 @@ spell_try_change(su) break; case STATE_UNROT3R: - /* Undo STATE_ROT3R: "312" -> "123" */ + /* Undo ROT3R: "312" -> "123" */ p = fword + sp->ts_fidx; #ifdef FEAT_MBYTE if (has_mbyte) @@ -5921,7 +6142,10 @@ spell_try_change(su) fl = STRLEN(ftp->ft_from); tl = STRLEN(ftp->ft_to); if (fl != tl) + { mch_memmove(p + tl, p + fl, STRLEN(p + fl) + 1); + repextra += tl - fl; + } mch_memmove(p, ftp->ft_to, tl); stack[depth].ts_fidxtry = sp->ts_fidx + tl; #ifdef FEAT_MBYTE @@ -5945,7 +6169,10 @@ spell_try_change(su) tl = STRLEN(ftp->ft_to); p = fword + sp->ts_fidx; if (fl != tl) + { mch_memmove(p + fl, p + tl, STRLEN(p + tl) + 1); + repextra -= tl - fl; + } mch_memmove(p, ftp->ft_from, fl); sp->ts_state = STATE_REP; break; @@ -6217,6 +6444,7 @@ score_combine(su) suggest_T *stp; char_u *p; char_u badsound[MAXWLEN]; + char_u badsound2[MAXWLEN]; char_u goodsound[MAXWLEN]; char_u fword[MAXWLEN]; int round; @@ -6234,12 +6462,24 @@ score_combine(su) { stp = &SUG(su->su_ga, i); + if (stp->st_orglen <= su->su_badlen) + p = badsound; + else + { + /* soundfold the bad word with a different length */ + (void)spell_casefold(su->su_badptr, stp->st_orglen, + fword, MAXWLEN); + spell_soundfold(lp->lp_slang, fword, badsound2); + p = badsound2; + } + /* Case-fold the word, sound-fold the word and compute the * score for the difference. */ (void)spell_casefold(stp->st_word, STRLEN(stp->st_word), - fword, MAXWLEN); + fword, MAXWLEN); spell_soundfold(lp->lp_slang, fword, goodsound); - stp->st_altscore = soundalike_score(goodsound, badsound); + + stp->st_altscore = soundalike_score(goodsound, p); if (stp->st_altscore == SCORE_MAXMAX) stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4; else @@ -6312,7 +6552,7 @@ score_combine(su) * Find suggestions by comparing the word in a sound-a-like form. */ static void -spell_try_soundalike(su) +suggest_try_soundalike(su) suginfo_T *su; { char_u salword[MAXWLEN]; @@ -6414,6 +6654,7 @@ spell_try_soundalike(su) if (sps_flags & SPS_DOUBLE) add_suggestion(su, &su->su_sga, p, + su->su_badlen, sound_score, FALSE); else { @@ -6425,10 +6666,12 @@ spell_try_soundalike(su) * sounding the same as the bad * word */ add_suggestion(su, &su->su_ga, p, + su->su_badlen, RESCORE(score, sound_score), TRUE); else add_suggestion(su, &su->su_ga, p, + su->su_badlen, score + sound_score, FALSE); } } @@ -6616,20 +6859,41 @@ similar_chars(slang, c1, c2) * with spell_edit_score(). */ static void -add_suggestion(su, gap, goodword, score, had_bonus) +add_suggestion(su, gap, goodword, badlen, score, had_bonus) suginfo_T *su; garray_T *gap; char_u *goodword; + int badlen; /* length of bad word used */ int score; int had_bonus; /* value for st_had_bonus */ { suggest_T *stp; int i; + char_u *p = NULL; + int c = 0; /* Check that the word wasn't banned. */ if (was_banned(su, goodword)) return; + /* If past "su_badlen" and the rest is identical stop at "su_badlen". + * Remove the common part from "goodword". */ + i = badlen - su->su_badlen; + if (i > 0) + { + /* This assumes there was no case folding or it didn't change the + * length... */ + p = goodword + STRLEN(goodword) - i; + if (p > goodword && STRNICMP(su->su_badptr + su->su_badlen, p, i) == 0) + { + badlen = su->su_badlen; + c = *p; + *p = NUL; + } + else + p = NULL; + } + if (score <= su->su_maxscore) { /* Check if the word is already there. */ @@ -6656,7 +6920,7 @@ add_suggestion(su, gap, goodword, score, had_bonus) stp->st_score = score; stp->st_altscore = 0; stp->st_had_bonus = had_bonus; - stp->st_orglen = su->su_badlen; + stp->st_orglen = badlen; ++gap->ga_len; /* If we have too many suggestions now, sort the list and keep @@ -6667,6 +6931,9 @@ add_suggestion(su, gap, goodword, score, had_bonus) } } } + + if (p != NULL) + *p = c; /* restore "goodword" */ } /* @@ -6736,6 +7003,9 @@ rescore_suggestions(su) langp_T *lp; suggest_T *stp; char_u sal_badword[MAXWLEN]; + char_u tword[MAXWLEN]; + char_u salword[MAXWLEN]; + char_u *p; int score; int i; @@ -6752,8 +7022,17 @@ rescore_suggestions(su) stp = &SUG(su->su_ga, i); if (!stp->st_had_bonus) { - score = spell_sound_score(lp->lp_slang, stp->st_word, - sal_badword); + if (stp->st_orglen <= su->su_badlen) + p = sal_badword; + else + { + /* soundfold the bad word with a different length */ + (void)spell_casefold(su->su_badptr, stp->st_orglen, + tword, MAXWLEN); + spell_soundfold(lp->lp_slang, tword, salword); + p = salword; + } + score = spell_sound_score(lp->lp_slang, stp->st_word, p); stp->st_score = RESCORE(stp->st_score, score); } } |