diff options
Diffstat (limited to 'src/regexp.c')
-rw-r--r-- | src/regexp.c | 445 |
1 files changed, 346 insertions, 99 deletions
diff --git a/src/regexp.c b/src/regexp.c index e456b5d5f..a1f71ab97 100644 --- a/src/regexp.c +++ b/src/regexp.c @@ -38,9 +38,20 @@ * Named character class support added by Walter Briscoe (1998 Jul 01) */ +/* Uncomment the first if you do not want to see debugging logs or files + * related to regular expressions, even when compiling with -DDEBUG. + * Uncomment the second to get the regexp debugging. */ +/* #undef DEBUG */ +/* #define DEBUG */ + #include "vim.h" -#undef DEBUG +#ifdef DEBUG +/* show/save debugging data when BT engine is used */ +# define BT_REGEXP_DUMP +/* save the debugging data to a file instead of displaying it */ +# define BT_REGEXP_LOG +#endif /* * The "internal use only" fields in regexp.h are present to pass info from @@ -326,9 +337,10 @@ toggle_Magic(x) /* Used for an error (down from) vim_regcomp(): give the error message, set * rc_did_emsg and return NULL */ #define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL) -#define EMSG_M_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL) #define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL) -#define EMSG_ONE_RET_NULL EMSG_M_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL) +#define EMSG2_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL) +#define EMSG2_RET_FAIL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL) +#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL) #define MAX_LIMIT (32767L << 16L) @@ -336,11 +348,18 @@ static int re_multi_type __ARGS((int)); static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n)); static char_u *cstrchr __ARGS((char_u *, int)); +#ifdef BT_REGEXP_DUMP +static void regdump __ARGS((char_u *, bt_regprog_T *)); +#endif #ifdef DEBUG -static void regdump __ARGS((char_u *, regprog_T *)); static char_u *regprop __ARGS((char_u *)); #endif +static char_u e_missingbracket[] = N_("E769: Missing ] after %s["); +static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%("); +static char_u e_unmatchedp[] = N_("E54: Unmatched %s("); +static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)"); + #define NOT_MULTI 0 #define MULTI_ONE 1 #define MULTI_MULT 2 @@ -630,7 +649,13 @@ static char_u META_flags[] = { }; #endif -static int curchr; +static int curchr; /* currently parsed character */ +/* Previous character. Note: prevchr is sometimes -1 when we are not at the + * start, eg in /[ ^I]^ the pattern was never found even if it existed, + * because ^ was taken to be magic -- webb */ +static int prevchr; +static int prevprevchr; /* previous-previous character */ +static int nextchr; /* used for ungetchr() */ /* arguments for reg() */ #define REG_NOPAREN 0 /* toplevel reg() */ @@ -680,6 +705,9 @@ static int read_limits __ARGS((long *, long *)); static void regtail __ARGS((char_u *, char_u *)); static void regoptail __ARGS((char_u *, char_u *)); +static regengine_T bt_regengine; +static regengine_T nfa_regengine; + /* * Return TRUE if compiled regular expression "prog" can match a line break. */ @@ -762,6 +790,7 @@ char *EQUIVAL_CLASS_C[16] = { /* * Produce the bytes for equivalence class "c". * Currently only handles latin1, latin9 and utf-8. + * NOTE: When changing this function, also change nfa_emit_equi_class() */ static void reg_equi_class(c) @@ -1239,8 +1268,11 @@ skip_regexp(startp, dirc, magic, newp) return p; } +static regprog_T *bt_regcomp __ARGS((char_u *expr, int re_flags)); + /* - * vim_regcomp() - compile a regular expression into internal code + * bt_regcomp() - compile a regular expression into internal code for the + * traditional back track matcher. * Returns the program in allocated space. Returns NULL for an error. * * We can't allocate space until we know how big the compiled form will be, @@ -1259,12 +1291,12 @@ skip_regexp(startp, dirc, magic, newp) * of the structure of the compiled regexp. * "re_flags": RE_MAGIC and/or RE_STRING. */ - regprog_T * -vim_regcomp(expr, re_flags) + static regprog_T * +bt_regcomp(expr, re_flags) char_u *expr; int re_flags; { - regprog_T *r; + bt_regprog_T *r; char_u *scan; char_u *longest; int len; @@ -1291,7 +1323,7 @@ vim_regcomp(expr, re_flags) #endif /* Allocate space. */ - r = (regprog_T *)lalloc(sizeof(regprog_T) + regsize, TRUE); + r = (bt_regprog_T *)lalloc(sizeof(bt_regprog_T) + regsize, TRUE); if (r == NULL) return NULL; @@ -1386,10 +1418,11 @@ vim_regcomp(expr, re_flags) r->regmlen = len; } } -#ifdef DEBUG +#ifdef BT_REGEXP_DUMP regdump(expr, r); #endif - return r; + r->engine = &bt_regengine; + return (regprog_T *)r; } /* @@ -1436,7 +1469,7 @@ vim_regcomp_had_eol() #endif /* - * reg - regular expression, i.e. main body or parenthesized thing + * Parse regular expression, i.e. main body or parenthesized thing. * * Caller must absorb opening parenthesis. * @@ -1473,7 +1506,7 @@ reg(paren, flagp) { /* Make a MOPEN node. */ if (regnpar >= NSUBEXP) - EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL); + EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL); parno = regnpar; ++regnpar; ret = regnode(MOPEN + parno); @@ -1534,14 +1567,14 @@ reg(paren, flagp) else #endif if (paren == REG_NPAREN) - EMSG_M_RET_NULL(_("E53: Unmatched %s%%("), reg_magic == MAGIC_ALL); + EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL); else - EMSG_M_RET_NULL(_("E54: Unmatched %s("), reg_magic == MAGIC_ALL); + EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL); } else if (paren == REG_NOPAREN && peekchr() != NUL) { if (curchr == Magic(')')) - EMSG_M_RET_NULL(_("E55: Unmatched %s)"), reg_magic == MAGIC_ALL); + EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL); else EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */ /* NOTREACHED */ @@ -1556,7 +1589,7 @@ reg(paren, flagp) } /* - * Handle one alternative of an | operator. + * Parse one alternative of an | operator. * Implements the & operator. */ static char_u * @@ -1599,7 +1632,7 @@ regbranch(flagp) } /* - * Handle one alternative of an | or & operator. + * Parse one alternative of an | or & operator. * Implements the concatenation operator. */ static char_u * @@ -1679,7 +1712,7 @@ regconcat(flagp) } /* - * regpiece - something followed by possible [*+=] + * Parse something followed by possible [*+=]. * * Note that the branching code sequences used for = and the general cases * of * and + are somewhat optimized: they use the same NOTHING node as @@ -1759,7 +1792,7 @@ regpiece(flagp) } } if (lop == END) - EMSG_M_RET_NULL(_("E59: invalid character after %s@"), + EMSG2_RET_NULL(_("E59: invalid character after %s@"), reg_magic == MAGIC_ALL); /* Look behind must match with behind_pos. */ if (lop == BEHIND || lop == NOBEHIND) @@ -1793,7 +1826,7 @@ regpiece(flagp) else { if (num_complex_braces >= 10) - EMSG_M_RET_NULL(_("E60: Too many complex %s{...}s"), + EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"), reg_magic == MAGIC_ALL); reginsert(BRACE_COMPLEX + num_complex_braces, ret); regoptail(ret, regnode(BACK)); @@ -1820,8 +1853,20 @@ regpiece(flagp) return ret; } +/* When making changes to classchars also change nfa_classcodes. */ +static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU"; +static int classcodes[] = { + ANY, IDENT, SIDENT, KWORD, SKWORD, + FNAME, SFNAME, PRINT, SPRINT, + WHITE, NWHITE, DIGIT, NDIGIT, + HEX, NHEX, OCTAL, NOCTAL, + WORD, NWORD, HEAD, NHEAD, + ALPHA, NALPHA, LOWER, NLOWER, + UPPER, NUPPER +}; + /* - * regatom - the lowest level + * Parse the lowest level. * * Optimization: gobbles an entire sequence of ordinary characters so that * it can turn them into a single node, which is smaller to store and @@ -1836,15 +1881,6 @@ regatom(flagp) int cpo_lit; /* 'cpoptions' contains 'l' flag */ int cpo_bsl; /* 'cpoptions' contains '\' flag */ int c; - static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU"; - static int classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD, - FNAME, SFNAME, PRINT, SPRINT, - WHITE, NWHITE, DIGIT, NDIGIT, - HEX, NHEX, OCTAL, NOCTAL, - WORD, NWORD, HEAD, NHEAD, - ALPHA, NALPHA, LOWER, NLOWER, - UPPER, NUPPER - }; char_u *p; int extra = 0; @@ -2140,7 +2176,7 @@ regatom(flagp) while ((c = getchr()) != ']') { if (c == NUL) - EMSG_M_RET_NULL(_("E69: Missing ] after %s%%["), + EMSG2_RET_NULL(_("E69: Missing ] after %s%%["), reg_magic == MAGIC_ALL); br = regnode(BRANCH); if (ret == NULL) @@ -2156,7 +2192,7 @@ regatom(flagp) return NULL; } if (ret == NULL) - EMSG_M_RET_NULL(_("E70: Empty %s%%[]"), + EMSG2_RET_NULL(_("E70: Empty %s%%[]"), reg_magic == MAGIC_ALL); lastbranch = regnode(BRANCH); br = regnode(NOTHING); @@ -2200,7 +2236,7 @@ regatom(flagp) } if (i < 0) - EMSG_M_RET_NULL( + EMSG2_RET_NULL( _("E678: Invalid character after %s%%[dxouU]"), reg_magic == MAGIC_ALL); #ifdef FEAT_MBYTE @@ -2272,7 +2308,7 @@ regatom(flagp) } } - EMSG_M_RET_NULL(_("E71: Invalid character after %s%%"), + EMSG2_RET_NULL(_("E71: Invalid character after %s%%"), reg_magic == MAGIC_ALL); } } @@ -2567,8 +2603,7 @@ collection: break; } else if (reg_strict) - EMSG_M_RET_NULL(_("E769: Missing ] after %s["), - reg_magic > MAGIC_OFF); + EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF); } /* FALLTHROUGH */ @@ -2659,7 +2694,7 @@ use_multibytecode(c) #endif /* - * emit a node + * Emit a node. * Return pointer to generated code. */ static char_u * @@ -2711,7 +2746,7 @@ regmbc(c) #endif /* - * reginsert - insert an operator in front of already-emitted operand + * Insert an operator in front of already-emitted operand * * Means relocating the operand. */ @@ -2742,7 +2777,7 @@ reginsert(op, opnd) } /* - * reginsert_limits - insert an operator in front of already-emitted operand. + * Insert an operator in front of already-emitted operand. * The operator has the given limit values as operands. Also set next pointer. * * Means relocating the operand. @@ -2794,7 +2829,7 @@ re_put_long(p, val) } /* - * regtail - set the next-pointer at the end of a node chain + * Set the next-pointer at the end of a node chain. */ static void regtail(p, val) @@ -2835,7 +2870,7 @@ regtail(p, val) } /* - * regoptail - regtail on item after a BRANCH; nop if none + * Like regtail, on item after a BRANCH; nop if none. */ static void regoptail(p, val) @@ -2851,22 +2886,15 @@ regoptail(p, val) } /* - * getchr() - get the next character from the pattern. We know about - * magic and such, so therefore we need a lexical analyzer. + * Functions for getting characters from the regexp input. */ -/* static int curchr; */ -static int prevprevchr; -static int prevchr; -static int nextchr; /* used for ungetchr() */ -/* - * Note: prevchr is sometimes -1 when we are not at the start, - * eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was - * taken to be magic -- webb - */ static int at_start; /* True when on the first character */ static int prev_at_start; /* True when on the second character */ +/* + * Start parsing at "str". + */ static void initchr(str) char_u *str; @@ -2878,6 +2906,9 @@ initchr(str) prev_at_start = FALSE; } +/* + * Get the next character without advancing. + */ static int peekchr() { @@ -3086,6 +3117,10 @@ skipchr_keepstart() prevprevchr = prpr; } +/* + * Get the next character from the pattern. We know about magic and such, so + * therefore we need a lexical analyzer. + */ static int getchr() { @@ -3340,8 +3375,8 @@ typedef struct regbehind_S } regbehind_T; static char_u *reg_getline __ARGS((linenr_T lnum)); -static long vim_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm)); -static long regtry __ARGS((regprog_T *prog, colnr_T col)); +static long bt_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm)); +static long regtry __ARGS((bt_regprog_T *prog, colnr_T col)); static void cleanup_subexpr __ARGS((void)); #ifdef FEAT_SYN_HL static void cleanup_zsubexpr __ARGS((void)); @@ -3398,7 +3433,7 @@ static colnr_T ireg_maxcol; /* * Sometimes need to save a copy of a line. Since alloc()/free() is very * slow, we keep one allocated piece of memory and only re-allocate it when - * it's too small. It's freed in vim_regexec_both() when finished. + * it's too small. It's freed in bt_regexec_both() when finished. */ static char_u *reg_tofree = NULL; static unsigned reg_tofreelen; @@ -3556,6 +3591,8 @@ static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */ /* TRUE if using multi-line regexp. */ #define REG_MULTI (reg_match == NULL) +static int bt_regexec __ARGS((regmatch_T *rmp, char_u *line, colnr_T col)); + /* * Match a regexp against a string. * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). @@ -3563,8 +3600,8 @@ static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */ * * Return TRUE if there is a match, FALSE if not. */ - int -vim_regexec(rmp, line, col) + static int +bt_regexec(rmp, line, col) regmatch_T *rmp; char_u *line; /* string to match against */ colnr_T col; /* column to start looking for match */ @@ -3580,16 +3617,19 @@ vim_regexec(rmp, line, col) ireg_icombine = FALSE; #endif ireg_maxcol = 0; - return (vim_regexec_both(line, col, NULL) != 0); + return (bt_regexec_both(line, col, NULL) != 0); } #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \ || defined(FIND_REPLACE_DIALOG) || defined(PROTO) + +static int bt_regexec_nl __ARGS((regmatch_T *rmp, char_u *line, colnr_T col)); + /* * Like vim_regexec(), but consider a "\n" in "line" to be a line break. */ - int -vim_regexec_nl(rmp, line, col) + static int +bt_regexec_nl(rmp, line, col) regmatch_T *rmp; char_u *line; /* string to match against */ colnr_T col; /* column to start looking for match */ @@ -3605,10 +3645,12 @@ vim_regexec_nl(rmp, line, col) ireg_icombine = FALSE; #endif ireg_maxcol = 0; - return (vim_regexec_both(line, col, NULL) != 0); + return (bt_regexec_both(line, col, NULL) != 0); } #endif +static long bt_regexec_multi __ARGS((regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm)); + /* * Match a regexp against multiple lines. * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). @@ -3617,8 +3659,8 @@ vim_regexec_nl(rmp, line, col) * Return zero if there is no match. Return number of lines contained in the * match otherwise. */ - long -vim_regexec_multi(rmp, win, buf, lnum, col, tm) + static long +bt_regexec_multi(rmp, win, buf, lnum, col, tm) regmmatch_T *rmp; win_T *win; /* window in which to search or NULL */ buf_T *buf; /* buffer in which to search */ @@ -3641,7 +3683,7 @@ vim_regexec_multi(rmp, win, buf, lnum, col, tm) #endif ireg_maxcol = rmp->rmm_maxcol; - r = vim_regexec_both(NULL, col, tm); + r = bt_regexec_both(NULL, col, tm); return r; } @@ -3651,12 +3693,12 @@ vim_regexec_multi(rmp, win, buf, lnum, col, tm) * lines ("line" is NULL, use reg_getline()). */ static long -vim_regexec_both(line, col, tm) +bt_regexec_both(line, col, tm) char_u *line; colnr_T col; /* column to start looking for match */ proftime_T *tm UNUSED; /* timeout limit or NULL */ { - regprog_T *prog; + bt_regprog_T *prog; char_u *s; long retval = 0L; @@ -3682,14 +3724,14 @@ vim_regexec_both(line, col, tm) if (REG_MULTI) { - prog = reg_mmatch->regprog; + prog = (bt_regprog_T *)reg_mmatch->regprog; line = reg_getline((linenr_T)0); reg_startpos = reg_mmatch->startpos; reg_endpos = reg_mmatch->endpos; } else { - prog = reg_match->regprog; + prog = (bt_regprog_T *)reg_match->regprog; reg_startp = reg_match->startp; reg_endp = reg_match->endp; } @@ -3931,7 +3973,7 @@ unref_extmatch(em) */ static long regtry(prog, col) - regprog_T *prog; + bt_regprog_T *prog; colnr_T col; { reginput = regline + col; @@ -4063,7 +4105,7 @@ regmatch(scan) #define RA_NOMATCH 5 /* didn't match */ /* Make "regstack" and "backpos" empty. They are allocated and freed in - * vim_regexec_both() to reduce malloc()/free() calls. */ + * bt_regexec_both() to reduce malloc()/free() calls. */ regstack.ga_len = 0; backpos.ga_len = 0; @@ -4072,14 +4114,14 @@ regmatch(scan) */ for (;;) { - /* Some patterns my cause a long time to match, even though they are not + /* Some patterns may cause a long time to match, even though they are not * illegal. E.g., "\([a-z]\+\)\+Q". Allow breaking them with CTRL-C. */ fast_breakcheck(); #ifdef DEBUG if (scan != NULL && regnarrate) { - mch_errmsg(regprop(scan)); + mch_errmsg((char *)regprop(scan)); mch_errmsg("(\n"); } #endif @@ -4100,7 +4142,7 @@ regmatch(scan) #ifdef DEBUG if (regnarrate) { - mch_errmsg(regprop(scan)); + mch_errmsg((char *)regprop(scan)); mch_errmsg("...\n"); # ifdef FEAT_SYN_HL if (re_extmatch_in != NULL) @@ -4112,7 +4154,7 @@ regmatch(scan) { mch_errmsg(" \""); if (re_extmatch_in->matches[i] != NULL) - mch_errmsg(re_extmatch_in->matches[i]); + mch_errmsg((char *)re_extmatch_in->matches[i]); mch_errmsg("\"\n"); } } @@ -6091,9 +6133,14 @@ regnext(p) static int prog_magic_wrong() { - if (UCHARAT(REG_MULTI - ? reg_mmatch->regprog->program - : reg_match->regprog->program) != REGMAGIC) + regprog_T *prog; + + prog = REG_MULTI ? reg_mmatch->regprog : reg_match->regprog; + if (prog->engine == &nfa_regengine) + /* For NFA matcher we don't check the magic */ + return FALSE; + + if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC) { EMSG(_(e_re_corr)); return TRUE; @@ -6318,7 +6365,7 @@ re_num_cmp(val, scan) } -#ifdef DEBUG +#ifdef BT_REGEXP_DUMP /* * regdump - dump a regexp onto stdout in vaguely comprehensible form @@ -6326,14 +6373,22 @@ re_num_cmp(val, scan) static void regdump(pattern, r) char_u *pattern; - regprog_T *r; + bt_regprog_T *r; { char_u *s; int op = EXACTLY; /* Arbitrary non-END op. */ char_u *next; char_u *end = NULL; + FILE *f; - printf("\r\nregcomp(%s):\r\n", pattern); +#ifdef BT_REGEXP_LOG + f = fopen("bt_regexp_log.log", "a"); +#else + f = stdout; +#endif + if (f == NULL) + return; + fprintf(f, "-------------------------------------\n\r\nregcomp(%s):\r\n", pattern); s = r->program + 1; /* @@ -6343,18 +6398,18 @@ regdump(pattern, r) while (op != END || s <= end) { op = OP(s); - printf("%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */ + fprintf(f, "%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */ next = regnext(s); if (next == NULL) /* Next ptr. */ - printf("(0)"); + fprintf(f, "(0)"); else - printf("(%d)", (int)((s - r->program) + (next - s))); + fprintf(f, "(%d)", (int)((s - r->program) + (next - s))); if (end < next) end = next; if (op == BRACE_LIMITS) { /* Two short ints */ - printf(" minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s)); + fprintf(f, " minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s)); s += 8; } s += 3; @@ -6363,25 +6418,33 @@ regdump(pattern, r) || op == EXACTLY) { /* Literal string, where present. */ + fprintf(f, "\nxxxxxxxxx\n"); while (*s != NUL) - printf("%c", *s++); + fprintf(f, "%c", *s++); + fprintf(f, "\nxxxxxxxxx\n"); s++; } - printf("\r\n"); + fprintf(f, "\r\n"); } /* Header fields of interest. */ if (r->regstart != NUL) - printf("start `%s' 0x%x; ", r->regstart < 256 + fprintf(f, "start `%s' 0x%x; ", r->regstart < 256 ? (char *)transchar(r->regstart) : "multibyte", r->regstart); if (r->reganch) - printf("anchored; "); + fprintf(f, "anchored; "); if (r->regmust != NULL) - printf("must have \"%s\"", r->regmust); - printf("\r\n"); + fprintf(f, "must have \"%s\"", r->regmust); + fprintf(f, "\r\n"); + +#ifdef BT_REGEXP_LOG + fclose(f); +#endif } +#endif /* BT_REGEXP_DUMP */ +#ifdef DEBUG /* * regprop - printable representation of opcode */ @@ -6389,12 +6452,12 @@ regdump(pattern, r) regprop(op) char_u *op; { - char_u *p; - static char_u buf[50]; + char *p; + static char buf[50]; - (void) strcpy(buf, ":"); + STRCPY(buf, ":"); - switch (OP(op)) + switch ((int) OP(op)) { case BOL: p = "BOL"; @@ -6761,10 +6824,10 @@ regprop(op) break; } if (p != NULL) - (void) strcat(buf, p); - return buf; + STRCAT(buf, p); + return (char_u *)buf; } -#endif +#endif /* DEBUG */ #ifdef FEAT_MBYTE static void mb_decompose __ARGS((int c, int *c1, int *c2, int *c3)); @@ -7667,3 +7730,187 @@ reg_submatch(no) return retval; } #endif + +static regengine_T bt_regengine = +{ + bt_regcomp, + bt_regexec, +#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \ + || defined(FIND_REPLACE_DIALOG) || defined(PROTO) + bt_regexec_nl, +#endif + bt_regexec_multi +#ifdef DEBUG + ,(char_u *)"" +#endif +}; + + +#include "regexp_nfa.c" + +static regengine_T nfa_regengine = +{ + nfa_regcomp, + nfa_regexec, +#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \ + || defined(FIND_REPLACE_DIALOG) || defined(PROTO) + nfa_regexec_nl, +#endif + nfa_regexec_multi +#ifdef DEBUG + ,(char_u *)"" +#endif +}; + +/* Which regexp engine to use? Needed for vim_regcomp(). + * Must match with 'regexpengine'. */ +static int regexp_engine = 0; +#define AUTOMATIC_ENGINE 0 +#define BACKTRACKING_ENGINE 1 +#define NFA_ENGINE 2 +#ifdef DEBUG +static char_u regname[][30] = { + "AUTOMATIC Regexp Engine", + "BACKTACKING Regexp Engine", + "NFA Regexp Engine" + }; +#endif + +/* + * Compile a regular expression into internal code. + * Returns the program in allocated memory. Returns NULL for an error. + */ + regprog_T * +vim_regcomp(expr_arg, re_flags) + char_u *expr_arg; + int re_flags; +{ + regprog_T *prog = NULL; + char_u *expr = expr_arg; + + syntax_error = FALSE; + regexp_engine = p_re; + + /* Check for prefix "\%#=", that sets the regexp engine */ + if (STRNCMP(expr, "\\%#=", 4) == 0) + { + int newengine = expr[4] - '0'; + + if (newengine == AUTOMATIC_ENGINE + || newengine == BACKTRACKING_ENGINE + || newengine == NFA_ENGINE) + { + regexp_engine = expr[4] - '0'; + expr += 5; +#ifdef DEBUG + EMSG3("New regexp mode selected (%d): %s", regexp_engine, + regname[newengine]); +#endif + } + else + { + EMSG(_("E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used ")); + regexp_engine = AUTOMATIC_ENGINE; + } + } +#ifdef DEBUG + bt_regengine.expr = expr; + nfa_regengine.expr = expr; +#endif + + /* + * First try the NFA engine, unless backtracking was requested. + */ + if (regexp_engine != BACKTRACKING_ENGINE) + prog = nfa_regengine.regcomp(expr, re_flags); + else + prog = bt_regengine.regcomp(expr, re_flags); + + if (prog == NULL) /* error compiling regexp with initial engine */ + { +#ifdef DEBUG + if (regexp_engine != BACKTRACKING_ENGINE) /* debugging log for NFA */ + { + FILE *f; + f = fopen("debug.log", "a"); + if (f) + { + if (!syntax_error) + fprintf(f, "NFA engine could not handle \"%s\"\n", expr); + else + fprintf(f, "Syntax error in \"%s\"\n", expr); + fclose(f); + } + else + EMSG("(NFA) Could not open \"debug.log\" to write !!!"); + /* + if (syntax_error) + EMSG("NFA Regexp: Syntax Error !"); + */ + } +#endif + /* + * If NFA engine failed, then revert to the backtracking engine. + * Except when there was a syntax error, which was properly handled by + * NFA engine. + */ + if (regexp_engine == AUTOMATIC_ENGINE) + if (!syntax_error) + prog = bt_regengine.regcomp(expr, re_flags); + + } /* endif prog==NULL */ + + + return prog; +} + +/* + * Match a regexp against a string. + * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). + * Uses curbuf for line count and 'iskeyword'. + * + * Return TRUE if there is a match, FALSE if not. + */ + int +vim_regexec(rmp, line, col) + regmatch_T *rmp; + char_u *line; /* string to match against */ + colnr_T col; /* column to start looking for match */ +{ + return rmp->regprog->engine->regexec(rmp, line, col); +} + +#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \ + || defined(FIND_REPLACE_DIALOG) || defined(PROTO) +/* + * Like vim_regexec(), but consider a "\n" in "line" to be a line break. + */ + int +vim_regexec_nl(rmp, line, col) + regmatch_T *rmp; + char_u *line; + colnr_T col; +{ + return rmp->regprog->engine->regexec_nl(rmp, line, col); +} +#endif + +/* + * Match a regexp against multiple lines. + * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). + * Uses curbuf for line count and 'iskeyword'. + * + * Return zero if there is no match. Return number of lines contained in the + * match otherwise. + */ + long +vim_regexec_multi(rmp, win, buf, lnum, col, tm) + regmmatch_T *rmp; + win_T *win; /* window in which to search or NULL */ + buf_T *buf; /* buffer in which to search */ + linenr_T lnum; /* nr of line to start looking for match */ + colnr_T col; /* column to start looking for match */ + proftime_T *tm; /* timeout limit or NULL */ +{ + return rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm); +} |