summaryrefslogtreecommitdiff
path: root/src/regexp.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/regexp.c')
-rw-r--r--src/regexp.c445
1 files changed, 346 insertions, 99 deletions
diff --git a/src/regexp.c b/src/regexp.c
index e456b5d5f..a1f71ab97 100644
--- a/src/regexp.c
+++ b/src/regexp.c
@@ -38,9 +38,20 @@
* Named character class support added by Walter Briscoe (1998 Jul 01)
*/
+/* Uncomment the first if you do not want to see debugging logs or files
+ * related to regular expressions, even when compiling with -DDEBUG.
+ * Uncomment the second to get the regexp debugging. */
+/* #undef DEBUG */
+/* #define DEBUG */
+
#include "vim.h"
-#undef DEBUG
+#ifdef DEBUG
+/* show/save debugging data when BT engine is used */
+# define BT_REGEXP_DUMP
+/* save the debugging data to a file instead of displaying it */
+# define BT_REGEXP_LOG
+#endif
/*
* The "internal use only" fields in regexp.h are present to pass info from
@@ -326,9 +337,10 @@ toggle_Magic(x)
/* Used for an error (down from) vim_regcomp(): give the error message, set
* rc_did_emsg and return NULL */
#define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL)
-#define EMSG_M_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
#define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL)
-#define EMSG_ONE_RET_NULL EMSG_M_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
+#define EMSG2_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
+#define EMSG2_RET_FAIL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
+#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
#define MAX_LIMIT (32767L << 16L)
@@ -336,11 +348,18 @@ static int re_multi_type __ARGS((int));
static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n));
static char_u *cstrchr __ARGS((char_u *, int));
+#ifdef BT_REGEXP_DUMP
+static void regdump __ARGS((char_u *, bt_regprog_T *));
+#endif
#ifdef DEBUG
-static void regdump __ARGS((char_u *, regprog_T *));
static char_u *regprop __ARGS((char_u *));
#endif
+static char_u e_missingbracket[] = N_("E769: Missing ] after %s[");
+static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%(");
+static char_u e_unmatchedp[] = N_("E54: Unmatched %s(");
+static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)");
+
#define NOT_MULTI 0
#define MULTI_ONE 1
#define MULTI_MULT 2
@@ -630,7 +649,13 @@ static char_u META_flags[] = {
};
#endif
-static int curchr;
+static int curchr; /* currently parsed character */
+/* Previous character. Note: prevchr is sometimes -1 when we are not at the
+ * start, eg in /[ ^I]^ the pattern was never found even if it existed,
+ * because ^ was taken to be magic -- webb */
+static int prevchr;
+static int prevprevchr; /* previous-previous character */
+static int nextchr; /* used for ungetchr() */
/* arguments for reg() */
#define REG_NOPAREN 0 /* toplevel reg() */
@@ -680,6 +705,9 @@ static int read_limits __ARGS((long *, long *));
static void regtail __ARGS((char_u *, char_u *));
static void regoptail __ARGS((char_u *, char_u *));
+static regengine_T bt_regengine;
+static regengine_T nfa_regengine;
+
/*
* Return TRUE if compiled regular expression "prog" can match a line break.
*/
@@ -762,6 +790,7 @@ char *EQUIVAL_CLASS_C[16] = {
/*
* Produce the bytes for equivalence class "c".
* Currently only handles latin1, latin9 and utf-8.
+ * NOTE: When changing this function, also change nfa_emit_equi_class()
*/
static void
reg_equi_class(c)
@@ -1239,8 +1268,11 @@ skip_regexp(startp, dirc, magic, newp)
return p;
}
+static regprog_T *bt_regcomp __ARGS((char_u *expr, int re_flags));
+
/*
- * vim_regcomp() - compile a regular expression into internal code
+ * bt_regcomp() - compile a regular expression into internal code for the
+ * traditional back track matcher.
* Returns the program in allocated space. Returns NULL for an error.
*
* We can't allocate space until we know how big the compiled form will be,
@@ -1259,12 +1291,12 @@ skip_regexp(startp, dirc, magic, newp)
* of the structure of the compiled regexp.
* "re_flags": RE_MAGIC and/or RE_STRING.
*/
- regprog_T *
-vim_regcomp(expr, re_flags)
+ static regprog_T *
+bt_regcomp(expr, re_flags)
char_u *expr;
int re_flags;
{
- regprog_T *r;
+ bt_regprog_T *r;
char_u *scan;
char_u *longest;
int len;
@@ -1291,7 +1323,7 @@ vim_regcomp(expr, re_flags)
#endif
/* Allocate space. */
- r = (regprog_T *)lalloc(sizeof(regprog_T) + regsize, TRUE);
+ r = (bt_regprog_T *)lalloc(sizeof(bt_regprog_T) + regsize, TRUE);
if (r == NULL)
return NULL;
@@ -1386,10 +1418,11 @@ vim_regcomp(expr, re_flags)
r->regmlen = len;
}
}
-#ifdef DEBUG
+#ifdef BT_REGEXP_DUMP
regdump(expr, r);
#endif
- return r;
+ r->engine = &bt_regengine;
+ return (regprog_T *)r;
}
/*
@@ -1436,7 +1469,7 @@ vim_regcomp_had_eol()
#endif
/*
- * reg - regular expression, i.e. main body or parenthesized thing
+ * Parse regular expression, i.e. main body or parenthesized thing.
*
* Caller must absorb opening parenthesis.
*
@@ -1473,7 +1506,7 @@ reg(paren, flagp)
{
/* Make a MOPEN node. */
if (regnpar >= NSUBEXP)
- EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
+ EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
parno = regnpar;
++regnpar;
ret = regnode(MOPEN + parno);
@@ -1534,14 +1567,14 @@ reg(paren, flagp)
else
#endif
if (paren == REG_NPAREN)
- EMSG_M_RET_NULL(_("E53: Unmatched %s%%("), reg_magic == MAGIC_ALL);
+ EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL);
else
- EMSG_M_RET_NULL(_("E54: Unmatched %s("), reg_magic == MAGIC_ALL);
+ EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL);
}
else if (paren == REG_NOPAREN && peekchr() != NUL)
{
if (curchr == Magic(')'))
- EMSG_M_RET_NULL(_("E55: Unmatched %s)"), reg_magic == MAGIC_ALL);
+ EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL);
else
EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */
/* NOTREACHED */
@@ -1556,7 +1589,7 @@ reg(paren, flagp)
}
/*
- * Handle one alternative of an | operator.
+ * Parse one alternative of an | operator.
* Implements the & operator.
*/
static char_u *
@@ -1599,7 +1632,7 @@ regbranch(flagp)
}
/*
- * Handle one alternative of an | or & operator.
+ * Parse one alternative of an | or & operator.
* Implements the concatenation operator.
*/
static char_u *
@@ -1679,7 +1712,7 @@ regconcat(flagp)
}
/*
- * regpiece - something followed by possible [*+=]
+ * Parse something followed by possible [*+=].
*
* Note that the branching code sequences used for = and the general cases
* of * and + are somewhat optimized: they use the same NOTHING node as
@@ -1759,7 +1792,7 @@ regpiece(flagp)
}
}
if (lop == END)
- EMSG_M_RET_NULL(_("E59: invalid character after %s@"),
+ EMSG2_RET_NULL(_("E59: invalid character after %s@"),
reg_magic == MAGIC_ALL);
/* Look behind must match with behind_pos. */
if (lop == BEHIND || lop == NOBEHIND)
@@ -1793,7 +1826,7 @@ regpiece(flagp)
else
{
if (num_complex_braces >= 10)
- EMSG_M_RET_NULL(_("E60: Too many complex %s{...}s"),
+ EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"),
reg_magic == MAGIC_ALL);
reginsert(BRACE_COMPLEX + num_complex_braces, ret);
regoptail(ret, regnode(BACK));
@@ -1820,8 +1853,20 @@ regpiece(flagp)
return ret;
}
+/* When making changes to classchars also change nfa_classcodes. */
+static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
+static int classcodes[] = {
+ ANY, IDENT, SIDENT, KWORD, SKWORD,
+ FNAME, SFNAME, PRINT, SPRINT,
+ WHITE, NWHITE, DIGIT, NDIGIT,
+ HEX, NHEX, OCTAL, NOCTAL,
+ WORD, NWORD, HEAD, NHEAD,
+ ALPHA, NALPHA, LOWER, NLOWER,
+ UPPER, NUPPER
+};
+
/*
- * regatom - the lowest level
+ * Parse the lowest level.
*
* Optimization: gobbles an entire sequence of ordinary characters so that
* it can turn them into a single node, which is smaller to store and
@@ -1836,15 +1881,6 @@ regatom(flagp)
int cpo_lit; /* 'cpoptions' contains 'l' flag */
int cpo_bsl; /* 'cpoptions' contains '\' flag */
int c;
- static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
- static int classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD,
- FNAME, SFNAME, PRINT, SPRINT,
- WHITE, NWHITE, DIGIT, NDIGIT,
- HEX, NHEX, OCTAL, NOCTAL,
- WORD, NWORD, HEAD, NHEAD,
- ALPHA, NALPHA, LOWER, NLOWER,
- UPPER, NUPPER
- };
char_u *p;
int extra = 0;
@@ -2140,7 +2176,7 @@ regatom(flagp)
while ((c = getchr()) != ']')
{
if (c == NUL)
- EMSG_M_RET_NULL(_("E69: Missing ] after %s%%["),
+ EMSG2_RET_NULL(_("E69: Missing ] after %s%%["),
reg_magic == MAGIC_ALL);
br = regnode(BRANCH);
if (ret == NULL)
@@ -2156,7 +2192,7 @@ regatom(flagp)
return NULL;
}
if (ret == NULL)
- EMSG_M_RET_NULL(_("E70: Empty %s%%[]"),
+ EMSG2_RET_NULL(_("E70: Empty %s%%[]"),
reg_magic == MAGIC_ALL);
lastbranch = regnode(BRANCH);
br = regnode(NOTHING);
@@ -2200,7 +2236,7 @@ regatom(flagp)
}
if (i < 0)
- EMSG_M_RET_NULL(
+ EMSG2_RET_NULL(
_("E678: Invalid character after %s%%[dxouU]"),
reg_magic == MAGIC_ALL);
#ifdef FEAT_MBYTE
@@ -2272,7 +2308,7 @@ regatom(flagp)
}
}
- EMSG_M_RET_NULL(_("E71: Invalid character after %s%%"),
+ EMSG2_RET_NULL(_("E71: Invalid character after %s%%"),
reg_magic == MAGIC_ALL);
}
}
@@ -2567,8 +2603,7 @@ collection:
break;
}
else if (reg_strict)
- EMSG_M_RET_NULL(_("E769: Missing ] after %s["),
- reg_magic > MAGIC_OFF);
+ EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF);
}
/* FALLTHROUGH */
@@ -2659,7 +2694,7 @@ use_multibytecode(c)
#endif
/*
- * emit a node
+ * Emit a node.
* Return pointer to generated code.
*/
static char_u *
@@ -2711,7 +2746,7 @@ regmbc(c)
#endif
/*
- * reginsert - insert an operator in front of already-emitted operand
+ * Insert an operator in front of already-emitted operand
*
* Means relocating the operand.
*/
@@ -2742,7 +2777,7 @@ reginsert(op, opnd)
}
/*
- * reginsert_limits - insert an operator in front of already-emitted operand.
+ * Insert an operator in front of already-emitted operand.
* The operator has the given limit values as operands. Also set next pointer.
*
* Means relocating the operand.
@@ -2794,7 +2829,7 @@ re_put_long(p, val)
}
/*
- * regtail - set the next-pointer at the end of a node chain
+ * Set the next-pointer at the end of a node chain.
*/
static void
regtail(p, val)
@@ -2835,7 +2870,7 @@ regtail(p, val)
}
/*
- * regoptail - regtail on item after a BRANCH; nop if none
+ * Like regtail, on item after a BRANCH; nop if none.
*/
static void
regoptail(p, val)
@@ -2851,22 +2886,15 @@ regoptail(p, val)
}
/*
- * getchr() - get the next character from the pattern. We know about
- * magic and such, so therefore we need a lexical analyzer.
+ * Functions for getting characters from the regexp input.
*/
-/* static int curchr; */
-static int prevprevchr;
-static int prevchr;
-static int nextchr; /* used for ungetchr() */
-/*
- * Note: prevchr is sometimes -1 when we are not at the start,
- * eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was
- * taken to be magic -- webb
- */
static int at_start; /* True when on the first character */
static int prev_at_start; /* True when on the second character */
+/*
+ * Start parsing at "str".
+ */
static void
initchr(str)
char_u *str;
@@ -2878,6 +2906,9 @@ initchr(str)
prev_at_start = FALSE;
}
+/*
+ * Get the next character without advancing.
+ */
static int
peekchr()
{
@@ -3086,6 +3117,10 @@ skipchr_keepstart()
prevprevchr = prpr;
}
+/*
+ * Get the next character from the pattern. We know about magic and such, so
+ * therefore we need a lexical analyzer.
+ */
static int
getchr()
{
@@ -3340,8 +3375,8 @@ typedef struct regbehind_S
} regbehind_T;
static char_u *reg_getline __ARGS((linenr_T lnum));
-static long vim_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm));
-static long regtry __ARGS((regprog_T *prog, colnr_T col));
+static long bt_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm));
+static long regtry __ARGS((bt_regprog_T *prog, colnr_T col));
static void cleanup_subexpr __ARGS((void));
#ifdef FEAT_SYN_HL
static void cleanup_zsubexpr __ARGS((void));
@@ -3398,7 +3433,7 @@ static colnr_T ireg_maxcol;
/*
* Sometimes need to save a copy of a line. Since alloc()/free() is very
* slow, we keep one allocated piece of memory and only re-allocate it when
- * it's too small. It's freed in vim_regexec_both() when finished.
+ * it's too small. It's freed in bt_regexec_both() when finished.
*/
static char_u *reg_tofree = NULL;
static unsigned reg_tofreelen;
@@ -3556,6 +3591,8 @@ static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
/* TRUE if using multi-line regexp. */
#define REG_MULTI (reg_match == NULL)
+static int bt_regexec __ARGS((regmatch_T *rmp, char_u *line, colnr_T col));
+
/*
* Match a regexp against a string.
* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
@@ -3563,8 +3600,8 @@ static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
*
* Return TRUE if there is a match, FALSE if not.
*/
- int
-vim_regexec(rmp, line, col)
+ static int
+bt_regexec(rmp, line, col)
regmatch_T *rmp;
char_u *line; /* string to match against */
colnr_T col; /* column to start looking for match */
@@ -3580,16 +3617,19 @@ vim_regexec(rmp, line, col)
ireg_icombine = FALSE;
#endif
ireg_maxcol = 0;
- return (vim_regexec_both(line, col, NULL) != 0);
+ return (bt_regexec_both(line, col, NULL) != 0);
}
#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
|| defined(FIND_REPLACE_DIALOG) || defined(PROTO)
+
+static int bt_regexec_nl __ARGS((regmatch_T *rmp, char_u *line, colnr_T col));
+
/*
* Like vim_regexec(), but consider a "\n" in "line" to be a line break.
*/
- int
-vim_regexec_nl(rmp, line, col)
+ static int
+bt_regexec_nl(rmp, line, col)
regmatch_T *rmp;
char_u *line; /* string to match against */
colnr_T col; /* column to start looking for match */
@@ -3605,10 +3645,12 @@ vim_regexec_nl(rmp, line, col)
ireg_icombine = FALSE;
#endif
ireg_maxcol = 0;
- return (vim_regexec_both(line, col, NULL) != 0);
+ return (bt_regexec_both(line, col, NULL) != 0);
}
#endif
+static long bt_regexec_multi __ARGS((regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm));
+
/*
* Match a regexp against multiple lines.
* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
@@ -3617,8 +3659,8 @@ vim_regexec_nl(rmp, line, col)
* Return zero if there is no match. Return number of lines contained in the
* match otherwise.
*/
- long
-vim_regexec_multi(rmp, win, buf, lnum, col, tm)
+ static long
+bt_regexec_multi(rmp, win, buf, lnum, col, tm)
regmmatch_T *rmp;
win_T *win; /* window in which to search or NULL */
buf_T *buf; /* buffer in which to search */
@@ -3641,7 +3683,7 @@ vim_regexec_multi(rmp, win, buf, lnum, col, tm)
#endif
ireg_maxcol = rmp->rmm_maxcol;
- r = vim_regexec_both(NULL, col, tm);
+ r = bt_regexec_both(NULL, col, tm);
return r;
}
@@ -3651,12 +3693,12 @@ vim_regexec_multi(rmp, win, buf, lnum, col, tm)
* lines ("line" is NULL, use reg_getline()).
*/
static long
-vim_regexec_both(line, col, tm)
+bt_regexec_both(line, col, tm)
char_u *line;
colnr_T col; /* column to start looking for match */
proftime_T *tm UNUSED; /* timeout limit or NULL */
{
- regprog_T *prog;
+ bt_regprog_T *prog;
char_u *s;
long retval = 0L;
@@ -3682,14 +3724,14 @@ vim_regexec_both(line, col, tm)
if (REG_MULTI)
{
- prog = reg_mmatch->regprog;
+ prog = (bt_regprog_T *)reg_mmatch->regprog;
line = reg_getline((linenr_T)0);
reg_startpos = reg_mmatch->startpos;
reg_endpos = reg_mmatch->endpos;
}
else
{
- prog = reg_match->regprog;
+ prog = (bt_regprog_T *)reg_match->regprog;
reg_startp = reg_match->startp;
reg_endp = reg_match->endp;
}
@@ -3931,7 +3973,7 @@ unref_extmatch(em)
*/
static long
regtry(prog, col)
- regprog_T *prog;
+ bt_regprog_T *prog;
colnr_T col;
{
reginput = regline + col;
@@ -4063,7 +4105,7 @@ regmatch(scan)
#define RA_NOMATCH 5 /* didn't match */
/* Make "regstack" and "backpos" empty. They are allocated and freed in
- * vim_regexec_both() to reduce malloc()/free() calls. */
+ * bt_regexec_both() to reduce malloc()/free() calls. */
regstack.ga_len = 0;
backpos.ga_len = 0;
@@ -4072,14 +4114,14 @@ regmatch(scan)
*/
for (;;)
{
- /* Some patterns my cause a long time to match, even though they are not
+ /* Some patterns may cause a long time to match, even though they are not
* illegal. E.g., "\([a-z]\+\)\+Q". Allow breaking them with CTRL-C. */
fast_breakcheck();
#ifdef DEBUG
if (scan != NULL && regnarrate)
{
- mch_errmsg(regprop(scan));
+ mch_errmsg((char *)regprop(scan));
mch_errmsg("(\n");
}
#endif
@@ -4100,7 +4142,7 @@ regmatch(scan)
#ifdef DEBUG
if (regnarrate)
{
- mch_errmsg(regprop(scan));
+ mch_errmsg((char *)regprop(scan));
mch_errmsg("...\n");
# ifdef FEAT_SYN_HL
if (re_extmatch_in != NULL)
@@ -4112,7 +4154,7 @@ regmatch(scan)
{
mch_errmsg(" \"");
if (re_extmatch_in->matches[i] != NULL)
- mch_errmsg(re_extmatch_in->matches[i]);
+ mch_errmsg((char *)re_extmatch_in->matches[i]);
mch_errmsg("\"\n");
}
}
@@ -6091,9 +6133,14 @@ regnext(p)
static int
prog_magic_wrong()
{
- if (UCHARAT(REG_MULTI
- ? reg_mmatch->regprog->program
- : reg_match->regprog->program) != REGMAGIC)
+ regprog_T *prog;
+
+ prog = REG_MULTI ? reg_mmatch->regprog : reg_match->regprog;
+ if (prog->engine == &nfa_regengine)
+ /* For NFA matcher we don't check the magic */
+ return FALSE;
+
+ if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
{
EMSG(_(e_re_corr));
return TRUE;
@@ -6318,7 +6365,7 @@ re_num_cmp(val, scan)
}
-#ifdef DEBUG
+#ifdef BT_REGEXP_DUMP
/*
* regdump - dump a regexp onto stdout in vaguely comprehensible form
@@ -6326,14 +6373,22 @@ re_num_cmp(val, scan)
static void
regdump(pattern, r)
char_u *pattern;
- regprog_T *r;
+ bt_regprog_T *r;
{
char_u *s;
int op = EXACTLY; /* Arbitrary non-END op. */
char_u *next;
char_u *end = NULL;
+ FILE *f;
- printf("\r\nregcomp(%s):\r\n", pattern);
+#ifdef BT_REGEXP_LOG
+ f = fopen("bt_regexp_log.log", "a");
+#else
+ f = stdout;
+#endif
+ if (f == NULL)
+ return;
+ fprintf(f, "-------------------------------------\n\r\nregcomp(%s):\r\n", pattern);
s = r->program + 1;
/*
@@ -6343,18 +6398,18 @@ regdump(pattern, r)
while (op != END || s <= end)
{
op = OP(s);
- printf("%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
+ fprintf(f, "%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
next = regnext(s);
if (next == NULL) /* Next ptr. */
- printf("(0)");
+ fprintf(f, "(0)");
else
- printf("(%d)", (int)((s - r->program) + (next - s)));
+ fprintf(f, "(%d)", (int)((s - r->program) + (next - s)));
if (end < next)
end = next;
if (op == BRACE_LIMITS)
{
/* Two short ints */
- printf(" minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
+ fprintf(f, " minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
s += 8;
}
s += 3;
@@ -6363,25 +6418,33 @@ regdump(pattern, r)
|| op == EXACTLY)
{
/* Literal string, where present. */
+ fprintf(f, "\nxxxxxxxxx\n");
while (*s != NUL)
- printf("%c", *s++);
+ fprintf(f, "%c", *s++);
+ fprintf(f, "\nxxxxxxxxx\n");
s++;
}
- printf("\r\n");
+ fprintf(f, "\r\n");
}
/* Header fields of interest. */
if (r->regstart != NUL)
- printf("start `%s' 0x%x; ", r->regstart < 256
+ fprintf(f, "start `%s' 0x%x; ", r->regstart < 256
? (char *)transchar(r->regstart)
: "multibyte", r->regstart);
if (r->reganch)
- printf("anchored; ");
+ fprintf(f, "anchored; ");
if (r->regmust != NULL)
- printf("must have \"%s\"", r->regmust);
- printf("\r\n");
+ fprintf(f, "must have \"%s\"", r->regmust);
+ fprintf(f, "\r\n");
+
+#ifdef BT_REGEXP_LOG
+ fclose(f);
+#endif
}
+#endif /* BT_REGEXP_DUMP */
+#ifdef DEBUG
/*
* regprop - printable representation of opcode
*/
@@ -6389,12 +6452,12 @@ regdump(pattern, r)
regprop(op)
char_u *op;
{
- char_u *p;
- static char_u buf[50];
+ char *p;
+ static char buf[50];
- (void) strcpy(buf, ":");
+ STRCPY(buf, ":");
- switch (OP(op))
+ switch ((int) OP(op))
{
case BOL:
p = "BOL";
@@ -6761,10 +6824,10 @@ regprop(op)
break;
}
if (p != NULL)
- (void) strcat(buf, p);
- return buf;
+ STRCAT(buf, p);
+ return (char_u *)buf;
}
-#endif
+#endif /* DEBUG */
#ifdef FEAT_MBYTE
static void mb_decompose __ARGS((int c, int *c1, int *c2, int *c3));
@@ -7667,3 +7730,187 @@ reg_submatch(no)
return retval;
}
#endif
+
+static regengine_T bt_regengine =
+{
+ bt_regcomp,
+ bt_regexec,
+#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
+ || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
+ bt_regexec_nl,
+#endif
+ bt_regexec_multi
+#ifdef DEBUG
+ ,(char_u *)""
+#endif
+};
+
+
+#include "regexp_nfa.c"
+
+static regengine_T nfa_regengine =
+{
+ nfa_regcomp,
+ nfa_regexec,
+#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
+ || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
+ nfa_regexec_nl,
+#endif
+ nfa_regexec_multi
+#ifdef DEBUG
+ ,(char_u *)""
+#endif
+};
+
+/* Which regexp engine to use? Needed for vim_regcomp().
+ * Must match with 'regexpengine'. */
+static int regexp_engine = 0;
+#define AUTOMATIC_ENGINE 0
+#define BACKTRACKING_ENGINE 1
+#define NFA_ENGINE 2
+#ifdef DEBUG
+static char_u regname[][30] = {
+ "AUTOMATIC Regexp Engine",
+ "BACKTACKING Regexp Engine",
+ "NFA Regexp Engine"
+ };
+#endif
+
+/*
+ * Compile a regular expression into internal code.
+ * Returns the program in allocated memory. Returns NULL for an error.
+ */
+ regprog_T *
+vim_regcomp(expr_arg, re_flags)
+ char_u *expr_arg;
+ int re_flags;
+{
+ regprog_T *prog = NULL;
+ char_u *expr = expr_arg;
+
+ syntax_error = FALSE;
+ regexp_engine = p_re;
+
+ /* Check for prefix "\%#=", that sets the regexp engine */
+ if (STRNCMP(expr, "\\%#=", 4) == 0)
+ {
+ int newengine = expr[4] - '0';
+
+ if (newengine == AUTOMATIC_ENGINE
+ || newengine == BACKTRACKING_ENGINE
+ || newengine == NFA_ENGINE)
+ {
+ regexp_engine = expr[4] - '0';
+ expr += 5;
+#ifdef DEBUG
+ EMSG3("New regexp mode selected (%d): %s", regexp_engine,
+ regname[newengine]);
+#endif
+ }
+ else
+ {
+ EMSG(_("E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used "));
+ regexp_engine = AUTOMATIC_ENGINE;
+ }
+ }
+#ifdef DEBUG
+ bt_regengine.expr = expr;
+ nfa_regengine.expr = expr;
+#endif
+
+ /*
+ * First try the NFA engine, unless backtracking was requested.
+ */
+ if (regexp_engine != BACKTRACKING_ENGINE)
+ prog = nfa_regengine.regcomp(expr, re_flags);
+ else
+ prog = bt_regengine.regcomp(expr, re_flags);
+
+ if (prog == NULL) /* error compiling regexp with initial engine */
+ {
+#ifdef DEBUG
+ if (regexp_engine != BACKTRACKING_ENGINE) /* debugging log for NFA */
+ {
+ FILE *f;
+ f = fopen("debug.log", "a");
+ if (f)
+ {
+ if (!syntax_error)
+ fprintf(f, "NFA engine could not handle \"%s\"\n", expr);
+ else
+ fprintf(f, "Syntax error in \"%s\"\n", expr);
+ fclose(f);
+ }
+ else
+ EMSG("(NFA) Could not open \"debug.log\" to write !!!");
+ /*
+ if (syntax_error)
+ EMSG("NFA Regexp: Syntax Error !");
+ */
+ }
+#endif
+ /*
+ * If NFA engine failed, then revert to the backtracking engine.
+ * Except when there was a syntax error, which was properly handled by
+ * NFA engine.
+ */
+ if (regexp_engine == AUTOMATIC_ENGINE)
+ if (!syntax_error)
+ prog = bt_regengine.regcomp(expr, re_flags);
+
+ } /* endif prog==NULL */
+
+
+ return prog;
+}
+
+/*
+ * Match a regexp against a string.
+ * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
+ * Uses curbuf for line count and 'iskeyword'.
+ *
+ * Return TRUE if there is a match, FALSE if not.
+ */
+ int
+vim_regexec(rmp, line, col)
+ regmatch_T *rmp;
+ char_u *line; /* string to match against */
+ colnr_T col; /* column to start looking for match */
+{
+ return rmp->regprog->engine->regexec(rmp, line, col);
+}
+
+#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
+ || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
+/*
+ * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
+ */
+ int
+vim_regexec_nl(rmp, line, col)
+ regmatch_T *rmp;
+ char_u *line;
+ colnr_T col;
+{
+ return rmp->regprog->engine->regexec_nl(rmp, line, col);
+}
+#endif
+
+/*
+ * Match a regexp against multiple lines.
+ * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
+ * Uses curbuf for line count and 'iskeyword'.
+ *
+ * Return zero if there is no match. Return number of lines contained in the
+ * match otherwise.
+ */
+ long
+vim_regexec_multi(rmp, win, buf, lnum, col, tm)
+ regmmatch_T *rmp;
+ win_T *win; /* window in which to search or NULL */
+ buf_T *buf; /* buffer in which to search */
+ linenr_T lnum; /* nr of line to start looking for match */
+ colnr_T col; /* column to start looking for match */
+ proftime_T *tm; /* timeout limit or NULL */
+{
+ return rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm);
+}