From bcf9442307075bac40d44328c8bf7ea21857b138 Mon Sep 17 00:00:00 2001 From: Bram Moolenaar Date: Sat, 23 Jun 2018 14:21:42 +0200 Subject: patch 8.1.0098: segfault when pattern with \z() is very slow Problem: Segfault when pattern with \z() is very slow. Solution: Check for NULL regprog. Add "nfa_fail" to test_override() to be able to test this. Fix that 'searchhl' resets called_emsg. --- runtime/doc/eval.txt | 2 ++ src/evalfunc.c | 3 +++ src/globals.h | 1 + src/regexp.c | 15 ++++++++++----- src/regexp_nfa.c | 7 ++++--- src/screen.c | 4 ++++ src/syntax.c | 6 ++++++ src/testdir/test_syntax.vim | 12 ++++++++++++ src/version.c | 2 ++ src/vim.h | 1 + 10 files changed, 45 insertions(+), 8 deletions(-) diff --git a/runtime/doc/eval.txt b/runtime/doc/eval.txt index d4c58e81f..5ae0f6bca 100644 --- a/runtime/doc/eval.txt +++ b/runtime/doc/eval.txt @@ -8694,6 +8694,8 @@ test_override({name}, {val}) *test_override()* redraw disable the redrawing() function char_avail disable the char_avail() function starting reset the "starting" variable, see below + nfa_fail makes the NFA regexp engine fail to force a + fallback to the old engine ALL clear all overrides ({val} is not used) "starting" is to be used when a test should behave like diff --git a/src/evalfunc.c b/src/evalfunc.c index 24c31945d..7325f5dfc 100644 --- a/src/evalfunc.c +++ b/src/evalfunc.c @@ -13090,10 +13090,13 @@ f_test_override(typval_T *argvars, typval_T *rettv UNUSED) save_starting = -1; } } + else if (STRCMP(name, (char_u *)"nfa_fail") == 0) + nfa_fail_for_testing = val; else if (STRCMP(name, (char_u *)"ALL") == 0) { disable_char_avail_for_testing = FALSE; disable_redraw_for_testing = FALSE; + nfa_fail_for_testing = FALSE; if (save_starting >= 0) { starting = save_starting; diff --git a/src/globals.h b/src/globals.h index db3a73f2e..875085959 100644 --- a/src/globals.h +++ b/src/globals.h @@ -1634,6 +1634,7 @@ EXTERN int alloc_fail_repeat INIT(= 0); /* flags set by test_override() */ EXTERN int disable_char_avail_for_testing INIT(= 0); EXTERN int disable_redraw_for_testing INIT(= 0); +EXTERN int nfa_fail_for_testing INIT(= 0); EXTERN int in_free_unref_items INIT(= FALSE); #endif diff --git a/src/regexp.c b/src/regexp.c index b1cad99c3..9ac712eeb 100644 --- a/src/regexp.c +++ b/src/regexp.c @@ -367,7 +367,7 @@ static char_u e_unmatchedp[] = N_("E54: Unmatched %s("); static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)"); #ifdef FEAT_SYN_HL static char_u e_z_not_allowed[] = N_("E66: \\z( not allowed here"); -static char_u e_z1_not_allowed[] = N_("E67: \\z1 et al. not allowed here"); +static char_u e_z1_not_allowed[] = N_("E67: \\z1 - \\z9 not allowed here"); #endif static char_u e_missing_sb[] = N_("E69: Missing ] after %s%%["); static char_u e_empty_sb[] = N_("E70: Empty %s%%[]"); @@ -2139,7 +2139,7 @@ regatom(int *flagp) switch (c) { #ifdef FEAT_SYN_HL - case '(': if (reg_do_extmatch != REX_SET) + case '(': if ((reg_do_extmatch & REX_SET) == 0) EMSG_RET_NULL(_(e_z_not_allowed)); if (one_exactly) EMSG_ONE_RET_NULL; @@ -2158,7 +2158,7 @@ regatom(int *flagp) case '6': case '7': case '8': - case '9': if (reg_do_extmatch != REX_USE) + case '9': if ((reg_do_extmatch & REX_USE) == 0) EMSG_RET_NULL(_(e_z1_not_allowed)); ret = regnode(ZREF + c - '0'); re_has_z = REX_USE; @@ -8332,8 +8332,8 @@ vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col) /* * Match a regexp against multiple lines. - * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). - * Note: "rmp->regprog" may be freed and changed. + * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp(). + * Note: "rmp->regprog" may be freed and changed, even set to NULL. * Uses curbuf for line count and 'iskeyword'. * * Return zero if there is no match. Return number of lines contained in the @@ -8376,7 +8376,12 @@ vim_regexec_multi( #ifdef FEAT_EVAL report_re_switch(pat); #endif + // checking for \z misuse was already done when compiling for NFA, + // allow all here + reg_do_extmatch = REX_ALL; rmp->regprog = vim_regcomp(pat, re_flags); + reg_do_extmatch = 0; + if (rmp->regprog != NULL) result = rmp->regprog->engine->regexec_multi( rmp, win, buf, lnum, col, tm, timed_out); diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c index 0ead03af0..e6296993d 100644 --- a/src/regexp_nfa.c +++ b/src/regexp_nfa.c @@ -1482,7 +1482,7 @@ nfa_regatom(void) case '8': case '9': /* \z1...\z9 */ - if (reg_do_extmatch != REX_USE) + if ((reg_do_extmatch & REX_USE) == 0) EMSG_RET_FAIL(_(e_z1_not_allowed)); EMIT(NFA_ZREF1 + (no_Magic(c) - '1')); /* No need to set nfa_has_backref, the sub-matches don't @@ -1491,7 +1491,7 @@ nfa_regatom(void) break; case '(': /* \z( */ - if (reg_do_extmatch != REX_SET) + if ((reg_do_extmatch & REX_SET) == 0) EMSG_RET_FAIL(_(e_z_not_allowed)); if (nfa_reg(REG_ZPAREN) == FAIL) return FAIL; /* cascaded error */ @@ -5692,7 +5692,8 @@ nfa_regmatch( nextlist->n = 0; /* clear nextlist */ nextlist->has_pim = FALSE; ++nfa_listid; - if (prog->re_engine == AUTOMATIC_ENGINE && nfa_listid >= NFA_MAX_STATES) + if (prog->re_engine == AUTOMATIC_ENGINE + && (nfa_listid >= NFA_MAX_STATES || nfa_fail_for_testing)) { /* too many states, retry with old engine */ nfa_match = NFA_TOO_EXPENSIVE; diff --git a/src/screen.c b/src/screen.c index 3dde1b7a1..b7b3e6714 100644 --- a/src/screen.c +++ b/src/screen.c @@ -7868,6 +7868,7 @@ next_search_hl( linenr_T l; colnr_T matchcol; long nmatched; + int save_called_emsg = called_emsg; if (shl->lnum != 0) { @@ -7986,6 +7987,9 @@ next_search_hl( break; /* useful match found */ } } + + // Restore called_emsg for assert_fails(). + called_emsg = save_called_emsg; } /* diff --git a/src/syntax.c b/src/syntax.c index 5feaac6f5..7db69183e 100644 --- a/src/syntax.c +++ b/src/syntax.c @@ -3327,6 +3327,12 @@ syn_regexec( profile_start(&pt); #endif + if (rmp->regprog == NULL) + // This can happen if a previous call to vim_regexec_multi() tried to + // use the NFA engine, which resulted in NFA_TOO_EXPENSIVE, and + // compiling the pattern with the other engine fails. + return FALSE; + rmp->rmm_maxcol = syn_buf->b_p_smc; r = vim_regexec_multi(rmp, syn_win, syn_buf, lnum, col, #ifdef FEAT_RELTIME diff --git a/src/testdir/test_syntax.vim b/src/testdir/test_syntax.vim index e49c8ed89..ffa8e9022 100644 --- a/src/testdir/test_syntax.vim +++ b/src/testdir/test_syntax.vim @@ -562,3 +562,15 @@ func Test_syntax_c() let $COLORFGBG = '' call delete('Xtest.c') endfun + +" Using \z() in a region with NFA failing should not crash. +func Test_syn_wrong_z_one() + new + call setline(1, ['just some text', 'with foo and bar to match with']) + syn region FooBar start="foo\z(.*\)bar" end="\z1" + call test_override("nfa_fail", 1) + redraw! + redraw! + call test_override("ALL", 0) + bwipe! +endfunc diff --git a/src/version.c b/src/version.c index f440ff953..5cf6882bf 100644 --- a/src/version.c +++ b/src/version.c @@ -761,6 +761,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ +/**/ + 98, /**/ 97, /**/ diff --git a/src/vim.h b/src/vim.h index 0a4a49018..fb476c529 100644 --- a/src/vim.h +++ b/src/vim.h @@ -1013,6 +1013,7 @@ extern int (*dyn_libintl_putenv)(const char *envstring); /* values for reg_do_extmatch */ # define REX_SET 1 /* to allow \z\(...\), */ # define REX_USE 2 /* to allow \z\1 et al. */ +# define REX_ALL (REX_SET | REX_USE) #endif /* Return values for fullpathcmp() */ -- cgit v1.2.3