diff options
author | Bram Moolenaar <Bram@vim.org> | 2013-06-05 18:52:40 +0200 |
---|---|---|
committer | Bram Moolenaar <Bram@vim.org> | 2013-06-05 18:52:40 +0200 |
commit | 8795374bd31a98e488e1ce293604f17eec33067b (patch) | |
tree | 6752dba4356911f1611a24098798d14380b4b0ed | |
parent | 67604aed755c71f51114c0125eb49ba7bcfcf8c7 (diff) | |
download | vim-8795374bd31a98e488e1ce293604f17eec33067b.zip |
updated for version 7.3.1122
Problem: New regexp engine: \%> not supported.
Solution: Implement \%>.
-rw-r--r-- | src/regexp_nfa.c | 140 | ||||
-rw-r--r-- | src/testdir/test64.in | 8 | ||||
-rw-r--r-- | src/testdir/test64.ok | 8 | ||||
-rw-r--r-- | src/version.c | 2 |
4 files changed, 126 insertions, 32 deletions
diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c index c7747eac3..e7db49930 100644 --- a/src/regexp_nfa.c +++ b/src/regexp_nfa.c @@ -57,7 +57,9 @@ enum NFA_NCLOSE, /* End of subexpr. marked with \%( ... \) */ NFA_START_INVISIBLE, NFA_START_INVISIBLE_BEFORE, + NFA_START_PATTERN, NFA_END_INVISIBLE, + NFA_END_PATTERN, NFA_COMPOSING, /* Next nodes in NFA are part of the composing multibyte char */ NFA_END_COMPOSING, /* End of a composing char in the NFA */ @@ -1505,9 +1507,9 @@ nfa_regpiece() i = NFA_PREV_ATOM_JUST_BEFORE_NEG; break; case '>': - /* \@> Not supported yet */ - /* i = NFA_PREV_ATOM_LIKE_PATTERN; */ - return FAIL; + /* \@> */ + i = NFA_PREV_ATOM_LIKE_PATTERN; + break; } if (i == 0) { @@ -1885,12 +1887,17 @@ nfa_set_code(c) STRCPY(code, "NFA_PREV_ATOM_JUST_BEFORE"); break; case NFA_PREV_ATOM_JUST_BEFORE_NEG: STRCPY(code, "NFA_PREV_ATOM_JUST_BEFORE_NEG"); break; + case NFA_PREV_ATOM_LIKE_PATTERN: + STRCPY(code, "NFA_PREV_ATOM_LIKE_PATTERN"); break; + case NFA_NOPEN: STRCPY(code, "NFA_NOPEN"); break; case NFA_NCLOSE: STRCPY(code, "NFA_NCLOSE"); break; case NFA_START_INVISIBLE: STRCPY(code, "NFA_START_INVISIBLE"); break; case NFA_START_INVISIBLE_BEFORE: STRCPY(code, "NFA_START_INVISIBLE_BEFORE"); break; + case NFA_START_PATTERN: STRCPY(code, "NFA_START_PATTERN"); break; case NFA_END_INVISIBLE: STRCPY(code, "NFA_END_INVISIBLE"); break; + case NFA_END_PATTERN: STRCPY(code, "NFA_END_PATTERN"); break; case NFA_COMPOSING: STRCPY(code, "NFA_COMPOSING"); break; case NFA_END_COMPOSING: STRCPY(code, "NFA_END_COMPOSING"); break; @@ -2601,12 +2608,26 @@ post2nfa(postfix, end, nfa_calc_size) case NFA_PREV_ATOM_NO_WIDTH_NEG: case NFA_PREV_ATOM_JUST_BEFORE: case NFA_PREV_ATOM_JUST_BEFORE_NEG: + case NFA_PREV_ATOM_LIKE_PATTERN: { int neg = (*p == NFA_PREV_ATOM_NO_WIDTH_NEG || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG); int before = (*p == NFA_PREV_ATOM_JUST_BEFORE || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG); - int n; + int pattern = (*p == NFA_PREV_ATOM_LIKE_PATTERN); + int start_state = NFA_START_INVISIBLE; + int end_state = NFA_END_INVISIBLE; + int n = 0; + nfa_state_T *zend; + nfa_state_T *skip; + + if (before) + start_state = NFA_START_INVISIBLE_BEFORE; + else if (pattern) + { + start_state = NFA_START_PATTERN; + end_state = NFA_END_PATTERN; + } if (before) n = *++p; /* get the count */ @@ -2620,16 +2641,15 @@ post2nfa(postfix, end, nfa_calc_size) if (nfa_calc_size == TRUE) { - nstate += 2; + nstate += pattern ? 4 : 2; break; } e = POP(); - s1 = alloc_state(NFA_END_INVISIBLE, NULL, NULL); + s1 = alloc_state(end_state, NULL, NULL); if (s1 == NULL) goto theend; - patch(e.out, s1); - s = alloc_state(NFA_START_INVISIBLE, e.start, s1); + s = alloc_state(start_state, e.start, s1); if (s == NULL) goto theend; if (neg) @@ -2638,12 +2658,21 @@ post2nfa(postfix, end, nfa_calc_size) s1->negated = TRUE; } if (before) - { s->val = n; /* store the count */ - ++s->c; /* NFA_START_INVISIBLE -> NFA_START_INVISIBLE_BEFORE */ + if (pattern) + { + /* NFA_ZEND -> NFA_END_PATTERN -> NFA_SKIP -> what follows. */ + skip = alloc_state(NFA_SKIP, NULL, NULL); + zend = alloc_state(NFA_ZEND, s1, NULL); + s1->out= skip; + patch(e.out, zend); + PUSH(frag(s, list1(&skip->out))); + } + else + { + patch(e.out, s1); + PUSH(frag(s, list1(&s1->out))); } - - PUSH(frag(s, list1(&s1->out))); break; } @@ -2953,7 +2982,7 @@ log_subexpr(sub) for (j = 0; j < sub->in_use; j++) if (REG_MULTI) - fprintf(log_fd, "\n *** group %d, start: c=%d, l=%d, end: c=%d, l=%d", + fprintf(log_fd, "*** group %d, start: c=%d, l=%d, end: c=%d, l=%d\n", j, sub->list.multi[j].start.col, (int)sub->list.multi[j].start.lnum, @@ -2964,12 +2993,11 @@ log_subexpr(sub) char *s = (char *)sub->list.line[j].start; char *e = (char *)sub->list.line[j].end; - fprintf(log_fd, "\n *** group %d, start: \"%s\", end: \"%s\"", + fprintf(log_fd, "*** group %d, start: \"%s\", end: \"%s\"\n", j, s == NULL ? "NULL" : s, e == NULL ? "NULL" : e); } - fprintf(log_fd, "\n"); } #endif @@ -4317,6 +4345,7 @@ nfa_regmatch(prog, start, submatch, m) } case NFA_END_INVISIBLE: + case NFA_END_PATTERN: /* * This is only encountered after a NFA_START_INVISIBLE or * NFA_START_INVISIBLE_BEFORE node. @@ -4343,7 +4372,8 @@ nfa_regmatch(prog, start, submatch, m) (int)(nfa_endp->se_u.ptr - reginput)); } #endif - /* It's only a match if it ends at "nfa_endp" */ + /* If "nfa_endp" is set it's only a match if it ends at + * "nfa_endp" */ if (nfa_endp != NULL && (REG_MULTI ? (reglnum != nfa_endp->se_u.pos.lnum || (int)(reginput - regline) @@ -4360,6 +4390,10 @@ nfa_regmatch(prog, start, submatch, m) copy_sub(&m->synt, &t->subs.synt); #endif } +#ifdef ENABLE_LOG + fprintf(log_fd, "Match found:\n"); + log_subsexpr(m); +#endif nfa_match = TRUE; break; @@ -4435,6 +4469,63 @@ nfa_regmatch(prog, start, submatch, m) } break; + case NFA_START_PATTERN: + /* First try matching the pattern. */ + result = recursive_regmatch(t->state, prog, + submatch, m, &listids); + if (result) + { + int bytelen; + +#ifdef ENABLE_LOG + fprintf(log_fd, "NFA_START_PATTERN matches:\n"); + log_subsexpr(m); +#endif + /* Copy submatch info from the recursive call */ + copy_sub_off(&t->subs.norm, &m->norm); +#ifdef FEAT_SYN_HL + copy_sub_off(&t->subs.synt, &m->synt); +#endif + /* Now we need to skip over the matched text and then + * continue with what follows. */ + if (REG_MULTI) + /* TODO: multi-line match */ + bytelen = m->norm.list.multi[0].end.col + - (int)(reginput - regline); + else + bytelen = (int)(m->norm.list.line[0].end - reginput); + +#ifdef ENABLE_LOG + fprintf(log_fd, "NFA_START_PATTERN length: %d\n", bytelen); +#endif + if (bytelen == 0) + { + /* empty match, output of corresponding + * NFA_END_PATTERN/NFA_SKIP to be used at current + * position */ + addstate_here(thislist, t->state->out1->out->out, + &t->subs, t->pim, &listidx); + } + else if (bytelen <= clen) + { + /* match current character, output of corresponding + * NFA_END_PATTERN to be used at next position. */ + ll = nextlist; + add_state = t->state->out1->out->out; + add_off = clen; + } + else + { + /* skip over the matched characters, set character + * count in NFA_SKIP */ + ll = nextlist; + add_state = t->state->out1->out; + add_off = bytelen; + add_count = bytelen - clen; + } + } + break; + case NFA_BOL: if (reginput == regline) addstate_here(thislist, t->state->out, &t->subs, @@ -4846,9 +4937,6 @@ nfa_regmatch(prog, start, submatch, m) ll = nextlist; add_state = t->state->out->out; add_off = clen; -#ifdef ENABLE_LOG - log_subsexpr(&nextlist->t[nextlist->n - 1].subs); -#endif } else { @@ -4858,9 +4946,6 @@ nfa_regmatch(prog, start, submatch, m) add_state = t->state->out; add_off = bytelen; add_count = bytelen - clen; -#ifdef ENABLE_LOG - log_subsexpr(&nextlist->t[nextlist->n - 1].subs); -#endif } } break; @@ -4873,9 +4958,6 @@ nfa_regmatch(prog, start, submatch, m) ll = nextlist; add_state = t->state->out; add_off = clen; -#ifdef ENABLE_LOG - log_subsexpr(&nextlist->t[nextlist->n - 1].subs); -#endif } else { @@ -4884,9 +4966,6 @@ nfa_regmatch(prog, start, submatch, m) add_state = t->state; add_off = 0; add_count = t->count - clen; -#ifdef ENABLE_LOG - log_subsexpr(&nextlist->t[nextlist->n - 1].subs); -#endif } break; @@ -5158,13 +5237,12 @@ nfa_regtry(prog, col) f = fopen(NFA_REGEXP_RUN_LOG, "a"); if (f != NULL) { - fprintf(f, "\n\n\n\n\n\n\t\t=======================================================\n"); - fprintf(f, " =======================================================\n"); + fprintf(f, "\n\n\t=======================================================\n"); #ifdef DEBUG fprintf(f, "\tRegexp is \"%s\"\n", nfa_regengine.expr); #endif fprintf(f, "\tInput text is \"%s\" \n", reginput); - fprintf(f, " =======================================================\n\n"); + fprintf(f, "\t=======================================================\n\n"); nfa_print_state(f, start); fprintf(f, "\n\n"); fclose(f); diff --git a/src/testdir/test64.in b/src/testdir/test64.in index 062d9baa8..0ba833fed 100644 --- a/src/testdir/test64.in +++ b/src/testdir/test64.in @@ -385,6 +385,12 @@ STARTTEST :call add(tl, [2, '\(<<\)\@2<=span.', 'xxspanxxxx<spanxx<<spanyyy', 'spany', '<<']) :call add(tl, [2, '\(foo\)\@<!bar.', 'xx foobar1 xbar2 xx', 'bar2']) :" +:""""" \@> +:call add(tl, [2, '\(a*\)\@>a', 'aaaa']) +:call add(tl, [2, '\(a*\)\@>b', 'aaab', 'aaab', 'aaa']) +:" TODO: BT engine does not restore submatch after failure +:call add(tl, [1, '\(a*\)\@>a\|a\+', 'aaaa', 'aaaa']) +:" :"""" "\_" prepended negated collection matches EOL :call add(tl, [2, '\_[^8-9]\+', "asfi\n9888", "asfi\n"]) :call add(tl, [2, '\_[^a]\+', "asfi\n9888", "sfi\n9888"]) @@ -401,7 +407,7 @@ STARTTEST : let text = t[2] : let matchidx = 3 : for engine in [0, 1, 2] -: if engine == 2 && !re +: if engine == 2 && re == 0 || engine == 1 && re ==1 : continue : endif : let ®expengine = engine diff --git a/src/testdir/test64.ok b/src/testdir/test64.ok index 49ecac631..49a570ae2 100644 --- a/src/testdir/test64.ok +++ b/src/testdir/test64.ok @@ -872,6 +872,14 @@ OK 2 - \(<<\)\@2<=span. OK 0 - \(foo\)\@<!bar. OK 1 - \(foo\)\@<!bar. OK 2 - \(foo\)\@<!bar. +OK 0 - \(a*\)\@>a +OK 1 - \(a*\)\@>a +OK 2 - \(a*\)\@>a +OK 0 - \(a*\)\@>b +OK 1 - \(a*\)\@>b +OK 2 - \(a*\)\@>b +OK 0 - \(a*\)\@>a\|a\+ +OK 2 - \(a*\)\@>a\|a\+ OK 0 - \_[^8-9]\+ OK 1 - \_[^8-9]\+ OK 2 - \_[^8-9]\+ diff --git a/src/version.c b/src/version.c index e74e271da..edc78bc34 100644 --- a/src/version.c +++ b/src/version.c @@ -729,6 +729,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ /**/ + 1122, +/**/ 1121, /**/ 1120, |