1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
|
/* vi:set ts=8 sts=4 sw=4:
*
* NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
*
* This is NOT the original regular expression code as written by Henry
* Spencer. This code has been modified specifically for use with Vim, and
* should not be used apart from compiling Vim. If you want a good regular
* expression library, get the original code.
*
* NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
*/
#ifndef _REGEXP_H
#define _REGEXP_H
/*
* The number of sub-matches is limited to 10.
* The first one (index 0) is the whole match, referenced with "\0".
* The second one (index 1) is the first sub-match, referenced with "\1".
* This goes up to the tenth (index 9), referenced with "\9".
*/
#define NSUBEXP 10
/*
* In the NFA engine: how many braces are allowed.
* TODO(RE): Use dynamic memory allocation instead of static, like here
*/
#define NFA_MAX_BRACES 20
typedef struct regengine regengine_T;
/*
* Structure returned by vim_regcomp() to pass on to vim_regexec().
* This is the general structure. For the actual matcher, two specific
* structures are used. See code below.
*/
typedef struct regprog
{
regengine_T *engine;
unsigned regflags;
} regprog_T;
/*
* Structure used by the back track matcher.
* These fields are only to be used in regexp.c!
* See regexp.c for an explanation.
*/
typedef struct
{
/* These two members implement regprog_T */
regengine_T *engine;
unsigned regflags;
int regstart;
char_u reganch;
char_u *regmust;
int regmlen;
#ifdef FEAT_SYN_HL
char_u reghasz;
#endif
char_u program[1]; /* actually longer.. */
} bt_regprog_T;
/*
* Structure representing a NFA state.
* A NFA state may have no outgoing edge, when it is a NFA_MATCH state.
*/
typedef struct nfa_state nfa_state_T;
struct nfa_state
{
int c;
nfa_state_T *out;
nfa_state_T *out1;
int id;
int lastlist[2]; /* 0: normal, 1: recursive */
int val;
};
/*
* Structure used by the NFA matcher.
*/
typedef struct
{
/* These two members implement regprog_T */
regengine_T *engine;
unsigned regflags;
nfa_state_T *start; /* points into state[] */
int reganch; /* pattern starts with ^ */
int regstart; /* char at start of pattern */
char_u *match_text; /* plain text to match with */
int has_zend; /* pattern contains \ze */
int has_backref; /* pattern contains \1 .. \9 */
#ifdef FEAT_SYN_HL
int reghasz;
#endif
#ifdef DEBUG
char_u *pattern;
#endif
int nsubexp; /* number of () */
int nstate;
nfa_state_T state[1]; /* actually longer.. */
} nfa_regprog_T;
/*
* Structure to be used for single-line matching.
* Sub-match "no" starts at "startp[no]" and ends just before "endp[no]".
* When there is no match, the pointer is NULL.
*/
typedef struct
{
regprog_T *regprog;
char_u *startp[NSUBEXP];
char_u *endp[NSUBEXP];
int rm_ic;
} regmatch_T;
/*
* Structure to be used for multi-line matching.
* Sub-match "no" starts in line "startpos[no].lnum" column "startpos[no].col"
* and ends in line "endpos[no].lnum" just before column "endpos[no].col".
* The line numbers are relative to the first line, thus startpos[0].lnum is
* always 0.
* When there is no match, the line number is -1.
*/
typedef struct
{
regprog_T *regprog;
lpos_T startpos[NSUBEXP];
lpos_T endpos[NSUBEXP];
int rmm_ic;
colnr_T rmm_maxcol; /* when not zero: maximum column */
} regmmatch_T;
/*
* Structure used to store external references: "\z\(\)" to "\z\1".
* Use a reference count to avoid the need to copy this around. When it goes
* from 1 to zero the matches need to be freed.
*/
typedef struct
{
short refcnt;
char_u *matches[NSUBEXP];
} reg_extmatch_T;
struct regengine
{
regprog_T *(*regcomp)(char_u*, int);
void (*regfree)(regprog_T *);
int (*regexec_nl)(regmatch_T*, char_u*, colnr_T, int);
long (*regexec_multi)(regmmatch_T*, win_T*, buf_T*, linenr_T, colnr_T, proftime_T*);
#ifdef DEBUG
char_u *expr;
#endif
};
#endif /* _REGEXP_H */
|