summaryrefslogtreecommitdiff
path: root/Userland/Libraries/LibC/regex.h
blob: b7e01ae9813d7ff5adf347b3408d56f2e2844ade (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
/*
 * Copyright (c) 2020, Emanuel Sprung <emanuel.sprung@gmail.com>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#pragma once

#include <stddef.h>
#include <sys/types.h>

__BEGIN_DECLS

typedef ssize_t regoff_t;

struct regex_t {
    void* __data;
};

enum __Regex_Error {
    __Regex_NoError,
    __Regex_InvalidPattern,             // Invalid regular expression.
    __Regex_InvalidCollationElement,    // Invalid collating element referenced.
    __Regex_InvalidCharacterClass,      // Invalid character class type referenced.
    __Regex_InvalidTrailingEscape,      // Trailing \ in pattern.
    __Regex_InvalidNumber,              // Number in \digit invalid or in error.
    __Regex_MismatchingBracket,         // [ ] imbalance.
    __Regex_MismatchingParen,           // ( ) imbalance.
    __Regex_MismatchingBrace,           // { } imbalance.
    __Regex_InvalidBraceContent,        // Content of {} invalid: not a number, number too large, more than two numbers, first larger than second.
    __Regex_InvalidBracketContent,      // Content of [] invalid.
    __Regex_InvalidRange,               // Invalid endpoint in range expression.
    __Regex_InvalidRepetitionMarker,    // ?, * or + not preceded by valid regular expression.
    __Regex_ReachedMaxRecursion,        // MaximumRecursion has been reached.
    __Regex_EmptySubExpression,         // Sub expression has empty content.
    __Regex_InvalidCaptureGroup,        // Content of capture group is invalid.
    __Regex_InvalidNameForCaptureGroup, // Name of capture group is invalid.
};

enum ReError {
    REG_NOERR = __Regex_NoError,
    REG_BADPAT = __Regex_InvalidPattern,            // Invalid regular expression.
    REG_ECOLLATE = __Regex_InvalidCollationElement, // Invalid collating element referenced.
    REG_ECTYPE = __Regex_InvalidCharacterClass,     // Invalid character class type referenced.
    REG_EESCAPE = __Regex_InvalidTrailingEscape,    // Trailing \ in pattern.
    REG_ESUBREG = __Regex_InvalidNumber,            // Number in \digit invalid or in error.
    REG_EBRACK = __Regex_MismatchingBracket,        // [ ] imbalance.
    REG_EPAREN = __Regex_MismatchingParen,          // \( \) or ( ) imbalance.
    REG_EBRACE = __Regex_MismatchingBrace,          // \{ \} imbalance.
    REG_BADBR = __Regex_InvalidBraceContent,        // Content of \{ \} invalid: not a number, number too large, more than two numbers, first larger than second.
    REG_ERANGE = __Regex_InvalidRange,              // Invalid endpoint in range expression.
    REG_BADRPT = __Regex_InvalidRepetitionMarker,   // ?, * or + not preceded by valid regular expression.
    REG_EMPTY_EXPR = __Regex_EmptySubExpression,    // Empty expression
    REG_ENOSYS,                                     // The implementation does not support the function.
    REG_ESPACE,                                     // Out of memory.
    REG_NOMATCH,                                    // regexec() failed to match.
};

struct regmatch_t {
    regoff_t rm_so;  // byte offset from start of string to start of substring
    regoff_t rm_eo;  // byte offset from start of string of the first character after the end of substring
    regoff_t rm_cnt; // number of matches
};

enum __RegexAllFlags {
    __Regex_Global = 1,                                  // All matches (don't return after first match)
    __Regex_Insensitive = __Regex_Global << 1,           // Case insensitive match (ignores case of [a-zA-Z])
    __Regex_Ungreedy = __Regex_Global << 2,              // The match becomes lazy by default. Now a ? following a quantifier makes it greedy
    __Regex_Unicode = __Regex_Global << 3,               // Enable all unicode features and interpret all unicode escape sequences as such
    __Regex_Extended = __Regex_Global << 4,              // Ignore whitespaces. Spaces and text after a # in the pattern are ignored
    __Regex_Extra = __Regex_Global << 5,                 // Disallow meaningless escapes. A \ followed by a letter with no special meaning is faulted
    __Regex_MatchNotBeginOfLine = __Regex_Global << 6,   // Pattern is not forced to ^ -> search in whole string!
    __Regex_MatchNotEndOfLine = __Regex_Global << 7,     // Don't Force the dollar sign, $, to always match end of the string, instead of end of the line. This option is ignored if the Multiline-flag is set
    __Regex_SkipSubExprResults = __Regex_Global << 8,    // Do not return sub expressions in the result
    __Regex_StringCopyMatches = __Regex_Global << 9,     // Do explicitly copy results into new allocated string instead of StringView to original string.
    __Regex_SingleLine = __Regex_Global << 10,           // Dot matches newline characters
    __Regex_Sticky = __Regex_Global << 11,               // Force the pattern to only match consecutive matches from where the previous match ended.
    __Regex_Multiline = __Regex_Global << 12,            // Handle newline characters. Match each line, one by one.
    __Regex_SkipTrimEmptyMatches = __Regex_Global << 13, // Do not remove empty capture group results.
    __Regex_Internal_Stateful = __Regex_Global << 14,    // Internal flag; enables stateful matches.
    __Regex_Last = __Regex_SkipTrimEmptyMatches
};

// clang-format off
// Values for the cflags parameter to the regcomp() function:
#define REG_EXTENDED __Regex_Extended                // Use Extended Regular Expressions.
#define REG_ICASE __Regex_Insensitive                // Ignore case in match.
#define REG_NOSUB __Regex_SkipSubExprResults         // Report only success or fail in regexec().
#define REG_GLOBAL __Regex_Global                    // Don't stop searching for more match
#define REG_NEWLINE (__Regex_Multiline | REG_GLOBAL) // Change the handling of newline.

// Values for the eflags parameter to the regexec() function:
#define REG_NOTBOL __Regex_MatchNotBeginOfLine // The circumflex character (^), when taken as a special character, will not match the beginning of string.
#define REG_NOTEOL __Regex_MatchNotEndOfLine   // The dollar sign ($), when taken as a special character, will not match the end of string.

//static_assert (sizeof(FlagsUnderlyingType) * 8 >= regex::POSIXFlags::Last << 1), "flags type too small")
#define REG_SEARCH __Regex_Last << 1
// clang-format on

int regcomp(regex_t*, const char*, int);
int regexec(const regex_t*, const char*, size_t, regmatch_t[], int);
size_t regerror(int, const regex_t*, char*, size_t);
void regfree(regex_t*);

__END_DECLS