diff options
Diffstat (limited to 'Libraries/LibRegex/Tests/Regex.cpp')
-rw-r--r-- | Libraries/LibRegex/Tests/Regex.cpp | 451 |
1 files changed, 451 insertions, 0 deletions
diff --git a/Libraries/LibRegex/Tests/Regex.cpp b/Libraries/LibRegex/Tests/Regex.cpp new file mode 100644 index 0000000000..8a5cb904b1 --- /dev/null +++ b/Libraries/LibRegex/Tests/Regex.cpp @@ -0,0 +1,451 @@ +/* + * Copyright (c) 2020, Emanuel Sprung <emanuel.sprung@gmail.com> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <AK/TestSuite.h> // import first, to prevent warning of ASSERT* redefinition + +#include <LibRegex/Regex.h> +#include <LibRegex/RegexDebug.h> +#include <AK/StringBuilder.h> +#include <stdio.h> + +static ECMAScriptOptions match_test_api_options(const ECMAScriptOptions options) +{ + return options; +} + +static PosixOptions match_test_api_options(const PosixOptions options) +{ + return options; +} + +TEST_CASE(regex_options_ecmascript) +{ + ECMAScriptOptions eo; + eo |= ECMAScriptFlags::Global; + + EXPECT(eo & ECMAScriptFlags::Global); + EXPECT(!(eo & ECMAScriptFlags::Insensitive)); + + eo = match_test_api_options(ECMAScriptFlags::Global | ECMAScriptFlags::Insensitive | ECMAScriptFlags::Sticky); + EXPECT(eo & ECMAScriptFlags::Global); + EXPECT(eo & ECMAScriptFlags::Insensitive); + EXPECT(eo & ECMAScriptFlags::Sticky); + EXPECT(!(eo & ECMAScriptFlags::Unicode)); + EXPECT(!(eo & ECMAScriptFlags::Multiline)); + EXPECT(!(eo & ECMAScriptFlags::SingleLine)); + + eo &= ECMAScriptFlags::Insensitive; + EXPECT(!(eo & ECMAScriptFlags::Global)); + EXPECT(eo & ECMAScriptFlags::Insensitive); + EXPECT(!(eo & ECMAScriptFlags::Multiline)); + + eo &= ECMAScriptFlags::Sticky; + EXPECT(!(eo & ECMAScriptFlags::Global)); + EXPECT(!(eo & ECMAScriptFlags::Insensitive)); + EXPECT(!(eo & ECMAScriptFlags::Multiline)); + EXPECT(!(eo & ECMAScriptFlags::Sticky)); + + eo = ~ECMAScriptFlags::Insensitive; + EXPECT(eo & ECMAScriptFlags::Global); + EXPECT(!(eo & ECMAScriptFlags::Insensitive)); + EXPECT(eo & ECMAScriptFlags::Multiline); + EXPECT(eo & ECMAScriptFlags::Sticky); +} + +TEST_CASE(regex_options_posix) +{ + PosixOptions eo; + eo |= PosixFlags::Global; + + EXPECT(eo & PosixFlags::Global); + EXPECT(!(eo & PosixFlags::Insensitive)); + + eo = match_test_api_options(PosixFlags::Global | PosixFlags::Insensitive | PosixFlags::MatchNotBeginOfLine); + EXPECT(eo & PosixFlags::Global); + EXPECT(eo & PosixFlags::Insensitive); + EXPECT(eo & PosixFlags::MatchNotBeginOfLine); + EXPECT(!(eo & PosixFlags::Unicode)); + EXPECT(!(eo & PosixFlags::Multiline)); + + eo &= PosixFlags::Insensitive; + EXPECT(!(eo & PosixFlags::Global)); + EXPECT(eo & PosixFlags::Insensitive); + EXPECT(!(eo & PosixFlags::Multiline)); + + eo &= PosixFlags::MatchNotBeginOfLine; + EXPECT(!(eo & PosixFlags::Global)); + EXPECT(!(eo & PosixFlags::Insensitive)); + EXPECT(!(eo & PosixFlags::Multiline)); + + eo = ~PosixFlags::Insensitive; + EXPECT(eo & PosixFlags::Global); + EXPECT(!(eo & PosixFlags::Insensitive)); + EXPECT(eo & PosixFlags::Multiline); +} + +TEST_CASE(regex_lexer) +{ + Lexer l("/[.*+?^${}()|[\\]\\\\]/g"); + EXPECT(l.next().type() == regex::TokenType::Slash); + EXPECT(l.next().type() == regex::TokenType::LeftBracket); + EXPECT(l.next().type() == regex::TokenType::Period); + EXPECT(l.next().type() == regex::TokenType::Asterisk); + EXPECT(l.next().type() == regex::TokenType::Plus); + EXPECT(l.next().type() == regex::TokenType::Questionmark); + EXPECT(l.next().type() == regex::TokenType::Circumflex); + EXPECT(l.next().type() == regex::TokenType::Dollar); + EXPECT(l.next().type() == regex::TokenType::LeftCurly); + EXPECT(l.next().type() == regex::TokenType::RightCurly); + EXPECT(l.next().type() == regex::TokenType::LeftParen); + EXPECT(l.next().type() == regex::TokenType::RightParen); + EXPECT(l.next().type() == regex::TokenType::Pipe); + EXPECT(l.next().type() == regex::TokenType::LeftBracket); + EXPECT(l.next().type() == regex::TokenType::EscapeSequence); + EXPECT(l.next().type() == regex::TokenType::EscapeSequence); + EXPECT(l.next().type() == regex::TokenType::RightBracket); + EXPECT(l.next().type() == regex::TokenType::Slash); + EXPECT(l.next().type() == regex::TokenType::Char); +} + +TEST_CASE(parser_error_parens) +{ + String pattern = "test()test"; + Lexer l(pattern); + PosixExtendedParser p(l); + p.parse(); + EXPECT(p.has_error()); + EXPECT(p.error() == Error::EmptySubExpression); +} + +TEST_CASE(parser_error_special_characters_used_at_wrong_place) +{ + String pattern; + Vector<char, 5> chars = { '*', '+', '?', '{' }; + StringBuilder b; + + Lexer l; + PosixExtended p(l); + + for (auto& ch : chars) { + // First in ere + b.clear(); + b.append(ch); + pattern = b.build(); + l.set_source(pattern); + p.parse(); + EXPECT(p.has_error()); + EXPECT(p.error() == Error::InvalidRepetitionMarker); + + // After vertical line + b.clear(); + b.append("a|"); + b.append(ch); + pattern = b.build(); + l.set_source(pattern); + p.parse(); + EXPECT(p.has_error()); + EXPECT(p.error() == Error::InvalidRepetitionMarker); + + // After circumflex + b.clear(); + b.append("^"); + b.append(ch); + pattern = b.build(); + l.set_source(pattern); + p.parse(); + EXPECT(p.has_error()); + EXPECT(p.error() == Error::InvalidRepetitionMarker); + + // After dollar + b.clear(); + b.append("$"); + b.append(ch); + pattern = b.build(); + l.set_source(pattern); + p.parse(); + EXPECT(p.has_error()); + EXPECT(p.error() == Error::InvalidRepetitionMarker); + + // After left parens + b.clear(); + b.append("("); + b.append(ch); + b.append(")"); + pattern = b.build(); + l.set_source(pattern); + p.parse(); + EXPECT(p.has_error()); + EXPECT(p.error() == Error::InvalidRepetitionMarker); + } +} + +TEST_CASE(parser_error_vertical_line_used_at_wrong_place) +{ + Lexer l; + PosixExtended p(l); + + // First in ere + l.set_source("|asdf"); + p.parse(); + EXPECT(p.has_error()); + EXPECT(p.error() == Error::EmptySubExpression); + + // Last in ere + l.set_source("asdf|"); + p.parse(); + EXPECT(p.has_error()); + EXPECT(p.error() == Error::EmptySubExpression); + + // After left parens + l.set_source("(|asdf)"); + p.parse(); + EXPECT(p.has_error()); + EXPECT(p.error() == Error::EmptySubExpression); + + // Proceed right parens + l.set_source("(asdf)|"); + p.parse(); + EXPECT(p.has_error()); + EXPECT(p.error() == Error::EmptySubExpression); +} + +TEST_CASE(catch_all_first) +{ + Regex<PosixExtended> re("^.*$"); + RegexResult m; + re.match("Hello World", m); + EXPECT(m.count == 1); + EXPECT(re.match("Hello World", m)); +} + +TEST_CASE(catch_all) +{ + Regex<PosixExtended> re("^.*$", PosixFlags::Global); + + EXPECT(re.has_match("Hello World")); + EXPECT(re.match("Hello World").success); + EXPECT(re.match("Hello World").count == 1); + + EXPECT(has_match("Hello World", re)); + auto res = match("Hello World", re); + EXPECT(res.success); + EXPECT(res.count == 1); + EXPECT(res.matches.size() == 1); + EXPECT(res.matches.first().view == "Hello World"); +} + +TEST_CASE(catch_all_again) +{ + Regex<PosixExtended> re("^.*$", PosixFlags::Extra); + EXPECT_EQ(has_match("Hello World", re), true); +} + +TEST_CASE(char_utf8) +{ + Regex<PosixExtended> re("😀"); + RegexResult result; + + EXPECT_EQ((result = match("Привет, мир! 😀 γειά σου κόσμος 😀 こんにちは世界", re, PosixFlags::Global)).success, true); + EXPECT_EQ(result.count, 2u); +} + +TEST_CASE(catch_all_newline) +{ + Regex<PosixExtended> re("^.*$", PosixFlags::Multiline | PosixFlags::StringCopyMatches); + RegexResult result; + auto lambda = [&result, &re]() { + String aaa = "Hello World\nTest\n1234\n"; + result = match(aaa, re); + EXPECT_EQ(result.success, true); + }; + lambda(); + EXPECT_EQ(result.count, 3u); + EXPECT_EQ(result.matches.at(0).view, "Hello World"); + EXPECT_EQ(result.matches.at(1).view, "Test"); + EXPECT_EQ(result.matches.at(2).view, "1234"); +} + +TEST_CASE(catch_all_newline_view) +{ + Regex<PosixExtended> re("^.*$", PosixFlags::Multiline); + RegexResult result; + + String aaa = "Hello World\nTest\n1234\n"; + result = match(aaa, re); + EXPECT_EQ(result.success, true); + EXPECT_EQ(result.count, 3u); + String str = "Hello World"; + EXPECT_EQ(result.matches.at(0).view, str.view()); + EXPECT_EQ(result.matches.at(1).view, "Test"); + EXPECT_EQ(result.matches.at(2).view, "1234"); +} + +TEST_CASE(catch_all_newline_2) +{ + Regex<PosixExtended> re("^.*$"); + RegexResult result; + result = match("Hello World\nTest\n1234\n", re, PosixFlags::Multiline | PosixFlags::StringCopyMatches); + EXPECT_EQ(result.success, true); + EXPECT_EQ(result.count, 3u); + EXPECT_EQ(result.matches.at(0).view, "Hello World"); + EXPECT_EQ(result.matches.at(1).view, "Test"); + EXPECT_EQ(result.matches.at(2).view, "1234"); + + result = match("Hello World\nTest\n1234\n", re); + EXPECT_EQ(result.success, true); + EXPECT_EQ(result.count, 1u); + EXPECT_EQ(result.matches.at(0).view, "Hello World\nTest\n1234\n"); +} + +TEST_CASE(match_all_character_class) +{ + Regex<PosixExtended> re("[[:alpha:]]"); + String str = "[Window]\nOpacity=255\nAudibleBeep=0\n"; + RegexResult result = match(str, re, PosixFlags::Global | PosixFlags::StringCopyMatches); + + EXPECT_EQ(result.success, true); + EXPECT_EQ(result.count, 24u); + EXPECT_EQ(result.matches.at(0).view, "W"); + EXPECT_EQ(result.matches.at(1).view, "i"); + EXPECT_EQ(result.matches.at(2).view, "n"); + EXPECT(&result.matches.at(0).view.characters_without_null_termination()[0] != &str.view().characters_without_null_termination()[1]); +} + +TEST_CASE(example_for_git_commit) +{ + Regex<PosixExtended> re("^.*$"); + auto result = re.match("Well, hello friends!\nHello World!"); + + EXPECT(result.success); + EXPECT(result.count == 1); + EXPECT(result.matches.at(0).view.starts_with("Well")); + EXPECT(result.matches.at(0).view.length() == 33); + + EXPECT(re.has_match("Well,....")); + + result = re.match("Well, hello friends!\nHello World!", PosixFlags::Multiline); + + EXPECT(result.success); + EXPECT(result.count == 2); + EXPECT(result.matches.at(0).view == "Well, hello friends!"); + EXPECT(result.matches.at(1).view == "Hello World!"); +} + +TEST_CASE(email_address) +{ + Regex<PosixExtended> re("^[A-Z0-9a-z._%+-]{1,64}@([A-Za-z0-9-]{1,63}\\.){1,125}[A-Za-z]{2,63}$"); + EXPECT(re.has_match("hello.world@domain.tld")); + EXPECT(re.has_match("this.is.a.very_long_email_address@world.wide.web")); +} + +TEST_CASE(ini_file_entries) +{ + Regex<PosixExtended> re("[[:alpha:]]*=([[:digit:]]*)|\\[(.*)\\]"); + RegexResult result; + +#ifdef REGEX_DEBUG + RegexDebug regex_dbg(stderr); + regex_dbg.print_raw_bytecode(re); + regex_dbg.print_header(); + regex_dbg.print_bytecode(re); +#endif + + String haystack = "[Window]\nOpacity=255\nAudibleBeep=0\n"; + EXPECT_EQ(re.search(haystack.view(), result, PosixFlags::Multiline), true); + EXPECT_EQ(result.count, 3u); + +#ifdef REGEX_DEBUG + for (auto& v : result.matches) + fprintf(stderr, "%s\n", v.view.to_string().characters()); +#endif + + EXPECT_EQ(result.matches.at(0).view, "[Window]"); + EXPECT_EQ(result.capture_group_matches.at(0).at(1).view, "Window"); + EXPECT_EQ(result.matches.at(1).view, "Opacity=255"); + EXPECT_EQ(result.matches.at(1).line, 1u); + EXPECT_EQ(result.matches.at(1).column, 0u); + EXPECT_EQ(result.capture_group_matches.at(1).at(0).view, "255"); + EXPECT_EQ(result.capture_group_matches.at(1).at(0).line, 1u); + EXPECT_EQ(result.capture_group_matches.at(1).at(0).column, 8u); + EXPECT_EQ(result.matches.at(2).view, "AudibleBeep=0"); + EXPECT_EQ(result.capture_group_matches.at(2).at(0).view, "0"); + EXPECT_EQ(result.capture_group_matches.at(2).at(0).line, 2u); + EXPECT_EQ(result.capture_group_matches.at(2).at(0).column, 12u); +} + +TEST_CASE(named_capture_group) +{ + Regex<PosixExtended> re("[[:alpha:]]*=(?<Test>[[:digit:]]*)"); + RegexResult result; + +#ifdef REGEX_DEBUG + RegexDebug regex_dbg(stderr); + regex_dbg.print_raw_bytecode(re); + regex_dbg.print_header(); + regex_dbg.print_bytecode(re); +#endif + + String haystack = "[Window]\nOpacity=255\nAudibleBeep=0\n"; + EXPECT_EQ(re.search(haystack, result, PosixFlags::Multiline), true); + EXPECT_EQ(result.count, 2u); + EXPECT_EQ(result.matches.at(0).view, "Opacity=255"); + EXPECT_EQ(result.named_capture_group_matches.at(0).ensure("Test").view, "255"); + EXPECT_EQ(result.matches.at(1).view, "AudibleBeep=0"); + EXPECT_EQ(result.named_capture_group_matches.at(1).ensure("Test").view, "0"); +} + +TEST_CASE(a_star) +{ + Regex<PosixExtended> re("a*"); + RegexResult result; + +#ifdef REGEX_DEBUG + RegexDebug regex_dbg(stderr); + regex_dbg.print_raw_bytecode(re); + regex_dbg.print_header(); + regex_dbg.print_bytecode(re); +#endif + + String haystack = "[Window]\nOpacity=255\nAudibleBeep=0\n"; + EXPECT_EQ(re.search(haystack.view(), result, PosixFlags::Multiline), true); + EXPECT_EQ(result.count, 32u); + EXPECT_EQ(result.matches.at(0).view.length(), 0u); + EXPECT_EQ(result.matches.at(10).view.length(), 1u); + EXPECT_EQ(result.matches.at(10).view, "a"); + EXPECT_EQ(result.matches.at(31).view.length(), 0u); +} + +TEST_CASE(simple_period_end_benchmark) +{ + Regex<PosixExtended> re("hello.$"); + RegexResult m; + EXPECT_EQ(re.search("Hello1", m), false); + EXPECT_EQ(re.search("hello1hello1", m), true); + EXPECT_EQ(re.search("hello2hell", m), false); + EXPECT_EQ(re.search("hello?", m), true); +} + +TEST_MAIN(Regex) |