/* * Copyright (c) 2020, Emanuel Sprung * * SPDX-License-Identifier: BSD-2-Clause */ #pragma once #include "RegexByteCode.h" #include "RegexMatch.h" #include "RegexOptions.h" #include "RegexParser.h" #include #include #include #include #include #include #include #include #include namespace regex { namespace Detail { struct Block { size_t start; size_t end; }; } static constexpr const size_t c_max_recursion = 5000; static constexpr const size_t c_match_preallocation_count = 0; struct RegexResult final { bool success { false }; size_t count { 0 }; Vector matches; Vector> capture_group_matches; size_t n_operations { 0 }; size_t n_capture_groups { 0 }; size_t n_named_capture_groups { 0 }; }; template class Regex; template class Matcher final { public: Matcher(Regex const* pattern, Optional::OptionsType> regex_options = {}) : m_pattern(pattern) , m_regex_options(regex_options.value_or({})) { } ~Matcher() = default; RegexResult match(RegexStringView, Optional::OptionsType> = {}) const; RegexResult match(Vector const&, Optional::OptionsType> = {}) const; typename ParserTraits::OptionsType options() const { return m_regex_options; } void reset_pattern(Badge>, Regex const* pattern) { m_pattern = pattern; } private: bool execute(MatchInput const& input, MatchState& state, size_t& operations) const; Regex const* m_pattern; typename ParserTraits::OptionsType const m_regex_options; }; template class Regex final { public: String pattern_value; regex::Parser::Result parser_result; OwnPtr> matcher { nullptr }; mutable size_t start_offset { 0 }; static regex::Parser::Result parse_pattern(StringView pattern, typename ParserTraits::OptionsType regex_options = {}); explicit Regex(String pattern, typename ParserTraits::OptionsType regex_options = {}); Regex(regex::Parser::Result parse_result, String pattern, typename ParserTraits::OptionsType regex_options = {}); ~Regex() = default; Regex(Regex&&); Regex& operator=(Regex&&); typename ParserTraits::OptionsType options() const; void print_bytecode(FILE* f = stdout) const; String error_string(Optional message = {}) const; RegexResult match(RegexStringView view, Optional::OptionsType> regex_options = {}) const { if (!matcher || parser_result.error != Error::NoError) return {}; return matcher->match(view, regex_options); } RegexResult match(Vector const& views, Optional::OptionsType> regex_options = {}) const { if (!matcher || parser_result.error != Error::NoError) return {}; return matcher->match(views, regex_options); } String replace(RegexStringView view, StringView replacement_pattern, Optional::OptionsType> regex_options = {}) const { if (!matcher || parser_result.error != Error::NoError) return {}; StringBuilder builder; size_t start_offset = 0; RegexResult result = matcher->match(view, regex_options); if (!result.success) return view.to_string(); for (size_t i = 0; i < result.matches.size(); ++i) { auto& match = result.matches[i]; builder.append(view.substring_view(start_offset, match.global_offset - start_offset).to_string()); start_offset = match.global_offset + match.view.length(); GenericLexer lexer(replacement_pattern); while (!lexer.is_eof()) { if (lexer.consume_specific('\\')) { if (lexer.consume_specific('\\')) { builder.append('\\'); continue; } auto number = lexer.consume_while(isdigit); if (auto index = number.to_uint(); index.has_value() && result.n_capture_groups >= index.value()) { builder.append(result.capture_group_matches[i][index.value() - 1].view.to_string()); } else { builder.appendff("\\{}", number); } } else { builder.append(lexer.consume_while([](auto ch) { return ch != '\\'; })); } } } builder.append(view.substring_view(start_offset, view.length() - start_offset).to_string()); return builder.to_string(); } // FIXME: replace(Vector const , ...) RegexResult search(RegexStringView view, Optional::OptionsType> regex_options = {}) const { if (!matcher || parser_result.error != Error::NoError) return {}; AllOptions options = (AllOptions)regex_options.value_or({}); if ((options & AllFlags::MatchNotBeginOfLine) && (options & AllFlags::MatchNotEndOfLine)) { options.reset_flag(AllFlags::MatchNotEndOfLine); options.reset_flag(AllFlags::MatchNotBeginOfLine); } options.reset_flag(AllFlags::Internal_Stateful); options |= AllFlags::Global; return matcher->match(view, options); } RegexResult search(Vector const& views, Optional::OptionsType> regex_options = {}) const { if (!matcher || parser_result.error != Error::NoError) return {}; AllOptions options = (AllOptions)regex_options.value_or({}); if ((options & AllFlags::MatchNotBeginOfLine) && (options & AllFlags::MatchNotEndOfLine)) { options.reset_flag(AllFlags::MatchNotEndOfLine); options.reset_flag(AllFlags::MatchNotBeginOfLine); } options.reset_flag(AllFlags::Internal_Stateful); options |= AllFlags::Global; return matcher->match(views, options); } bool match(RegexStringView view, RegexResult& m, Optional::OptionsType> regex_options = {}) const { m = match(view, regex_options); return m.success; } bool match(Vector const& views, RegexResult& m, Optional::OptionsType> regex_options = {}) const { m = match(views, regex_options); return m.success; } bool search(RegexStringView view, RegexResult& m, Optional::OptionsType> regex_options = {}) const { m = search(view, regex_options); return m.success; } bool search(Vector const& views, RegexResult& m, Optional::OptionsType> regex_options = {}) const { m = search(views, regex_options); return m.success; } bool has_match(RegexStringView view, Optional::OptionsType> regex_options = {}) const { if (!matcher || parser_result.error != Error::NoError) return false; RegexResult result = matcher->match(view, AllOptions { regex_options.value_or({}) } | AllFlags::SkipSubExprResults); return result.success; } bool has_match(Vector const& views, Optional::OptionsType> regex_options = {}) const { if (!matcher || parser_result.error != Error::NoError) return false; RegexResult result = matcher->match(views, AllOptions { regex_options.value_or({}) } | AllFlags::SkipSubExprResults); return result.success; } using BasicBlockList = Vector; static BasicBlockList split_basic_blocks(ByteCode const&); private: void run_optimization_passes(); void attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&); }; // free standing functions for match, search and has_match template RegexResult match(RegexStringView view, Regex& pattern, Optional::OptionsType> regex_options = {}) { return pattern.match(view, regex_options); } template RegexResult match(Vector const& view, Regex& pattern, Optional::OptionsType> regex_options = {}) { return pattern.match(view, regex_options); } template bool match(RegexStringView view, Regex& pattern, RegexResult&, Optional::OptionsType> regex_options = {}) { return pattern.match(view, regex_options); } template bool match(Vector const& view, Regex& pattern, RegexResult&, Optional::OptionsType> regex_options = {}) { return pattern.match(view, regex_options); } template RegexResult search(RegexStringView view, Regex& pattern, Optional::OptionsType> regex_options = {}) { return pattern.search(view, regex_options); } template RegexResult search(Vector const& views, Regex& pattern, Optional::OptionsType> regex_options = {}) { return pattern.search(views, regex_options); } template bool search(RegexStringView view, Regex& pattern, RegexResult&, Optional::OptionsType> regex_options = {}) { return pattern.search(view, regex_options); } template bool search(Vector const& views, Regex& pattern, RegexResult&, Optional::OptionsType> regex_options = {}) { return pattern.search(views, regex_options); } template bool has_match(RegexStringView view, Regex& pattern, Optional::OptionsType> regex_options = {}) { return pattern.has_match(view, regex_options); } template bool has_match(Vector const& views, Regex& pattern, Optional::OptionsType> regex_options = {}) { return pattern.has_match(views, regex_options); } } using regex::has_match; using regex::match; using regex::Regex; using regex::RegexResult;