diff options
author | AnotherTest <ali.mpfard@gmail.com> | 2020-12-06 17:02:03 +0330 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2020-12-06 15:38:40 +0100 |
commit | 19bf7734a49c1f86247c6b52c4831cf0575c6180 (patch) | |
tree | ff214938d0f10a55cb5e19d372c70f3ef84ede9a /Libraries/LibRegex | |
parent | 6b4281c3aa4bd421b00a83af17bc158d5fd2b883 (diff) | |
download | serenity-19bf7734a49c1f86247c6b52c4831cf0575c6180.zip |
LibRegex: Store 'String' matches inside the bytecode
Also removes an unnecessary 'length' argument (StringView has a length!)
Diffstat (limited to 'Libraries/LibRegex')
-rw-r--r-- | Libraries/LibRegex/RegexByteCode.cpp | 14 | ||||
-rw-r--r-- | Libraries/LibRegex/RegexByteCode.h | 16 | ||||
-rw-r--r-- | Libraries/LibRegex/RegexParser.cpp | 4 |
3 files changed, 22 insertions, 12 deletions
diff --git a/Libraries/LibRegex/RegexByteCode.cpp b/Libraries/LibRegex/RegexByteCode.cpp index 3fffc193b7..551327f30e 100644 --- a/Libraries/LibRegex/RegexByteCode.cpp +++ b/Libraries/LibRegex/RegexByteCode.cpp @@ -444,14 +444,16 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(const MatchInput& input, M } else if (compare_type == CharacterCompareType::String) { ASSERT(!current_inversion_state()); - char* str = reinterpret_cast<char*>(m_bytecode->at(offset++)); - auto& length = m_bytecode->at(offset++); + const auto& length = m_bytecode->at(offset++); + StringBuilder str_builder; + for (size_t i = 0; i < length; ++i) + str_builder.append(m_bytecode->at(offset++)); // We want to compare a string that is definitely longer than the available string if (input.view.length() - state.string_position < length) return ExecutionResult::Failed_ExecuteLowPrioForks; - if (!compare_string(input, state, str, length)) + if (!compare_string(input, state, str_builder.string_view().characters_without_null_termination(), length)) return ExecutionResult::Failed_ExecuteLowPrioForks; } else if (compare_type == CharacterCompareType::CharClass) { @@ -717,9 +719,11 @@ const Vector<String> OpCode_Compare::variable_arguments_to_string(Optional<Match auto ref = m_bytecode->at(offset++); result.empend(String::format("number=%lu", ref)); } else if (compare_type == CharacterCompareType::String) { - char* str = reinterpret_cast<char*>(m_bytecode->at(offset++)); auto& length = m_bytecode->at(offset++); - result.empend(String::format("value=\"%.*s\"", length, str)); + StringBuilder str_builder; + for (size_t i = 0; i < length; ++i) + str_builder.append(m_bytecode->at(offset++)); + result.empend(String::format("value=\"%.*s\"", length, str_builder.string_view().characters_without_null_termination())); if (!view.is_null() && view.length() > state().string_position) result.empend(String::format( "compare against: \"%s\"", diff --git a/Libraries/LibRegex/RegexByteCode.h b/Libraries/LibRegex/RegexByteCode.h index da3f971bfe..6701d7c887 100644 --- a/Libraries/LibRegex/RegexByteCode.h +++ b/Libraries/LibRegex/RegexByteCode.h @@ -186,7 +186,7 @@ public: append(move(bytecode)); } - void insert_bytecode_compare_string(StringView view, size_t length) + void insert_bytecode_compare_string(StringView view) { ByteCode bytecode; @@ -196,8 +196,7 @@ public: ByteCode arguments; arguments.empend(static_cast<ByteCodeValueType>(CharacterCompareType::String)); - arguments.empend(reinterpret_cast<ByteCodeValueType>(view.characters_without_null_termination())); - arguments.empend(length); + arguments.insert_string(view); bytecode.empend(arguments.size()); // size of arguments bytecode.append(move(arguments)); @@ -205,7 +204,7 @@ public: append(move(bytecode)); } - void insert_bytecode_compare_named_reference(StringView name, size_t length) + void insert_bytecode_compare_named_reference(StringView name) { ByteCode bytecode; @@ -216,7 +215,7 @@ public: arguments.empend(static_cast<ByteCodeValueType>(CharacterCompareType::NamedReference)); arguments.empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination())); - arguments.empend(length); + arguments.empend(name.length()); bytecode.empend(arguments.size()); // size of arguments bytecode.append(move(arguments)); @@ -458,6 +457,13 @@ public: OpCode* get_opcode(MatchState& state) const; private: + void insert_string(const StringView& view) + { + empend((ByteCodeValueType)view.length()); + for (size_t i = 0; i < view.length(); ++i) + empend((ByteCodeValueType)view[i]); + } + ALWAYS_INLINE OpCode* get_opcode_by_id(OpCodeId id) const; static HashMap<u32, OwnPtr<OpCode>> s_opcodes; }; diff --git a/Libraries/LibRegex/RegexParser.cpp b/Libraries/LibRegex/RegexParser.cpp index f342cce512..aa22be74b2 100644 --- a/Libraries/LibRegex/RegexParser.cpp +++ b/Libraries/LibRegex/RegexParser.cpp @@ -430,7 +430,7 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_sub_expression(ByteCode& stack, si if (length > 1) { // last character is inserted into 'bytecode' for duplication symbol handling auto new_length = length - ((match_repetition_symbol() && length > 1) ? 1 : 0); - stack.insert_bytecode_compare_string(start_token.value(), new_length); + stack.insert_bytecode_compare_string({ start_token.value().characters_without_null_termination(), new_length }); } if ((match_repetition_symbol() && length > 1) || length == 1) // Create own compare opcode for last character before duplication symbol @@ -1099,7 +1099,7 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini } match_length_minimum += maybe_length.value(); - stack.insert_bytecode_compare_named_reference(name, name.length()); + stack.insert_bytecode_compare_named_reference(name); return true; } |