diff options
author | Linus Groh <mail@linusgroh.de> | 2021-09-13 23:20:58 +0100 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2021-09-14 02:48:57 +0200 |
commit | a50e33abe3a34aba965e329a615eab4d87227e4c (patch) | |
tree | a5335ac2bb27813d66a793e041ec6db1f96fc1b7 | |
parent | e4d97add3da0de8d75ddbcd1950420f33ee03904 (diff) | |
download | serenity-a50e33abe3a34aba965e329a615eab4d87227e4c.zip |
LibJS: Skip ID_{Start,Continue} property lookup for any ASCII characters
Before this change, Lexer::is_identifier_{start,middle}() would do a
Unicode property lookup via Unicode::code_point_has_property() quite
frequently, especially for common characters like .,;{}[]() etc.
Since these and any other ASCII characters not covered by the alpha /
alphanumeric check are known to not have the ID_Start / ID_Continue
(except '_', which is special-cased now) properties, we can easily
avoid this function call.
-rw-r--r-- | Userland/Libraries/LibJS/Lexer.cpp | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/Userland/Libraries/LibJS/Lexer.cpp b/Userland/Libraries/LibJS/Lexer.cpp index cd4dc8a415..3ef63870f3 100644 --- a/Userland/Libraries/LibJS/Lexer.cpp +++ b/Userland/Libraries/LibJS/Lexer.cpp @@ -408,6 +408,11 @@ Optional<u32> Lexer::is_identifier_start(size_t& identifier_length) const if (is_ascii_alpha(code_point) || code_point == '_' || code_point == '$') return code_point; + // Optimization: the first codepoint with the ID_Start property after A-Za-z is outside the + // ASCII range (0x00AA), so we can skip code_point_has_property() for any ASCII characters. + if (is_ascii(code_point)) + return {}; + static auto id_start_category = Unicode::property_from_string("ID_Start"sv); if (id_start_category.has_value() && Unicode::code_point_has_property(code_point, *id_start_category)) return code_point; @@ -436,6 +441,13 @@ Optional<u32> Lexer::is_identifier_middle(size_t& identifier_length) const if (is_ascii_alphanumeric(code_point) || (code_point == '$') || (code_point == ZERO_WIDTH_NON_JOINER) || (code_point == ZERO_WIDTH_JOINER)) return code_point; + // Optimization: the first codepoint with the ID_Continue property after A-Za-z0-9_ is outside the + // ASCII range (0x00AA), so we can skip code_point_has_property() for any ASCII characters. + if (code_point == '_') + return code_point; + if (is_ascii(code_point)) + return {}; + static auto id_continue_category = Unicode::property_from_string("ID_Continue"sv); if (id_continue_category.has_value() && Unicode::code_point_has_property(code_point, *id_continue_category)) return code_point; |