summaryrefslogtreecommitdiff
path: root/Userland/Libraries/LibWeb/HTML/SyntaxHighlighter
diff options
context:
space:
mode:
authorAli Mohammad Pur <ali.mpfard@gmail.com>2021-05-20 23:15:33 +0430
committerAndreas Kling <kling@serenityos.org>2021-05-20 22:06:45 +0200
commit97a230e4efedf92879da89cce57ffcb8d26fa517 (patch)
tree4d19439ce5cad377170ee60e6a78140bd0766c67 /Userland/Libraries/LibWeb/HTML/SyntaxHighlighter
parentaa7939bc6c02b474a535ea0fb7c521b3e141fd21 (diff)
downloadserenity-97a230e4efedf92879da89cce57ffcb8d26fa517.zip
LibWeb: Add a super basic HTML syntax highlighter
This can currently highlight tag names and attribute names/values.
Diffstat (limited to 'Userland/Libraries/LibWeb/HTML/SyntaxHighlighter')
-rw-r--r--Userland/Libraries/LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.cpp147
-rw-r--r--Userland/Libraries/LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.h32
2 files changed, 179 insertions, 0 deletions
diff --git a/Userland/Libraries/LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.cpp b/Userland/Libraries/LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.cpp
new file mode 100644
index 0000000000..53dba5bf18
--- /dev/null
+++ b/Userland/Libraries/LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.cpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2021, Ali Mohammad Pur <mpfard@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <LibWeb/HTML/Parser/HTMLTokenizer.h>
+#include <LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.h>
+
+namespace Web::HTML {
+
+enum class AugmentedTokenKind : u32 {
+ AttributeName,
+ AttributeValue,
+ OpenTag,
+ CloseTag,
+ Comment,
+ Doctype,
+};
+
+bool SyntaxHighlighter::is_identifier(void* token) const
+{
+ if (!token)
+ return false;
+ return false;
+}
+
+bool SyntaxHighlighter::is_navigatable(void*) const
+{
+ return false;
+}
+
+void SyntaxHighlighter::rehighlight(const Palette& palette)
+{
+ (void)palette;
+ auto text = m_client->get_text();
+
+ Vector<GUI::TextDocumentSpan> spans;
+ auto highlight = [&](auto start_line, auto start_column, auto end_line, auto end_column, Gfx::TextAttributes attributes, AugmentedTokenKind kind) {
+ spans.empend(
+ GUI::TextRange {
+ { start_line, start_column },
+ { end_line, end_column },
+ },
+ move(attributes),
+ (void*)kind,
+ false);
+ };
+
+ HTMLTokenizer tokenizer { text, "utf-8" };
+ [[maybe_unused]] enum class State {
+ HTML,
+ Javascript,
+ CSS,
+ } state { State::HTML };
+ for (;;) {
+ auto token = tokenizer.next_token();
+ if (!token.has_value())
+ break;
+
+ if (token->is_start_tag()) {
+ if (token->tag_name() == "script"sv) {
+ tokenizer.switch_to(HTMLTokenizer::State::ScriptData);
+ state = State::Javascript;
+ } else if (token->tag_name() == "style"sv) {
+ tokenizer.switch_to(HTMLTokenizer::State::RAWTEXT);
+ state = State::CSS;
+ }
+ } else if (token->is_end_tag()) {
+ if (token->tag_name().is_one_of("script"sv, "style"sv)) {
+ if (state == State::Javascript) {
+ // FIXME: Highlight javascript code here instead.
+ } else if (state == State::CSS) {
+ // FIXME: Highlight CSS code here instead.
+ }
+ state = State::HTML;
+ }
+ }
+
+ size_t token_start_offset = token->is_end_tag() ? 1 : 0;
+
+ if (token->is_comment()) {
+ highlight(
+ token->start_position().line,
+ token->start_position().column,
+ token->start_position().line,
+ token->start_position().column,
+ { palette.syntax_comment(), {} },
+ AugmentedTokenKind::Comment);
+ } else if (token->is_start_tag() || token->is_end_tag()) {
+ // FIXME: This breaks with single-character tag names.
+ highlight(
+ token->start_position().line,
+ token->start_position().column + token_start_offset,
+ token->start_position().line,
+ token->start_position().column + token->tag_name().length() + token_start_offset - 1,
+ { palette.syntax_keyword(), {} },
+ token->is_start_tag() ? AugmentedTokenKind::OpenTag : AugmentedTokenKind::CloseTag);
+
+ for (auto& attribute : token->attributes()) {
+ highlight(
+ attribute.name_start_position.line,
+ attribute.name_start_position.column + token_start_offset,
+ attribute.name_end_position.line,
+ attribute.name_end_position.column + token_start_offset,
+ { palette.syntax_identifier(), {} },
+ AugmentedTokenKind::AttributeName);
+ highlight(
+ attribute.value_start_position.line,
+ attribute.value_start_position.column + token_start_offset,
+ attribute.value_end_position.line,
+ attribute.value_end_position.column + token_start_offset,
+ { palette.syntax_string(), {} },
+ AugmentedTokenKind::AttributeValue);
+ }
+ } else if (token->is_doctype()) {
+ highlight(
+ token->start_position().line,
+ token->start_position().column,
+ token->start_position().line,
+ token->start_position().column,
+ { palette.syntax_preprocessor_statement(), {} },
+ AugmentedTokenKind::Doctype);
+ }
+ }
+
+ m_client->do_set_spans(move(spans));
+ m_has_brace_buddies = false;
+ highlight_matching_token_pair();
+ m_client->do_update();
+}
+
+Vector<Syntax::Highlighter::MatchingTokenPair> SyntaxHighlighter::matching_token_pairs() const
+{
+ static Vector<MatchingTokenPair> pairs;
+ if (pairs.is_empty()) {
+ pairs.append({ (void*)AugmentedTokenKind::OpenTag, (void*)AugmentedTokenKind::CloseTag });
+ }
+ return pairs;
+}
+
+bool SyntaxHighlighter::token_types_equal(void* token0, void* token1) const
+{
+ return token0 == token1;
+}
+
+}
diff --git a/Userland/Libraries/LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.h b/Userland/Libraries/LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.h
new file mode 100644
index 0000000000..067c3c6677
--- /dev/null
+++ b/Userland/Libraries/LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021, Ali Mohammad Pur <mpfard@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <LibSyntax/Highlighter.h>
+
+namespace Web::HTML {
+
+class SyntaxHighlighter : public Syntax::Highlighter {
+public:
+ SyntaxHighlighter() = default;
+ virtual ~SyntaxHighlighter() override = default;
+
+ virtual bool is_identifier(void*) const override;
+ virtual bool is_navigatable(void*) const override;
+
+ virtual Syntax::Language language() const override { return Syntax::Language::HTML; }
+ virtual void rehighlight(const Palette&) override;
+
+protected:
+ virtual Vector<MatchingTokenPair> matching_token_pairs() const override;
+ virtual bool token_types_equal(void*, void*) const override;
+
+ size_t m_line { 1 };
+ size_t m_column { 0 };
+};
+
+}