summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAli Mohammad Pur <ali.mpfard@gmail.com>2021-05-20 23:15:33 +0430
committerAndreas Kling <kling@serenityos.org>2021-05-20 22:06:45 +0200
commit97a230e4efedf92879da89cce57ffcb8d26fa517 (patch)
tree4d19439ce5cad377170ee60e6a78140bd0766c67
parentaa7939bc6c02b474a535ea0fb7c521b3e141fd21 (diff)
downloadserenity-97a230e4efedf92879da89cce57ffcb8d26fa517.zip
LibWeb: Add a super basic HTML syntax highlighter
This can currently highlight tag names and attribute names/values.
-rw-r--r--Userland/Libraries/LibSyntax/Highlighter.h9
-rw-r--r--Userland/Libraries/LibWeb/CMakeLists.txt1
-rw-r--r--Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp5
-rw-r--r--Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h4
-rw-r--r--Userland/Libraries/LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.cpp147
-rw-r--r--Userland/Libraries/LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.h32
6 files changed, 194 insertions, 4 deletions
diff --git a/Userland/Libraries/LibSyntax/Highlighter.h b/Userland/Libraries/LibSyntax/Highlighter.h
index dc2fe000e6..de0b6bdc2b 100644
--- a/Userland/Libraries/LibSyntax/Highlighter.h
+++ b/Userland/Libraries/LibSyntax/Highlighter.h
@@ -15,13 +15,14 @@
namespace Syntax {
enum class Language {
- PlainText,
Cpp,
- JavaScript,
- INI,
GML,
- Shell,
+ HTML,
+ INI,
+ JavaScript,
+ PlainText,
SQL,
+ Shell,
};
struct TextStyle {
diff --git a/Userland/Libraries/LibWeb/CMakeLists.txt b/Userland/Libraries/LibWeb/CMakeLists.txt
index 42c6de4158..20a62c948c 100644
--- a/Userland/Libraries/LibWeb/CMakeLists.txt
+++ b/Userland/Libraries/LibWeb/CMakeLists.txt
@@ -153,6 +153,7 @@ set(SOURCES
HTML/Parser/ListOfActiveFormattingElements.cpp
HTML/Parser/StackOfOpenElements.cpp
HTML/SubmitEvent.cpp
+ HTML/SyntaxHighlighter/SyntaxHighlighter.cpp
HTML/TagNames.cpp
HTML/WebSocket.cpp
HighResolutionTime/Performance.cpp
diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
index 51ef6af76b..8dfaed9602 100644
--- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
+++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
@@ -229,6 +229,11 @@ Optional<u32> HTMLTokenizer::peek_code_point(size_t offset) const
Optional<HTMLToken> HTMLTokenizer::next_token()
{
+ {
+ auto last_position = m_source_positions.last();
+ m_source_positions.clear();
+ m_source_positions.append(move(last_position));
+ }
_StartOfFunction:
if (!m_queued_tokens.is_empty())
return m_queued_tokens.dequeue();
diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h
index 5edfaf2271..da9945ed78 100644
--- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h
+++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h
@@ -110,6 +110,10 @@ public:
Optional<HTMLToken> next_token();
void switch_to(Badge<HTMLDocumentParser>, State new_state);
+ void switch_to(State new_state)
+ {
+ m_state = new_state;
+ }
void set_blocked(bool b) { m_blocked = b; }
bool is_blocked() const { return m_blocked; }
diff --git a/Userland/Libraries/LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.cpp b/Userland/Libraries/LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.cpp
new file mode 100644
index 0000000000..53dba5bf18
--- /dev/null
+++ b/Userland/Libraries/LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.cpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2021, Ali Mohammad Pur <mpfard@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <LibWeb/HTML/Parser/HTMLTokenizer.h>
+#include <LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.h>
+
+namespace Web::HTML {
+
+enum class AugmentedTokenKind : u32 {
+ AttributeName,
+ AttributeValue,
+ OpenTag,
+ CloseTag,
+ Comment,
+ Doctype,
+};
+
+bool SyntaxHighlighter::is_identifier(void* token) const
+{
+ if (!token)
+ return false;
+ return false;
+}
+
+bool SyntaxHighlighter::is_navigatable(void*) const
+{
+ return false;
+}
+
+void SyntaxHighlighter::rehighlight(const Palette& palette)
+{
+ (void)palette;
+ auto text = m_client->get_text();
+
+ Vector<GUI::TextDocumentSpan> spans;
+ auto highlight = [&](auto start_line, auto start_column, auto end_line, auto end_column, Gfx::TextAttributes attributes, AugmentedTokenKind kind) {
+ spans.empend(
+ GUI::TextRange {
+ { start_line, start_column },
+ { end_line, end_column },
+ },
+ move(attributes),
+ (void*)kind,
+ false);
+ };
+
+ HTMLTokenizer tokenizer { text, "utf-8" };
+ [[maybe_unused]] enum class State {
+ HTML,
+ Javascript,
+ CSS,
+ } state { State::HTML };
+ for (;;) {
+ auto token = tokenizer.next_token();
+ if (!token.has_value())
+ break;
+
+ if (token->is_start_tag()) {
+ if (token->tag_name() == "script"sv) {
+ tokenizer.switch_to(HTMLTokenizer::State::ScriptData);
+ state = State::Javascript;
+ } else if (token->tag_name() == "style"sv) {
+ tokenizer.switch_to(HTMLTokenizer::State::RAWTEXT);
+ state = State::CSS;
+ }
+ } else if (token->is_end_tag()) {
+ if (token->tag_name().is_one_of("script"sv, "style"sv)) {
+ if (state == State::Javascript) {
+ // FIXME: Highlight javascript code here instead.
+ } else if (state == State::CSS) {
+ // FIXME: Highlight CSS code here instead.
+ }
+ state = State::HTML;
+ }
+ }
+
+ size_t token_start_offset = token->is_end_tag() ? 1 : 0;
+
+ if (token->is_comment()) {
+ highlight(
+ token->start_position().line,
+ token->start_position().column,
+ token->start_position().line,
+ token->start_position().column,
+ { palette.syntax_comment(), {} },
+ AugmentedTokenKind::Comment);
+ } else if (token->is_start_tag() || token->is_end_tag()) {
+ // FIXME: This breaks with single-character tag names.
+ highlight(
+ token->start_position().line,
+ token->start_position().column + token_start_offset,
+ token->start_position().line,
+ token->start_position().column + token->tag_name().length() + token_start_offset - 1,
+ { palette.syntax_keyword(), {} },
+ token->is_start_tag() ? AugmentedTokenKind::OpenTag : AugmentedTokenKind::CloseTag);
+
+ for (auto& attribute : token->attributes()) {
+ highlight(
+ attribute.name_start_position.line,
+ attribute.name_start_position.column + token_start_offset,
+ attribute.name_end_position.line,
+ attribute.name_end_position.column + token_start_offset,
+ { palette.syntax_identifier(), {} },
+ AugmentedTokenKind::AttributeName);
+ highlight(
+ attribute.value_start_position.line,
+ attribute.value_start_position.column + token_start_offset,
+ attribute.value_end_position.line,
+ attribute.value_end_position.column + token_start_offset,
+ { palette.syntax_string(), {} },
+ AugmentedTokenKind::AttributeValue);
+ }
+ } else if (token->is_doctype()) {
+ highlight(
+ token->start_position().line,
+ token->start_position().column,
+ token->start_position().line,
+ token->start_position().column,
+ { palette.syntax_preprocessor_statement(), {} },
+ AugmentedTokenKind::Doctype);
+ }
+ }
+
+ m_client->do_set_spans(move(spans));
+ m_has_brace_buddies = false;
+ highlight_matching_token_pair();
+ m_client->do_update();
+}
+
+Vector<Syntax::Highlighter::MatchingTokenPair> SyntaxHighlighter::matching_token_pairs() const
+{
+ static Vector<MatchingTokenPair> pairs;
+ if (pairs.is_empty()) {
+ pairs.append({ (void*)AugmentedTokenKind::OpenTag, (void*)AugmentedTokenKind::CloseTag });
+ }
+ return pairs;
+}
+
+bool SyntaxHighlighter::token_types_equal(void* token0, void* token1) const
+{
+ return token0 == token1;
+}
+
+}
diff --git a/Userland/Libraries/LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.h b/Userland/Libraries/LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.h
new file mode 100644
index 0000000000..067c3c6677
--- /dev/null
+++ b/Userland/Libraries/LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021, Ali Mohammad Pur <mpfard@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <LibSyntax/Highlighter.h>
+
+namespace Web::HTML {
+
+class SyntaxHighlighter : public Syntax::Highlighter {
+public:
+ SyntaxHighlighter() = default;
+ virtual ~SyntaxHighlighter() override = default;
+
+ virtual bool is_identifier(void*) const override;
+ virtual bool is_navigatable(void*) const override;
+
+ virtual Syntax::Language language() const override { return Syntax::Language::HTML; }
+ virtual void rehighlight(const Palette&) override;
+
+protected:
+ virtual Vector<MatchingTokenPair> matching_token_pairs() const override;
+ virtual bool token_types_equal(void*, void*) const override;
+
+ size_t m_line { 1 };
+ size_t m_column { 0 };
+};
+
+}