diff options
author | Idan Horowitz <idan.horowitz@gmail.com> | 2021-04-24 16:20:51 +0300 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2021-04-29 10:36:23 +0200 |
commit | 115b445dab0dfdd28a87232b3d57a1610a233d7b (patch) | |
tree | e0ca56290d2e0129c92ea9e2a402ce148a288e68 /Userland/Libraries/LibGfx | |
parent | 33fdd402b5cf16e8bb36fa499ac438b2b9134923 (diff) | |
download | serenity-115b445dab0dfdd28a87232b3d57a1610a233d7b.zip |
LibGfx: Add basic support for bidirectional text rendering
This adds a *very* simplified version of the UNICODE BIDIRECTIONAL
ALGORITHM (https://www.unicode.org/reports/tr9/), that can render most
bidirectional text but also produces awkward results in a large amount
of edge cases, and as such, this should probably be replaced with a
fully spec compliant implementation at some point.
Diffstat (limited to 'Userland/Libraries/LibGfx')
-rw-r--r-- | Userland/Libraries/LibGfx/CMakeLists.txt | 1 | ||||
-rw-r--r-- | Userland/Libraries/LibGfx/Painter.cpp | 197 | ||||
-rw-r--r-- | Userland/Libraries/LibGfx/TextDirection.cpp | 34 | ||||
-rw-r--r-- | Userland/Libraries/LibGfx/TextDirection.h | 103 |
4 files changed, 331 insertions, 4 deletions
diff --git a/Userland/Libraries/LibGfx/CMakeLists.txt b/Userland/Libraries/LibGfx/CMakeLists.txt index 02e224d787..17dd58938b 100644 --- a/Userland/Libraries/LibGfx/CMakeLists.txt +++ b/Userland/Libraries/LibGfx/CMakeLists.txt @@ -29,6 +29,7 @@ set(SOURCES Size.cpp StylePainter.cpp SystemTheme.cpp + TextDirection.cpp Triangle.cpp Typeface.cpp WindowTheme.cpp diff --git a/Userland/Libraries/LibGfx/Painter.cpp b/Userland/Libraries/LibGfx/Painter.cpp index dbbf5d005f..80a5456cdb 100644 --- a/Userland/Libraries/LibGfx/Painter.cpp +++ b/Userland/Libraries/LibGfx/Painter.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org> + * Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org> * * SPDX-License-Identifier: BSD-2-Clause */ @@ -23,6 +24,7 @@ #include <LibGfx/CharacterBitmap.h> #include <LibGfx/Palette.h> #include <LibGfx/Path.h> +#include <LibGfx/TextDirection.h> #include <math.h> #include <stdio.h> @@ -1017,12 +1019,12 @@ struct ElidedText<Utf32View> { }; template<typename TextType, typename DrawGlyphFunction> -void draw_text_line(const IntRect& a_rect, const TextType& text, const Font& font, TextAlignment alignment, TextElision elision, DrawGlyphFunction draw_glyph) +void draw_text_line(const IntRect& a_rect, const TextType& text, const Font& font, TextAlignment alignment, TextElision elision, TextDirection direction, DrawGlyphFunction draw_glyph) { auto rect = a_rect; TextType final_text(text); typename ElidedText<TextType>::Type elided_text; - if (elision == TextElision::Right) { + if (elision == TextElision::Right) { // FIXME: This needs to be specialized for bidirectional text int text_width = font.width(final_text); if (font.width(final_text) > rect.width()) { int glyph_spacing = font.glyph_spacing(); @@ -1074,14 +1076,22 @@ void draw_text_line(const IntRect& a_rect, const TextType& text, const Font& fon auto point = rect.location(); int space_width = font.glyph_width(' ') + font.glyph_spacing(); + if (direction == TextDirection::RTL) { + point.move_by(rect.width(), 0); // Start drawing from the end + space_width = -space_width; // Draw spaces backwards + } + for (u32 code_point : final_text) { if (code_point == ' ') { point.move_by(space_width, 0); continue; } IntSize glyph_size(font.glyph_or_emoji_width(code_point) + font.glyph_spacing(), font.glyph_height()); + if (direction == TextDirection::RTL) + point.move_by(-glyph_size.width(), 0); // If we are drawing right to left, we have to move backwards before drawing the glyph draw_glyph({ point, glyph_size }, code_point); - point.move_by(glyph_size.width(), 0); + if (direction == TextDirection::LTR) + point.move_by(glyph_size.width(), 0); } } @@ -1105,9 +1115,170 @@ static inline size_t draw_text_get_length(const Utf32View& text) return text.length(); } +template<typename TextType> +Vector<DirectionalRun> split_text_into_directional_runs(const TextType& text, TextDirection initial_direction) +{ + // FIXME: This is a *very* simplified version of the UNICODE BIDIRECTIONAL ALGORITHM (https://www.unicode.org/reports/tr9/), that can render most bidirectional text + // but also produces awkward results in a large amount of edge cases. This should probably be replaced with a fully spec compliant implementation at some point. + + // FIXME: Support HTML "dir" attribute (how?) + u8 paragraph_embedding_level = initial_direction == TextDirection::LTR ? 0 : 1; + Vector<u8> embedding_levels; + embedding_levels.ensure_capacity(text.length()); + for (size_t i = 0; i < text.length(); i++) + embedding_levels.unchecked_append(paragraph_embedding_level); + + // FIXME: Support Explicit Directional Formatting Characters + + Vector<BidirectionalClass> character_classes; + character_classes.ensure_capacity(text.length()); + for (u32 code_point : text) + character_classes.unchecked_append(get_char_bidi_class(code_point)); + + // resolving weak types + BidirectionalClass paragraph_class = initial_direction == TextDirection::LTR ? BidirectionalClass::STRONG_LTR : BidirectionalClass::STRONG_RTL; + for (size_t i = 0; i < character_classes.size(); i++) { + if (character_classes[i] != BidirectionalClass::WEAK_SEPARATORS) + continue; + for (ssize_t j = i - 1; j >= 0; j--) { + auto character_class = character_classes[j]; + if (character_class != BidirectionalClass::STRONG_RTL && character_class != BidirectionalClass::STRONG_LTR) + continue; + character_classes[i] = character_class; + break; + } + if (character_classes[i] == BidirectionalClass::WEAK_SEPARATORS) + character_classes[i] = paragraph_class; + } + + // resolving neutral types + auto left_side = BidirectionalClass::NEUTRAL; + auto sequence_length = 0; + for (size_t i = 0; i < character_classes.size(); i++) { + auto character_class = character_classes[i]; + if (left_side == BidirectionalClass::NEUTRAL) { + if (character_class != BidirectionalClass::NEUTRAL) + left_side = character_class; + else + character_classes[i] = paragraph_class; + continue; + } + if (character_class != BidirectionalClass::NEUTRAL) { + BidirectionalClass sequence_class; + if (bidi_class_to_direction(left_side) == bidi_class_to_direction(character_class)) { + sequence_class = left_side == BidirectionalClass::STRONG_RTL ? BidirectionalClass::STRONG_RTL : BidirectionalClass::STRONG_LTR; + } else { + sequence_class = paragraph_class; + } + for (auto j = 0; j < sequence_length; j++) { + character_classes[i - j - 1] = sequence_class; + } + sequence_length = 0; + left_side = character_class; + } else { + sequence_length++; + } + } + for (auto i = 0; i < sequence_length; i++) + character_classes[character_classes.size() - i - 1] = paragraph_class; + + // resolving implicit levels + for (size_t i = 0; i < character_classes.size(); i++) { + auto character_class = character_classes[i]; + if ((embedding_levels[i] % 2) == 0) { + if (character_class == BidirectionalClass::STRONG_RTL) + embedding_levels[i] += 1; + else if (character_class == BidirectionalClass::WEAK_NUMBERS || character_class == BidirectionalClass::WEAK_SEPARATORS) + embedding_levels[i] += 2; + } else { + if (character_class == BidirectionalClass::STRONG_LTR || character_class == BidirectionalClass::WEAK_NUMBERS || character_class == BidirectionalClass::WEAK_SEPARATORS) + embedding_levels[i] += 1; + } + } + + // splitting into runs + auto run_code_points_start = text.begin(); + auto next_code_points_slice = [&](auto length) { + Vector<u32> run_code_points; + run_code_points.ensure_capacity(length); + for (size_t j = 0; j < length; ++j, ++run_code_points_start) + run_code_points.unchecked_append(*run_code_points_start); + return run_code_points; + }; + Vector<DirectionalRun> runs; + size_t start = 0; + u8 level = embedding_levels[0]; + for (size_t i = 1; i < embedding_levels.size(); ++i) { + if (embedding_levels[i] == level) + continue; + auto code_points_slice = next_code_points_slice(i - start); + runs.append({ move(code_points_slice), level }); + start = i; + level = embedding_levels[i]; + } + auto code_points_slice = next_code_points_slice(embedding_levels.size() - start); + runs.append({ move(code_points_slice), level }); + + // reordering resolved levels + // FIXME: missing special cases for trailing whitespace characters + u8 minimum_level = 128; + u8 maximum_level = 0; + for (auto& run : runs) { + minimum_level = min(minimum_level, run.embedding_level()); + maximum_level = max(minimum_level, run.embedding_level()); + } + if ((minimum_level % 2) == 0) + minimum_level++; + auto runs_count = runs.size() - 1; + while (maximum_level <= minimum_level) { + size_t run_index = 0; + while (run_index < runs_count) { + while (run_index < runs_count && runs[run_index].embedding_level() < maximum_level) + run_index++; + auto reverse_start = run_index; + while (run_index <= runs_count && runs[run_index].embedding_level() >= maximum_level) + run_index++; + auto reverse_end = run_index - 1; + while (reverse_start < reverse_end) { + swap(runs[reverse_start], runs[reverse_end]); + reverse_start++; + reverse_end--; + } + } + maximum_level--; + } + + // mirroring RTL mirror characters + for (auto& run : runs) { + if (run.direction() == TextDirection::LTR) + continue; + for (auto& code_point : run.code_points()) { + code_point = get_mirror_char(code_point); + } + } + + return runs; +} + +template<typename TextType> +bool text_contains_bidirectional_text(const TextType& text, TextDirection initial_direction) +{ + for (u32 code_point : text) { + auto char_class = get_char_bidi_class(code_point); + if (char_class == BidirectionalClass::NEUTRAL) + continue; + if (bidi_class_to_direction(char_class) != initial_direction) + return true; + } + return false; +} + template<typename TextType, typename DrawGlyphFunction> void do_draw_text(const IntRect& rect, const TextType& text, const Font& font, TextAlignment alignment, TextElision elision, DrawGlyphFunction draw_glyph) { + if (draw_text_get_length(text) == 0) + return; + Vector<TextType, 32> lines; size_t start_of_current_line = 0; @@ -1161,9 +1332,27 @@ void do_draw_text(const IntRect& rect, const TextType& text, const Font& font, T for (size_t i = 0; i < lines.size(); ++i) { auto& line = lines[i]; + IntRect line_rect { bounding_rect.x(), bounding_rect.y() + static_cast<int>(i) * line_height, bounding_rect.width(), line_height }; line_rect.intersect(rect); - draw_text_line(line_rect, line, font, alignment, elision, draw_glyph); + + TextDirection line_direction = get_text_direction(line); + if (text_contains_bidirectional_text(line, line_direction)) { // Slow Path: The line contains mixed BiDi classes + auto directional_runs = split_text_into_directional_runs(line, line_direction); + auto current_dx = line_direction == TextDirection::LTR ? 0 : line_rect.width(); + for (auto& directional_run : directional_runs) { + auto run_width = font.width(directional_run.text()); + if (line_direction == TextDirection::RTL) + current_dx -= run_width; + auto run_rect = line_rect.translated(current_dx, 0); + run_rect.set_width(run_width); + draw_text_line(run_rect, directional_run.text(), font, alignment, elision, directional_run.direction(), draw_glyph); + if (line_direction == TextDirection::LTR) + current_dx += run_width; + } + } else { + draw_text_line(line_rect, line, font, alignment, elision, line_direction, draw_glyph); + } } } diff --git a/Userland/Libraries/LibGfx/TextDirection.cpp b/Userland/Libraries/LibGfx/TextDirection.cpp new file mode 100644 index 0000000000..c29e1baf61 --- /dev/null +++ b/Userland/Libraries/LibGfx/TextDirection.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <AK/Array.h> +#include <LibGfx/TextDirection.h> + +namespace Gfx { + +// FIXME: These should be parsed from the official UnicodeData.txt that specifies the class for each character (this function doesnt take into account a large amount of characters) +static consteval Array<BidirectionalClass, 0x1F000> generate_char_bidi_class_lookup_table() +{ + Array<BidirectionalClass, 0x1F000> lookup_table {}; + for (u32 ch = 0; ch < 0x1F000; ch++) { + auto char_class = BidirectionalClass::STRONG_LTR; + if ((ch >= 0x600 && ch <= 0x7BF) || (ch >= 0x8A0 && ch <= 0x8FF) || (ch >= 0xFB50 && ch <= 0xFDCF) || (ch >= 0xFDF0 && ch <= 0xFDFF) || (ch >= 0xFE70 && ch <= 0xFEFF) || (ch >= 0x1EE00 && ch <= 0x1EEFF)) + char_class = BidirectionalClass::STRONG_RTL; // Arabic RTL + if ((ch >= 0x590 && ch <= 0x5FF) || (ch >= 0x7C0 && ch <= 0x89F) || (ch == 0x200F) || (ch >= 0xFB1D && ch <= 0xFB4F) || (ch >= 0x10800 && ch <= 0x10FFF) || (ch >= 0x1E800 && ch <= 0x1EDFF) || (ch >= 0x1EF00 && ch <= 0x1EFFF)) + char_class = BidirectionalClass::STRONG_RTL; // Hebrew RTL + if ((ch >= 0x30 && ch <= 0x39) || (ch >= 0x660 && ch <= 0x669) || (ch >= 0x10D30 && ch <= 0x10E7E)) + char_class = BidirectionalClass::WEAK_NUMBERS; // Numerals + if ((ch >= 0x23 && ch <= 0x25) || (ch >= 0x2B && ch <= 0x2F) || (ch == 0x3A)) + char_class = BidirectionalClass::WEAK_SEPARATORS; // Seperators + if ((ch >= 0x9 && ch <= 0xD) || (ch >= 0x1C && ch <= 0x22) || (ch >= 0x26 && ch <= 0x2A) || (ch >= 0x3B && ch <= 0x40) || (ch >= 0x5B && ch <= 0x60) || (ch >= 0x7B && ch <= 0x7E)) + char_class = BidirectionalClass::NEUTRAL; + lookup_table[ch] = char_class; + } + return lookup_table; +} +constexpr Array<BidirectionalClass, 0x1F000> char_bidi_class_lookup_table = generate_char_bidi_class_lookup_table(); + +} diff --git a/Userland/Libraries/LibGfx/TextDirection.h b/Userland/Libraries/LibGfx/TextDirection.h new file mode 100644 index 0000000000..87075a832e --- /dev/null +++ b/Userland/Libraries/LibGfx/TextDirection.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include <AK/Utf32View.h> +#include <AK/Vector.h> + +namespace Gfx { + +enum class BidirectionalClass { + STRONG_LTR, + STRONG_RTL, + WEAK_NUMBERS, + WEAK_SEPARATORS, + NEUTRAL, +}; + +extern const Array<BidirectionalClass, 0x1F000> char_bidi_class_lookup_table; + +constexpr BidirectionalClass get_char_bidi_class(u32 ch) +{ + if (ch >= char_bidi_class_lookup_table.size()) + return BidirectionalClass::STRONG_LTR; + return char_bidi_class_lookup_table[ch]; +} + +// FIXME: These should be parsed from the official BidiMirroring.txt that specifies the mirroring character for each character (this function doesnt take into account a large amount of characters) +constexpr u32 get_mirror_char(u32 ch) +{ + if (ch == 0x28) + return 0x29; + if (ch == 0x29) + return 0x28; + if (ch == 0x3C) + return 0x3E; + if (ch == 0x3E) + return 0x3C; + if (ch == 0x5B) + return 0x5D; + if (ch == 0x7B) + return 0x7D; + if (ch == 0x7D) + return 0x7B; + if (ch == 0xAB) + return 0xBB; + if (ch == 0xBB) + return 0xAB; + if (ch == 0x2039) + return 0x203A; + if (ch == 0x203A) + return 0x2039; + return ch; +} + +enum class TextDirection { + LTR, + RTL, +}; + +constexpr TextDirection bidi_class_to_direction(BidirectionalClass class_) +{ + VERIFY(class_ != BidirectionalClass::NEUTRAL); + if (class_ == BidirectionalClass::STRONG_LTR || class_ == BidirectionalClass::WEAK_NUMBERS || class_ == BidirectionalClass::WEAK_SEPARATORS) + return TextDirection::LTR; + return TextDirection::RTL; +} + +// Assumes the text has a homogeneous direction +template<typename TextType> +constexpr TextDirection get_text_direction(TextType text) +{ + for (u32 code_point : text) { + auto char_direction = get_char_bidi_class(code_point); + if (char_direction != BidirectionalClass::NEUTRAL) + return bidi_class_to_direction(char_direction); + } + return TextDirection::LTR; +} + +class DirectionalRun { +public: + DirectionalRun(Vector<u32> code_points, u8 embedding_level) + : m_code_points(move(code_points)) + , m_embedding_level(embedding_level) + { + } + + [[nodiscard]] Utf32View text() const { return { m_code_points.data(), m_code_points.size() }; } + [[nodiscard]] u8 embedding_level() const { return m_embedding_level; } + [[nodiscard]] TextDirection direction() const { return (m_embedding_level % 2) == 0 ? TextDirection::LTR : TextDirection::RTL; } + + Vector<u32>& code_points() { return m_code_points; } + +private: + Vector<u32> m_code_points; + u8 m_embedding_level; +}; + +} |