diff options
author | Idan Horowitz <idan.horowitz@gmail.com> | 2022-01-30 20:39:26 +0200 |
---|---|---|
committer | Idan Horowitz <idan.horowitz@gmail.com> | 2022-01-31 21:05:04 +0200 |
commit | cea6c81c7738ead1d2b95a25c42ccb2b5f5f11e7 (patch) | |
tree | bc104b4a884ed7cd82b44df156b1322fbc057936 /Userland | |
parent | b1d19b5917e42022669403b6983215e2680b4067 (diff) | |
download | serenity-cea6c81c7738ead1d2b95a25c42ccb2b5f5f11e7.zip |
LibJS: Implement the Intl.Segmenter FindBoundary AO
Diffstat (limited to 'Userland')
-rw-r--r-- | Userland/Libraries/LibJS/Runtime/Intl/Segmenter.cpp | 72 | ||||
-rw-r--r-- | Userland/Libraries/LibJS/Runtime/Intl/Segmenter.h | 6 | ||||
-rw-r--r-- | Userland/Libraries/LibJS/Runtime/Intl/Segments.h | 4 |
3 files changed, 82 insertions, 0 deletions
diff --git a/Userland/Libraries/LibJS/Runtime/Intl/Segmenter.cpp b/Userland/Libraries/LibJS/Runtime/Intl/Segmenter.cpp index 89b71b47ed..8571c9501c 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/Segmenter.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/Segmenter.cpp @@ -4,7 +4,10 @@ * SPDX-License-Identifier: BSD-2-Clause */ +#include <AK/BinarySearch.h> +#include <AK/Utf16View.h> #include <LibJS/Runtime/Intl/Segmenter.h> +#include <LibUnicode/CharacterTypes.h> namespace JS::Intl { @@ -40,4 +43,73 @@ StringView Segmenter::segmenter_granularity_string() const } } +// 18.8.1 FindBoundary ( segmenter, string, startIndex, direction ), https://tc39.es/ecma402/#sec-findboundary +double find_boundary(Segmenter const& segmenter, Utf16View const& string, double start_index, Direction direction, Optional<Vector<size_t>>& boundaries_cache) +{ + // 1. Let locale be segmenter.[[Locale]]. + auto const& locale = segmenter.locale(); + + // 2. Let granularity be segmenter.[[SegmenterGranularity]]. + auto granularity = segmenter.segmenter_granularity(); + + // 3. Let len be the length of string. + auto length = string.length_in_code_units(); + + // Non-standard, populate boundaries cache + if (!boundaries_cache.has_value()) { + switch (granularity) { + case Segmenter::SegmenterGranularity::Grapheme: + boundaries_cache = Unicode::find_grapheme_segmentation_boundaries(string); + break; + case Segmenter::SegmenterGranularity::Word: + boundaries_cache = Unicode::find_word_segmentation_boundaries(string); + break; + case Segmenter::SegmenterGranularity::Sentence: + boundaries_cache = Unicode::find_sentence_segmentation_boundaries(string); + break; + default: + VERIFY_NOT_REACHED(); + } + } + (void)locale; // TODO: Support locale-sensitive boundaries + + // 4. If direction is before, then + if (direction == Direction::Before) { + // a. Assert: startIndex ≥ 0. + VERIFY(start_index >= 0); + // b. Assert: startIndex < len. + VERIFY(start_index < length); + + // c. Search string for the last segmentation boundary that is preceded by at most startIndex code units from the beginning, using locale locale and text element granularity granularity. + size_t boundary_index; + binary_search(*boundaries_cache, start_index, &boundary_index); + + // d. If a boundary is found, return the count of code units in string preceding it. + if (boundary_index < boundaries_cache->size()) + return boundaries_cache->at(boundary_index); + + // e. Return 0. + return 0; + } + + // 5. Assert: direction is after. + VERIFY(direction == Direction::After); + + // 6. If len is 0 or startIndex ≥ len, return +∞. + if (length == 0 || start_index >= length) + return INFINITY; + + // 7. Search string for the first segmentation boundary that follows the code unit at index startIndex, using locale locale and text element granularity granularity. + size_t boundary_index; + binary_search(*boundaries_cache, start_index, &boundary_index); + ++boundary_index; + + // 8. If a boundary is found, return the count of code units in string preceding it. + if (boundary_index < boundaries_cache->size()) + return boundaries_cache->at(boundary_index); + + // 9. Return len. + return length; +} + } diff --git a/Userland/Libraries/LibJS/Runtime/Intl/Segmenter.h b/Userland/Libraries/LibJS/Runtime/Intl/Segmenter.h index f2c9163691..652a395ea2 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/Segmenter.h +++ b/Userland/Libraries/LibJS/Runtime/Intl/Segmenter.h @@ -36,4 +36,10 @@ private: SegmenterGranularity m_segmenter_granularity { SegmenterGranularity::Grapheme }; // [[SegmenterGranularity]] }; +enum class Direction { + Before, + After, +}; +double find_boundary(Segmenter const&, Utf16View const&, double start_index, Direction, Optional<Vector<size_t>>& boundaries_cache); + } diff --git a/Userland/Libraries/LibJS/Runtime/Intl/Segments.h b/Userland/Libraries/LibJS/Runtime/Intl/Segments.h index 970bcdaba0..1e5a11a610 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/Segments.h +++ b/Userland/Libraries/LibJS/Runtime/Intl/Segments.h @@ -25,11 +25,15 @@ public: Utf16View segments_string() const { return m_segments_string.view(); } + Optional<Vector<size_t>>& boundaries_cache() const { return m_boundaries_cache; } + private: virtual void visit_edges(Cell::Visitor&) override; Segmenter& m_segments_segmenter; // [[SegmentsSegmenter]] Utf16String m_segments_string; // [[SegmentsString]] + + mutable Optional<Vector<size_t>> m_boundaries_cache; }; } |