summaryrefslogtreecommitdiff
path: root/Userland
diff options
context:
space:
mode:
authorIdan Horowitz <idan.horowitz@gmail.com>2022-01-30 20:39:26 +0200
committerIdan Horowitz <idan.horowitz@gmail.com>2022-01-31 21:05:04 +0200
commitcea6c81c7738ead1d2b95a25c42ccb2b5f5f11e7 (patch)
treebc104b4a884ed7cd82b44df156b1322fbc057936 /Userland
parentb1d19b5917e42022669403b6983215e2680b4067 (diff)
downloadserenity-cea6c81c7738ead1d2b95a25c42ccb2b5f5f11e7.zip
LibJS: Implement the Intl.Segmenter FindBoundary AO
Diffstat (limited to 'Userland')
-rw-r--r--Userland/Libraries/LibJS/Runtime/Intl/Segmenter.cpp72
-rw-r--r--Userland/Libraries/LibJS/Runtime/Intl/Segmenter.h6
-rw-r--r--Userland/Libraries/LibJS/Runtime/Intl/Segments.h4
3 files changed, 82 insertions, 0 deletions
diff --git a/Userland/Libraries/LibJS/Runtime/Intl/Segmenter.cpp b/Userland/Libraries/LibJS/Runtime/Intl/Segmenter.cpp
index 89b71b47ed..8571c9501c 100644
--- a/Userland/Libraries/LibJS/Runtime/Intl/Segmenter.cpp
+++ b/Userland/Libraries/LibJS/Runtime/Intl/Segmenter.cpp
@@ -4,7 +4,10 @@
* SPDX-License-Identifier: BSD-2-Clause
*/
+#include <AK/BinarySearch.h>
+#include <AK/Utf16View.h>
#include <LibJS/Runtime/Intl/Segmenter.h>
+#include <LibUnicode/CharacterTypes.h>
namespace JS::Intl {
@@ -40,4 +43,73 @@ StringView Segmenter::segmenter_granularity_string() const
}
}
+// 18.8.1 FindBoundary ( segmenter, string, startIndex, direction ), https://tc39.es/ecma402/#sec-findboundary
+double find_boundary(Segmenter const& segmenter, Utf16View const& string, double start_index, Direction direction, Optional<Vector<size_t>>& boundaries_cache)
+{
+ // 1. Let locale be segmenter.[[Locale]].
+ auto const& locale = segmenter.locale();
+
+ // 2. Let granularity be segmenter.[[SegmenterGranularity]].
+ auto granularity = segmenter.segmenter_granularity();
+
+ // 3. Let len be the length of string.
+ auto length = string.length_in_code_units();
+
+ // Non-standard, populate boundaries cache
+ if (!boundaries_cache.has_value()) {
+ switch (granularity) {
+ case Segmenter::SegmenterGranularity::Grapheme:
+ boundaries_cache = Unicode::find_grapheme_segmentation_boundaries(string);
+ break;
+ case Segmenter::SegmenterGranularity::Word:
+ boundaries_cache = Unicode::find_word_segmentation_boundaries(string);
+ break;
+ case Segmenter::SegmenterGranularity::Sentence:
+ boundaries_cache = Unicode::find_sentence_segmentation_boundaries(string);
+ break;
+ default:
+ VERIFY_NOT_REACHED();
+ }
+ }
+ (void)locale; // TODO: Support locale-sensitive boundaries
+
+ // 4. If direction is before, then
+ if (direction == Direction::Before) {
+ // a. Assert: startIndex ≥ 0.
+ VERIFY(start_index >= 0);
+ // b. Assert: startIndex < len.
+ VERIFY(start_index < length);
+
+ // c. Search string for the last segmentation boundary that is preceded by at most startIndex code units from the beginning, using locale locale and text element granularity granularity.
+ size_t boundary_index;
+ binary_search(*boundaries_cache, start_index, &boundary_index);
+
+ // d. If a boundary is found, return the count of code units in string preceding it.
+ if (boundary_index < boundaries_cache->size())
+ return boundaries_cache->at(boundary_index);
+
+ // e. Return 0.
+ return 0;
+ }
+
+ // 5. Assert: direction is after.
+ VERIFY(direction == Direction::After);
+
+ // 6. If len is 0 or startIndex ≥ len, return +∞.
+ if (length == 0 || start_index >= length)
+ return INFINITY;
+
+ // 7. Search string for the first segmentation boundary that follows the code unit at index startIndex, using locale locale and text element granularity granularity.
+ size_t boundary_index;
+ binary_search(*boundaries_cache, start_index, &boundary_index);
+ ++boundary_index;
+
+ // 8. If a boundary is found, return the count of code units in string preceding it.
+ if (boundary_index < boundaries_cache->size())
+ return boundaries_cache->at(boundary_index);
+
+ // 9. Return len.
+ return length;
+}
+
}
diff --git a/Userland/Libraries/LibJS/Runtime/Intl/Segmenter.h b/Userland/Libraries/LibJS/Runtime/Intl/Segmenter.h
index f2c9163691..652a395ea2 100644
--- a/Userland/Libraries/LibJS/Runtime/Intl/Segmenter.h
+++ b/Userland/Libraries/LibJS/Runtime/Intl/Segmenter.h
@@ -36,4 +36,10 @@ private:
SegmenterGranularity m_segmenter_granularity { SegmenterGranularity::Grapheme }; // [[SegmenterGranularity]]
};
+enum class Direction {
+ Before,
+ After,
+};
+double find_boundary(Segmenter const&, Utf16View const&, double start_index, Direction, Optional<Vector<size_t>>& boundaries_cache);
+
}
diff --git a/Userland/Libraries/LibJS/Runtime/Intl/Segments.h b/Userland/Libraries/LibJS/Runtime/Intl/Segments.h
index 970bcdaba0..1e5a11a610 100644
--- a/Userland/Libraries/LibJS/Runtime/Intl/Segments.h
+++ b/Userland/Libraries/LibJS/Runtime/Intl/Segments.h
@@ -25,11 +25,15 @@ public:
Utf16View segments_string() const { return m_segments_string.view(); }
+ Optional<Vector<size_t>>& boundaries_cache() const { return m_boundaries_cache; }
+
private:
virtual void visit_edges(Cell::Visitor&) override;
Segmenter& m_segments_segmenter; // [[SegmentsSegmenter]]
Utf16String m_segments_string; // [[SegmentsString]]
+
+ mutable Optional<Vector<size_t>> m_boundaries_cache;
};
}