summaryrefslogtreecommitdiff
path: root/AK
diff options
context:
space:
mode:
authorAndreas Kling <awesomekling@gmail.com>2019-10-18 22:49:23 +0200
committerAndreas Kling <awesomekling@gmail.com>2019-10-18 22:49:23 +0200
commitf4e6dae6fec24df0f3ee721a964d091b62aa774e (patch)
treea7de636a3af657e94ae76a17774360ddc78105ef /AK
parentab9e6166e8c3e5df6c5ee1132324d524e3698505 (diff)
downloadserenity-f4e6dae6fec24df0f3ee721a964d091b62aa774e.zip
UTF-8: Add Utf8CodepointIterator::codepoint_length_in_bytes()
This allows you to retrieve the length (in bytes) of the codepoint the iterator is currently pointing at.
Diffstat (limited to 'AK')
-rw-r--r--AK/Utf8View.cpp10
-rw-r--r--AK/Utf8View.h2
2 files changed, 12 insertions, 0 deletions
diff --git a/AK/Utf8View.cpp b/AK/Utf8View.cpp
index b63f674c9a..d5b73f4319 100644
--- a/AK/Utf8View.cpp
+++ b/AK/Utf8View.cpp
@@ -139,6 +139,16 @@ Utf8CodepointIterator& Utf8CodepointIterator::operator++()
return *this;
}
+int Utf8CodepointIterator::codepoint_length_in_bytes() const
+{
+ ASSERT(m_length > 0);
+ int codepoint_length_in_bytes;
+ u32 value;
+ bool first_byte_makes_sense = decode_first_byte(*m_ptr, codepoint_length_in_bytes, value);
+ ASSERT(first_byte_makes_sense);
+ return codepoint_length_in_bytes;
+}
+
u32 Utf8CodepointIterator::operator*() const
{
ASSERT(m_length > 0);
diff --git a/AK/Utf8View.h b/AK/Utf8View.h
index 5db0e1b7ea..cbc61aeead 100644
--- a/AK/Utf8View.h
+++ b/AK/Utf8View.h
@@ -18,6 +18,8 @@ public:
Utf8CodepointIterator& operator++();
u32 operator*() const;
+ int codepoint_length_in_bytes() const;
+
private:
Utf8CodepointIterator(const unsigned char*, int);
const unsigned char* m_ptr { nullptr };