diff options
author | Andreas Kling <awesomekling@gmail.com> | 2019-10-18 22:49:23 +0200 |
---|---|---|
committer | Andreas Kling <awesomekling@gmail.com> | 2019-10-18 22:49:23 +0200 |
commit | f4e6dae6fec24df0f3ee721a964d091b62aa774e (patch) | |
tree | a7de636a3af657e94ae76a17774360ddc78105ef /AK | |
parent | ab9e6166e8c3e5df6c5ee1132324d524e3698505 (diff) | |
download | serenity-f4e6dae6fec24df0f3ee721a964d091b62aa774e.zip |
UTF-8: Add Utf8CodepointIterator::codepoint_length_in_bytes()
This allows you to retrieve the length (in bytes) of the codepoint the
iterator is currently pointing at.
Diffstat (limited to 'AK')
-rw-r--r-- | AK/Utf8View.cpp | 10 | ||||
-rw-r--r-- | AK/Utf8View.h | 2 |
2 files changed, 12 insertions, 0 deletions
diff --git a/AK/Utf8View.cpp b/AK/Utf8View.cpp index b63f674c9a..d5b73f4319 100644 --- a/AK/Utf8View.cpp +++ b/AK/Utf8View.cpp @@ -139,6 +139,16 @@ Utf8CodepointIterator& Utf8CodepointIterator::operator++() return *this; } +int Utf8CodepointIterator::codepoint_length_in_bytes() const +{ + ASSERT(m_length > 0); + int codepoint_length_in_bytes; + u32 value; + bool first_byte_makes_sense = decode_first_byte(*m_ptr, codepoint_length_in_bytes, value); + ASSERT(first_byte_makes_sense); + return codepoint_length_in_bytes; +} + u32 Utf8CodepointIterator::operator*() const { ASSERT(m_length > 0); diff --git a/AK/Utf8View.h b/AK/Utf8View.h index 5db0e1b7ea..cbc61aeead 100644 --- a/AK/Utf8View.h +++ b/AK/Utf8View.h @@ -18,6 +18,8 @@ public: Utf8CodepointIterator& operator++(); u32 operator*() const; + int codepoint_length_in_bytes() const; + private: Utf8CodepointIterator(const unsigned char*, int); const unsigned char* m_ptr { nullptr }; |