summaryrefslogtreecommitdiff
path: root/AK/Utf32View.h
blob: 8009879c9c083d149dfb1bbcc04119163c33ccc8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
/*
 * Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
 *
 * SPDX-License-Identifier: BSD-2-Clause
 */

#pragma once

#include <AK/Assertions.h>
#include <AK/Checked.h>
#include <AK/Format.h>
#include <AK/Types.h>

namespace AK {

class Utf32View;

class Utf32CodePointIterator {
    friend class Utf32View;

public:
    Utf32CodePointIterator() = default;
    ~Utf32CodePointIterator() = default;

    bool operator==(Utf32CodePointIterator const& other) const
    {
        return m_ptr == other.m_ptr && m_length == other.m_length;
    }
    Utf32CodePointIterator& operator++()
    {
        VERIFY(m_length > 0);
        m_ptr++;
        m_length--;
        return *this;
    }
    ssize_t operator-(Utf32CodePointIterator const& other) const
    {
        return m_ptr - other.m_ptr;
    }
    u32 operator*() const
    {
        VERIFY(m_length > 0);
        return *m_ptr;
    }

    // NOTE: This returns {} if the peek is at or past EOF.
    Optional<u32> peek(size_t offset = 0) const;

    constexpr int code_point_length_in_bytes() const { return sizeof(u32); }
    bool done() const { return !m_length; }

private:
    Utf32CodePointIterator(u32 const* ptr, size_t length)
        : m_ptr(ptr)
        , m_length((ssize_t)length)
    {
    }
    u32 const* m_ptr { nullptr };
    ssize_t m_length { -1 };
};

class Utf32View {
public:
    using Iterator = Utf32CodePointIterator;

    Utf32View() = default;
    Utf32View(u32 const* code_points, size_t length)
        : m_code_points(code_points)
        , m_length(length)
    {
        VERIFY(code_points || length == 0);
    }

    Utf32CodePointIterator begin() const
    {
        return { begin_ptr(), m_length };
    }

    Utf32CodePointIterator end() const
    {
        return { end_ptr(), 0 };
    }

    u32 at(size_t index) const
    {
        VERIFY(index < m_length);
        return m_code_points[index];
    }

    u32 operator[](size_t index) const { return at(index); }

    u32 const* code_points() const { return m_code_points; }
    bool is_empty() const { return m_length == 0; }
    bool is_null() const { return !m_code_points; }
    size_t length() const { return m_length; }

    size_t iterator_offset(Utf32CodePointIterator const& it) const
    {
        VERIFY(it.m_ptr >= m_code_points);
        VERIFY(it.m_ptr < m_code_points + m_length);
        return ((ptrdiff_t)it.m_ptr - (ptrdiff_t)m_code_points) / sizeof(u32);
    }

    Utf32View substring_view(size_t offset, size_t length) const
    {
        VERIFY(offset <= m_length);
        VERIFY(!Checked<size_t>::addition_would_overflow(offset, length));
        VERIFY((offset + length) <= m_length);
        return Utf32View(m_code_points + offset, length);
    }

private:
    u32 const* begin_ptr() const
    {
        return m_code_points;
    }
    u32 const* end_ptr() const
    {
        return m_code_points + m_length;
    }

    u32 const* m_code_points { nullptr };
    size_t m_length { 0 };
};

template<>
struct Formatter<Utf32View> : Formatter<StringView> {
    ErrorOr<void> format(FormatBuilder&, Utf32View const&);
};

}

#if USING_AK_GLOBALLY
using AK::Utf32View;
#endif