summaryrefslogtreecommitdiff
path: root/Userland/Libraries/LibWeb/Infra/Strings.cpp
blob: 726caddd4750a0ac00d6ae3d29f04291079e0a46 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
/*
 * Copyright (c) 2022, Linus Groh <linusg@serenityos.org>
 * Copyright (c) 2022, networkException <networkexception@serenityos.org>
 * Copyright (c) 2023, Kenneth Myhra <kennethmyhra@serenityos.org>
 * Copyright (c) 2023, Sam Atkins <atkinssj@serenityos.org>
 *
 * SPDX-License-Identifier: BSD-2-Clause
 */

#include <AK/CharacterTypes.h>
#include <AK/DeprecatedString.h>
#include <AK/Utf16View.h>
#include <AK/Utf8View.h>
#include <LibWeb/Infra/CharacterTypes.h>
#include <LibWeb/Infra/Strings.h>

namespace Web::Infra {

// https://infra.spec.whatwg.org/#ascii-case-insensitive
bool is_ascii_case_insensitive_match(StringView a, StringView b)
{
    // A string A is an ASCII case-insensitive match for a string B,
    // if the ASCII lowercase of A is the ASCII lowercase of B.

    Utf8View a_view { a };
    Utf8View b_view { b };

    if (a_view.length() != b_view.length())
        return false;

    auto b_iterator = b_view.begin();
    for (auto a_char : a_view) {
        auto b_char = *b_iterator;
        ++b_iterator;

        if (to_ascii_lowercase(a_char) != to_ascii_lowercase(b_char))
            return false;
    }

    return true;
}

// https://infra.spec.whatwg.org/#strip-and-collapse-ascii-whitespace
DeprecatedString strip_and_collapse_whitespace(StringView string)
{
    // Replace any sequence of one or more consecutive code points that are ASCII whitespace in the string with a single U+0020 SPACE code point.
    StringBuilder builder;
    for (auto code_point : Utf8View { string }) {
        if (Infra::is_ascii_whitespace(code_point)) {
            if (!builder.string_view().ends_with(' '))
                builder.append(' ');
            continue;
        }
        builder.append_code_point(code_point);
    }

    // ...and then remove any leading and trailing ASCII whitespace from that string.
    return builder.string_view().trim(Infra::ASCII_WHITESPACE);
}

// https://infra.spec.whatwg.org/#code-unit-prefix
bool is_code_unit_prefix(StringView potential_prefix, StringView input)
{
    auto potential_prefix_utf16 = utf8_to_utf16(potential_prefix).release_value_but_fixme_should_propagate_errors();
    auto input_utf16 = utf8_to_utf16(input).release_value_but_fixme_should_propagate_errors();

    // 1. Let i be 0.
    size_t i = 0;

    // 2. While true:
    while (true) {
        // 1. If i is greater than or equal to potentialPrefix’s length, then return true.
        if (i >= potential_prefix.length())
            return true;

        // 2. If i is greater than or equal to input’s length, then return false.
        if (i >= input.length())
            return false;

        // 3. Let potentialPrefixCodeUnit be the ith code unit of potentialPrefix.
        auto potential_prefix_code_unit = Utf16View(potential_prefix_utf16).code_unit_at(i);

        // 4. Let inputCodeUnit be the ith code unit of input.
        auto input_code_unit = Utf16View(input_utf16).code_unit_at(i);

        // 5. Return false if potentialPrefixCodeUnit is not inputCodeUnit.
        if (potential_prefix_code_unit != input_code_unit)
            return false;

        // 6. Set i to i + 1.
        ++i;
    }
}

// https://infra.spec.whatwg.org/#scalar-value-string
ErrorOr<String> convert_to_scalar_value_string(StringView string)
{
    // To convert a string into a scalar value string, replace any surrogates with U+FFFD.
    StringBuilder scalar_value_builder;
    auto utf8_view = Utf8View { string };
    for (u32 code_point : utf8_view) {
        if (is_unicode_surrogate(code_point))
            code_point = 0xFFFD;
        TRY(scalar_value_builder.try_append(code_point));
    }
    return scalar_value_builder.to_string();
}

// https://infra.spec.whatwg.org/#ascii-lowercase
ErrorOr<String> to_ascii_lower_case(StringView string)
{
    // To ASCII lowercase a string, replace all ASCII upper alphas in the string with their
    // corresponding code point in ASCII lower alpha.
    StringBuilder string_builder;
    auto utf8_view = Utf8View { string };
    for (u32 code_point : utf8_view) {
        code_point = to_ascii_lowercase(code_point);
        TRY(string_builder.try_append(code_point));
    }
    return string_builder.to_string();
}

// https://infra.spec.whatwg.org/#ascii-uppercase
ErrorOr<String> to_ascii_upper_case(StringView string)
{
    // To ASCII uppercase a string, replace all ASCII lower alphas in the string with their
    // corresponding code point in ASCII upper alpha.
    StringBuilder string_builder;
    auto utf8_view = Utf8View { string };
    for (u32 code_point : utf8_view) {
        code_point = to_ascii_uppercase(code_point);
        TRY(string_builder.try_append(code_point));
    }
    return string_builder.to_string();
}

}