summaryrefslogtreecommitdiff
path: root/Userland
diff options
context:
space:
mode:
authorTim Schumacher <timschumi@gmx.de>2021-06-05 12:44:01 +0200
committerAndreas Kling <kling@serenityos.org>2021-09-16 16:42:40 +0200
commita1617faf44b97141c93626eddae8c690bcb7dca9 (patch)
tree4228b3c44900fbcb489e80def7e725bfb05f2b5e /Userland
parent405f3d0aa3be30de6892bc9ea9303f0611ede5f6 (diff)
downloadserenity-a1617faf44b97141c93626eddae8c690bcb7dca9.zip
LibC: Implement mbsinit and mbrtowc
Diffstat (limited to 'Userland')
-rw-r--r--Userland/Libraries/LibC/wchar.cpp152
-rw-r--r--Userland/Libraries/LibC/wchar.h1
2 files changed, 147 insertions, 6 deletions
diff --git a/Userland/Libraries/LibC/wchar.cpp b/Userland/Libraries/LibC/wchar.cpp
index b3bca30b3f..8a70fd4b04 100644
--- a/Userland/Libraries/LibC/wchar.cpp
+++ b/Userland/Libraries/LibC/wchar.cpp
@@ -6,8 +6,53 @@
#include <AK/Assertions.h>
#include <AK/Format.h>
+#include <errno.h>
#include <wchar.h>
+static void mbstate_reset(mbstate_t* state)
+{
+ *state = { 0 };
+}
+
+static unsigned int mbstate_stored_bytes(mbstate_t* state)
+{
+ for (unsigned int i = 0; i < sizeof(state->bytes); i++) {
+ if (!state->bytes[i]) {
+ return i;
+ }
+ }
+
+ return sizeof(state->bytes);
+}
+
+static unsigned int mbstate_expected_bytes(mbstate_t* state)
+{
+ unsigned char first = state->bytes[0];
+
+ // Single-byte sequences have their first bit unset
+ if ((first & 0b10000000) == 0) {
+ return 1;
+ }
+
+ // Two-byte sequences start with 0b110xxxxx
+ if ((first & 0b11100000) == 0b11000000) {
+ return 2;
+ }
+
+ // Three-byte sequences start with 0b1110xxxx
+ if ((first & 0b11110000) == 0b11100000) {
+ return 3;
+ }
+
+ // Four-byte sequences start with 0b11110xxx
+ if ((first & 0b11111000) == 0b11110000) {
+ return 4;
+ }
+
+ // Everything else is invalid
+ return 0;
+}
+
extern "C" {
size_t wcslen(const wchar_t* str)
@@ -168,10 +213,96 @@ wint_t btowc(int c)
return c;
}
-size_t mbrtowc(wchar_t*, const char*, size_t, mbstate_t*)
+size_t mbrtowc(wchar_t* pwc, const char* s, size_t n, mbstate_t* state)
{
- dbgln("FIXME: Implement mbrtowc()");
- TODO();
+ static mbstate_t _anonymous_state = { 0 };
+
+ if (state == nullptr) {
+ state = &_anonymous_state;
+ }
+
+ // If s is nullptr, check if the state contains a complete multibyte character
+ if (s == nullptr) {
+ if (mbstate_expected_bytes(state) == mbstate_stored_bytes(state)) {
+ mbstate_reset(state);
+ return 0;
+ } else {
+ mbstate_reset(state);
+ errno = EILSEQ;
+ return -1;
+ }
+ }
+
+ // Stop early if we can't read anything
+ if (n == 0) {
+ return 0;
+ }
+
+ size_t consumed_bytes = 0;
+ size_t stored_bytes = mbstate_stored_bytes(state);
+
+ // Fill the first byte if we haven't done that yet
+ if (state->bytes[0] == 0) {
+ state->bytes[0] = s[0];
+ consumed_bytes++;
+ }
+
+ size_t expected_bytes = mbstate_expected_bytes(state);
+
+ // Check if the first byte is invalid
+ if (expected_bytes == 0) {
+ mbstate_reset(state);
+ errno = EILSEQ;
+ return -1;
+ }
+
+ size_t needed_bytes = expected_bytes - stored_bytes;
+
+ while (consumed_bytes < needed_bytes) {
+ if (consumed_bytes == n) {
+ // No complete multibyte character
+ return -2;
+ }
+
+ unsigned char c = s[consumed_bytes];
+
+ // Continuation bytes have to start with 0b10xxxxxx
+ if ((c & 0b11000000) != 0b10000000) {
+ // Invalid multibyte character
+ mbstate_reset(state);
+ errno = EILSEQ;
+ return -1;
+ }
+
+ state->bytes[mbstate_stored_bytes(state)] = c;
+ consumed_bytes++;
+ }
+
+ wchar_t codepoint = state->bytes[0];
+
+ // Mask out the "length" bits if necessary
+ if (expected_bytes > 1) {
+ codepoint &= (1 << (7 - expected_bytes)) - 1;
+ }
+
+ for (unsigned int i = 1; i < expected_bytes; i++) {
+ // Each continuation byte contains 6 bits of data
+ codepoint = codepoint << 6;
+ codepoint |= state->bytes[i] & 0b111111;
+ }
+
+ if (pwc) {
+ *pwc = codepoint;
+ }
+
+ // We don't have a shift state that we need to keep, so just clear the entire state
+ mbstate_reset(state);
+
+ if (codepoint == 0) {
+ return 0;
+ }
+
+ return consumed_bytes;
}
size_t mbrlen(const char*, size_t, mbstate_t*)
@@ -198,9 +329,18 @@ int wctob(wint_t)
TODO();
}
-int mbsinit(const mbstate_t*)
+int mbsinit(const mbstate_t* state)
{
- dbgln("FIXME: Implement mbsinit()");
- TODO();
+ if (!state) {
+ return 1;
+ }
+
+ for (unsigned char byte : state->bytes) {
+ if (byte) {
+ return 0;
+ }
+ }
+
+ return 1;
}
}
diff --git a/Userland/Libraries/LibC/wchar.h b/Userland/Libraries/LibC/wchar.h
index a707bbf23f..2db7dc4da6 100644
--- a/Userland/Libraries/LibC/wchar.h
+++ b/Userland/Libraries/LibC/wchar.h
@@ -19,6 +19,7 @@ typedef __WINT_TYPE__ wint_t;
typedef unsigned long int wctype_t;
typedef struct {
+ unsigned char bytes[4];
} mbstate_t;
size_t wcslen(const wchar_t*);