diff options
author | Thijs Schreijer <thijs@thijsschreijer.nl> | 2024-05-06 11:44:47 +0200 |
---|---|---|
committer | Thijs Schreijer <thijs@thijsschreijer.nl> | 2024-05-20 12:43:55 +0200 |
commit | dcd5d62501e61e0f6901d4d4687ab56430a4b8a7 (patch) | |
tree | 4501938052c0f62279eaae66c34811d4b5232fa2 | |
parent | 1d64b5790f26760cb830336ccca9d51474b73ae8 (diff) | |
download | luasystem-dcd5d62501e61e0f6901d4d4687ab56430a4b8a7.zip |
add example for reading a line from the terminal, non-blocking
Handles utf8, and character width
-rw-r--r-- | examples/compat.lua | 5 | ||||
-rw-r--r-- | examples/readline.lua | 476 | ||||
-rw-r--r-- | luasystem-scm-0.rockspec | 1 | ||||
-rw-r--r-- | spec/04-term_spec.lua | 192 | ||||
-rw-r--r-- | src/term.c | 330 | ||||
-rw-r--r-- | src/wcwidth.c | 285 | ||||
-rw-r--r-- | src/wcwidth.h | 21 | ||||
-rw-r--r-- | system/init.lua | 126 |
8 files changed, 1358 insertions, 78 deletions
diff --git a/examples/compat.lua b/examples/compat.lua index c00d44a..a59d964 100644 --- a/examples/compat.lua +++ b/examples/compat.lua @@ -5,12 +5,15 @@ local sys = require "system" -if sys.is_windows then +if sys.windows then -- Windows holds multiple copies of environment variables, to ensure `getenv` -- returns what `setenv` sets we need to use the `system.getenv` instead of -- `os.getenv`. os.getenv = sys.getenv -- luacheck: ignore + -- Set console output to UTF-8 encoding. + sys.setconsoleoutputcp(65001) + -- Set up the terminal to handle ANSI escape sequences on Windows. if sys.isatty(io.stdout) then sys.setconsoleflags(io.stdout, sys.getconsoleflags(io.stdout) + sys.COF_VIRTUAL_TERMINAL_PROCESSING) diff --git a/examples/readline.lua b/examples/readline.lua new file mode 100644 index 0000000..f1e6258 --- /dev/null +++ b/examples/readline.lua @@ -0,0 +1,476 @@ +local sys = require("system") + + +-- Mapping of key-sequences to key-names +local key_names = { + ["\27[C"] = "right", + ["\27[D"] = "left", + ["\127"] = "backspace", + ["\27[3~"] = "delete", + ["\27[H"] = "home", + ["\27[F"] = "end", + ["\27"] = "escape", + ["\9"] = "tab", + ["\27[Z"] = "shift-tab", +} + +if sys.windows then + key_names["\13"] = "enter" +else + key_names["\10"] = "enter" +end + + +-- Mapping of key-names to key-sequences +local key_sequences = {} +for k, v in pairs(key_names) do + key_sequences[v] = k +end + + +-- bell character +local function bell() + io.write("\7") + io.flush() +end + + +-- generate string to move cursor horizontally +-- positive goes right, negative goes left +local function cursor_move_horiz(n) + if n == 0 then + return "" + end + return "\27[" .. (n > 0 and n or -n) .. (n > 0 and "C" or "D") +end + + +-- -- generate string to move cursor vertically +-- -- positive goes down, negative goes up +-- local function cursor_move_vert(n) +-- if n == 0 then +-- return "" +-- end +-- return "\27[" .. (n > 0 and n or -n) .. (n > 0 and "B" or "A") +-- end + + +-- -- log to the line above the current line +-- local function log(...) +-- local arg = { n = select("#", ...), ...} +-- for i = 1, arg.n do +-- arg[i] = tostring(arg[i]) +-- end +-- arg = " " .. table.concat(arg, " ") .. " " + +-- io.write(cursor_move_vert(-1), arg, cursor_move_vert(1), cursor_move_horiz(-#arg)) +-- end + + +-- UTF8 character size in bytes +-- @tparam number b the byte value of the first byte of a UTF8 character +local function utf8size(b) + return b < 128 and 1 or b < 224 and 2 or b < 240 and 3 or b < 248 and 4 +end + + + +local utf8parse do + local utf8_value_mt = { + __tostring = function(self) + return table.concat(self, "") + end, + } + + -- Parses a UTF8 string into list of individual characters. + -- key 'chars' gets the length in UTF8 characters, whilst # returns the length + -- for display (to handle double-width UTF8 chars). + -- in the list the double-width characters are followed by an empty string. + -- @tparam string s the UTF8 string to parse + -- @treturn table the list of characters + function utf8parse(s) + local t = setmetatable({ chars = 0 }, utf8_value_mt) + local i = 1 + while i <= #s do + local b = s:byte(i) + local w = utf8size(b) + local char = s:sub(i, i + w - 1) + t[#t + 1] = char + t.chars = t.chars + 1 + if sys.utf8cwidth(char) == 2 then + -- double width character, add empty string to keep the length of the + -- list the same as the character width on screen + t[#t + 1] = "" + end + i = i + w + end + return t + end +end + + + +-- inline tests for utf8parse +-- do +-- local t = utf8parse("a你b好c") +-- assert(t[1] == "a") +-- assert(t[2] == "你") -- double width +-- assert(t[3] == "") +-- assert(t[4] == "b") +-- assert(t[5] == "好") -- double width +-- assert(t[6] == "") +-- assert(t[7] == "c") +-- assert(#t == 7) -- size as displayed +-- end + + + +-- readline class + +local readline = {} +readline.__index = readline + + +--- Create a new readline object. +-- @tparam table opts the options for the readline object +-- @tparam[opt=""] string opts.prompt the prompt to display +-- @tparam[opt=80] number opts.max_length the maximum length of the input +-- @tparam[opt=""] string opts.value the default value +-- @tparam[opt=`#value`] number opts.position of the cursor in the input +-- @tparam[opt={"\10"/"\13"}] table opts.exit_keys an array of keys that will cause the readline to exit +-- @treturn readline the new readline object +function readline.new(opts) + local value = utf8parse(opts.value or "") + local prompt = utf8parse(opts.prompt or "") + local pos = math.floor(opts.position or (#value + 1)) + pos = math.max(math.min(pos, (#value + 1)), 1) + local len = math.floor(opts.max_length or 80) + if len < 1 then + error("max_length must be at least 1", 2) + end + + if value.chars > len then + error("value is longer than max_length", 2) + end + + local exit_keys = {} + for _, key in ipairs(opts.exit_keys or {}) do + exit_keys[key] = true + end + if exit_keys[1] == nil then + -- nothing provided, default to Enter-key + exit_keys[1] = key_sequences.enter + end + + local self = { + value = value, -- the default value + max_length = len, -- the maximum length of the input + prompt = prompt, -- the prompt to display + position = pos, -- the current position in the input + drawn_before = false, -- if the prompt has been drawn + exit_keys = exit_keys, -- the keys that will cause the readline to exit + } + + setmetatable(self, readline) + return self +end + + + +-- draw the prompt and the input value, and position the cursor. +local function draw(self, redraw) + if redraw or not self.drawn_before then + -- we are at start of prompt + self.drawn_before = true + else + -- we are at current cursor position, move to start of prompt + io.write(cursor_move_horiz(-(#self.prompt + self.position))) + end + -- write prompt & value + io.write(tostring(self.prompt) .. tostring(self.value)) + -- clear remainder of input size + io.write(string.rep(" ", self.max_length - self.value.chars)) + io.write(cursor_move_horiz(-(self.max_length - self.value.chars))) + -- move to cursor position + io.write(cursor_move_horiz(-(#self.value + 1 - self.position))) + io.flush() +end + + +local handle_key do -- keyboard input handler + + local key_handlers + key_handlers = { + left = function(self) + if self.position == 1 then + bell() + return + end + + local new_pos = self.position - 1 + while self.value[new_pos] == "" do -- skip empty strings; double width chars + new_pos = new_pos - 1 + end + + io.write(cursor_move_horiz(-(self.position - new_pos))) + io.flush() + self.position = new_pos + end, + + right = function(self) + if self.position == #self.value + 1 then + bell() + return + end + + local new_pos = self.position + 1 + while self.value[new_pos] == "" do -- skip empty strings; double width chars + new_pos = new_pos + 1 + end + + io.write(cursor_move_horiz(new_pos - self.position)) + io.flush() + self.position = new_pos + end, + + backspace = function(self) + if self.position == 1 then + bell() + return + end + + while self.value[self.position - 1] == "" do -- remove empty strings; double width chars + io.write(cursor_move_horiz(-1)) + self.position = self.position - 1 + table.remove(self.value, self.position) + end + -- remove char itself + io.write(cursor_move_horiz(-1)) + self.position = self.position - 1 + table.remove(self.value, self.position) + self.value.chars = self.value.chars - 1 + draw(self) + end, + + home = function(self) + local new_pos = 1 + io.write(cursor_move_horiz(new_pos - self.position)) + self.position = new_pos + end, + + ["end"] = function(self) + local new_pos = #self.value + 1 + io.write(cursor_move_horiz(new_pos - self.position)) + self.position = new_pos + end, + + delete = function(self) + if self.position > #self.value then + bell() + return + end + + key_handlers.right(self) + key_handlers.backspace(self) + end, + } + + + -- handles a single input key/ansi-sequence. + -- @tparam string key the key or ansi-sequence (from `system.readansi`) + -- @tparam string keytype the type of the key, either "char" or "ansi" (from `system.readansi`) + -- @treturn string status the status of the key handling, either "ok", "exit_key" or an error message + function handle_key(self, key, keytype) + if self.exit_keys[key] then + -- registered exit key + return "exit_key" + end + + local handler = key_handlers[key_names[key] or true ] + if handler then + handler(self) + return "ok" + end + + if keytype == "ansi" then + -- we got an ansi sequence, but dunno how to handle it, ignore + -- print("unhandled ansi: ", key:sub(2,-1), string.byte(key, 1, -1)) + bell() + return "ok" + end + + -- just a single key + if key < " " then + -- control character + bell() + return "ok" + end + + if self.value.chars >= self.max_length then + bell() + return "ok" + end + + -- insert the key into the value + if sys.utf8cwidth(key) == 2 then + -- double width character, insert empty string after it + table.insert(self.value, self.position, "") + table.insert(self.value, self.position, key) + self.position = self.position + 2 + io.write(cursor_move_horiz(2)) + else + table.insert(self.value, self.position, key) + self.position = self.position + 1 + io.write(cursor_move_horiz(1)) + end + self.value.chars = self.value.chars + 1 + draw(self) + return "ok" + end +end + + + +--- Get_size returns the maximum size of the input box (prompt + input). +-- The size is in rows and columns. Columns is determined by +-- the prompt and the `max_length * 2` (characters can be double-width). +-- @treturn number the number of rows (always 1) +-- @treturn number the number of columns +function readline:get_size() + return 1, #self.prompt + self.max_length * 2 +end + + + +--- Get coordinates of the cursor in the input box (prompt + input). +-- The coordinates are 1-based. They are returned as row and column, within the +-- size as reported by `get_size`. +-- @treturn number the row of the cursor (always 1) +-- @treturn number the column of the cursor +function readline:get_cursor() + return 1, #self.prompt + self.position +end + + + +--- Set the coordinates of the cursor in the input box (prompt + input). +-- The coordinates are 1-based. They are expected to be within the +-- size as reported by `get_size`, and beyond the prompt. +-- If the position is invalid, it will be corrected. +-- Use the results to check if the position was adjusted. +-- @tparam number row the row of the cursor (always 1) +-- @tparam number col the column of the cursor +-- @return results of get_cursor +function readline:set_cursor(row, col) + local l_prompt = #self.prompt + local l_value = #self.value + + if col < l_prompt + 1 then + col = l_prompt + 1 + elseif col > l_prompt + l_value + 1 then + col = l_prompt + l_value + 1 + end + + while self.value[col - l_prompt] == "" do + col = col - 1 -- on an empty string, so move back to start of double-width char + end + + local new_pos = col - l_prompt + + cursor_move_horiz(self.position - new_pos) + io.flush() + + self.position = new_pos + return self:get_cursor() +end + + + +--- Read a line of input from the user. +-- It will first print the `prompt` and then wait for input. Ensure the cursor +-- is at the correct position before calling this function. This function will +-- do all cursor movements in a relative way. +-- Can be called again after an exit-key or timeout has occurred. Just make sure +-- the cursor is at the same position where is was when it returned the last time. +-- Alternatively the cursor can be set to the position of the prompt (the position +-- the cursor was in before the first call), and the parameter `redraw` can be set +-- to `true`. +-- @tparam[opt=math.huge] number timeout the maximum time to wait for input in seconds +-- @tparam[opt=false] boolean redraw if `true` the prompt will be redrawn (cursor must be at prompt position!) +-- @treturn[1] string the input string as entered the user +-- @treturn[1] string the exit-key used to exit the readline (see `new`) +-- @treturn[2] nil when input is incomplete +-- @treturn[2] string error message, the reason why the input is incomplete, `"timeout"`, or an error reading a key +function readline:__call(timeout, redraw) + draw(self, redraw) + timeout = timeout or math.huge + local timeout_end = sys.gettime() + timeout + + while true do + local key, keytype = sys.readansi(timeout_end - sys.gettime()) + if not key then + -- error or timeout + return nil, keytype + end + + local status = handle_key(self, key, keytype) + if status == "exit_key" then + return tostring(self.value), key + + elseif status ~= "ok" then + error("unknown status received: " .. tostring(status)) + end + end +end + + + +-- return readline + + + + +-- setup Windows console to handle ANSI processing +local of_in = sys.getconsoleflags(io.stdin) +local cp_in = sys.getconsolecp() +-- sys.setconsolecp(65001) +sys.setconsolecp(850) +local of_out = sys.getconsoleflags(io.stdout) +local cp_out = sys.getconsoleoutputcp() +sys.setconsoleoutputcp(65001) +sys.setconsoleflags(io.stdout, sys.getconsoleflags(io.stdout) + sys.COF_VIRTUAL_TERMINAL_PROCESSING) +sys.setconsoleflags(io.stdin, sys.getconsoleflags(io.stdin) + sys.CIF_VIRTUAL_TERMINAL_INPUT) + +-- setup Posix terminal to use non-blocking mode, and disable line-mode +local of_attr = sys.tcgetattr(io.stdin) +local of_block = sys.getnonblock(io.stdin) +sys.setnonblock(io.stdin, true) +sys.tcsetattr(io.stdin, sys.TCSANOW, { + lflag = of_attr.lflag - sys.L_ICANON - sys.L_ECHO, -- disable canonical mode and echo +}) + + +local rl = readline.new{ + prompt = "Enter something: ", + max_length = 60, + value = "Hello, 你-好 World 🚀!", + -- position = 2, + exit_keys = {key_sequences.enter, "\27", "\t", "\27[Z"}, -- enter, escape, tab, shift-tab +} + + +local result, key = rl() +print("") -- newline after input, to move cursor down from the input line +print("Result (string): '" .. result .. "'") +print("Result (bytes):", result:byte(1,-1)) +print("Exit-Key (bytes):", key:byte(1,-1)) + + +-- Clean up afterwards +sys.setnonblock(io.stdin, false) +sys.setconsoleflags(io.stdout, of_out) +sys.setconsoleflags(io.stdin, of_in) +sys.tcsetattr(io.stdin, sys.TCSANOW, of_attr) +sys.setnonblock(io.stdin, of_block) +sys.setconsolecp(cp_in) +sys.setconsoleoutputcp(cp_out) diff --git a/luasystem-scm-0.rockspec b/luasystem-scm-0.rockspec index dac3d9b..00a442c 100644 --- a/luasystem-scm-0.rockspec +++ b/luasystem-scm-0.rockspec @@ -60,6 +60,7 @@ local function make_platform(plat) 'src/random.c', 'src/term.c', 'src/bitflags.c', + 'src/wcwidth.c', }, defines = defines[plat], libraries = libraries[plat], diff --git a/spec/04-term_spec.lua b/spec/04-term_spec.lua index 9ca37e9..ee4145a 100644 --- a/spec/04-term_spec.lua +++ b/spec/04-term_spec.lua @@ -4,6 +4,19 @@ require("spec.helpers") describe("Terminal:", function() + local wincodepage + + setup(function() + wincodepage = system.getconsoleoutputcp() + assert(system.setconsoleoutputcp(65001)) + end) + + teardown(function() + assert(system.setconsoleoutputcp(wincodepage)) + end) + + + describe("isatty()", function() local newtmpfile = require("pl.path").tmpname @@ -93,7 +106,7 @@ describe("Terminal:", function() - describe("getconsoleflags()", function() + pending("getconsoleflags()", function() pending("returns the consoleflags, if called without flags", function() print"1" @@ -111,4 +124,181 @@ for k,v in pairs(debug.getinfo(system.isatty)) do print(k,v) end end) end) + + + + pending("setconsoleflags()", function() + + pending("sets the consoleflags, if called with flags", function() + end) + + end) + + + + pending("tcgetattr()", function() + + pending("sets the consoleflags, if called with flags", function() + end) + + end) + + + + pending("tcsetattr()", function() + + pending("sets the consoleflags, if called with flags", function() + end) + + end) + + + + pending("getconsolecp()", function() + + pending("sets the consoleflags, if called with flags", function() + end) + + end) + + + + pending("setconsolecp()", function() + + pending("sets the consoleflags, if called with flags", function() + end) + + end) + + + + pending("getconsoleoutputcp()", function() + + pending("sets the consoleflags, if called with flags", function() + end) + + end) + + + + pending("setconsoleoutputcp()", function() + + pending("sets the consoleflags, if called with flags", function() + end) + + end) + + + + pending("getnonblock()", function() + + pending("sets the consoleflags, if called with flags", function() + end) + + end) + + + + pending("setnonblock()", function() + + pending("sets the consoleflags, if called with flags", function() + end) + + end) + + + + pending("termsize()", function() + + pending("sets the consoleflags, if called with flags", function() + end) + + end) + + + + describe("utf8cwidth()", function() + + local ch1 = string.char(226, 130, 172) -- "€" single + local ch2 = string.char(240, 159, 154, 128) -- "🚀" double + local ch3 = string.char(228, 189, 160) -- "你" double + local ch4 = string.char(229, 165, 189) -- "好" double + + it("handles zero width characters", function() + assert.same({0}, {system.utf8cwidth("")}) -- empty string returns 0-size + assert.same({nil, 'Character width determination failed'}, {system.utf8cwidth("\a")}) -- bell character + assert.same({nil, 'Character width determination failed'}, {system.utf8cwidth("\27")}) -- escape character + end) + + it("handles single width characters", function() + assert.same({1}, {system.utf8cwidth("a")}) + assert.same({1}, {system.utf8cwidth(ch1)}) + end) + + it("handles double width characters", function() + assert.same({2}, {system.utf8cwidth(ch2)}) + assert.same({2}, {system.utf8cwidth(ch3)}) + assert.same({2}, {system.utf8cwidth(ch4)}) + end) + + it("returns the width of the first character in the string", function() + assert.same({nil, 'Character width determination failed'}, {system.utf8cwidth("\a" .. ch1)}) -- bell character + EURO + assert.same({1}, {system.utf8cwidth(ch1 .. ch2)}) + assert.same({2}, {system.utf8cwidth(ch2 .. ch3 .. ch4)}) + end) + + end) + + + + describe("utf8swidth()", function() + + local ch1 = string.char(226, 130, 172) -- "€" single + local ch2 = string.char(240, 159, 154, 128) -- "🚀" double + local ch3 = string.char(228, 189, 160) -- "你" double + local ch4 = string.char(229, 165, 189) -- "好" double + + it("handles zero width characters", function() + assert.same({0}, {system.utf8swidth("")}) -- empty string returns 0-size + assert.same({nil, 'Character width determination failed'}, {system.utf8swidth("\a")}) -- bell character + assert.same({nil, 'Character width determination failed'}, {system.utf8swidth("\27")}) -- escape character + end) + + it("handles multi-character UTF8 strings", function() + assert.same({15}, {system.utf8swidth("hello " .. ch1 .. ch2 .. " world")}) + assert.same({16}, {system.utf8swidth("hello " .. ch3 .. ch4 .. " world")}) + end) + + end) + + + + pending("termbackup()", function() + + end) + + + + pending("termrestore()", function() + + end) + + + + pending("termwrap()", function() + + end) + + + + pending("readkey()", function() + + end) + + + + pending("readansi()", function() + + end) + end) @@ -15,6 +15,7 @@ #ifdef _WIN32 # include <windows.h> +# include <locale.h> #else # include <termios.h> # include <string.h> @@ -22,8 +23,16 @@ # include <fcntl.h> # include <sys/ioctl.h> # include <unistd.h> +# include <wchar.h> +# include <locale.h> #endif + +// Windows does not have a wcwidth function, so we use compatibilty code from +// http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c by Markus Kuhn +#include "wcwidth.h" + + #ifdef _WIN32 // after an error is returned, GetLastError() result can be passed to this function to get a string // representation of the error on the stack. @@ -423,7 +432,7 @@ static int lst_getconsoleflags(lua_State *L) // see https://github.com/luaposix/luaposix /*** -Get termios state. +Get termios state (Posix). The terminal attributes is a table with the following fields: - `iflag` input flags @@ -511,7 +520,7 @@ static int lst_tcgetattr(lua_State *L) /*** -Set termios state. +Set termios state (Posix). This function will set the flags as given. The `I_`, `O_`, and `L_` constants are available on the module table. They are the respective @@ -689,13 +698,28 @@ static int lst_getnonblock(lua_State *L) * Reading keyboard input *-------------------------------------------------------------------------*/ +#ifdef _WIN32 +// Define a static buffer for UTF-8 characters +static char utf8_buffer[4]; +static int utf8_buffer_len = 0; +static int utf8_buffer_index = 0; +#endif + + /*** -Reads a key from the console non-blocking. +Reads a key from the console non-blocking. This function should not be called +directly, but through the `system.readkey` or `system.readansi` functions. It +will return the next byte from the input stream, or `nil` if no key was pressed. + On Posix, `io.stdin` must be set to non-blocking mode using `setnonblock` -before calling this function. Otherwise it will block. +before calling this function. Otherwise it will block. No conversions are +done on Posix, so the byte read is returned as-is. -@function readkey -@treturn[1] integer the key code of the key that was pressed +On Windows this reads a wide character and converts it to UTF-8. Multi-byte +sequences will be buffered internally and returned one byte at a time. + +@function _readkey +@treturn[1] integer the byte read from the input stream @treturn[2] nil if no key was pressed @treturn[3] nil on error @treturn[3] string error message @@ -703,20 +727,87 @@ before calling this function. Otherwise it will block. */ static int lst_readkey(lua_State *L) { #ifdef _WIN32 - if (_kbhit()) { - int ch = _getch(); - if (ch == EOF) { - // Error handling for end-of-file or read error - lua_pushnil(L); - lua_pushliteral(L, "_getch error"); - return 2; + if (utf8_buffer_len > 0) { + // Buffer not empty, return the next byte + lua_pushinteger(L, (unsigned char)utf8_buffer[utf8_buffer_index]); + utf8_buffer_index++; + utf8_buffer_len--; + // printf("returning from buffer: %d\n", luaL_checkinteger(L, -1)); + if (utf8_buffer_len == 0) { + utf8_buffer_index = 0; } - lua_pushinteger(L, (unsigned char)ch); return 1; } - return 0; + + if (!_kbhit()) { + return 0; + } + + wchar_t wc = _getwch(); + // printf("----\nread wchar_t: %x\n", wc); + if (wc == WEOF) { + lua_pushnil(L); + lua_pushliteral(L, "read error"); + return 2; + } + + if (sizeof(wchar_t) == 2) { + // printf("2-byte wchar_t\n"); + // only 2 bytes wide, not 4 + if (wc >= 0xD800 && wc <= 0xDBFF) { + // printf("2-byte wchar_t, received high, getting low...\n"); + + // we got a high surrogate, so we need to read the next one as the low surrogate + if (!_kbhit()) { + lua_pushnil(L); + lua_pushliteral(L, "incomplete surrogate pair"); + return 2; + } + + wchar_t wc2 = _getwch(); + // printf("read wchar_t 2: %x\n", wc2); + if (wc2 == WEOF) { + lua_pushnil(L); + lua_pushliteral(L, "read error"); + return 2; + } + + if (wc2 < 0xDC00 || wc2 > 0xDFFF) { + lua_pushnil(L); + lua_pushliteral(L, "invalid surrogate pair"); + return 2; + } + // printf("2-byte pair complete now\n"); + wchar_t wch_pair[2] = { wc, wc2 }; + utf8_buffer_len = WideCharToMultiByte(CP_UTF8, 0, wch_pair, 2, utf8_buffer, sizeof(utf8_buffer), NULL, NULL); + + } else { + // printf("2-byte wchar_t, no surrogate pair\n"); + // not a high surrogate, so we can handle just the 2 bytes directly + utf8_buffer_len = WideCharToMultiByte(CP_UTF8, 0, &wc, 1, utf8_buffer, sizeof(utf8_buffer), NULL, NULL); + } + + } else { + // printf("4-byte wchar_t\n"); + // 4 bytes wide, so handle as UTF-32 directly + utf8_buffer_len = WideCharToMultiByte(CP_UTF8, 0, &wc, 1, utf8_buffer, sizeof(utf8_buffer), NULL, NULL); + } + // printf("utf8_buffer_len: %d\n", utf8_buffer_len); + utf8_buffer_index = 0; + if (utf8_buffer_len <= 0) { + lua_pushnil(L); + lua_pushliteral(L, "UTF-8 conversion error"); + return 2; + } + + lua_pushinteger(L, (unsigned char)utf8_buffer[utf8_buffer_index]); + utf8_buffer_index++; + utf8_buffer_len--; + // printf("returning from buffer: %x\n", luaL_checkinteger(L, -1)); + return 1; #else + // Posix implementation char ch; ssize_t bytes_read = read(STDIN_FILENO, &ch, 1); if (bytes_read > 0) { @@ -782,6 +873,205 @@ static int lst_termsize(lua_State *L) { /*------------------------------------------------------------------------- + * utf8 conversion and support + *-------------------------------------------------------------------------*/ + +// Function to convert a single UTF-8 character to a Unicode code point (uint32_t) +// To prevent having to do codepage/locale changes, we use a custom implementation +int utf8_to_wchar(const char *utf8, size_t len, mk_wchar_t *codepoint) { + if (len == 0) { + return -1; // No input provided + } + + unsigned char c = (unsigned char)utf8[0]; + if (c <= 0x7F) { + *codepoint = c; + return 1; + } else if ((c & 0xE0) == 0xC0) { + if (len < 2) return -1; // Not enough bytes + *codepoint = ((utf8[0] & 0x1F) << 6) | (utf8[1] & 0x3F); + return 2; + } else if ((c & 0xF0) == 0xE0) { + if (len < 3) return -1; // Not enough bytes + *codepoint = ((utf8[0] & 0x0F) << 12) | ((utf8[1] & 0x3F) << 6) | (utf8[2] & 0x3F); + return 3; + } else if ((c & 0xF8) == 0xF0) { + if (len < 4) return -1; // Not enough bytes + *codepoint = ((utf8[0] & 0x07) << 18) | ((utf8[1] & 0x3F) << 12) | ((utf8[2] & 0x3F) << 6) | (utf8[3] & 0x3F); + return 4; + } else { + // Invalid UTF-8 character + return -1; + } +} + + +/*** +Get the width of a utf8 character for terminal display. +@function utf8cwidth +@tparam string utf8_char the utf8 character to check, only the width of the first character will be returned +@treturn[1] int the display width in columns of the first character in the string (0 for an empty string) +@treturn[2] nil +@treturn[2] string error message +*/ +int lst_utf8cwidth(lua_State *L) { + const char *utf8_char; + size_t utf8_len; + utf8_char = luaL_checklstring(L, 1, &utf8_len); + int width = 0; + + mk_wchar_t wc; + + if (utf8_len == 0) { + lua_pushinteger(L, 0); + return 1; + } + + // Convert the UTF-8 string to a wide character + int bytes_processed = utf8_to_wchar(utf8_char, utf8_len, &wc); + if (bytes_processed == -1) { + lua_pushnil(L); + lua_pushstring(L, "Invalid UTF-8 character"); + return 2; + } + + // Get the width of the wide character + width = mk_wcwidth(wc); + if (width == -1) { + lua_pushnil(L); + lua_pushstring(L, "Character width determination failed"); + return 2; + } + + lua_pushinteger(L, width); + return 1; +} + + + + +/*** +Get the width of a utf8 string for terminal display. +@function utf8swidth +@tparam string utf8_string the utf8 string to check +@treturn[1] int the display width of the string in columns (0 for an empty string) +@treturn[2] nil +@treturn[2] string error message +*/ +int lst_utf8swidth(lua_State *L) { + const char *utf8_str; + size_t utf8_len; + utf8_str = luaL_checklstring(L, 1, &utf8_len); + int total_width = 0; + + if (utf8_len == 0) { + lua_pushinteger(L, 0); + return 1; + } + + int bytes_processed = 0; + size_t i = 0; + mk_wchar_t wc; + + while (i < utf8_len) { + bytes_processed = utf8_to_wchar(utf8_str + i, utf8_len - i, &wc); + if (bytes_processed == -1) { + lua_pushnil(L); + lua_pushstring(L, "Invalid UTF-8 character"); + return 2; + } + + int width = mk_wcwidth(wc); + if (width == -1) { + lua_pushnil(L); + lua_pushstring(L, "Character width determination failed"); + return 2; + } + + total_width += width; + i += bytes_processed; + } + + lua_pushinteger(L, total_width); + return 1; +} + + + +/*------------------------------------------------------------------------- + * Windows codepage functions + *-------------------------------------------------------------------------*/ + + +/*** +Gets the current console code page (Windows). +@function getconsolecp +@treturn[1] int the current code page (always 65001 on Posix systems) +*/ +static int lst_getconsolecp(lua_State *L) { + unsigned int cp = 65001; +#ifdef _WIN32 + cp = GetConsoleCP(); +#endif + lua_pushinteger(L, cp); + return 1; +} + + + +/*** +Sets the current console code page (Windows). +@function setconsolecp +@tparam int cp the code page to set, use 65001 for UTF-8 +@treturn[1] bool `true` on success (always `true` on Posix systems) +*/ +static int lst_setconsolecp(lua_State *L) { + unsigned int cp = (unsigned int)luaL_checkinteger(L, 1); + int success = TRUE; +#ifdef _WIN32 + SetConsoleCP(cp); +#endif + lua_pushboolean(L, success); + return 1; +} + + + +/*** +Gets the current console output code page (Windows). +@function getconsoleoutputcp +@treturn[1] int the current code page (always 65001 on Posix systems) +*/ +static int lst_getconsoleoutputcp(lua_State *L) { + unsigned int cp = 65001; +#ifdef _WIN32 + cp = GetConsoleOutputCP(); +#endif + lua_pushinteger(L, cp); + return 1; +} + + + +/*** +Sets the current console output code page (Windows). +@function setconsoleoutputcp +@tparam int cp the code page to set, use 65001 for UTF-8 +@treturn[1] bool `true` on success (always `true` on Posix systems) +*/ +static int lst_setconsoleoutputcp(lua_State *L) { + unsigned int cp = (unsigned int)luaL_checkinteger(L, 1); + int success = TRUE; +#ifdef _WIN32 + SetConsoleOutputCP(cp); +#endif + lua_pushboolean(L, success); + return 1; +} + + + +/*------------------------------------------------------------------------- * Initializes module *-------------------------------------------------------------------------*/ @@ -791,10 +1081,16 @@ static luaL_Reg func[] = { { "setconsoleflags", lst_setconsoleflags }, { "tcgetattr", lst_tcgetattr }, { "tcsetattr", lst_tcsetattr }, - { "getnonblock", lst_setnonblock }, + { "getnonblock", lst_getnonblock }, { "setnonblock", lst_setnonblock }, - { "readkey", lst_readkey }, + { "_readkey", lst_readkey }, { "termsize", lst_termsize }, + { "utf8cwidth", lst_utf8cwidth }, + { "utf8swidth", lst_utf8swidth }, + { "getconsolecp", lst_getconsolecp }, + { "setconsolecp", lst_setconsolecp }, + { "getconsoleoutputcp", lst_getconsoleoutputcp }, + { "setconsoleoutputcp", lst_setconsoleoutputcp }, { NULL, NULL } }; diff --git a/src/wcwidth.c b/src/wcwidth.c new file mode 100644 index 0000000..6032158 --- /dev/null +++ b/src/wcwidth.c @@ -0,0 +1,285 @@ +// This file was modified from the original versions, check "modified:" comments for details +// Character range updates (both the table and the +1 check) were generated using ChatGPT. + +/* + * This is an implementation of wcwidth() and wcswidth() (defined in + * IEEE Std 1002.1-2001) for Unicode. + * + * http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html + * http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html + * + * In fixed-width output devices, Latin characters all occupy a single + * "cell" position of equal width, whereas ideographic CJK characters + * occupy two such cells. Interoperability between terminal-line + * applications and (teletype-style) character terminals using the + * UTF-8 encoding requires agreement on which character should advance + * the cursor by how many cell positions. No established formal + * standards exist at present on which Unicode character shall occupy + * how many cell positions on character terminals. These routines are + * a first attempt of defining such behavior based on simple rules + * applied to data provided by the Unicode Consortium. + * + * For some graphical characters, the Unicode standard explicitly + * defines a character-cell width via the definition of the East Asian + * FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes. + * In all these cases, there is no ambiguity about which width a + * terminal shall use. For characters in the East Asian Ambiguous (A) + * class, the width choice depends purely on a preference of backward + * compatibility with either historic CJK or Western practice. + * Choosing single-width for these characters is easy to justify as + * the appropriate long-term solution, as the CJK practice of + * displaying these characters as double-width comes from historic + * implementation simplicity (8-bit encoded characters were displayed + * single-width and 16-bit ones double-width, even for Greek, + * Cyrillic, etc.) and not any typographic considerations. + * + * Much less clear is the choice of width for the Not East Asian + * (Neutral) class. Existing practice does not dictate a width for any + * of these characters. It would nevertheless make sense + * typographically to allocate two character cells to characters such + * as for instance EM SPACE or VOLUME INTEGRAL, which cannot be + * represented adequately with a single-width glyph. The following + * routines at present merely assign a single-cell width to all + * neutral characters, in the interest of simplicity. This is not + * entirely satisfactory and should be reconsidered before + * establishing a formal standard in this area. At the moment, the + * decision which Not East Asian (Neutral) characters should be + * represented by double-width glyphs cannot yet be answered by + * applying a simple rule from the Unicode database content. Setting + * up a proper standard for the behavior of UTF-8 character terminals + * will require a careful analysis not only of each Unicode character, + * but also of each presentation form, something the author of these + * routines has avoided to do so far. + * + * http://www.unicode.org/unicode/reports/tr11/ + * + * Markus Kuhn -- 2007-05-26 (Unicode 5.0) + * + * Permission to use, copy, modify, and distribute this software + * for any purpose and without fee is hereby granted. The author + * disclaims all warranties with regard to this software. + * + * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c + */ + +#include "wcwidth.h" // modified: used to define mk_wchar_t + +struct interval { + int first; + int last; +}; + +/* auxiliary function for binary search in interval table */ +static int bisearch(mk_wchar_t ucs, const struct interval *table, int max) { // modified: use mk_wchar_t + int min = 0; + int mid; + + if (ucs < table[0].first || ucs > table[max].last) + return 0; + while (max >= min) { + mid = (min + max) / 2; + if (ucs > table[mid].last) + min = mid + 1; + else if (ucs < table[mid].first) + max = mid - 1; + else + return 1; + } + + return 0; +} + + +/* The following two functions define the column width of an ISO 10646 + * character as follows: + * + * - The null character (U+0000) has a column width of 0. + * + * - Other C0/C1 control characters and DEL will lead to a return + * value of -1. + * + * - Non-spacing and enclosing combining characters (general + * category code Mn or Me in the Unicode database) have a + * column width of 0. + * + * - SOFT HYPHEN (U+00AD) has a column width of 1. + * + * - Other format characters (general category code Cf in the Unicode + * database) and ZERO WIDTH SPACE (U+200B) have a column width of 0. + * + * - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF) + * have a column width of 0. + * + * - Spacing characters in the East Asian Wide (W) or East Asian + * Full-width (F) category as defined in Unicode Technical + * Report #11 have a column width of 2. + * + * - All remaining characters (including all printable + * ISO 8859-1 and WGL4 characters, Unicode control characters, + * etc.) have a column width of 1. + * + * This implementation assumes that mk_wchar_t characters are encoded + * in ISO 10646. + */ + +int mk_wcwidth(mk_wchar_t ucs) // modified: use mk_wchar_t +{ + /* sorted list of non-overlapping intervals of non-spacing characters */ + /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */ + static const struct interval combining[] = { // modified: added new ranges to the list + { 0x0300, 0x036F }, { 0x0483, 0x0489 }, { 0x0591, 0x05BD }, + { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, { 0x05C4, 0x05C5 }, + { 0x05C7, 0x05C7 }, { 0x0600, 0x0605 }, { 0x0610, 0x061A }, + { 0x061C, 0x061C }, { 0x064B, 0x065F }, { 0x0670, 0x0670 }, + { 0x06D6, 0x06DC }, { 0x06DF, 0x06E4 }, { 0x06E7, 0x06E8 }, + { 0x06EA, 0x06ED }, { 0x0711, 0x0711 }, { 0x0730, 0x074A }, + { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x07FD, 0x07FD }, + { 0x0816, 0x0819 }, { 0x081B, 0x0823 }, { 0x0825, 0x0827 }, + { 0x0829, 0x082D }, { 0x0859, 0x085B }, { 0x08D3, 0x08E1 }, + { 0x08E3, 0x0903 }, { 0x093A, 0x093C }, { 0x093E, 0x094F }, + { 0x0951, 0x0957 }, { 0x0962, 0x0963 }, { 0x0981, 0x0983 }, + { 0x09BC, 0x09BC }, { 0x09BE, 0x09C4 }, { 0x09C7, 0x09C8 }, + { 0x09CB, 0x09CD }, { 0x09D7, 0x09D7 }, { 0x09E2, 0x09E3 }, + { 0x09FE, 0x09FE }, { 0x0A01, 0x0A03 }, { 0x0A3C, 0x0A3C }, + { 0x0A3E, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, + { 0x0A51, 0x0A51 }, { 0x0A70, 0x0A71 }, { 0x0A75, 0x0A75 }, + { 0x0A81, 0x0A83 }, { 0x0ABC, 0x0ABC }, { 0x0ABE, 0x0AC5 }, + { 0x0AC7, 0x0AC9 }, { 0x0ACB, 0x0ACD }, { 0x0AE2, 0x0AE3 }, + { 0x0AFA, 0x0AFF }, { 0x0B01, 0x0B03 }, { 0x0B3C, 0x0B3C }, + { 0x0B3E, 0x0B44 }, { 0x0B47, 0x0B48 }, { 0x0B4B, 0x0B4D }, + { 0x0B55, 0x0B57 }, { 0x0B62, 0x0B63 }, { 0x0B82, 0x0B82 }, + { 0x0BBE, 0x0BC2 }, { 0x0BC6, 0x0BC8 }, { 0x0BCA, 0x0BCD }, + { 0x0BD7, 0x0BD7 }, { 0x0C00, 0x0C04 }, { 0x0C3E, 0x0C44 }, + { 0x0C46, 0x0C48 }, { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, + { 0x0C62, 0x0C63 }, { 0x0C81, 0x0C83 }, { 0x0CBC, 0x0CBC }, + { 0x0CBE, 0x0CC4 }, { 0x0CC6, 0x0CC8 }, { 0x0CCA, 0x0CCD }, + { 0x0CD5, 0x0CD6 }, { 0x0CE2, 0x0CE3 }, { 0x0D00, 0x0D03 }, + { 0x0D3B, 0x0D3C }, { 0x0D3E, 0x0D44 }, { 0x0D46, 0x0D48 }, + { 0x0D4A, 0x0D4D }, { 0x0D57, 0x0D57 }, { 0x0D62, 0x0D63 }, + { 0x0D82, 0x0D83 }, { 0x0DCF, 0x0DD4 }, { 0x0DD6, 0x0DD6 }, + { 0x0DD8, 0x0DDF }, { 0x0DF2, 0x0DF3 }, { 0x0E31, 0x0E31 }, + { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, + { 0x0EB4, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, + { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, + { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, + { 0x0F8D, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, + { 0x102D, 0x1030 }, { 0x1032, 0x1037 }, { 0x1039, 0x103A }, + { 0x103D, 0x103E }, { 0x1058, 0x1059 }, { 0x105E, 0x1060 }, + { 0x1071, 0x1074 }, { 0x1082, 0x1082 }, { 0x1085, 0x1086 }, + { 0x108D, 0x108D }, { 0x109D, 0x109D }, { 0x135D, 0x135F }, + { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 }, + { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD }, + { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD }, + { 0x180B, 0x180E }, { 0x1885, 0x1886 }, { 0x18A9, 0x18A9 }, + { 0x1920, 0x1922 }, { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, + { 0x1939, 0x193B }, { 0x1A17, 0x1A18 }, { 0x1A1B, 0x1A1B }, + { 0x1A56, 0x1A56 }, { 0x1A58, 0x1A5E }, { 0x1A60, 0x1A60 }, + { 0x1A62, 0x1A62 }, { 0x1A65, 0x1A6C }, { 0x1A73, 0x1A7C }, + { 0x1A7F, 0x1A7F }, { 0x1AB0, 0x1ACE }, { 0x1B00, 0x1B03 }, + { 0x1B34, 0x1B34 }, { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, + { 0x1B42, 0x1B42 }, { 0x1B6B, 0x1B73 }, { 0x1B80, 0x1B82 }, + { 0x1BA1, 0x1BA1 }, { 0x1BA6, 0x1BA7 }, { 0x1BAA, 0x1BAA }, + { 0x1BAB, 0x1BAD }, { 0x1BE6, 0x1BE6 }, { 0x1BE8, 0x1BE9 }, + { 0x1BED, 0x1BED }, { 0x1BEF, 0x1BF1 }, { 0x1C2C, 0x1C33 }, + { 0x1C36, 0x1C37 }, { 0x1CD0, 0x1CD2 }, { 0x1CD4, 0x1CE8 }, + { 0x1CED, 0x1CED }, { 0x1CF4, 0x1CF4 }, { 0x1CF8, 0x1CF9 }, + { 0x1DC0, 0x1DF9 }, { 0x1DFB, 0x1DFF }, { 0x20D0, 0x20DC }, + { 0x20E1, 0x20E1 }, { 0x20E5, 0x20F0 }, { 0x2CEF, 0x2CF1 }, + { 0x2D7F, 0x2D7F }, { 0x2DE0, 0x2DFF }, { 0x302A, 0x302D }, + { 0x3099, 0x309A }, { 0xA66F, 0xA672 }, { 0xA674, 0xA67D }, + { 0xA69E, 0xA69F }, { 0xA6F0, 0xA6F1 }, { 0xA802, 0xA802 }, + { 0xA806, 0xA806 }, { 0xA80B, 0xA80B }, { 0xA825, 0xA826 }, + { 0xA82C, 0xA82C }, { 0xA8C4, 0xA8C5 }, { 0xA8E0, 0xA8F1 }, + { 0xA8FF, 0xA8FF }, { 0xA926, 0xA92D }, { 0xA947, 0xA951 }, + { 0xA980, 0xA982 }, { 0xA9B3, 0xA9B3 }, { 0xA9B6, 0xA9B9 }, + { 0xA9BC, 0xA9BD }, { 0xA9E5, 0xA9E5 }, { 0xAA29, 0xAA2E }, + { 0xAA31, 0xAA32 }, { 0xAA35, 0xAA36 }, { 0xAA43, 0xAA43 }, + { 0xAA4C, 0xAA4C }, { 0xAA7C, 0xAA7C }, { 0xAAB0, 0xAAB0 }, + { 0xAAB2, 0xAAB4 }, { 0xAAB7, 0xAAB8 }, { 0xAABE, 0xAABF }, + { 0xAAC1, 0xAAC1 }, { 0xAAEB, 0xAAEB }, { 0xAAEE, 0xAAEF }, + { 0xAAF5, 0xAAF6 }, { 0xABE3, 0xABE4 }, { 0xABE6, 0xABE7 }, + { 0xABE9, 0xABEA }, { 0xABEC, 0xABED }, { 0xFB1E, 0xFB1E }, + { 0xFE00, 0xFE0F }, { 0xFE20, 0xFE2F }, { 0x101FD, 0x101FD }, + { 0x102E0, 0x102E0 }, { 0x10376, 0x1037A }, { 0x10A01, 0x10A03 }, + { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F }, { 0x10A38, 0x10A3A }, + { 0x10A3F, 0x10A3F }, { 0x10AE5, 0x10AE6 }, { 0x10D24, 0x10D27 }, + { 0x10EAB, 0x10EAC }, { 0x10F46, 0x10F50 }, { 0x10F82, 0x10F85 }, + { 0x11000, 0x11002 }, { 0x11038, 0x11046 }, { 0x1107F, 0x11082 }, + { 0x110B0, 0x110BA }, { 0x11100, 0x11102 }, { 0x11127, 0x11134 }, + { 0x11145, 0x11146 }, { 0x11173, 0x11173 }, { 0x11180, 0x11182 }, + { 0x111B3, 0x111C0 }, { 0x111C9, 0x111CC }, { 0x1122C, 0x11237 }, + { 0x1123E, 0x1123E }, { 0x112DF, 0x112EA }, { 0x11300, 0x11303 }, + { 0x1133B, 0x1133C }, { 0x1133E, 0x11344 }, { 0x11347, 0x11348 }, + { 0x1134B, 0x1134D }, { 0x11357, 0x11357 }, { 0x11362, 0x11363 }, + { 0x11435, 0x11446 }, { 0x1145E, 0x1145E }, { 0x114B0, 0x114C3 }, + { 0x115AF, 0x115B5 }, { 0x115B8, 0x115C0 }, { 0x115DC, 0x115DD }, + { 0x11630, 0x11640 }, { 0x116AB, 0x116B7 }, { 0x1171D, 0x1172B }, + { 0x1182C, 0x1183A }, { 0x11930, 0x11935 }, { 0x11937, 0x11938 }, + { 0x1193B, 0x1193E }, { 0x11940, 0x11940 }, { 0x11942, 0x11942 }, + { 0x119D1, 0x119D7 }, { 0x119DA, 0x119E0 }, { 0x11A01, 0x11A0A }, + { 0x11A33, 0x11A39 }, { 0x11A3B, 0x11A3E }, { 0x11A47, 0x11A47 }, + { 0x11A51, 0x11A5B }, { 0x11A8A, 0x11A96 }, { 0x11A98, 0x11A99 }, + { 0x11C30, 0x11C36 }, { 0x11C38, 0x11C3D }, { 0x11C3F, 0x11C3F }, + { 0x11C92, 0x11CA7 }, { 0x11CAA, 0x11CB0 }, { 0x11CB2, 0x11CB3 }, + { 0x11CB5, 0x11CB6 }, { 0x11D31, 0x11D36 }, { 0x11D3A, 0x11D3A }, + { 0x11D3C, 0x11D3D }, { 0x11D3F, 0x11D45 }, { 0x11D47, 0x11D47 }, + { 0x11D90, 0x11D91 }, { 0x11D95, 0x11D95 }, { 0x11D97, 0x11D97 }, + { 0x11EF3, 0x11EF4 }, { 0x13430, 0x13438 }, { 0x16AF0, 0x16AF4 }, + { 0x16B30, 0x16B36 }, { 0x16F4F, 0x16F4F }, { 0x16F8F, 0x16F92 }, + { 0x1BC9D, 0x1BC9E }, { 0x1BCA0, 0x1BCA3 }, { 0x1D167, 0x1D169 }, + { 0x1D173, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD }, + { 0x1D242, 0x1D244 }, { 0x1DA00, 0x1DA36 }, { 0x1DA3B, 0x1DA6C }, + { 0x1DA75, 0x1DA75 }, { 0x1DA84, 0x1DA84 }, { 0x1DA9B, 0x1DA9F }, + { 0x1DAA1, 0x1DAAF }, { 0x1E000, 0x1E006 }, { 0x1E008, 0x1E018 }, + { 0x1E01B, 0x1E021 }, { 0x1E023, 0x1E024 }, { 0x1E026, 0x1E02A }, + { 0x1E130, 0x1E136 }, { 0x1E2AE, 0x1E2AE }, { 0x1E2EC, 0x1E2EF }, + { 0x1E4EC, 0x1E4EF }, { 0x1E8D0, 0x1E8D6 }, { 0x1E944, 0x1E94A }, + { 0x1E947, 0x1E94A }, { 0xE0100, 0xE01EF } + }; + + /* test for 8-bit control characters */ + if (ucs == 0) + return 0; + if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) + return -1; + + /* binary search in table of non-spacing characters */ + if (bisearch(ucs, combining, + sizeof(combining) / sizeof(struct interval) - 1)) + return 0; + + /* if we arrive here, ucs is not a combining or C0/C1 control character */ + + return 1 + + (ucs >= 0x1100 && + (ucs <= 0x115f || /* Hangul Jamo init. consonants */ + ucs == 0x2329 || ucs == 0x232a || + (ucs >= 0x2e80 && ucs <= 0xa4cf && + ucs != 0x303f) || /* CJK ... Yi */ + (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */ + (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */ + (ucs >= 0xfe10 && ucs <= 0xfe19) || /* Vertical forms */ + (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */ + (ucs >= 0xff00 && ucs <= 0xff60) || /* Fullwidth Forms */ + (ucs >= 0xffe0 && ucs <= 0xffe6) || + (ucs >= 0x1f300 && ucs <= 0x1f64f) || /* modified: added Emoticons */ + (ucs >= 0x1f680 && ucs <= 0x1f6ff) || /* modified: added Transport and Map Symbols */ + (ucs >= 0x1f900 && ucs <= 0x1f9ff) || /* modified: added Supplemental Symbols and Pictographs */ + (ucs >= 0x20000 && ucs <= 0x2fffd) || + (ucs >= 0x30000 && ucs <= 0x3fffd))); +} + + +int mk_wcswidth(const mk_wchar_t *pwcs, size_t n) // modified: use mk_wchar_t +{ + int w, width = 0; + + for (;*pwcs && n-- > 0; pwcs++) + if ((w = mk_wcwidth(*pwcs)) < 0) + return -1; + else + width += w; + + return width; +} + diff --git a/src/wcwidth.h b/src/wcwidth.h new file mode 100644 index 0000000..f2fee11 --- /dev/null +++ b/src/wcwidth.h @@ -0,0 +1,21 @@ +// wcwidth.h + +// Windows does not have a wcwidth function, so we use compatibilty code from +// http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c by Markus Kuhn + +#ifndef MK_WCWIDTH_H +#define MK_WCWIDTH_H + + +#ifdef _WIN32 +#include <stdint.h> +typedef uint32_t mk_wchar_t; // Windows wchar_t can be 16-bit, we need 32-bit +#else +#include <wchar.h> +typedef wchar_t mk_wchar_t; // Posix wchar_t is 32-bit so just use that +#endif + +int mk_wcwidth(mk_wchar_t ucs); +int mk_wcswidth(const mk_wchar_t *pwcs, size_t n); + +#endif // MK_WCWIDTH_H diff --git a/system/init.lua b/system/init.lua index 893cd91..c232cd2 100644 --- a/system/init.lua +++ b/system/init.lua @@ -2,45 +2,11 @@ -- @module init local sys = require 'system.core' -local global_backup -- global backup for terminal settings - - - -local add_gc_method do - -- feature detection; __GC meta-method, not available in all Lua versions - local has_gc = false - local tt = setmetatable({}, { -- luacheck: ignore - __gc = function() has_gc = true end - }) - - -- clear table and run GC to trigger - tt = nil - collectgarbage() - collectgarbage() - - - if has_gc then - -- use default GC mechanism since it is available - function add_gc_method(t, f) - setmetatable(t, { __gc = f }) - end - else - -- create workaround using a proxy userdata, typical for Lua 5.1 - function add_gc_method(t, f) - local proxy = newproxy(true) - getmetatable(proxy).__gc = function() - t["__gc_proxy"] = nil - f(t) - end - t["__gc_proxy"] = proxy - end - end -end --- Returns a backup of terminal setting for stdin/out/err. --- Handles terminal/console flags and non-block flags on the streams. +-- Handles terminal/console flags, Windows codepage, and non-block flags on the streams. -- Backs up terminal/console flags only if a stream is a tty. -- @return table with backup of terminal settings function sys.termbackup() @@ -63,6 +29,9 @@ function sys.termbackup() backup.block_out = sys.getnonblock(io.stdout) backup.block_err = sys.getnonblock(io.stderr) + backup.consoleoutcodepage = sys.getconsoleoutputcp() + backup.consolecp = sys.getconsolecp() + return backup end @@ -82,25 +51,65 @@ function sys.termrestore(backup) if backup.block_in ~= nil then sys.setnonblock(io.stdin, backup.block_in) end if backup.block_out ~= nil then sys.setnonblock(io.stdout, backup.block_out) end if backup.block_err ~= nil then sys.setnonblock(io.stderr, backup.block_err) end + + if backup.consoleoutcodepage then sys.setconsoleoutputcp(backup.consoleoutcodepage) end + if backup.consolecp then sys.setconsolecp(backup.consolecp) end return true end ---- Backs up terminal settings and restores them on application exit. --- Calls `termbackup` to back up terminal settings and sets up a GC method to --- automatically restore them on application exit (also works on Lua 5.1). --- @treturn[1] boolean true --- @treturn[2] nil if the backup was already created --- @treturn[2] string error message -function sys.autotermrestore() - if global_backup then - return nil, "global terminal backup was already set up" +do -- autotermrestore + local global_backup -- global backup for terminal settings + + + local add_gc_method do + -- feature detection; __GC meta-method, not available in all Lua versions + local has_gc = false + local tt = setmetatable({}, { -- luacheck: ignore + __gc = function() has_gc = true end + }) + + -- clear table and run GC to trigger + tt = nil + collectgarbage() + collectgarbage() + + + if has_gc then + -- use default GC mechanism since it is available + function add_gc_method(t, f) + setmetatable(t, { __gc = f }) + end + else + -- create workaround using a proxy userdata, typical for Lua 5.1 + function add_gc_method(t, f) + local proxy = newproxy(true) + getmetatable(proxy).__gc = function() + t["__gc_proxy"] = nil + f(t) + end + t["__gc_proxy"] = proxy + end + end + end + + + --- Backs up terminal settings and restores them on application exit. + -- Calls `termbackup` to back up terminal settings and sets up a GC method to + -- automatically restore them on application exit (also works on Lua 5.1). + -- @treturn[1] boolean true + -- @treturn[2] nil if the backup was already created + -- @treturn[2] string error message + function sys.autotermrestore() + if global_backup then + return nil, "global terminal backup was already set up" + end + global_backup = sys.termbackup() + add_gc_method(global_backup, function(self) + sys.termrestore(self) end) + return true end - global_backup = sys.termbackup() - add_gc_method(global_backup, function(self) - sys.termrestore(self) end) - return true end @@ -208,12 +217,9 @@ end do - local _readkey = sys.readkey - local interval = 0.1 - --- Reads a single byte from the console, with a timeout. - -- This function uses `system.sleep` to wait in increments of 0.1 seconds until either a byte is - -- available or the timeout is reached. + -- This function uses `system.sleep` to wait until either a byte is available or the timeout is reached. + -- The sleep period is exponentially backing off, starting at 0.0125 seconds, with a maximum of 0.2 seconds. -- It returns immediately if a byte is available or if `timeout` is less than or equal to `0`. -- @tparam number timeout the timeout in seconds. -- @treturn[1] integer the key code of the key that was received @@ -224,11 +230,13 @@ do error("arg #1 to readkey, expected timeout in seconds, got " .. type(timeout), 2) end - local key = _readkey() + local interval = 0.0125 + local key = sys._readkey() while key == nil and timeout > 0 do - sys.sleep(interval) + sys.sleep(math.min(interval, timeout)) timeout = timeout - interval - key = _readkey() + interval = math.min(0.2, interval * 2) + key = sys._readkey() end if key then @@ -246,14 +254,14 @@ do local utf8_length -- length of utf8 sequence currently being processed local unpack = unpack or table.unpack - -- Reads a single key, if it is the start of ansi escape sequence then it reads - -- the full sequence. + --- Reads a single key, if it is the start of ansi escape sequence then it reads + -- the full sequence. The key can be a multi-byte string in case of multibyte UTF-8 character. -- This function uses `system.readkey`, and hence `system.sleep` to wait until either a key is -- available or the timeout is reached. -- It returns immediately if a key is available or if `timeout` is less than or equal to `0`. -- In case of an ANSI sequence, it will return the full sequence as a string. -- @tparam number timeout the timeout in seconds. - -- @treturn[1] string the character that was received, or a complete ANSI sequence + -- @treturn[1] string the character that was received (can be multi-byte), or a complete ANSI sequence -- @treturn[1] string the type of input: `"char"` for a single key, `"ansi"` for an ANSI sequence -- @treturn[2] nil in case of an error -- @treturn[2] string error message; `"timeout"` if the timeout was reached. |