summaryrefslogtreecommitdiff
path: root/script/encoder/init.lua
blob: 0011265a07c6dca77d6d70a93a644c23b3a9f36d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
local ansi    = require 'encoder.ansi'
local utf16   = require 'encoder.utf16'
local utf16le = utf16('le', utf8.codepoint '�')
local utf16be = utf16('be', utf8.codepoint '�')

---@alias encoder.encoding '"utf8"'|'"utf16"'|'"utf16le"'|'"utf16be"'

---@alias encoder.bom '"no"'|'"yes"'|'"auto"'

local m = {}

---@param encoding encoder.encoding
---@param s string
---@param i integer
---@param j integer
function m.len(encoding, s, i, j)
    i = i or 1
    j = j or #s
    if encoding == 'utf16'
    or encoding == 'utf16' then
        local us = utf16le.fromutf8(s:sub(i, j))
        return #us // 2
    end
    if encoding == 'utf16be' then
        local us = utf16be.fromutf8(s:sub(i, j))
        return #us // 2
    end
    if encoding == 'utf8' then
        return utf8.len(s, i, j, true)
    end
    log.error('Unsupport len encoding:', encoding)
    return j - i + 1
end

---@param encoding encoder.encoding
---@param s string
---@param n integer
---@param i integer
function m.offset(encoding, s, n, i)
    i = i or 1
    if encoding == 'utf16'
    or encoding == 'utf16le' then
        local line = s:match('[^\r\n]*', i)
        if not line:find '[\x80-\xff]' then
            return n + i - 1
        end
        local us = utf16le.fromutf8(line)
        local os = utf16le.toutf8(us:sub(1, n * 2 - 2))
        return #os + i
    end
    if encoding == 'utf16be' then
        local line = s:match('[^\r\n]*', i)
        if not line:find '[\x80-\xff]' then
            return n + i - 1
        end
        local us = utf16be.fromutf8(line)
        local os = utf16be.toutf8(us:sub(1, n * 2 - 2))
        return #os + i
    end
    if encoding == 'utf8' then
        return utf8.offset(s, n, i)
    end
    log.error('Unsupport offset encoding:', encoding)
    return n + i - 1
end

---@param encoding encoder.encoding
---@param text string
---@param bom encoder.bom
---@return string
function m.encode(encoding, text, bom)
    if encoding == 'utf8' then
        if bom == 'yes' then
            text = '\xEF\xBB\xBF' .. text
        end
        return text
    end
    if encoding == 'ansi' then
        return ansi.fromutf8(text)
    end
    if encoding == 'utf16'
    or encoding == 'utf16le' then
        text = utf16le.fromutf8(text)
        if bom == 'yes'
        or bom == 'auto' then
            text = '\xFF\xFE' .. text
        end
        return text
    end
    if encoding == 'utf16be' then
        text = utf16be.fromutf8(text)
        if bom == 'yes'
        or bom == 'auto' then
            text = '\xFE\xFF' .. text
        end
        return text
    end
    log.error('Unsupport encode encoding:', encoding)
    return text
end

---@param encoding encoder.encoding
---@param text string
---@return string
function m.decode(encoding, text)
    if encoding == 'utf8' then
        return text
    end
    if encoding == 'ansi' then
        return ansi.toutf8(text)
    end
    if encoding == 'utf16'
    or encoding == 'utf16le' then
        return utf16le.toutf8(text)
    end
    if encoding == 'utf16be' then
        return utf16be.toutf8(text)
    end
    log.error('Unsupport encode encoding:', encoding)
    return text
end

return m