1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
|
local ansi = require 'encoder.ansi'
local utf16 = require 'encoder.utf16'
local utf16le = utf16('le', utf8.codepoint '�')
local utf16be = utf16('be', utf8.codepoint '�')
---@alias encoder.encoding '"utf8"'|'"utf16"'|'"utf16le"'|'"utf16be"'
---@alias encoder.bom '"no"'|'"yes"'|'"auto"'
local m = {}
---@param encoding encoder.encoding
---@param s string
---@param i integer
---@param j integer
function m.len(encoding, s, i, j)
i = i or 1
j = j or #s
if encoding == 'utf16'
or encoding == 'utf16' then
local us = utf16le.fromutf8(s:sub(i, j))
return #us // 2
end
if encoding == 'utf16be' then
local us = utf16be.fromutf8(s:sub(i, j))
return #us // 2
end
if encoding == 'utf8' then
return utf8.len(s, i, j, true)
end
log.error('Unsupport len encoding:', encoding)
return j - i + 1
end
---@param encoding encoder.encoding
---@param s string
---@param n integer
---@param i integer
function m.offset(encoding, s, n, i)
i = i or 1
if encoding == 'utf16'
or encoding == 'utf16le' then
local line = s:match('[^\r\n]*', i)
if not line:find '[\x80-\xff]' then
return n + i - 1
end
local us = utf16le.fromutf8(line)
local os = utf16le.toutf8(us:sub(1, n * 2 - 2))
return #os + i
end
if encoding == 'utf16be' then
local line = s:match('[^\r\n]*', i)
if not line:find '[\x80-\xff]' then
return n + i - 1
end
local us = utf16be.fromutf8(line)
local os = utf16be.toutf8(us:sub(1, n * 2 - 2))
return #os + i
end
if encoding == 'utf8' then
return utf8.offset(s, n, i)
end
log.error('Unsupport offset encoding:', encoding)
return n + i - 1
end
---@param encoding encoder.encoding
---@param text string
---@param bom encoder.bom
---@return string
function m.encode(encoding, text, bom)
if encoding == 'utf8' then
if bom == 'yes' then
text = '\xEF\xBB\xBF' .. text
end
return text
end
if encoding == 'ansi' then
return ansi.fromutf8(text)
end
if encoding == 'utf16'
or encoding == 'utf16le' then
text = utf16le.fromutf8(text)
if bom == 'yes'
or bom == 'auto' then
text = '\xFF\xFE' .. text
end
return text
end
if encoding == 'utf16be' then
text = utf16be.fromutf8(text)
if bom == 'yes'
or bom == 'auto' then
text = '\xFE\xFF' .. text
end
return text
end
log.error('Unsupport encode encoding:', encoding)
return text
end
---@param encoding encoder.encoding
---@param text string
---@return string
function m.decode(encoding, text)
if encoding == 'utf8' then
return text
end
if encoding == 'ansi' then
return ansi.toutf8(text)
end
if encoding == 'utf16'
or encoding == 'utf16le' then
return utf16le.toutf8(text)
end
if encoding == 'utf16be' then
return utf16be.toutf8(text)
end
log.error('Unsupport encode encoding:', encoding)
return text
end
return m
|