diff options
Diffstat (limited to 'server/src/parser')
-rw-r--r-- | server/src/parser/ast.lua | 377 | ||||
-rw-r--r-- | server/src/parser/grammar.lua | 278 | ||||
-rw-r--r-- | server/src/parser/init.lua | 2 | ||||
-rw-r--r-- | server/src/parser/lines.lua | 122 |
4 files changed, 670 insertions, 109 deletions
diff --git a/server/src/parser/ast.lua b/server/src/parser/ast.lua new file mode 100644 index 00000000..2c434511 --- /dev/null +++ b/server/src/parser/ast.lua @@ -0,0 +1,377 @@ +local tonumber = tonumber +local string_char = string.char +local utf8_char = utf8.char + +local defs = { + Nil = function (pos) + return { + type = 'nil', + start = pos, + finish = pos + 2, + } + end, + True = function (pos) + return { + type = 'boolean', + start = pos, + finish = pos + 3, + [1] = true, + } + end, + False = function (pos) + return { + type = 'boolean', + start = pos, + finish = pos + 4, + [1] = false, + } + end, + String = function (start, str, finish) + return { + type = 'string', + start = start, + finish = finish - 1, + [1] = str, + } + end, + Char10 = function (char) + char = tonumber(char) + if not char or char < 0 or char > 255 then + -- TODO 记录错误 + return '' + end + return string_char(char) + end, + Char16 = function (char) + return string_char(tonumber(char, 16)) + end, + CharUtf8 = function (char) + char = tonumber(char, 16) + if not char or char < 0 or char > 0x10ffff then + -- TODO 记录错误 + return '' + end + return utf8_char(char) + end, + Number = function (start, number, finish) + return { + type = 'number', + start = start, + finish = finish - 1, + [1] = tonumber(number), + } + end, + Name = function (start, str, finish) + return { + type = 'name', + start = start, + finish = finish - 1, + [1] = str, + } + end, + Simple = function (first, ...) + if ... then + return { + type = 'simple', + first, ..., + } + elseif first == '' then + return nil + else + return first + end + end, + Index = function (exp) + exp.index = true + return exp + end, + Call = function (arg) + if arg == nil then + return { + type = 'call' + } + end + if arg.type == 'list' then + arg.type = 'call' + return arg + end + local obj = { + type = 'call', + [1] = arg, + } + return obj + end, + Binary = function (...) + local e1, op = ... + if not op then + return e1 + end + local args = {...} + local e1 = args[1] + local e2 + for i = 2, #args, 2 do + op, e2 = args[i], args[i+1] + e1 = { + type = op, + [1] = e1, + [2] = e2, + } + end + return e1 + end, + Unary = function (...) + local e1, op = ... + if not op then + return e1 + end + local args = {...} + local e1 = args[#args] + for i = #args - 1, 1, -1 do + op = args[i] + e1 = { + type = op, + [1] = e1, + } + end + return e1 + end, + DOTS = function (start) + return { + type = '...', + start = start, + finish = start + 2, + } + end, + COLON = function (start) + return { + type = ':', + start = start, + finish = start, + } + end, + Function = function (start, name, arg, ...) + local obj = { + type = 'function', + start = start, + name = name, + arg = arg, + ... + } + local max = #obj + obj.finish = obj[max] - 1 + obj[max] = nil + return obj + end, + LocalFunction = function (start, name, arg, ...) + local obj = { + type = 'localfunction', + start = start, + name = name, + arg = arg, + ... + } + local max = #obj + obj.finish = obj[max] - 1 + obj[max] = nil + return obj + end, + Table = function (start, table, finish) + if table then + table.start = start + table.finish = finish - 1 + else + table = { + type = 'table', + start = start, + finish = finish - 1, + } + end + return table + end, + TableFields = function (...) + if ... == '' then + return nil + else + return { + type = 'table', + ..., + } + end + end, + NewField = function (key, value) + return { + type = 'pair', + key, value, + } + end, + NewIndex = function (key, value) + key.index = true + return { + type = 'pair', + key, value, + } + end, + List = function (first, second, ...) + if second then + return { + type = 'list', + first, second, ... + } + elseif first == '' then + return nil + else + return first + end + end, + Set = function (keys, values) + return { + type = 'set', + keys, values, + } + end, + Local = function (keys, values) + return { + type = 'local', + keys, values, + } + end, + DoBody = function (...) + if ... == '' then + return { + type = 'do', + } + else + return { + type = 'do', + ... + } + end + end, + Do = function (start, action, finish) + action.start = start + action.finish = finish - 1 + return action + end, + Break = function () + return { + type = 'break', + } + end, + Return = function (exp) + if exp == nil or exp == '' then + exp = { + type = 'return' + } + else + if exp.type == 'list' then + exp.type = 'return' + else + exp = { + type = 'return', + [1] = exp, + } + end + end + return exp + end, + Label = function (name) + name.type = 'label' + return name + end, + GoTo = function (name) + name.type = 'goto' + return name + end, + IfBlock = function (exp, ...) + return { + filter = exp, + ... + } + end, + ElseIfBlock = function (exp, ...) + return { + filter = exp, + ... + } + end, + ElseBlock = function (...) + return { + ... + } + end, + If = function (start, ...) + local obj = { + type = 'if', + start = start, + ... + } + local max = #obj + obj.finish = obj[max] - 1 + obj[max] = nil + return obj + end, + LoopDef = function (arg, min, max, step) + return arg, min, max, step + end, + Loop = function (start, arg, min, max, step, ...) + local obj = { + type = 'loop', + start = start, + arg = arg, + min = min, + max = max, + step = step, + ... + } + local max = #obj + obj.finish = obj[max] - 1 + obj[max] = nil + return obj + end, + In = function (start, arg, exp, ...) + local obj = { + type = 'in', + start = start, + arg = arg, + exp = exp, + ... + } + local max = #obj + obj.finish = obj[max] - 1 + obj[max] = nil + return obj + end, + While = function (start, filter, ...) + local obj = { + type = 'while', + start = start, + filter = filter, + ... + } + local max = #obj + obj.finish = obj[max] - 1 + obj[max] = nil + return obj + end, + Repeat = function (start, ...) + local obj = { + type = 'repeat', + start = start, + ... + } + local max = #obj + obj.finish = obj[max] - 1 + obj.filter = obj[max-1] + obj[max] = nil + obj[max-1] = nil + return obj + end, +} + +return function (self, lua, mode) + local gram, err = self.grammar(lua, mode, defs) + if not gram then + return nil, err + end + return gram +end diff --git a/server/src/parser/grammar.lua b/server/src/parser/grammar.lua index c172dcac..0efe4056 100644 --- a/server/src/parser/grammar.lua +++ b/server/src/parser/grammar.lua @@ -2,10 +2,36 @@ local re = require 'parser.relabel' local m = require 'lpeglabel' local calcline = require 'parser.calcline' + local scriptBuf = '' local compiled = {} local parser +local RESERVED = { + ['and'] = true, + ['break'] = true, + ['do'] = true, + ['else'] = true, + ['elseif'] = true, + ['end'] = true, + ['false'] = true, + ['for'] = true, + ['function'] = true, + ['goto'] = true, + ['if'] = true, + ['in'] = true, + ['local'] = true, + ['nil'] = true, + ['not'] = true, + ['or'] = true, + ['repeat'] = true, + ['return'] = true, + ['then'] = true, + ['true'] = true, + ['until'] = true, + ['while'] = true, +} + local defs = setmetatable({}, {__index = function (self, key) self[key] = function (...) if parser[key] then @@ -29,6 +55,16 @@ defs.en = '\n' defs.er = '\r' defs.et = '\t' defs.ev = '\v' +defs.NotReserved = function (_, _, str) + if RESERVED[str] then + return false + end + return true, str +end + +defs.first = function (first, ...) + return first +end local eof = re.compile '!. / %{SYNTAX_ERROR}' local function grammar(tag) @@ -70,33 +106,31 @@ grammar 'Common' [[ Cut <- ![a-zA-Z0-9_] X16 <- [a-fA-F0-9] -AND <- Sp 'and' Cut +AND <- Sp {'and'} Cut BREAK <- Sp 'break' Cut DO <- Sp 'do' Cut ELSE <- Sp 'else' Cut ELSEIF <- Sp 'elseif' Cut END <- Sp 'end' Cut -FALSE <- Sp {} -> FALSE - 'false' Cut +FALSE <- Sp 'false' Cut FOR <- Sp 'for' Cut FUNCTION <- Sp 'function' Cut GOTO <- Sp 'goto' Cut IF <- Sp 'if' Cut IN <- Sp 'in' Cut LOCAL <- Sp 'local' Cut -NIL <- Sp {} -> NIL - 'nil' Cut -NOT <- Sp 'not' Cut -OR <- Sp 'or' Cut +NIL <- Sp 'nil' Cut +NOT <- Sp {'not'} Cut +OR <- Sp {'or'} Cut REPEAT <- Sp 'repeat' Cut RETURN <- Sp 'return' Cut THEN <- Sp 'then' Cut -TRUE <- Sp {} -> TRUE - 'true' Cut +TRUE <- Sp 'true' Cut UNTIL <- Sp 'until' Cut WHILE <- Sp 'while' Cut -Esc <- '\' EChar +Esc <- '\' -> '' + EChar EChar <- 'a' -> ea / 'b' -> eb / 'f' -> ef @@ -108,39 +142,39 @@ EChar <- 'a' -> ea / '"' / "'" / %nl - / 'z' (%nl / %s)* -> '' - / 'x' X16 X16 - / [0-9] [0-9]? [0-9]? - / 'u{' X16^+1^-6 '}' + / ('z' (%nl / %s)*) -> '' + / ('x' {X16 X16}) -> Char16 + / ([0-9] [0-9]? [0-9]?) -> Char10 + / ('u{' {X16^+1^-6} '}')-> CharUtf8 -Comp <- Sp CompList +Comp <- Sp {CompList} CompList <- '<=' / '>=' / '<' / '>' / '~=' / '==' -BOR <- Sp '|' -BXOR <- Sp '~' -BAND <- Sp '&' -Bshift <- Sp BshiftList +BOR <- Sp {'|'} +BXOR <- Sp {'~'} +BAND <- Sp {'&'} +Bshift <- Sp {BshiftList} BshiftList <- '<<' / '>>' -Concat <- Sp '..' -Adds <- Sp AddsList +Concat <- Sp {'..'} +Adds <- Sp {AddsList} AddsList <- '+' / '-' -Muls <- Sp MulsList +Muls <- Sp {MulsList} MulsList <- '*' / '//' / '/' / '%' -Unary <- Sp UnaryList +Unary <- Sp {UnaryList} UnaryList <- 'not' / '#' / '-' / '~' -POWER <- Sp '^' +POWER <- Sp {'^'} PL <- Sp '(' PR <- Sp ')' @@ -150,36 +184,33 @@ TL <- Sp '{' TR <- Sp '}' COMMA <- Sp ',' SEMICOLON <- Sp ';' -DOTS <- Sp {} -> DOTSPos - '...' -> DOTS +DOTS <- Sp ({} '...') -> DOTS DOT <- Sp '.' -COLON <- Sp {} -> COLONPos - ':' -> COLON +COLON <- Sp ({} ':') -> COLON LABEL <- Sp '::' ASSIGN <- Sp '=' ]] grammar 'Nil' [[ -Nil <- NIL +Nil <- Sp ({} -> Nil) NIL ]] grammar 'Boolean' [[ -Boolean <- TRUE - / FALSE +Boolean <- Sp ({} -> True) TRUE + / Sp ({} -> False) FALSE ]] grammar 'String' [[ String <- Sp ({} StringDef {}) -> String -StringDef <- '"' {(Esc / !%nl !'"' .)*} -> ShortString '"' - / "'" {(Esc / !%nl !"'" .)*} -> ShortString "'" - / '[' {:eq: '='* :} '[' {(!StringClose .)*} -> LongString StringClose +StringDef <- '"' {~(Esc / !%nl !'"' .)*~} -> first '"' + / "'" {~(Esc / !%nl !"'" .)*~} -> first "'" + / '[' {:eq: '='* :} '[' {(!StringClose .)*} -> first StringClose StringClose <- ']' =eq ']' ]] grammar 'Number' [[ -Number <- Sp {} -> NumberPos - NumberDef -> Number +Number <- Sp ({} {NumberDef} {}) -> Number NumberDef <- Number16 / Number10 Number10 <- Integer10 Float10 @@ -192,25 +223,27 @@ Float16 <- ('.' X16*)? ([pP] [+-]? [1-9]? [0-9]*)? ]] grammar 'Name' [[ -Name <- Sp {} -> NamePos - {[a-zA-Z_] [a-zA-Z0-9_]*} -> Name +Name <- Sp ({} NameBody {}) + -> Name +NameBody <- ([a-zA-Z_] [a-zA-Z0-9_]*) + => NotReserved ]] grammar 'Exp' [[ Exp <- ExpOr -ExpOr <- ExpAnd (OR ExpAnd)* -ExpAnd <- ExpCompare (AND ExpCompare)* -ExpCompare <- ExpBor (Comp ExpBor)* -ExpBor <- ExpBxor (BOR ExpBxor)* -ExpBxor <- ExpBand (BXOR ExpBand)* -ExpBand <- ExpBshift (BAND ExpBshift)* -ExpBshift <- ExpConcat (Bshift ExpConcat)* -ExpConcat <- ExpAdds (Concat ExpAdds)* -ExpAdds <- ExpMuls (Adds ExpMuls)* -ExpMuls <- ExpUnary (Muls ExpUnary)* -ExpUnary <- (Unary ExpPower) - / ExpPower -ExpPower <- ExpUnit (POWER ExpUnary)* +ExpOr <- (ExpAnd (OR ExpAnd)*) -> Binary +ExpAnd <- (ExpCompare (AND ExpCompare)*) -> Binary +ExpCompare <- (ExpBor (Comp ExpBor)*) -> Binary +ExpBor <- (ExpBxor (BOR ExpBxor)*) -> Binary +ExpBxor <- (ExpBand (BXOR ExpBand)*) -> Binary +ExpBand <- (ExpBshift (BAND ExpBshift)*) -> Binary +ExpBshift <- (ExpConcat (Bshift ExpConcat)*) -> Binary +ExpConcat <- (ExpAdds (Concat ExpConcat)*) -> Binary +ExpAdds <- (ExpMuls (Adds ExpMuls)*) -> Binary +ExpMuls <- (ExpUnary (Muls ExpUnary)*) -> Binary +ExpUnary <- ( (Unary+ ExpPower)) -> Unary + / ExpPower +ExpPower <- (ExpUnit (POWER ExpUnary)*) -> Binary ExpUnit <- Nil / Boolean / String @@ -224,107 +257,134 @@ Simple <- (Prefix (Suffix)*) -> Simple Prefix <- PL Exp PR / Name -ColonName <- (COLON Name) - -> ColonName Suffix <- DOT Name - / ColonName + / COLON Name / Table / String - / BL Exp BR - / PL ArgList? PR + / BL Exp -> Index BR + / PL (ExpList -> Call) PR +ExpList <- (Exp (COMMA Exp)*)? + -> List +NameList <- (Name (COMMA Name)*)? + -> List ArgList <- (Arg (COMMA Arg)*)? - -> ArgList + -> List Arg <- DOTS - / Exp + / Name -Table <- (TL TableFields? TR) +Table <- Sp ({} TL TableFields TR {}) -> Table -TableFields <- TableField (TableSep TableField)* TableSep? +TableFields <- (TableField (TableSep TableField)* TableSep?)? + -> TableFields TableSep <- COMMA / SEMICOLON TableField <- NewIndex / NewField / Exp NewIndex <- (BL Exp BR ASSIGN Exp) -> NewIndex -NewField <- Name ASSIGN Exp +NewField <- (Name ASSIGN Exp) + -> NewField -Function <- FunctionLoc / FunctionDef -FunctionLoc <- (LOCAL FUNCTION FuncName PL ArgList PR) -> FunctionLoc - (!END Action)* -> Function - END -FunctionDef <- (FUNCTION FuncName PL ArgList PR) -> FunctionDef - (!END Action)* -> Function +Function <- Sp ({} FunctionBody {}) + -> Function +FunctionBody<- FUNCTION FuncName PL ArgList PR + Action* END -FuncName <- (Name (FuncSuffix)*)? - -> FuncName +FuncName <- (Name? (FuncSuffix)*) + -> Simple FuncSuffix <- DOT Name - / ColonName + / COLON Name -- 纯占位,修改了 `relabel.lua` 使重复定义不抛错 -Action <- !. . +Action <- !END . ]] grammar 'Action' [[ -Action <- SEMICOLON / Do / Break / Return / Label / GoTo / If / For / While / Repeat / Function / Set / Local / Call +Action <- SEMICOLON + / Do + / Break + / Return + / Label + / GoTo + / If + / For + / While + / Repeat + / Function + / LocalFunction + / Local + / Set + / Call -ExpList <- Exp (COMMA Exp)* -NameList <- (Name (COMMA Name)*) -> NameList -SimpleList <- (Simple (COMMA Simple)*) -> SimpleList +SimpleList <- (Simple (COMMA Simple)*) + -> List -Do <- DO -> DoDef - (!END Action)* -> Do - END +Do <- Sp ({} DO DoBody END {}) + -> Do +DoBody <- Action* + -> DoBody Break <- BREAK + -> Break -Return <- RETURN !END ExpList? +Return <- RETURN ExpList? + -> Return -Label <- LABEL Name LABEL +Label <- LABEL Name -> Label LABEL -GoTo <- GOTO Name +GoTo <- GOTO Name -> GoTo -If <- IfPart - ElseIfPart* - ElsePart? +If <- Sp ({} IfBody {}) + -> If +IfBody <- (IfPart -> IfBlock) + (ElseIfPart -> ElseIfBlock)* + (ElsePart -> ElseBlock)? END - -> EndIf -IfPart <- (IF Exp THEN) -> IfDef - (!ELSEIF !ELSE !END Action)* -> If -ElseIfPart <- (ELSEIF Exp THEN) -> ElseIfDef - (!ELSE !ELSEIF !END Action)* -> ElseIf -ElsePart <- ELSE -> ElseDef - (!END Action)* -> Else +IfPart <- IF Exp THEN + Action* +ElseIfPart <- ELSEIF Exp THEN + Action* +ElsePart <- ELSE + Action* For <- Loop / In -Loop <- (FOR LoopStart LoopFinish LoopStep? DO) -> LoopDef - (!END Action)* -> Loop +Loop <- Sp ({} LoopBody {}) + -> Loop +LoopBody <- (FOR LoopStart LoopFinish LoopStep? DO) -> LoopDef + Action* END -LoopStart <- (Name ASSIGN Exp) -> LoopStart +LoopStart <- Name ASSIGN Exp LoopFinish <- COMMA Exp LoopStep <- COMMA Exp -In <- (FOR NameList IN ExpList DO) -> InDef - (!END Action)* -> In +In <- Sp ({} InBody {}) + -> In +InBody <- FOR NameList IN ExpList DO + Action* END -While <- (WHILE Exp DO) -> WhileDef - (!END Action)* -> While +While <- Sp ({} WhileBody {}) + -> While +WhileBody <- WHILE Exp DO + Action* END -Repeat <- REPEAT -> RepeatDef - (!UNTIL Action)* -> Repeat - (UNTIL Exp) -> Until +Repeat <- Sp ({} RepeatBody {}) + -> Repeat +RepeatBody <- REPEAT + Action* + UNTIL Exp -Set <- (LOCAL NameList ASSIGN ExpList) - -> LocalSet - / (SimpleList ASSIGN ExpList) +Local <- (LOCAL NameList (ASSIGN ExpList)?) + -> Local +Set <- (SimpleList ASSIGN ExpList) -> Set -Local <- LOCAL NameList - -> LocalVar - Call <- Simple - -> Call + +LocalFunction + <- Sp ({} LOCAL FunctionBody {}) + -> LocalFunction ]] grammar 'Lua' [[ @@ -333,7 +393,7 @@ Lua <- (Sp Action)* Sp return function (lua, mode, parser_) parser = parser_ or {} - mode = mode or 'lua' + mode = mode or 'Lua' local r, e, pos = compiled[mode]:match(lua) if not r then local err = errorpos(lua, pos, e) diff --git a/server/src/parser/init.lua b/server/src/parser/init.lua index 3216fa39..9a98b787 100644 --- a/server/src/parser/init.lua +++ b/server/src/parser/init.lua @@ -2,6 +2,8 @@ local api = { grammar = require 'parser.grammar', split = require 'parser.split', calcline = require 'parser.calcline', + ast = require 'parser.ast', + lines = require 'parser.lines', } return api diff --git a/server/src/parser/lines.lua b/server/src/parser/lines.lua new file mode 100644 index 00000000..ea41e927 --- /dev/null +++ b/server/src/parser/lines.lua @@ -0,0 +1,122 @@ +local m = require 'lpeglabel' + +local function utf8_len(buf, start, finish) + local len, pos = utf8.len(buf, start, finish) + if len then + return len + end + return 1 + utf8_len(buf, start, pos-1) + utf8_len(buf, pos+1, finish) +end + +local function Line(pos, str, ...) + local line = {...} + local sp = 0 + local tab = 0 + for i = 1, #line do + if line[i] == ' ' then + sp = sp + 1 + else + tab = tab + 1 + end + line[i] = nil + end + line[1] = pos + line[2] = sp + line[3] = tab + return line +end + +local parser = m.P{ +'Lines', +Lines = m.Ct(m.V'Line'^0 * m.V'LastLine'), +Line = m.Cp() * m.C(m.V'Indent' * (1 - m.V'Nl')^0 * m.V'Nl') / Line, +LastLine= m.Cp() * m.C(m.V'Indent' * (1 - m.V'Nl')^0) / Line, +Nl = m.P'\r\n' + m.S'\r\n', +Indent = m.C(m.S' \t')^0, +} + +local mt = {} +mt.__index = mt + +function mt:position(row, col, code) + if row < 1 then + return 1 + end + if row > #self then + if code == 'utf8' then + return utf8_len(self.buf) + 1 + else + return #self.buf + 1 + end + end + local line = self[row] + local next_line = self[row+1] + local start = line[1] + local finish + if next_line then + finish = next_line[1] - 1 + else + finish = #self.buf + 1 + end + local pos + if code == 'utf8' then + pos = utf8.offset(self.buf, col, start) or finish + else + pos = start + col - 1 + end + if pos < start then + pos = start + elseif pos > finish then + pos = finish + end + return pos +end + +function mt:rowcol(pos, code) + if pos < 1 then + return 1, 1 + end + if pos > #self.buf + 1 then + local start = self[#self][1] + if code == 'utf8' then + return #self, utf8_len(self.buf, start) + 1 + else + return #self, #self.buf - start + 2 + end + end + local min = 1 + local max = #self + for _ = 1, 100 do + local row = (max - min) // 2 + min + local start = self[row][1] + if pos < start then + max = row + elseif pos > start then + local next_start = self[row + 1][1] + if pos < next_start then + if code == 'utf8' then + return row, utf8_len(self.buf, start, pos) + else + return row, pos - start + 1 + end + elseif pos > next_start then + min = row + else + return row + 1, 1 + end + else + return row, 1 + end + end + error('rowcol failed!') +end + +return function (self, buf) + local lines, err = parser:match(buf) + if not lines then + return nil, err + end + lines.buf = buf + + return setmetatable(lines, mt) +end |