diff options
Diffstat (limited to 'script/parser/grammar.lua')
-rw-r--r-- | script/parser/grammar.lua | 538 |
1 files changed, 538 insertions, 0 deletions
diff --git a/script/parser/grammar.lua b/script/parser/grammar.lua new file mode 100644 index 00000000..06dae246 --- /dev/null +++ b/script/parser/grammar.lua @@ -0,0 +1,538 @@ +local re = require 'parser.relabel' +local m = require 'lpeglabel' +local ast = require 'parser.ast' + +local scriptBuf = '' +local compiled = {} +local defs = ast.defs + +-- goto 可以作为名字,合法性之后处理 +local RESERVED = { + ['and'] = true, + ['break'] = true, + ['do'] = true, + ['else'] = true, + ['elseif'] = true, + ['end'] = true, + ['false'] = true, + ['for'] = true, + ['function'] = true, + ['if'] = true, + ['in'] = true, + ['local'] = true, + ['nil'] = true, + ['not'] = true, + ['or'] = true, + ['repeat'] = true, + ['return'] = true, + ['then'] = true, + ['true'] = true, + ['until'] = true, + ['while'] = true, +} + +defs.nl = (m.P'\r\n' + m.S'\r\n') +defs.s = m.S' \t' +defs.S = - defs.s +defs.ea = '\a' +defs.eb = '\b' +defs.ef = '\f' +defs.en = '\n' +defs.er = '\r' +defs.et = '\t' +defs.ev = '\v' +defs['nil'] = m.Cp() / function () return nil end +defs['false'] = m.Cp() / function () return false end +defs.NotReserved = function (_, _, str) + if RESERVED[str] then + return false + end + return true +end +defs.Reserved = function (_, _, str) + if RESERVED[str] then + return true + end + return false +end +defs.None = function () end +defs.np = m.Cp() / function (n) return n+1 end + +m.setmaxstack(1000) + +local eof = re.compile '!. / %{SYNTAX_ERROR}' + +local function grammar(tag) + return function (script) + scriptBuf = script .. '\r\n' .. scriptBuf + compiled[tag] = re.compile(scriptBuf, defs) * eof + end +end + +local function errorpos(pos, err) + return { + type = 'UNKNOWN', + start = pos or 0, + finish = pos or 0, + err = err, + } +end + +grammar 'Comment' [[ +Comment <- LongComment + / '--' ShortComment +LongComment <- ('--[' {} {:eq: '='* :} {} '[' + {(!CommentClose .)*} + (CommentClose / {})) + -> LongComment + / ( + {} '/*' {} + (!'*/' .)* + {} '*/' {} + ) + -> CLongComment +CommentClose <- ']' =eq ']' +ShortComment <- ({} {(!%nl .)*} {}) + -> ShortComment +]] + +grammar 'Sp' [[ +Sp <- (Comment / %nl / %s)* +Sps <- (Comment / %nl / %s)+ +]] + +grammar 'Common' [[ +Word <- [a-zA-Z0-9_] +Cut <- !Word +X16 <- [a-fA-F0-9] +Rest <- (!%nl .)* + +AND <- Sp {'and'} Cut +BREAK <- Sp 'break' Cut +FALSE <- Sp 'false' Cut +GOTO <- Sp 'goto' Cut +LOCAL <- Sp 'local' Cut +NIL <- Sp 'nil' Cut +NOT <- Sp 'not' Cut +OR <- Sp {'or'} Cut +RETURN <- Sp 'return' Cut +TRUE <- Sp 'true' Cut + +DO <- Sp {} 'do' {} Cut + / Sp({} 'then' {} Cut) -> ErrDo +IF <- Sp {} 'if' {} Cut +ELSE <- Sp {} 'else' {} Cut +ELSEIF <- Sp {} 'elseif' {} Cut +END <- Sp {} 'end' {} Cut +FOR <- Sp {} 'for' {} Cut +FUNCTION <- Sp {} 'function' {} Cut +IN <- Sp {} 'in' {} Cut +REPEAT <- Sp {} 'repeat' {} Cut +THEN <- Sp {} 'then' {} Cut + / Sp({} 'do' {} Cut) -> ErrThen +UNTIL <- Sp {} 'until' {} Cut +WHILE <- Sp {} 'while' {} Cut + + +Esc <- '\' -> '' + EChar +EChar <- 'a' -> ea + / 'b' -> eb + / 'f' -> ef + / 'n' -> en + / 'r' -> er + / 't' -> et + / 'v' -> ev + / '\' + / '"' + / "'" + / %nl + / ('z' (%nl / %s)*) -> '' + / ({} 'x' {X16 X16}) -> Char16 + / ([0-9] [0-9]? [0-9]?) -> Char10 + / ('u{' {} {Word*} '}') -> CharUtf8 + -- 错误处理 + / 'x' {} -> MissEscX + / 'u' !'{' {} -> MissTL + / 'u{' Word* !'}' {} -> MissTR + / {} -> ErrEsc + +BOR <- Sp {'|'} +BXOR <- Sp {'~'} !'=' +BAND <- Sp {'&'} +Bshift <- Sp {BshiftList} +BshiftList <- '<<' + / '>>' +Concat <- Sp {'..'} +Adds <- Sp {AddsList} +AddsList <- '+' + / '-' +Muls <- Sp {MulsList} +MulsList <- '*' + / '//' + / '/' + / '%' +Unary <- Sp {} {UnaryList} +UnaryList <- NOT + / '#' + / '-' + / '~' !'=' +POWER <- Sp {'^'} + +BinaryOp <-( Sp {} {'or'} Cut + / Sp {} {'and'} Cut + / Sp {} {'<=' / '>=' / '<'!'<' / '>'!'>' / '~=' / '=='} + / Sp {} ({} '=' {}) -> ErrEQ + / Sp {} ({} '!=' {}) -> ErrUEQ + / Sp {} {'|'} + / Sp {} {'~'} + / Sp {} {'&'} + / Sp {} {'<<' / '>>'} + / Sp {} {'..'} !'.' + / Sp {} {'+' / '-'} + / Sp {} {'*' / '//' / '/' / '%'} + / Sp {} {'^'} + )-> BinaryOp +UnaryOp <-( Sp {} {'not' Cut / '#' / '~' !'=' / '-' !'-'} + )-> UnaryOp + +PL <- Sp '(' +PR <- Sp ')' +BL <- Sp '[' !'[' !'=' +BR <- Sp ']' +TL <- Sp '{' +TR <- Sp '}' +COMMA <- Sp ({} ',') + -> COMMA +SEMICOLON <- Sp ({} ';') + -> SEMICOLON +DOTS <- Sp ({} '...') + -> DOTS +DOT <- Sp ({} '.' !'.') + -> DOT +COLON <- Sp ({} ':' !':') + -> COLON +LABEL <- Sp '::' +ASSIGN <- Sp '=' !'=' +AssignOrEQ <- Sp ({} '==' {}) + -> ErrAssign + / Sp '=' + +DirtyBR <- BR / {} -> MissBR +DirtyTR <- TR / {} -> MissTR +DirtyPR <- PR / {} -> MissPR +DirtyLabel <- LABEL / {} -> MissLabel +NeedEnd <- END / {} -> MissEnd +NeedDo <- DO / {} -> MissDo +NeedAssign <- ASSIGN / {} -> MissAssign +NeedComma <- COMMA / {} -> MissComma +NeedIn <- IN / {} -> MissIn +NeedUntil <- UNTIL / {} -> MissUntil +NeedThen <- THEN / {} -> MissThen +]] + +grammar 'Nil' [[ +Nil <- Sp ({} -> Nil) NIL +]] + +grammar 'Boolean' [[ +Boolean <- Sp ({} -> True) TRUE + / Sp ({} -> False) FALSE +]] + +grammar 'String' [[ +String <- Sp ({} StringDef {}) + -> String +StringDef <- {'"'} + {~(Esc / !%nl !'"' .)*~} -> 1 + ('"' / {} -> MissQuote1) + / {"'"} + {~(Esc / !%nl !"'" .)*~} -> 1 + ("'" / {} -> MissQuote2) + / ('[' {} {:eq: '='* :} {} '[' %nl? + {(!StringClose .)*} -> 1 + (StringClose / {})) + -> LongString +StringClose <- ']' =eq ']' +]] + +grammar 'Number' [[ +Number <- Sp ({} {NumberDef} {}) -> Number + NumberSuffix? + ErrNumber? +NumberDef <- Number16 / Number10 +NumberSuffix<- ({} {[uU]? [lL] [lL]}) -> FFINumber + / ({} {[iI]}) -> ImaginaryNumber +ErrNumber <- ({} {([0-9a-zA-Z] / '.')+}) -> UnknownSymbol + +Number10 <- Float10 Float10Exp? + / Integer10 Float10? Float10Exp? +Integer10 <- [0-9]+ ('.' [0-9]*)? +Float10 <- '.' [0-9]+ +Float10Exp <- [eE] [+-]? [0-9]+ + / ({} [eE] [+-]? {}) -> MissExponent + +Number16 <- '0' [xX] Float16 Float16Exp? + / '0' [xX] Integer16 Float16? Float16Exp? +Integer16 <- X16+ ('.' X16*)? + / ({} {Word*}) -> MustX16 +Float16 <- '.' X16+ + / '.' ({} {Word*}) -> MustX16 +Float16Exp <- [pP] [+-]? [0-9]+ + / ({} [pP] [+-]? {}) -> MissExponent +]] + +grammar 'Name' [[ +Name <- Sp ({} NameBody {}) + -> Name +NameBody <- {[a-zA-Z_] [a-zA-Z0-9_]*} +FreeName <- Sp ({} {NameBody=>NotReserved} {}) + -> Name +KeyWord <- Sp NameBody=>Reserved +MustName <- Name / DirtyName +DirtyName <- {} -> DirtyName +]] + +grammar 'Exp' [[ +Exp <- (UnUnit BinUnit*) + -> Binary +BinUnit <- (BinaryOp UnUnit?) + -> SubBinary +UnUnit <- ExpUnit + / (UnaryOp+ (ExpUnit / MissExp)) + -> Unary +ExpUnit <- Nil + / Boolean + / String + / Number + / Dots + / Table + / Function + / Simple + +Simple <- {| Prefix (Sp Suffix)* |} + -> Simple +Prefix <- Sp ({} PL DirtyExp DirtyPR {}) + -> Paren + / Single +Single <- FreeName + -> Single +Suffix <- SuffixWithoutCall + / ({} PL SuffixCall DirtyPR {}) + -> Call +SuffixCall <- Sp ({} {| (COMMA / Exp)+ |} {}) + -> PackExpList + / %nil +SuffixWithoutCall + <- (DOT (Name / MissField)) + -> GetField + / ({} BL DirtyExp DirtyBR {}) + -> GetIndex + / (COLON (Name / MissMethod) NeedCall) + -> GetMethod + / ({} {| Table |} {}) + -> Call + / ({} {| String |} {}) + -> Call +NeedCall <- (!(Sp CallStart) {} -> MissPL)? +MissField <- {} -> MissField +MissMethod <- {} -> MissMethod +CallStart <- PL + / TL + / '"' + / "'" + / '[' '='* '[' + +DirtyExp <- Exp + / {} -> DirtyExp +MaybeExp <- Exp / MissExp +MissExp <- {} -> MissExp +ExpList <- Sp {| MaybeExp (Sp ',' MaybeExp)* |} + +Dots <- DOTS + -> VarArgs + +Table <- Sp ({} TL {| TableField* |} DirtyTR {}) + -> Table +TableField <- COMMA + / SEMICOLON + / NewIndex + / NewField + / Exp +Index <- BL DirtyExp DirtyBR +NewIndex <- Sp ({} Index NeedAssign DirtyExp {}) + -> NewIndex +NewField <- Sp ({} MustName ASSIGN DirtyExp {}) + -> NewField + +Function <- FunctionBody + -> Function +FuncArgs <- Sp ({} PL {| FuncArg+ |} DirtyPR {}) + -> FuncArgs + / PL DirtyPR %nil +FuncArgsMiss<- {} -> MissPL DirtyPR %nil +FuncArg <- DOTS + / Name + / COMMA +FunctionBody<- FUNCTION FuncArgs + {| (!END Action)* |} + NeedEnd + / FUNCTION FuncArgsMiss + {| %nil |} + NeedEnd + +-- 纯占位,修改了 `relabel.lua` 使重复定义不抛错 +Action <- !END . +]] + +grammar 'Action' [[ +Action <- Sp (CrtAction / UnkAction) +CrtAction <- Semicolon + / Do + / Break + / Return + / Label + / GoTo + / If + / For + / While + / Repeat + / NamedFunction + / LocalFunction + / Local + / Set + / Call + / ExpInAction +UnkAction <- ({} {Word+}) + -> UnknownAction + / ({} '//' {} (LongComment / ShortComment)) + -> CCommentPrefix + / ({} {. (!Sps !CrtAction .)*}) + -> UnknownAction +ExpInAction <- Sp ({} Exp {}) + -> ExpInAction + +Semicolon <- Sp ';' +SimpleList <- {| Simple (Sp ',' Simple)* |} + +Do <- Sp ({} + 'do' Cut + {| (!END Action)* |} + NeedEnd) + -> Do + +Break <- Sp ({} BREAK {}) + -> Break + +Return <- Sp ({} RETURN ReturnExpList {}) + -> Return +ReturnExpList + <- Sp {| Exp (Sp ',' MaybeExp)* |} + / Sp {| !Exp !',' |} + / ExpList + +Label <- Sp ({} LABEL MustName DirtyLabel {}) + -> Label + +GoTo <- Sp ({} GOTO MustName {}) + -> GoTo + +If <- Sp ({} {| IfHead IfBody* |} NeedEnd) + -> If + +IfHead <- Sp (IfPart {}) -> IfBlock + / Sp (ElseIfPart {}) -> ElseIfBlock + / Sp (ElsePart {}) -> ElseBlock +IfBody <- Sp (ElseIfPart {}) -> ElseIfBlock + / Sp (ElsePart {}) -> ElseBlock +IfPart <- IF DirtyExp NeedThen + {| (!ELSEIF !ELSE !END Action)* |} +ElseIfPart <- ELSEIF DirtyExp NeedThen + {| (!ELSEIF !ELSE !END Action)* |} +ElsePart <- ELSE + {| (!ELSEIF !ELSE !END Action)* |} + +For <- Loop / In + +Loop <- LoopBody + -> Loop +LoopBody <- FOR LoopArgs NeedDo + {} {| (!END Action)* |} + NeedEnd +LoopArgs <- MustName AssignOrEQ + ({} {| (COMMA / !DO !END Exp)* |} {}) + -> PackLoopArgs + +In <- InBody + -> In +InBody <- FOR InNameList NeedIn InExpList NeedDo + {} {| (!END Action)* |} + NeedEnd +InNameList <- ({} {| (COMMA / !IN !DO !END Name)* |} {}) + -> PackInNameList +InExpList <- ({} {| (COMMA / !DO !DO !END Exp)* |} {}) + -> PackInExpList + +While <- WhileBody + -> While +WhileBody <- WHILE DirtyExp NeedDo + {| (!END Action)* |} + NeedEnd + +Repeat <- (RepeatBody {}) + -> Repeat +RepeatBody <- REPEAT + {| (!UNTIL Action)* |} + NeedUntil DirtyExp + +LocalAttr <- {| (Sp '<' Sp MustName Sp LocalAttrEnd)+ |} + -> LocalAttr +LocalAttrEnd<- '>' / {} -> MissGT +Local <- Sp ({} LOCAL LocalNameList ((AssignOrEQ ExpList) / %nil) {}) + -> Local +Set <- Sp ({} SimpleList AssignOrEQ ExpList {}) + -> Set +LocalNameList + <- {| LocalName (Sp ',' LocalName)* |} +LocalName <- (MustName LocalAttr?) + -> LocalName + +Call <- Simple + -> SimpleCall + +LocalFunction + <- Sp ({} LOCAL FunctionNamedBody) + -> LocalFunction + +NamedFunction + <- FunctionNamedBody + -> NamedFunction +FunctionNamedBody + <- FUNCTION FuncName FuncArgs + {| (!END Action)* |} + NeedEnd + / FUNCTION FuncName FuncArgsMiss + {| %nil |} + NeedEnd +FuncName <- {| Single (Sp SuffixWithoutCall)* |} + -> Simple + / {} -> MissName %nil +]] + +grammar 'Lua' [[ +Lua <- Head? + ({} {| Action* |} {}) -> Lua + Sp +Head <- '#' (!%nl .)* +]] + +return function (self, lua, mode) + local gram = compiled[mode] or compiled['Lua'] + local r, _, pos = gram:match(lua) + if not r then + local err = errorpos(pos) + return nil, err + end + + return r +end |