diff options
Diffstat (limited to 'script/parser/grammar.lua')
-rw-r--r-- | script/parser/grammar.lua | 630 |
1 files changed, 630 insertions, 0 deletions
diff --git a/script/parser/grammar.lua b/script/parser/grammar.lua new file mode 100644 index 00000000..568b8544 --- /dev/null +++ b/script/parser/grammar.lua @@ -0,0 +1,630 @@ +local re = require 'parser.relabel' +local m = require 'lpeglabel' +local ast = require 'parser.ast' + +local scriptBuf = '' +local compiled = {} +local parser +local defs = ast.defs + +-- goto 可以作为名字,合法性之后处理 +local RESERVED = { + ['and'] = true, + ['break'] = true, + ['do'] = true, + ['else'] = true, + ['elseif'] = true, + ['end'] = true, + ['false'] = true, + ['for'] = true, + ['function'] = true, + ['if'] = true, + ['in'] = true, + ['local'] = true, + ['nil'] = true, + ['not'] = true, + ['or'] = true, + ['repeat'] = true, + ['return'] = true, + ['then'] = true, + ['true'] = true, + ['until'] = true, + ['while'] = true, +} + +defs.nl = (m.P'\r\n' + m.S'\r\n') +defs.s = m.S' \t' +defs.S = - defs.s +defs.ea = '\a' +defs.eb = '\b' +defs.ef = '\f' +defs.en = '\n' +defs.er = '\r' +defs.et = '\t' +defs.ev = '\v' +defs['nil'] = m.Cp() / function () return nil end +defs['false'] = m.Cp() / function () return false end +defs.NotReserved = function (_, _, str) + if RESERVED[str] then + return false + end + return true +end +defs.Reserved = function (_, _, str) + if RESERVED[str] then + return true + end + return false +end +defs.None = function () end +defs.np = m.Cp() / function (n) return n+1 end + +m.setmaxstack(1000) + +local eof = re.compile '!. / %{SYNTAX_ERROR}' + +local function grammar(tag) + return function (script) + scriptBuf = script .. '\r\n' .. scriptBuf + compiled[tag] = re.compile(scriptBuf, defs) * eof + end +end + +local function errorpos(pos, err) + return { + type = 'UNKNOWN', + start = pos or 0, + finish = pos or 0, + err = err, + } +end + +grammar 'Comment' [[ +Comment <- LongComment / '--' ShortComment +LongComment <- ('--[' {} {:eq: '='* :} {} '[' + {(!CommentClose .)*} + (CommentClose {} / {} {})) + -> LongComment + / ( + {} '/*' {} + (!'*/' .)* + {} '*/' {} + ) + -> CLongComment +CommentClose <- ']' =eq ']' +ShortComment <- (!%nl .)* +]] + +grammar 'Sp' [[ +Sp <- (Comment / %nl / %s)* +Sps <- (Comment / %nl / %s)+ +]] + +grammar 'Common' [[ +Word <- [a-zA-Z0-9_] +Cut <- !Word +X16 <- [a-fA-F0-9] +Rest <- (!%nl .)* + +AND <- Sp {'and'} Cut +BREAK <- Sp 'break' Cut +DO <- Sp 'do' Cut + / Sp ({} 'then' Cut {}) -> ErrDo +ELSE <- Sp 'else' Cut +ELSEIF <- Sp 'elseif' Cut +END <- Sp 'end' Cut +FALSE <- Sp 'false' Cut +FOR <- Sp 'for' Cut +FUNCTION <- Sp 'function' Cut +GOTO <- Sp 'goto' Cut +IF <- Sp 'if' Cut +IN <- Sp 'in' Cut +LOCAL <- Sp 'local' Cut +NIL <- Sp 'nil' Cut +NOT <- Sp 'not' Cut +OR <- Sp {'or'} Cut +REPEAT <- Sp 'repeat' Cut +RETURN <- Sp 'return' Cut +THEN <- Sp 'then' Cut + / Sp ({} 'do' Cut {}) -> ErrThen +TRUE <- Sp 'true' Cut +UNTIL <- Sp 'until' Cut +WHILE <- Sp 'while' Cut + +Esc <- '\' -> '' + EChar +EChar <- 'a' -> ea + / 'b' -> eb + / 'f' -> ef + / 'n' -> en + / 'r' -> er + / 't' -> et + / 'v' -> ev + / '\' + / '"' + / "'" + / %nl + / ('z' (%nl / %s)*) -> '' + / ({} 'x' {X16 X16}) -> Char16 + / ([0-9] [0-9]? [0-9]?) -> Char10 + / ('u{' {} {Word*} '}') -> CharUtf8 + -- 错误处理 + / 'x' {} -> MissEscX + / 'u' !'{' {} -> MissTL + / 'u{' Word* !'}' {} -> MissTR + / {} -> ErrEsc + +BOR <- Sp {'|'} +BXOR <- Sp {'~'} !'=' +BAND <- Sp {'&'} +Bshift <- Sp {BshiftList} +BshiftList <- '<<' + / '>>' +Concat <- Sp {'..'} +Adds <- Sp {AddsList} +AddsList <- '+' + / '-' +Muls <- Sp {MulsList} +MulsList <- '*' + / '//' + / '/' + / '%' +Unary <- Sp {} {UnaryList} +UnaryList <- NOT + / '#' + / '-' + / '~' !'=' +POWER <- Sp {'^'} + +BinaryOp <- Sp {} {'or'} Cut + / Sp {} {'and'} Cut + / Sp {} {'<=' / '>=' / '<'!'<' / '>'!'>' / '~=' / '=='} + / Sp {} ({} '=' {}) -> ErrEQ + / Sp {} ({} '!=' {}) -> ErrUEQ + / Sp {} {'|'} + / Sp {} {'~'} + / Sp {} {'&'} + / Sp {} {'<<' / '>>'} + / Sp {} {'..'} !'.' + / Sp {} {'+' / '-'} + / Sp {} {'*' / '//' / '/' / '%'} + / Sp {} {'^'} +UnaryOp <- Sp {} {'not' Cut / '#' / '~' !'=' / '-' !'-'} + +PL <- Sp '(' +PR <- Sp ')' +BL <- Sp '[' !'[' !'=' +BR <- Sp ']' +TL <- Sp '{' +TR <- Sp '}' +COMMA <- Sp ',' +SEMICOLON <- Sp ';' +DOTS <- Sp ({} '...') -> DOTS +DOT <- Sp ({} '.' !'.') -> DOT +COLON <- Sp ({} ':' !':') -> COLON +LABEL <- Sp '::' +ASSIGN <- Sp '=' !'=' +AssignOrEQ <- Sp ({} '==' {}) -> ErrAssign + / Sp '=' + +Nothing <- {} -> Nothing + +DirtyBR <- BR {} / {} -> MissBR +DirtyTR <- TR {} / {} -> MissTR +DirtyPR <- PR {} / {} -> DirtyPR +DirtyLabel <- LABEL / {} -> MissLabel +NeedPR <- PR / {} -> MissPR +NeedEnd <- END / {} -> MissEnd +NeedDo <- DO / {} -> MissDo +NeedAssign <- ASSIGN / {} -> MissAssign +NeedComma <- COMMA / {} -> MissComma +NeedIn <- IN / {} -> MissIn +NeedUntil <- UNTIL / {} -> MissUntil +]] + +grammar 'Nil' [[ +Nil <- Sp ({} -> Nil) NIL +]] + +grammar 'Boolean' [[ +Boolean <- Sp ({} -> True) TRUE + / Sp ({} -> False) FALSE +]] + +grammar 'String' [[ +String <- Sp ({} StringDef {}) + -> String +StringDef <- {'"'} + {~(Esc / !%nl !'"' .)*~} -> 1 + ('"' / {} -> MissQuote1) + / {"'"} + {~(Esc / !%nl !"'" .)*~} -> 1 + ("'" / {} -> MissQuote2) + / ('[' {} {:eq: '='* :} {} '[' %nl? + {(!StringClose .)*} -> 1 + (StringClose / {})) + -> LongString +StringClose <- ']' =eq ']' +]] + +grammar 'Number' [[ +Number <- Sp ({} {NumberDef} {}) -> Number + NumberSuffix? + ErrNumber? +NumberDef <- Number16 / Number10 +NumberSuffix<- ({} {[uU]? [lL] [lL]}) -> FFINumber + / ({} {[iI]}) -> ImaginaryNumber +ErrNumber <- ({} {([0-9a-zA-Z] / '.')+}) -> UnknownSymbol + +Number10 <- Float10 Float10Exp? + / Integer10 Float10? Float10Exp? +Integer10 <- [0-9]+ ('.' [0-9]*)? +Float10 <- '.' [0-9]+ +Float10Exp <- [eE] [+-]? [0-9]+ + / ({} [eE] [+-]? {}) -> MissExponent + +Number16 <- '0' [xX] Float16 Float16Exp? + / '0' [xX] Integer16 Float16? Float16Exp? +Integer16 <- X16+ ('.' X16*)? + / ({} {Word*}) -> MustX16 +Float16 <- '.' X16+ + / '.' ({} {Word*}) -> MustX16 +Float16Exp <- [pP] [+-]? [0-9]+ + / ({} [pP] [+-]? {}) -> MissExponent +]] + +grammar 'Name' [[ +Name <- Sp ({} NameBody {}) + -> Name +NameBody <- {[a-zA-Z_] [a-zA-Z0-9_]*} +FreeName <- Sp ({} {NameBody=>NotReserved} {}) + -> Name +MustName <- Name / DirtyName +DirtyName <- {} -> DirtyName +]] + +grammar 'Exp' [[ +Exp <- (UnUnit (BinaryOp (UnUnit / {} -> DirtyExp))*) + -> Exp +UnUnit <- ExpUnit + / UnaryOp+ (ExpUnit / {} -> DirtyExp) +ExpUnit <- Nil + / Boolean + / String + / Number + / DOTS -> DotsAsExp + / Table + / Function + / Simple + +Simple <- (Prefix (Sp Suffix)*) + -> Simple +Prefix <- Sp ({} PL DirtyExp DirtyPR) + -> Prefix + / FreeName +Index <- ({} BL DirtyExp DirtyBR) -> Index +Suffix <- DOT Name / DOT {} -> MissField + / Method (!(Sp CallStart) {} -> MissPL)? + / ({} Table {}) -> Call + / ({} String {}) -> Call + / Index + / ({} PL CallArgList DirtyPR) -> Call +Method <- COLON Name / COLON {} -> MissMethod +CallStart <- PL + / TL + / '"' + / "'" + / '[' '='* '[' + +DirtyExp <- Exp + / {} -> DirtyExp +MaybeExp <- Exp / MissExp +MissExp <- {} -> MissExp +ExpList <- Sp (MaybeExp (COMMA (MaybeExp))*) + -> List +MustExpList <- Sp (Exp (COMMA (MaybeExp))*) + -> List +CallArgList <- Sp ({} (COMMA {} / Exp)+ {}) + -> CallArgList + / %nil +NameList <- (MustName (COMMA MustName)*) + -> List + +ArgList <- (DOTS -> DotsAsArg / Name / Sp {} COMMA)* + -> ArgList + +Table <- Sp ({} TL TableFields? DirtyTR) + -> Table +TableFields <- (Emmy / TableSep {} / TableField)+ +TableSep <- COMMA / SEMICOLON +TableField <- NewIndex / NewField / Exp +NewIndex <- Sp (Index NeedAssign DirtyExp) + -> NewIndex +NewField <- (MustName ASSIGN DirtyExp) + -> NewField + +Function <- Sp ({} FunctionBody {}) + -> Function +FuncArg <- PL {} ArgList {} NeedPR + / {} {} -> MissPL Nothing {} +FunctionBody<- FUNCTION BlockStart FuncArg + (Emmy / !END Action)* + BlockEnd + NeedEnd + +BlockStart <- {} -> BlockStart +BlockEnd <- {} -> BlockEnd + +-- 纯占位,修改了 `relabel.lua` 使重复定义不抛错 +Action <- !END . +Set <- END +Emmy <- '---@' +]] + +grammar 'Action' [[ +Action <- Sp (CrtAction / UnkAction) +CrtAction <- Semicolon + / Do + / Break + / Return + / Label + / GoTo + / If + / For + / While + / Repeat + / NamedFunction + / LocalFunction + / Local + / Set + / Call + / ExpInAction +UnkAction <- ({} {Word+}) + -> UnknownAction + / ({} '//' {} (LongComment / ShortComment)) + -> CCommentPrefix + / ({} {. (!Sps !CrtAction .)*}) + -> UnknownAction +ExpInAction <- Sp ({} Exp {}) + -> ExpInAction + +Semicolon <- SEMICOLON + -> Skip +SimpleList <- (Simple (COMMA Simple)*) + -> List + +Do <- Sp ({} 'do' Cut DoBody NeedEnd {}) + -> Do +DoBody <- (Emmy / !END Action)* + -> DoBody + +Break <- BREAK ({} Semicolon* AfterBreak?) + -> Break +AfterBreak <- Sp !END !UNTIL !ELSEIF !ELSE Action +BreakStart <- {} -> BreakStart +BreakEnd <- {} -> BreakEnd + +Return <- (ReturnBody Semicolon* AfterReturn?) + -> AfterReturn +ReturnBody <- Sp ({} RETURN MustExpList? {}) + -> Return +AfterReturn <- Sp !END !UNTIL !ELSEIF !ELSE Action + +Label <- Sp ({} LABEL MustName DirtyLabel {}) -> Label + +GoTo <- Sp ({} GOTO MustName {}) -> GoTo + +If <- Sp ({} IfBody {}) + -> If +IfHead <- (IfPart -> IfBlock) + / ({} ElseIfPart -> ElseIfBlock) + -> MissIf + / ({} ElsePart -> ElseBlock) + -> MissIf +IfBody <- IfHead + (ElseIfPart -> ElseIfBlock)* + (ElsePart -> ElseBlock)? + NeedEnd +IfPart <- IF DirtyExp THEN + {} (Emmy / !ELSEIF !ELSE !END Action)* {} + / IF DirtyExp {}->MissThen + {} {} +ElseIfPart <- ELSEIF DirtyExp THEN + {} (Emmy / !ELSE !ELSEIF !END Action)* {} + / ELSEIF DirtyExp {}->MissThen + {} {} +ElsePart <- ELSE + {} (Emmy / !END Action)* {} + +For <- Loop / In + / FOR + +Loop <- Sp ({} LoopBody {}) + -> Loop +LoopBody <- FOR LoopStart LoopFinish LoopStep NeedDo + BreakStart + (Emmy / !END Action)* + BreakEnd + NeedEnd +LoopStart <- MustName AssignOrEQ DirtyExp +LoopFinish <- NeedComma DirtyExp +LoopStep <- COMMA DirtyExp + / NeedComma Exp + / Nothing + +In <- Sp ({} InBody {}) + -> In +InBody <- FOR InNameList NeedIn ExpList NeedDo + BreakStart + (Emmy / !END Action)* + BreakEnd + NeedEnd +InNameList <- &IN DirtyName + / NameList + +While <- Sp ({} WhileBody {}) + -> While +WhileBody <- WHILE DirtyExp NeedDo + BreakStart + (Emmy / !END Action)* + BreakEnd + NeedEnd + +Repeat <- Sp ({} RepeatBody {}) + -> Repeat +RepeatBody <- REPEAT + BreakStart + (Emmy / !UNTIL Action)* + BreakEnd + NeedUntil DirtyExp + +LocalTag <- (Sp '<' Sp MustName Sp LocalTagEnd)* + -> LocalTag +LocalTagEnd <- '>' / {} -> MissGT +Local <- (LOCAL LocalNameList (AssignOrEQ ExpList)?) + -> Local +Set <- (SimpleList AssignOrEQ ExpList?) + -> Set +LocalNameList + <- (LocalName (COMMA LocalName)*) + -> List +LocalName <- (MustName LocalTag) + -> LocalName + +Call <- Simple + -> SimpleCall + +LocalFunction + <- Sp ({} LOCAL FunctionNamedBody {}) + -> LocalFunction + +NamedFunction + <- Sp ({} FunctionNamedBody {}) + -> NamedFunction +FunctionNamedBody + <- FUNCTION FuncName BlockStart FuncArg + (Emmy / !END Action)* + BlockEnd + NeedEnd +FuncName <- (MustName (DOT MustName)* FuncMethod?) + -> Simple +FuncMethod <- COLON Name / COLON {} -> MissMethod + +-- 占位 +Emmy <- '---@' +]] + +grammar 'Emmy' [[ +Emmy <- EmmyAction + / EmmyComments +EmmyAction <- EmmySp '---' %s* '@' EmmyBody ShortComment +EmmySp <- (!'---' Comment / %s / %nl)* +EmmyComments <- EmmyComment+ + -> EmmyComment +EmmyComment <- EmmySp '---' %s* !'@' {(!%nl .)*} +EmmyBody <- 'class' %s+ EmmyClass -> EmmyClass + / 'type' %s+ EmmyType -> EmmyType + / 'alias' %s+ EmmyAlias -> EmmyAlias + / 'param' %s+ EmmyParam -> EmmyParam + / 'return' %s+ EmmyReturn -> EmmyReturn + / 'field' %s+ EmmyField -> EmmyField + / 'generic' %s+ EmmyGeneric -> EmmyGeneric + / 'vararg' %s+ EmmyVararg -> EmmyVararg + / 'language' %s+ EmmyLanguage -> EmmyLanguage + / 'see' %s+ EmmySee -> EmmySee + / 'overload' %s+ EmmyOverLoad -> EmmyOverLoad + / EmmyIncomplete + +EmmyName <- ({} {[a-zA-Z_] [a-zA-Z0-9_]*}) + -> EmmyName +MustEmmyName <- EmmyName / DirtyEmmyName +DirtyEmmyName <- {} -> DirtyEmmyName +EmmyLongName <- ({} {(!%nl .)+}) + -> EmmyName +EmmyIncomplete <- MustEmmyName + -> EmmyIncomplete + +EmmyClass <- (MustEmmyName EmmyParentClass?) +EmmyParentClass <- %s* {} ':' %s* MustEmmyName + +EmmyType <- EmmyFunctionType + / EmmyTableType + / EmmyArrayType + / EmmyCommonType +EmmyCommonType <- EmmyTypeNames + -> EmmyCommonType +EmmyTypeNames <- EmmyTypeName (%s* {} '|' %s* !String EmmyTypeName)* +EmmyTypeName <- EmmyFunctionType + / EmmyTableType + / EmmyArrayType + / MustEmmyName +EmmyTypeEnum <- %s* (%nl %s* '---')? '|' EmmyEnum + -> EmmyTypeEnum +EmmyEnum <- %s* {'>'?} %s* String (EmmyEnumComment / (!%nl !'|' .)*) +EmmyEnumComment <- %s* '#' %s* {(!%nl .)*} + +EmmyAlias <- MustEmmyName %s* EmmyType EmmyTypeEnum* + +EmmyParam <- MustEmmyName %s* EmmyType %s* EmmyOption %s* EmmyTypeEnum* +EmmyOption <- Table? + -> EmmyOption + +EmmyReturn <- {} %nil %nil {} Table -> EmmyOption + / {} EmmyType (%s* EmmyName/%nil) {} EmmyOption + +EmmyField <- (EmmyFieldAccess MustEmmyName %s* EmmyType) +EmmyFieldAccess <- ({'public'} Cut %s*) + / ({'protected'} Cut %s*) + / ({'private'} Cut %s*) + / {} -> 'public' + +EmmyGeneric <- EmmyGenericBlock + (%s* ',' %s* EmmyGenericBlock)* +EmmyGenericBlock<- (MustEmmyName %s* (':' %s* EmmyType)?) + -> EmmyGenericBlock + +EmmyVararg <- EmmyType + +EmmyLanguage <- MustEmmyName + +EmmyArrayType <- ({} MustEmmyName -> EmmyCommonType {} '[' DirtyBR) + -> EmmyArrayType + / ({} PL EmmyCommonType DirtyPR '[' DirtyBR) + -> EmmyArrayType + +EmmyTableType <- ({} 'table' Cut '<' %s* EmmyType %s* ',' %s* EmmyType %s* '>' {}) + -> EmmyTableType + +EmmyFunctionType<- ({} 'fun' Cut %s* EmmyFunctionArgs %s* EmmyFunctionRtns {}) + -> EmmyFunctionType +EmmyFunctionArgs<- ('(' %s* EmmyFunctionArg %s* (',' %s* EmmyFunctionArg %s*)* DirtyPR) + -> EmmyFunctionArgs + / '(' %nil DirtyPR -> None + / %nil +EmmyFunctionRtns<- (':' %s* EmmyType (%s* ',' %s* EmmyType)*) + -> EmmyFunctionRtns + / %nil +EmmyFunctionArg <- MustEmmyName %s* ':' %s* EmmyType + +EmmySee <- {} MustEmmyName %s* '#' %s* MustEmmyName {} +EmmyOverLoad <- EmmyFunctionType +]] + +grammar 'Lua' [[ +Lua <- Head? + (Emmy / Action)* -> Lua + BlockEnd + Sp +Head <- '#' (!%nl .)* +]] + +return function (self, lua, mode) + local gram = compiled[mode] or compiled['Lua'] + local r, _, pos = gram:match(lua) + if not r then + local err = errorpos(pos) + return nil, err + end + + return r +end |