Module:User:Cscott/llpeg

return (function()

local builders = {}

local function register(name, f)

builders[name] = f

end

register('advent.compat', function() return require Module:User:Cscott/compat end)

register('llpeg.types', function(myrequire)

myrequire('strict')

local compat = myrequire('advent.compat')

local CHARMAX = 0x7F -- maximum codepoint for charsets

-- metatable for pattern objects; will be filled in later

local metareg = {}

local enum = function(keys)

local Enum = {}

Enum.__index = Enum

function Enum:__tostring() return self.name end

function Enum:pairs() return keys end

function Enum:type() return Enum end

for name, value in pairs(keys) do

Enum[name] = setmetatable({ name = name, value = value }, Enum)

end

return Enum

end

local CapKind = enum{

close = "close", -- not used in trees */

position = "position",

const = "constant", -- ktable[key] is Lua constant

backref = "backref", -- ktable[key] is "name" of group to get capture

arg = "argument", -- 'key' is arg's number

simple = "simple", -- next node is pattern

table = "table", -- next node is pattern

["function"] = "function", -- ktable[key] is function; next node is pattern

acc = "acc", -- ktable[key] is function; next node is pattern

query = "query", -- ktable[key] is table; next node is pattern

string = "string", -- ktable[key] is string; next node is pattern

num = "num", -- numbered capture; 'key' is number of value to return

subst = "substitution", -- substitution capture; next node is pattern

fold = "fold", -- ktable[key] is function; next node is pattern

runtime = "runtime", -- not used in trees (is uses another type for tree)

group = "group", -- ktable[key] is group's "name"

}

local TTag = enum{

Char = "char", -- 'n' has unicode codepoint

Set = "set", -- 'set' has sparse array codepoint->true for codepoint <=CHARMAX

-- 'rest' indicates whether all codepoints > CHARMAX should be

-- part of the set (true) or not (false)

Any = "any",

True = "true",

False = "false",

UTFR = "utf8.range", --[[ range of UTF-8 codepoints;

'from' has initial codepoint; 'to' has final codepoint ]]--

Rep = "rep", -- 'sib1' *

Seq = "seq", -- 'sib1' 'sib2'

Choice = "choice", -- 'sib1' / 'sib2'

Not = "not", -- !'sib1'

And = "and", -- &'sib1'

Call = "call", -- 'sib2' is rule being called; otherwise same as TOpenCall

OpenCall = "opencall", -- 'key' is rule name

Rule = "rule", --[[ 'key' is rule name (but key == nil for unused rules);

'sib1' is rule's pattern pre-rule; 'sib2' is next rule;

'n' is rule's sequential number, 'name' is rule name (even

for unused rules) ]]--

XInfo = "xinfo", -- extra info (not used)

Grammar = "grammar", -- 'sib1' is initial (and first) rule, 'n' is # rules

Behind = "behind", -- 'sib1' is pattern, 'n' is how much to go back

Capture = "capture", --[[ captures: 'cap' is kind of capture (enum 'CapKind');

'key' is Lua value associated with capture;

'sib1' is capture body ]]--

RunTime = "run-time", --[[ run-time capture: 'key' is Lua function;

'sib1' is capture body ]]--

Throw = "throw", -- labeled failure: 'key' is label's name,

-- sib2 is associated recovery rule

}

local PE = enum{

nullable = "nullable",

nofail = "nofail",

}

-- virtual machine instructions

local Opcode = enum{

Any = "any", -- if no char, fail

Char = "char", -- if char != aux, fail

Set = "set", -- if char not in buff, fail

TestAny = "testany", -- in no char, jump to 'offset'

TestChar = "testchar", -- if char != aux, jump to 'offset'

TestSet = "testset", -- if char not in buff, jump to 'offset'

Span = "span", -- read a span of chars in buff

UTFR = "utf-range", -- if codepoint not in range [offset, utf_to], fail

Behind = "behind", -- walk back 'aux' characters (fail if not possible)

Ret = "ret", -- return from a rule

End = "end", -- end of pattern

Choice = "choice", -- stack a choice; next fail will jump to 'offset'

PredChoice = "pred_choice", -- labeled failure: stack a choice; changes label env next fail will jump to 'offset'

Jmp = "jmp", -- jump to 'offset'

Call = "call", -- call rule at 'offset'

OpenCall = "open_call", -- call rule number 'key' (must be closed to a ICall)

Commit = "commit", -- pop choice and jump to 'offset'

PartialCommit = "partial_commit", -- update top choice to current position and jump

BackCommit = "back_commit", -- backtrack like "fail" but jump to its own 'offset'

FailTwice = "failtwice", -- pop one choice and then fail

Fail = "fail", -- go back to saved state on choice and jump to saved offset

Giveup = "giveup", -- internal use

FullCapture = "fullcapture", -- complete capture of last 'off' chars

OpenCapture = "opencapture", -- start a capture

CloseCapture = "closecapture",

CloseRunTime = "closeruntime",

Throw = "throw", -- fails with a given label --labeled failure

ThrowRec = "throw_rec", -- fails with a given label and call rule at 'offset' --labeled failure

Empty = "--", -- to fill empty slots left by optimizations

}

-- helper for visitor pattern definitions

function define(dispatch, which, f)

for _,v in pairs(which) do

assert(v ~= nil) -- catch typos

dispatch[v] = f

end

end

local numsiblings = {}

define(numsiblings, {

TTag.Char, TTag.Set, TTag.Any,

TTag.True, TTag.False, TTag.UTFR,

TTag.Call, TTag.OpenCall,

TTag.Throw,

}, 0)

define(numsiblings, {

TTag.Rep, TTag.Not, TTag.And, TTag.Grammar,

TTag.Behind, TTag.Capture, TTag.RunTime,

}, 1)

define(numsiblings, {

TTag.Seq, TTag.Choice, TTag.Rule,

}, 2)

-- more help for visitor functions

local function_name_registry = {}

function register_fname(name, f)

assert(type(name) == "string")

assert(type(f) == "function")

function_name_registry[f] = name

end

function report_ferror(f, msg)

local fname = function_name_registry[f]

if fname ~= nil then

msg = fname .. ": " .. msg

end

error(msg)

end

function define_type_visitor(tbl)

local dispatch = {}

for keys,func in pairs(tbl) do

if type(keys) ~= "table" then

keys = { keys }

end

define(dispatch, keys, func)

end

local visit

visit = function(val, ...)

local a = dispatch["assert"]

if a ~= nil then a(val, ...) end -- assert preconditions

local ty = type(val)

if ty == 'table' and getmetatable(val) == metareg then

ty = 'pattern'

end

local f = dispatch[ty]

if f ~= nil then return f(val, ...) end

f = dispatch.default

if f == nil then

report_ferror(visit, "no default for " .. ty)

end

return f(val, ...)

end

return visit

end

function define_tree_visitor(tbl, opt_name)

local dispatch = {}

for keys,func in pairs(tbl) do

if type(keys) ~= "table" or getmetatable(keys) == TTag then

keys = { keys }

end

define(dispatch, keys, func)

end

local visit

visit = function(tree, ...)

if tree == nil then report_ferror(visit, "nil tree") end

local a = dispatch["assert"]

if a ~= nil then a(tree, ...) end -- assert preconditions

local f = dispatch[tree.tag]

if f ~= nil then return f(tree, ...) end

f = dispatch.default

if f == nil then

report_ferror(visit, "no default for " .. tree.tag)

end

return f(tree, ...)

end

return visit

end

function define_opcode_visitor(tbl)

local dispatch = {}

for keys,func in pairs(tbl) do

if type(keys) ~= "table" or getmetatable(keys) == Opcode then

keys = { keys }

end

define(dispatch, keys, func)

end

local visit

visit = function(op, ...)

if op == nil then report_ferror(visit, "nil op") end

local a = dispatch["assert"]

if a ~= nil then a(op, ...) end -- assert preconditions

local f = dispatch[op.code]

if f ~= nil then return f(op, ...) end

f = dispatch.default

if f == nil then

report_ferror(visit, "no default for " .. op.code)

end

return f(op, ...)

end

return visit

end

-- helper for module imports

function from(mod, list)

local result = {}

for _,v in ipairs(list) do

table.insert(result, mod[v])

end

return compat.unpack(result)

end

function newcharset()

return setmetatable({

tag = TTag.Set,

code = nil,

rest = false,

set = {}

}, metareg)

end

local fullset = newcharset()

for i = 0,CHARMAX do

fullset.set[i] = true

end

fullset.rest = true -- make sure non-ascii unicode chars are included!

assert(fullset.tag == TTag.Set)

return {

CHARMAX = CHARMAX,

CapKind = CapKind,

Opcode = Opcode,

PE = PE,

TTag = TTag,

define = define,

define_tree_visitor = define_tree_visitor,

enum = enum,

from = from,

fullset = fullset,

metareg = metareg,

newcharset = newcharset,

numsiblings = numsiblings,

register_fname = register_fname,

}

end)

register('llpeg.print', function(myrequire)

myrequire('strict')

local compat = myrequire('advent.compat')

local from = myrequire('llpeg.types').from

local

CHARMAX,

CapKind,

Opcode,

TTag,

define,

define_tree_visitor,

numsiblings,

_ = from(myrequire('llpeg.types'), {

'CHARMAX',

'CapKind',

'Opcode',

'TTag',

'define',

'define_tree_visitor',

'numsiblings',

})

function printcharset(tree)

local result = "["

local i = 0

while i <= CHARMAX do

local first = i

while tree.set[i] and i <= CHARMAX do

i = i + 1

end

if first == (i - 1) then -- unary range

result = result .. string.format("(%02x)", first)

elseif first < (i-1) then -- non-empty range

result = result .. string.format("(%02x-%02x)", first, i - 1)

end

i = i + 1

end

if tree.rest then

result = result .. "(80-FFFF)"

end

return result .. "]"

end

function printjmp(op, pc)

return "-> " .. op.target

end

local printinst_helper = define_opcode_visitor{

[Opcode.Char] = function(op, pc)

return string.format("'%c' (%02x)", op.aux, op.aux)

end,

[Opcode.TestChar] = function(op, pc)

return string.format("'%c' (%02x)", op.aux, op.aux) .. printjmp(op, pc)

end,

[Opcode.UTFR] = function(op, pc)

return string.format("%d - %d", op.from, op.to)

end,

[Opcode.FullCapture] = function(op, pc)

return string.format("%s (size = %s) (idx = %s)",

op.cap.value, op.aux, op.key)

end,

[Opcode.OpenCapture] = function(op, pc)

return string.format("%s (idx = %s)",

op.cap.value, op.key)

end,

[Opcode.Set] = function(op, pc)

return printcharset(op)

end,

[Opcode.TestSet] = function(op, pc)

return printcharset(op) .. printjmp(op, pc)

end,

[Opcode.Span] = function(op, pc)

return printcharset(op)

end,

[Opcode.OpenCall] = function(op, pc)

return string.format("-> %d", op.target) -- rule number

end,

[Opcode.Behind] = function(op, pc)

return string.format("%d", op.aux)

end,

[{Opcode.Jmp, Opcode.Call, Opcode.Commit, Opcode.Choice,

Opcode.PartialCommit, Opcode.BackCommit, Opcode.TestAny,

Opcode.PredChoice}] = function(op, pc)

return printjmp(op, pc)

end,

[Opcode.Throw] = function(op, pc) -- labeled failure

return string.format("(idx = %s)", op.key)

end,

[Opcode.ThrowRec] = function(op, pc)

return printjmp(op, pc) .. string.format("(idx = %s)", op.key)

end,

default = function() return '' end,

}

function printinst(pc, op, accum)

table.insert(accum, string.format("%02d: %s ", pc, op.code.value))

table.insert(accum, printinst_helper(op, pc))

table.insert(accum, "\n")

return accum

end

function printpatt(code, accum)

for pc,op in ipairs(code) do

printinst(pc, op, accum)

end

return accum

end

function printcap(cap, indent)

print(string.format("%s%s", string.rep(' ', indent), cap))

end

function printcap2close(captures, ncaptures, i, indent)

local head = captures[i]

i = i + 1

printcap(head, indent) -- print head capture

while i <= ncaptures and head:inside(captures[i]) do

i = printcap2close(captures, ncaptures, i, indent + 2) -- print nested captures

end

if i <= ncaptures and head:isopencap() then

assert(captures[i]:isclosecap())

printcap(captures[i], indent) -- print and skip close capture

i = i + 1

end

return i

end

function printcaplist(captures, ncaptures)

-- for debugging, first print a raw list of captures

if ncaptures == nil then ncaptures = #captures end

for i=1,ncaptures do

printcap(captures[i], 0)

end

print(">======");

local i=1

while i <= ncaptures and not captures[i]:isclosecap() do

i = printcap2close(captures, ncaptures, i, 0)

end

if i > ncaptures then

print("")

end

print("=======");

end

local printtree_helper = define_tree_visitor{

[TTag.Char] = function(tree)

local c = compat.utf8char(tree.n)

if c:find("%C") ~= nil then -- printable?

return " '" .. c .. "'"

else

return string.format(" (%02X)", tree.n)

end

end,

[TTag.Set] = function(tree)

return printcharset(tree)

end,

[TTag.UTFR] = function(tree)

return " " .. tree.from .. " - " .. tree.to

end,

[{TTag.OpenCall, TTag.Call}] = function(tree)

local ret = string.format(" key: %s", tree.key)

local rule = tree.sib2

if rule ~= nil then

ret = ret .. " (rule: " .. rule.n .. ")"

end

return ret

end,

[TTag.Behind] = function(tree)

return " " .. tree.n

end,

[TTag.Capture] = function(tree)

return string.format(" kind: '%s' key: %s", tree.cap.value, tree.key)

end,

[TTag.Rule] = function(tree)

return string.format(" key: %s", tree.key)

end,

[TTag.XInfo] = function(tree)

return " n: " .. tree.n

end,

[TTag.Grammar] = function(tree)

return " " .. tree.n -- number of rules

end,

[TTag.Throw] = function(tree)

return string.format(" key: %s", tree.key)

end,

default = function(tree) return '' end

}

function printtree(tree, indent, accum)

local sibs = numsiblings[tree.tag]

table.insert(accum, string.rep(' ', indent))

table.insert(accum, tree.tag.value)

table.insert(accum, printtree_helper(tree))

table.insert(accum, "\n")

if tree.tag == TTag.Rule then

sibs = 1 -- don't print sib2

elseif tree.tag == TTag.Grammar then

local rule = tree.sib1

for i=1,tree.n do

printtree(rule, indent + 2, accum)

rule = rule.sib2

end

sibs = 0 -- siblings already handled

end

if sibs >= 1 then

printtree(tree.sib1, indent + 2, accum)

if sibs >= 2 then

printtree(tree.sib2, indent + 2, accum)

end

end

return accum

end

local PREFIX = "" -- could also be "l." or "lpeg." etc

local printrepl_helper

printrepl_helper = define_tree_visitor{

[TTag.True] = function(tree, buf)

table.insert(buf, PREFIX .. 'P""')

end,

[TTag.Any] = function(tree, buf)

table.insert(buf, PREFIX .. 'P(1)')

end,

[TTag.Char] = function(tree, buf)

table.insert(buf, PREFIX .. 'P"')

local c = compat.utf8char(tree.n)

if c:find("%C") ~= nil then -- printable?

table.insert(buf, c)

else

table.insert(buf, string.format('\\%02X', tree.n))

end

table.insert(buf, '"')

end,

[TTag.Set] = function(tree, buf)

local nbuf = {}

local insertchar = function(cp)

local c = compat.utf8char(cp)

if string.find(c, "^[^%w%p ]") ~= nil then

table.insert(nbuf, string.format('\\x%02X', cp))

else

table.insert(nbuf, c)

end

end

local nargs = 0

local inserttwo = function(cp1, cp2)

if nargs > 0 then table.insert(nbuf, ',') end

nargs = nargs + 1

table.insert(nbuf, '"')

insertchar(cp1)

insertchar(cp2)

table.insert(nbuf, '"')

end

local i = 0

while i <= CHARMAX do

local first = i

while tree.set[i] and i <= CHARMAX do

i = i + 1

end

if first == (i - 1) then -- unary range

inserttwo(first, first)

elseif first < (i-1) then -- non-empty range

inserttwo(first, i-1)

end

i = i + 1

end

local r = table.concat(nbuf)

if nargs == 1 then

r = PREFIX .. 'S' .. r

else

r = PREFIX .. 'S(' .. r .. ')'

end

if tree.rest then

table.insert(buf, '(')

table.insert(buf, r)

table.insert(buf, ' + ')

table.insert(buf, PREFIX)

table.insert(buf, 'utfR(0x80, 0x10FFFF))')

else

table.insert(buf, r)

end

end,

[TTag.UTFR] = function(tree, buf)

table.insert(buf, string.format("%sutfR(0x%04X, 0x%04X)", PREFIX, tree.from, tree.to))

end,

[{TTag.OpenCall, TTag.Call}] = function(tree, buf)

table.insert(buf, string.format('%sV"%s"', PREFIX, tree.key))

end,

[TTag.Not] = function(tree, buf)

table.insert(buf, '-(')

printrepl_helper(tree.sib1, buf)

table.insert(buf, ')')

end,

[TTag.Seq] = function(tree, buf)

table.insert(buf, "(")

printrepl_helper(tree.sib1, buf)

table.insert(buf, " * ")

printrepl_helper(tree.sib2, buf)

table.insert(buf, ")")

end,

[TTag.Choice] = function(tree, buf)

table.insert(buf, "(")

printrepl_helper(tree.sib1, buf)

table.insert(buf, " + ")

printrepl_helper(tree.sib2, buf)

table.insert(buf, ")")

end,

[TTag.Rep] = function(tree, buf)

printrepl_helper(tree.sib1, buf)

table.insert(buf, "^0")

end,

--[[

[TTag.Behind] = function(tree)

return " " .. tree.n

end,

]]--

[TTag.Capture] = function(tree, buf)

local repl = define_type_visitor{

string = function(v)

return '"' .. v .. '"' -- xxx should handle escapes

end,

default = tostring,

}

local name = nil

local show_patt = false

local show_key = false

if tree.cap == CapKind.simple then

name = 'C'

show_patt = true

elseif tree.cap == CapKind.subst then

name = 'Cs'

show_patt = true

elseif tree.cap == CapKind.table then

name = 'Ct'

show_patt = true

elseif tree.cap == CapKind.pos then

name = 'Cp'

elseif tree.cap == CapKind.arg then

name = 'Carg'

show_key = true

elseif tree.cap == CapKind.backref then

name = 'Cb'

show_key = true

elseif tree.cap == CapKind.group then

name = 'Cg'

show_patt = true

show_key = (tree.key ~= nil)

end

if name ~= nil then

table.insert(buf, PREFIX)

table.insert(buf, name)

table.insert(buf, '(')

if show_patt then

printrepl_helper(tree.sib1, buf)

if show_key then

table.insert(buf, ', ')

end

end

if show_key then

table.insert(buf, repl(tree.key))

end

table.insert(buf, ')')

return

end

if tree.cap == CapKind.string or

tree.cap == CapKind.num or

tree.cap == CapKind.query or

tree.cap == CapKind['function'] then

printrepl_helper(tree.sib1, buf)

table.insert(buf, ' / ')

table.insert(buf, repl(tree.key))

return

end

-- fallback

table.insert(buf, string.format("", tostring(tree.tag)))

end,

[TTag.Rule] = function(tree, buf)

local key = tree.name

if type(key) == 'number' then key = string.format("[%d]", key) end

table.insert(buf, key)

table.insert(buf, " = ")

printrepl_helper(tree.sib1, buf)

end,

[TTag.Grammar] = function(tree, buf)

table.insert(buf, PREFIX .. "P{")

local rule = tree.sib1

local r = {}

local first_rule_name = rule.name

r[first_rule_name] = rule

rule = rule.sib2

local names = {}

for i=2,tree.n do

table.insert(names, rule.name)

r[rule.name] = rule

rule = rule.sib2

end

-- sort rule names

table.sort(names)

table.insert(names, 1, first_rule_name)

-- now print in order

for _,name in ipairs(names) do

printrepl_helper(r[name], buf)

table.insert(buf, ", ")

end

table.insert(buf, "}")

end,

--[[

[TTag.Throw] = function(tree)

return " key: " .. tree.key .. " (rule: " .. tree.sib2.cap .. ")"

end,

]]--

default = function(tree, buf)

table.insert(buf, string.format("", tostring(tree.tag)))

end,

}

function printrepl(tree)

local buf = {}

printrepl_helper(tree, buf)

return table.concat(buf)

end

return {

printcaplist = printcaplist,

printcharset = printcharset,

printinst = printinst,

printpatt = printpatt,

printrepl = printrepl,

printtree = printtree,

}

end)

register('llpeg.code', function(myrequire)

myrequire('strict')

local compat = myrequire('advent.compat')

local from = myrequire('llpeg.types').from

local

CHARMAX,

CapKind,

Opcode,

PE,

TTag,

define,

define_tree_visitor,

fullset,

newcharset,

numsiblings,

register_fname,

_ = from(myrequire('llpeg.types'), {

'CHARMAX',

'CapKind',

'Opcode',

'PE',

'TTag',

'define',

'define_tree_visitor',

'fullset',

'newcharset',

'numsiblings',

'register_fname',

})

local printinst = myrequire('llpeg.print').printinst

local TRACE_INSTRUCTIONS = false

-- signals a "no-instruction"

local NOINST = nil

-- don't optimize captures longer than this

local MAXOFF = 15

-- forward declarations

local codegen

local CompileState = {}

CompileState.__index = CompileState

--[[

  • {======================================================
  • Analysis and some optimizations
  • =======================================================

]]--

--[[

  • Check whether a charset is empty (returns IFail), singleton (IChar),
  • full (IAny), or none of those (ISet). When singleton, '*c' returns
  • which character it is. (When generic set, the set was the input,
  • so there is no need to return it.)

]]--

function charsettype(cs)

local count = 0

local candidate

for i,_ in pairs(cs.set) do

candidate = i

count = count + 1

end

if cs.rest then

if count == (CHARMAX + 1) then

return Opcode.Any -- full set

end

elseif count == 0 then

return Opcode.Fail -- empty set

elseif count == 1 then

return Opcode.Char, candidate -- single char

end

return Opcode.Set -- neither full nor empty nor singleton

end

-- A few basic operations on charsets; returns new object

function cs_clone(cs)

local result = newcharset()

for i,_ in pairs(cs.set) do

result.set[i] = true

end

result.rest = cs.rest

return result

end

function cs_complement(cs)

local result = newcharset()

for i=0,CHARMAX do

if not cs.set[i] then

result.set[i] = true

end

end

result.rest = not cs.rest

return result

end

function cs_intersection(a, b)

local result = newcharset()

for i,_ in pairs(a.set) do

if a.set[i] and b.set[i] then

result.set[i] = true

end

end

result.rest = a.rest and b.rest

return result

end

function cs_union(a, b)

local result = newcharset()

for i=0,CHARMAX do

if a.set[i] or b.set[i] then

result.set[i] = true

end

end

result.rest = a.rest or b.rest

return result

end

function cs_diff(a, b)

local result = newcharset()

for i=0,CHARMAX do

if a.set[i] and not b.set[i] then

result.set[i] = true

end

end

result.rest = a.rest and not b.rest

return result

end

function cs_disjoint(a, b)

if a.rest == b.rest then return false end

for i,_ in pairs(a.set) do

if b.set[i] then return false end

end

for i,_ in pairs(b.set) do

if a.set[i] then return false end

end

return true

end

function cs_equal(a, b)

if a.rest ~= b.rest then return false end

for i,_ in pairs(a.set) do

if not b.set[i] then return false end

end

for i,_ in pairs(b.set) do

if not a.set[i] then return false end

end

return true

end

--[[

  • If 'tree' is a 'char' pattern (TSet, TChar, TAny), convert it into a
  • charset and return it; else return nil.

]]--

local tocharset = define_tree_visitor{

[TTag.Set] = function(v)

return v -- copy set

end,

[TTag.Char] = function(v)

-- only one char

if v.n <= CHARMAX then

local t = newcharset()

t.set[v.n] = true

return t

else

return nil

end

end,

[TTag.Any] = function(v)

return fullset

end,

[TTag.False] = function(v)

return newcharset()

end,

default = function(v)

return nil

end,

}

register_fname("tocharset", tocharset)

--[[

  • Visit a TCall node taking care to stop recursion. If node not yet
  • visited, return 'f(rule for call)', otherwise return 'def' (default
  • value)

]]--

function CompileState:callrecursive(tree, f, default_value, ...)

if tree.tag ~= TTag.Call then

error("unexpected tree tag")

end

local rule = self.grammar.ruletab[tree.key]

if rule.tag ~= TTag.Rule then

error("unexpected tree sibling")

end

if tree.seen == true then

return default_value -- node already visited

else

-- first visit

local oldseen = tree.seen

tree.seen = true

local result = f(rule, ...)

tree.seen = oldseen -- restore tree

return result

end

end

--[[

  • Check whether a pattern tree has captures

]]--

local hascaptures

hascaptures = define_tree_visitor{

[{TTag.Capture, TTag.RunTime}] = function(tree, cs)

return true

end,

[TTag.Call] = function(tree, cs)

assert(cs ~= nil)

return cs:callrecursive(tree, hascaptures, false, cs)

end,

[TTag.Rule] = function(tree, cs)

-- do not follow siblings

return hascaptures(tree.sib1, cs)

end,

[TTag.OpenCall] = function(tree, cs)

error("should not happen")

end,

[TTag.Grammar] = function(tree, cs)

-- make a fake compile state to hold the grammar, if necessary

if cs == nil then cs = CompileState:new(nil) end

return cs:withGrammar(tree, hascaptures, tree.sib1, cs)

end,

default = function(tree, cs)

local n = numsiblings[tree.tag]

if n == 1 then

return hascaptures(tree.sib1, cs) -- tail call

elseif n == 2 then

if hascaptures(tree.sib1, cs) then return true end

return hascaptures(tree.sib2, cs) -- tail call

elseif n == 0 then

return false

else

error("how many siblings does this have?")

end

end,

}

function CompileState:hascaptures(t) return hascaptures(t, self) end

register_fname("hascaptures", hascaptures)

--[[

  • Checks how a pattern behaves regarding the empty string,
  • in one of two different ways:
  • A pattern is *nullable* if it can match without consuming any character;
  • A pattern is *nofail* if it never fails for any string
  • (including the empty string).
  • The difference is only for predicates and run-time captures;
  • for other patterns, the two properties are equivalent.
  • (With predicates, &'a' is nullable but not nofail. Of course,
  • nofail => nullable.)
  • These functions are all convervative in the following way:
  • p is nullable => nullable(p)
  • nofail(p) => p cannot fail
  • The function assumes that TOpenCall is not nullable;
  • this will be checked again when the grammar is fixed.
  • Run-time captures can do whatever they want, so the result
  • is conservative.

]]--

local checkaux

checkaux = define_tree_visitor{

[{

TTag.Char, TTag.Set, TTag.Any, TTag.UTFR, TTag.False,

TTag.OpenCall, TTag.Throw,

}] = function(tree, pred, cs)

return false -- not nullable

end,

[{TTag.Rep,TTag.True}] = function(tree, pred, cs)

return true -- no fail

end,

[{TTag.Not,TTag.Behind}] = function(tree, pred, cs)

-- can match empty, but can fail

if pred == PE.nofail then

return false

else

return true

end

end,

[TTag.And] = function(tree, pred, cs)

-- can match empty; fail iff body does

if pred == PE.nullable then

return true

end

return checkaux(tree.sib1, pred, cs) -- tail call

end,

[TTag.RunTime] = function(tree, pred, cs)

-- can fail; match empty iff body does

if pred == PE.nofail then

return false

end

return checkaux(tree.sib1, pred, cs) -- tail call

end,

[TTag.Seq] = function(tree, pred, cs)

if not checkaux(tree.sib1, pred, cs) then

return false

end

return checkaux(tree.sib2, pred, cs) -- tail call

end,

[TTag.Choice] = function(tree, pred, cs)

if checkaux(tree.sib2, pred, cs) then

return true

end

return checkaux(tree.sib1, pred, cs) -- tail call

end,

[{ TTag.Capture, TTag.Rule, TTag.XInfo, }] = function(tree, pred, cs)

return checkaux(tree.sib1, pred, cs)

end,

[TTag.Grammar] = function(tree, pred, cs)

-- make a fake compile state to hold the grammar, if necessary

if cs == nil then cs = CompileState:new(nil) end

return cs:withGrammar(tree, checkaux, tree.sib1, pred, cs)

end,

[TTag.Call] = function(tree, pred, cs)

-- open calls are assumed not nullable; checked again after grammar

-- is fixed

if cs == nil then return false end

return checkaux(cs.grammar.ruletab[tree.key], pred, cs)

end,

}

register_fname("checkaux", checkaux)

function nofail(t, cs) return checkaux(t, PE.nofail, cs) end

function CompileState:nofail(t) return nofail(t, self) end

function nullable(t, cs) return checkaux(t, PE.nullable, cs) end

function CompileState:nullable(t) return nullable(t, self) end

function nullable_with_grammar(t, grm)

local cs = CompileState:new(nil)

return cs:withGrammar(grm, nullable, t, cs)

end

-- Note that we are counting characters, not bytes

local fixedlen, fixedlen_helper

fixedlen_helper = define_tree_visitor{

[{TTag.Char, TTag.Set, TTag.Any, TTag.UTFR}] = function(tree, len)

return len + 1

end,

[{TTag.False, TTag.True, TTag.Not, TTag.And, TTag.Behind}] = function(tree, len)

return len

end,

[{TTag.Rep, TTag.RunTime, TTag.OpenCall, TTag.Throw,}] = function(tree, len)

return -1 -- variable

end,

[{TTag.Capture, TTag.Rule, TTag.XInfo,}] = function(tree, len, cs)

return fixedlen_helper(tree.sib1, len, cs)

end,

[TTag.Grammar] = function(tree, len, cs)

-- make a fake compile state to hold the grammar, if necessary

if cs == nil then cs = CompileState:new(nil) end

return cs:withGrammar(tree, fixedlen_helper, tree.sib1, len, cs)

end,

[TTag.Call] = function(tree, len, cs)

-- If evaluating outside the context of a grammar, conservatively

-- return "variable"

if cs == nil then return -1 end

-- otherwise, carefully recurse

local n1 = cs:callrecursive(tree, fixedlen, -1, cs)

if n1 < 0 then return -1 end -- variable

return len + n1

end,

[TTag.Seq] = function(tree, len, cs)

local n1 = fixedlen_helper(tree.sib1, len, cs)

if n1 < 0 then return -1 end -- variable

return fixedlen_helper(tree.sib2, n1, cs)

end,

[TTag.Choice] = function(tree, len, cs)

local n1 = fixedlen_helper(tree.sib1, len, cs)

local n2 = fixedlen_helper(tree.sib2, len, cs)

if n1 ~= n2 or n1 < 0 then

return -1

else

return n1

end

end,

}

function fixedlen(tree, cs)

return fixedlen_helper(tree, 0, cs) -- supply default 0 for 'len'

end

function CompileState:fixedlen(t) return fixedlen(t, self) end

register_fname("fixedlen_helper", fixedlen_helper)

--[[

  • Computes the 'first set' of a pattern.
  • The result is a conservative aproximation:
  • match p ax -> x (for some x) ==> a belongs to first(p)
  • or
  • a not in first(p) ==> match p ax -> fail (for all x)
  • The set 'follow' is the first set of what follows the
  • pattern (full set if nothing follows it).
  • The function returns 0 when this resulting set can be used for
  • test instructions that avoid the pattern altogether.
  • A non-zero return can happen for two reasons:
  • 1) match p -> ==> return has bit 1 set
  • (tests cannot be used because they would always fail for an empty input);
  • 2) there is a match-time capture ==> return has bit 2 set
  • (optimizations should not bypass match-time captures).

]]--

local getfirst

getfirst = define_tree_visitor{

[TTag.Char] = function(t, follow, cs)

if t.n <= CHARMAX then return 0, tocharset(t) end

-- conservative approximation!

local s = newcharset()

s.rest = true

return 0, s

end,

[{ TTag.Set, TTag.Any, TTag.False }] = function(t, follow, cs)

return 0, tocharset(t)

end,

[TTag.UTFR] = function(t, follow, cs)

-- conservative approximation!

local firstset = newcharset()

if t.from <= CHARMAX then

for i=t.from, math.min(CHARMAX, t.to) do

firstset.set[i] = true

end

end

if t.to > CHARMAX then

-- conservative approximation of non-ascii unicode range

firstset.rest = true

end

return 0, firstset

end,

[TTag.True] = function(t, follow, cs)

return 1, follow -- 1 because this accepts the empty string

end,

[TTag.Throw] = function(t, follow, cs)

-- labeled failure: must always throw the label

return 1, fullset

end,

[TTag.Choice] = function(t, follow, cs)

local firstset = newcharset()

local e1,e1set = getfirst(t.sib1, follow, cs)

local e2,e2set = getfirst(t.sib2, follow, cs)

local firstset = cs_union(e1set, e2set)

local ret = 0 -- awkward lua5.1 way to say "e1 | e2"

if (e1 % 2) == 1 or (e2 % 2) == 1 then

ret = ret + 1

end

e1,e2 = compat.rshift(e1, 1), compat.rshift(e2, 1)

if (e1 % 2) == 1 or (e2 % 2) == 1 then

ret = ret + 2

end

return ret, firstset

end,

[TTag.Seq] = function(t, follow, cs)

if not nullable(t.sib1, cs) then

-- when p1 is not nullable, p2 has nothing to contribute

return getfirst(t.sib1, fullset, cs) -- tail call

else -- FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl))

local e2,csaux = getfirst(t.sib2, follow, cs)

local e1,firstset = getfirst(t.sib1, csaux, cs)

if e1 == 0 then

return 0, firstset -- 'e1' ensures that first can be used

elseif compat.rshift(e1, 1) % 2 == 1 or compat.rshift(e2, 1) % 2 == 1 then

-- one of the children has a matchtime?

return 2, firstset -- pattern has a matchtime capture

else

return e2, firstset -- else depends on e2

end

end

end,

[TTag.Rep] = function(t, follow, cs)

local _,firstset = getfirst(t.sib1, follow, cs)

return 1, cs_union(firstset, follow, cs) -- accepts the empty string

end,

[{ TTag.Capture,TTag.Rule,TTag.XInfo }] = function(t, follow, cs)

return getfirst(t.sib1, follow, cs) -- tail call

end,

[TTag.Grammar] = function(t, follow, cs)

return cs:withGrammar(t, getfirst, t.sib1, follow, cs)

end,

[TTag.RunTime] = function(t, follow, cs)

-- function invalidates any follow info

local e,firstset = getfirst(t.sib1, fullset, cs)

if e ~= 0 then

-- function is not "protected"?

return 2,firstset

else

-- pattern inside capture ensures first can be used

return 0,firstset

end

end,

[TTag.Call] = function(t, follow, cs)

local rule = cs.grammar.ruletab[t.key]

return getfirst(rule, follow, cs) -- tail call

end,

[TTag.And] = function(t, follow, cs)

local e,firstset = getfirst(t.sib1, follow, cs)

return e, cs_intersection(firstset, follow, cs)

end,

[{ TTag.Not, TTag.Behind }] = function(t, follow, cs)

if t.tag == TTag.Not then

local firstset = tocharset(t.sib1)

if firstset ~= nil then

return 1,cs_complement(firstset) -- could match empty input

end

end

-- the TNot or TBehind gives no new information

-- call getfirst only to check for math-time captures

local e,_ = getfirst(t.sib1, follow, cs)

if e%2 == 0 then e = e + 1 end -- set the lsb; could match empty input

return e, follow -- uses follow

end,

}

function CompileState:getfirst(t, follow) return getfirst(t, follow, self) end

register_fname("getfirst", getfirst)

--[[

  • If 'headfail(tree)' true, then 'tree' can fail only depending on the
  • next character of the subject.

-- ie, a single character of lookahead is enough to evaluate the pattern

-- rooted at this node

]]--

local headfail

headfail = define_tree_visitor{

[{TTag.Char, TTag.Set, TTag.Any,

TTag.False}] = function(t, cs)

return true

end,

[{TTag.True, TTag.Rep, TTag.RunTime, TTag.Not,

-- even though we are codepoint-based, we don't have a TestUTFR instruction

-- so we can't use a Test instruction on the first character, which is

-- implicitly what headfail is checking for.

TTag.UTFR,

TTag.Behind, TTag.Throw}] = function(t, cs)

return false

end,

[{TTag.Capture, TTag.Rule,

TTag.XInfo, TTag.And}] = function(t, cs)

return headfail(t.sib1, cs) -- tail call

end,

[TTag.Grammar] = function(t, cs)

return cs:withGrammar(t, headfail, t.sib1, cs)

end,

[TTag.Call] = function(t, cs)

local rule = cs.grammar.ruletab[t.key]

return headfail(rule, cs) -- tail call

end,

[TTag.Seq] = function(t, cs)

if not nofail(t.sib2, cs) then

-- if the second child could possibly fail, then we can't

-- evaluate the entire seq based just on the first child

return false

end

return headfail(t.sib1, cs) -- tail call

end,

[TTag.Choice] = function(t, cs)

-- both children need to be headfail for this to be headfail

if not headfail(t.sib1, cs) then

return false

end

return headfail(t.sib2, cs) -- tail call

end,

}

function CompileState:headfail(t) return headfail(t, self) end

register_fname("headfail", headfail)

--[[

  • Check whether the code generation for the given tree can benefit
  • from a follow set (to avoid computing the follow set when it is
  • not needed)

]]--

local needfollow

needfollow = define_tree_visitor{

[{TTag.Char, TTag.Set, TTag.Any, TTag.UTFR,

TTag.False, TTag.True, TTag.And, TTag.Not,

TTag.RunTime, TTag.Grammar, TTag.Call, TTag.Behind,

TTag.Throw, }] = function(tree) return false end,

[{TTag.Choice, TTag.Rep}] = function(tree) return true end,

[TTag.Capture] = function(tree) return needfollow(tree.sib1) end,

[TTag.Seq] = function(tree) return needfollow(tree.sib2) end,

}

register_fname("needfollow", needfollow)

--[[

  • ======================================================
  • Code generation
  • ======================================================

]]--

local Instruction = {}

Instruction.__index = Instruction

function Instruction:new(arg)

local opcode = arg[1]

if opcode == nil then error("no opcode") end

-- target is rule # for open calls before correction, and absolute pc after

local instr = {

code = opcode,

exec = opcode.exec, -- copy the exec function from the opcode!

aux = arg.aux, -- used for the "primary argument"

key = arg.key, -- used for string-valued arguments

target = arg.target, -- used for jmp-like instructions

from = arg.from, -- inclusive start, for ranges

to = arg.to, -- inclusive end, for ranges

set = arg.set, -- charset <= CHARMAX

rest = arg.rest, -- include characters above CHARMAX?

cap = arg.cap, -- used for "capture kind"

}

setmetatable(instr, self)

instr:setCode(opcode) -- opportunity to add tracing logic!

return instr

end

function Instruction:setCode(opcode)

self.code = opcode

local exec = opcode.exec

if TRACE_INSTRUCTIONS then

local str

self.exec = function(self, state, ...)

if str == nil then

str = table.concat(printinst(0, self, { "Executing " })):gsub("\n","")

end

print(state.bytePos, state.codepoint, str)

return exec(self, state, ...)

end

else

self.exec = exec

end

end

-- state for the compiler

function CompileState:new(p)

local cs = {

p = p,

}

setmetatable(cs, self)

return cs

end

function CompileState:withGrammar(g, f, ...)

local lastGrammar = self.grammar

self.grammar = g

local result = compat.pack(f(...))

self.grammar = lastGrammar

return compat.unpack(result)

end

function CompileState:codegen(tree, opt, tt, fl)

assert(fl.tag == TTag.Set)

-- just a little helper

return codegen(tree, self, opt, tt, fl)

end

function CompileState:getinstr(i)

return self.p.code[i]

end

function CompileState:addinstruction(arg)

local code = self.p.code

table.insert(code, Instruction:new(arg))

return #code

end

function CompileState:gethere()

local code = self.p.code

return 1 + #code

end

function CompileState:jumptothere(pc, where)

if pc ~= NOINST then

local code = self.p.code

code[pc].target = where

end

end

function CompileState:jumptohere(pc)

self:jumptothere(pc, self:gethere())

end

function codethrow(cs, throw)

local rule = nil

if cs.grammar ~= nil then

-- we only lookup/match *string* rule names, not numeric indices

rule = cs.grammar.ruletab[tostring(throw.key)]

end

if rule ~= nil then

return cs:addinstruction{

Opcode.ThrowRec,

key=throw.key, -- rule name / error label

target=rule.n -- recovery rule number

}

else

return cs:addinstruction{

Opcode.Throw,

key=throw.key, -- rule name / error label

-- no recovery rule

}

end

end

function codeutfr(cs, tree)

return cs:addinstruction{

Opcode.UTFR,

from = tree.from,

to = tree.to,

}

end

--[[

  • Code an IChar instruction, or IAny if there is an equivalent
  • test dominating it

]]--

function codechar(cs, codepoint, tt)

if tt ~= NOINST and

cs:getinstr(tt).code == Opcode.TestChar and

cs:getinstr(tt).aux == codepoint then

cs:addinstruction{Opcode.Any}

else

cs:addinstruction{Opcode.Char, aux=codepoint,}

end

end

--[[

  • Add a charset posfix to an instruction

]]--

function addcharset(cs, codepoint)

--[[

static void addcharset (CompileState *compst, const byte *cs) {

int p = gethere(compst);

int i;

for (i = 0; i < (int)CHARSETINSTSIZE - 1; i++)

nextinstruction(compst); /* space for buffer */

/* fill buffer with charset */

loopset(j, getinstr(compst, p).buff[j] = cs[j]);

]]--

end

--[[

  • code a char set, optimizing unit sets for IChar, "complete"
  • sets for IAny, and empty sets for IFail; also use an IAny
  • when instruction is dominated by an equivalent test.

]]--

function codecharset(cs, tree, tt)

local op,codepoint = charsettype(tree)

if op == Opcode.Char then

return codechar(cs, codepoint, tt)

elseif op == Opcode.Set then

-- non-trivial set?

if tt ~= NOINST and

cs:getinstr(tt).code == Opcode.TestSet and

cs_equal(tree, cs:getinstr(tt)) then

return cs:addinstruction{Opcode.Any}

else

return cs:addinstruction{

Opcode.Set,

set = tree.set, -- XXX ensure immutable

rest = tree.rest,

}

end

else

return cs:addinstruction{op} -- Any or Fail

end

end

--[[

  • code a test set, optimizing unit sets for ITestChar, "complete"
  • sets for ITestAny, and empty sets for IJmp (always fails).
  • 'e' is nonzero iff test should accept the empty string. (Test
  • instructions in the current VM never accept the empty string.)

]]--

function codetestset(cs, tree, e)

if e ~= 0 then return NOINST end

local op,codepoint = charsettype(tree)

if op == Opcode.Fail then

return cs:addinstruction{Opcode.Jmp, target = NOINST} -- always jump

elseif op == Opcode.Any then

return cs:addinstruction{Opcode.TestAny, target = NOINST}

elseif op == Opcode.Char then

return cs:addinstruction{

Opcode.TestChar,

target = NOINST,

aux = codepoint,

}

elseif op == Opcode.Set then

return cs:addinstruction{

Opcode.TestSet,

target = NOINST,

set = tree.set, -- XXX ensure immutable

rest = tree.rest,

}

else

error("unreachable")

end

end

--[[

  • == behind n;

    (where n = fixedlen(p))

]]--

function codebehind(cs, tree)

if tree.n > 0 then

cs:addinstruction{ Opcode.Behind, aux = tree.n }

end

return cs:codegen(tree.sib1, false, NOINST, fullset)

end

--[[

  • Choice; optimizations:
  • - when p1 is headfail or when first(p1) and first(p2) are disjoint,
  • than a character not in first(p1) cannot go to p1 and a character
  • in first(p1) cannot go to p2, either because p1 will accept
  • (headfail) or because it is not in first(p2) (disjoint).
  • (The second case is not valid if p1 accepts the empty string,
  • as then there is no character at all...)
  • - when p2 is empty and opt is true; a IPartialCommit can reuse
  • the Choice already active in the stack.

]]--

function codechoice(cs, p1, p2, opt, fl)

local emptyp2 = (p2.tag == TTag.True)

local e1, cs1 = cs:getfirst(p1, fullset)

local headfailp1 = cs:headfail(p1)

local e2, cs2

if not headfailp1 and e1 == 0 then

e2, cs2 = cs:getfirst(p2, fl) -- avoid computing unless necessary

end

if headfailp1 or (e1 == 0 and cs_disjoint(cs1, cs2)) then

-- == test (fail(p1)) -> L1 ; p1 ; jmp L2; L1: p2; L2:

local test = codetestset(cs, cs1, 0)

local jmp = NOINST

cs:codegen(p1, false, test, fl)

if not emptyp2 then

jmp = cs:addinstruction{Opcode.Jmp, target = NOINST }

end

cs:jumptohere(test)

cs:codegen(p2, opt, NOINST, fl)

cs:jumptohere(jmp)

elseif opt and emptyp2 then

-- p1? == IPartialCommit; p1

cs:jumptohere(cs:addinstruction{Opcode.PartialCommit, target = NOINST})

cs:codegen(p1, true, NOINST, fullset)

else

-- ==

-- test(first(p1)) -> L1; choice L1; ; commit L2; L1: ; L2:

local test = codetestset(cs, cs1, e1)

local pchoice = cs:addinstruction{Opcode.Choice, target = NOINST}

cs:codegen(p1, emptyp2, test, fullset)

local pcommit = cs:addinstruction{Opcode.Commit, target = NOINST}

cs:jumptohere(pchoice)

cs:jumptohere(test)

cs:codegen(p2, opt, NOINST, fl)

cs:jumptohere(pcommit)

end

end

--[[

  • And predicate
  • optimization: fixedlen(p) = n ==> <&p> ==

    ; behind n

  • (valid only when 'p' has no captures)

]]--

function codeand(cs, tree, tt)

--[[ labeled failure: optimization disabled because in case of a failure it

does not report the expected error position (the current subject position

when begin the matching of <&p>) ]]--

local pchoice = cs:addinstruction{Opcode.PredChoice, target = NOINST}

cs:codegen(tree, false, tt, fullset)

local pcommit = cs:addinstruction{Opcode.BackCommit, target = NOINST}

cs:jumptohere(pchoice)

cs:addinstruction{Opcode.Fail}

cs:jumptohere(pcommit)

end

--[[

  • Captures: if pattern has fixed (and not too big) length, and it
  • has no nested captures, use a single IFullCapture instruction
  • after the match; otherwise, enclose the pattern with OpenCapture -
  • CloseCapture.

]]--

function codecapture(cs, tree, tt, fl)

local len = cs:fixedlen(tree.sib1)

if len >= 0 and len <= MAXOFF and not cs:hascaptures(tree.sib1) then

cs:codegen(tree.sib1, false, tt, fl)

cs:addinstruction{

Opcode.FullCapture,

cap = tree.cap,

key = tree.key, -- capture name

aux = len,

}

else

assert(tree.cap ~= nil)

cs:addinstruction({

Opcode.OpenCapture,

cap = tree.cap,

key = tree.key, -- capture name

})

cs:codegen(tree.sib1, false, tt, fl)

cs:addinstruction({

Opcode.CloseCapture,

cap = CapKind.close,

})

end

end

function coderuntime(cs, tree, tt)

cs:addinstruction({

Opcode.OpenCapture,

cap = CapKind.group,

key = tree.key, -- capture *function*

})

cs:codegen(tree.sib1, false, tt, fullset)

cs:addinstruction({

Opcode.CloseRunTime,

cap = CapKind.close,

})

end

--[[

  • Repetition; optimizations:
  • When pattern is a charset, can use special instruction ISpan.
  • When pattern is head fail, or if it starts with characters that
  • are disjoint from what follows the repetions, a simple test
  • is enough (a fail inside the repetition would backtrack to fail
  • again in the following pattern, so there is no need for a choice).
  • When 'opt' is true, the repetion can reuse the Choice already
  • active in the stack.

]]--

function coderep(cs, tree, opt, fl)

local st = tocharset(tree)

if st ~= nil then

return cs:addinstruction{

Opcode.Span,

set = st.set,

rest = st.rest,

}

end

local e1,st = cs:getfirst(tree, fullset)

if cs:headfail(tree) or (e1 == 0 and cs_disjoint(st, fl)) then

-- L1: test (fail(p1)) -> L2;

; jmp L1; L2:

local test = codetestset(cs, st, 0)

cs:codegen(tree, false, test, fullset)

local jmp = cs:addinstruction{Opcode.Jmp, target = NOINST}

cs:jumptohere(test)

cs:jumptothere(jmp, test)

else

-- test(fail(p1)) -> L2; choice L2; L1:

; partialcommit L1; L2:

-- or (if 'opt'): partialcommit L1; L1:

; partialcommit L1;

local test = codetestset(cs, st, e1)

local pchoice = NOINST

if opt then

cs:jumptohere(cs:addinstruction{Opcode.PartialCommit, target = NOINST})

else

pchoice = cs:addinstruction{Opcode.Choice, target = NOINST}

end

local l2 = cs:gethere()

cs:codegen(tree, false, NOINST, fullset)

local commit = cs:addinstruction{Opcode.PartialCommit, target = NOINST}

cs:jumptothere(commit, l2)

cs:jumptohere(pchoice)

cs:jumptohere(test)

end

end

--[[

  • Not predicate; optimizations:
  • In any case, if first test fails, 'not' succeeds, so it can jump to
  • the end. If pattern is headfail, that is all (it cannot fail
  • in other parts); this case includes 'not' of simple sets. Otherwise,
  • use the default code (a choice plus a failtwice).

]]--

function codenot(cs, tree)

local e,st = cs:getfirst(tree, fullset)

local test = codetestset(cs, st, e)

if cs:headfail(tree) then

-- test (fail(p1)) -> L1; fail; L1:

cs:addinstruction{Opcode.Fail}

else

-- test(fail(p))-> L1; choice L1;

; failtwice; L1:

local pchoice = cs:addinstruction{Opcode.PredChoice, target = NOINST }

cs:codegen(tree, false, NOINST, fullset)

cs:addinstruction{Opcode.FailTwice}

cs:jumptohere(pchoice)

end

cs:jumptohere(test)

end

-- find the final destination of a sequence of jumps

function finaltarget(code, pc)

while code[pc].code == Opcode.Jmp do

pc = code[pc].target

end

return pc

end

-- final label (after traversing any jumps)

function finallabel(code, pc)

return finaltarget(code, code[pc].target)

end

--[[

  • change open calls to calls, using list 'positions' to find
  • correct offsets; also optimize tail calls

]]--

function correctcalls(cs, positions, from, to)

local code = cs.p.code

for i=from,(to-1) do

local op = code[i]

if op.code == Opcode.OpenCall or op.code == Opcode.ThrowRec then

local n = op.target -- rule number

local rule = positions[n] -- rule position

if rule == from or code[rule - 1].code == Opcode.Ret then

-- sanity check! ok!

else

error("bad rule position")

end

if op.code == Opcode.OpenCall then

if code[finaltarget(code, i+1)].code == Opcode.Ret then

-- call; ret => tail call

op:setCode(Opcode.Jmp)

else

op:setCode(Opcode.Call) -- open call no more

end

end

op.target = rule

end

end

end

--[[

  • Code for a grammar:
  • call L1; jmp L2; L1: rule 1; ret; rule 2; ret; ...; L2:

]]--

function codegrammar(cs, tree)

local firstcall = cs:addinstruction{Opcode.Call, target = NOINST} -- call initial rule

local jumptoend = cs:addinstruction{Opcode.Jmp, target = NOINST} -- jump to the end

local start = cs:gethere() -- here starts the initial rule

cs:jumptohere(firstcall)

local positions = {}

local rule = tree.sib1

for i=1,tree.n do

local pattern = rule.sib1

positions[i] = cs:gethere() -- save rule position

cs:codegen(rule.sib1, false, NOINST, fullset) -- code rule

cs:addinstruction{Opcode.Ret}

rule = rule.sib2

end

if rule.tag ~= TTag.True then error("impossible") end

cs:jumptohere(jumptoend)

correctcalls(cs, positions, start, cs:gethere())

end

function codecall(cs, tree)

local rule = cs.grammar.ruletab[tree.key]

assert(rule ~= nil)

assert(rule.n ~= nil)

return cs:addinstruction{

Opcode.OpenCall, -- to be corrected later

target = rule.n -- rule number

}

end

--[[

  • Code first child of a sequence
  • (second child is called in-place to allow tail call)
  • Return 'tt' for second child

]]--

function codeseq1(cs, p1, p2, tt, fl)

assert(fl.tag == TTag.Set)

if needfollow(p1) then

local _, fl1 = cs:getfirst(p2, fl) -- p1 follow is p2 first

cs:codegen(p1, false, tt, fl1)

else

-- use fullset as follow

cs:codegen(p1, false, tt, fullset)

end

if cs:fixedlen(p1) ~= 0 then -- can 'p1' consume anything?

return NOINST -- invalidate test

else

return tt -- else 'tt' still protects sib2

end

end

--[[

  • Main code-generation function: dispatch to auxiliar functions
  • according to kind of tree. ('needfollow' should return true
  • only for consructions that use 'fl'.)

]]--

--[[

  • code generation is recursive; 'opt' indicates that the code is being
  • generated as the last thing inside an optional pattern (so, if that
  • code is optional too, it can reuse the 'IChoice' already in place for
  • the outer pattern). 'tt' points to a previous test protecting this
  • code (or NOINST). 'fl' is the follow set of the pattern.

]]--

codegen = define_tree_visitor{

[TTag.Char] = function(tree, cs, opt, tt, fl)

return codechar(cs, tree.n, tt)

end,

[TTag.Any] = function(tree, cs, opt, tt, fl)

return cs:addinstruction{Opcode.Any}

end,

[TTag.Set] = function(tree, cs, opt, tt, fl)

return codecharset(cs, tree, tt)

end,

[TTag.True] = function(tree, cs, opt, tt, fl)

return -- do nothing

end,

[TTag.False] = function(tree, cs, opt, tt, fl)

return cs:addinstruction{Opcode.Fail}

end,

[TTag.UTFR] = function(tree, cs, opt, tt, fl)

return codeutfr(cs, tree)

end,

[TTag.Choice] = function(tree, cs, opt, tt, fl)

return codechoice(cs, tree.sib1, tree.sib2, opt, fl)

end,

[TTag.Rep] = function(tree, cs, opt, tt, fl)

return coderep(cs, tree.sib1, opt, fl)

end,

[TTag.Behind] = function(tree, cs, opt, tt, fl)

return codebehind(cs, tree)

end,

[TTag.Not] = function(tree, cs, opt, tt, fl)

return codenot(cs, tree.sib1)

end,

[TTag.And] = function(tree, cs, opt, tt, fl)

return codeand(cs, tree.sib1, tt)

end,

[TTag.Capture] = function(tree, cs, opt, tt, fl)

return codecapture(cs, tree, tt, fl)

end,

[TTag.RunTime] = function(tree, cs, opt, tt, fl)

return coderuntime(cs, tree, tt)

end,

[TTag.Grammar] = function(tree, cs, opt, tt, fl)

return cs:withGrammar(tree, codegrammar, cs, tree)

end,

[TTag.Call] = function(tree, cs, opt, tt, fl)

return codecall(cs, tree)

end,

[TTag.Seq] = function(tree, cs, opt, tt, fl)

tt = codeseq1(cs, tree.sib1, tree.sib2, tt, fl) -- code 'p1'

return cs:codegen(tree.sib2, opt, tt, fl) -- tail call

end,

[TTag.Throw] = function(tree, cs, opt, tt, fl)

return codethrow(cs, tree)

end,

["assert"] = function(tree, cs, opt, tt, fl)

assert(fl.tag == TTag.Set)

assert(opt ~= 0)

end,

}

register_fname("codegen", codegen)

--[[

  • Optimize jumps and other jump-like instructions.
  • * Update labels of instructions with labels to their final
  • destinations (e.g., choice L1; ... L1: jmp L2: becomes
  • choice L2)
  • * Jumps to other instructions that do jumps become those
  • instructions (e.g., jump to return becomes a return; jump
  • to commit becomes a commit)

]]--

function peephole(cs)

local code = cs.p.code

local jmpswitch

local switch = define_opcode_visitor{

-- instructions with labels

[{Opcode.Choice, Opcode.Call, Opcode.Commit, Opcode.PartialCommit,

Opcode.BackCommit, Opcode.TestChar, Opcode.TestSet,

Opcode.TestAny}] = function(op, i)

cs:jumptothere(i, finallabel(code, i))

end,

[Opcode.Jmp] = function(op, i)

local ft = finaltarget(code, i)

jmpswitch(code[ft], i, ft) -- jumping to what?

end,

default = function() end,

}

jmpswitch = define_opcode_visitor{

-- instructions with unconditional implicit jumps

[{Opcode.Ret,Opcode.Fail,Opcode.FailTwice,Opcode.End}] = function(op, i, ft)

code[i]:setCode(code[ft].code) -- jump becomes that instruction

end,

-- instructions with unconditional explicit jumps

[{Opcode.Commit, Opcode.PartialCommit, Opcode.BackCommit}] = function(op, i, ft)

local fft = finallabel(code, ft)

code[i]:setCode(code[ft].code) -- jump becomes that instruction

cs:jumptothere(i, fft) -- with an optimized target

switch(code[i], i) -- reoptimize the label

end,

default = function(op, i, ft)

cs:jumptothere(i, ft) -- optimize label

end,

}

for i=1,#code do

switch(code[i], i)

end

end

-- thread the instructions to speed up dispatch during execution

function thread(cs)

local code = cs.p.code

for i=1,#code-1 do

code[i].next = code[i+1]

if code[i].target ~= nil then

code[i].branch = code[code[i].target]

end

end

end

function compile(p)

local compst = CompileState:new(p)

p.code = {}

assert(fullset.tag == TTag.Set)

compst:codegen(p, false, NOINST, fullset)

compst:addinstruction{Opcode.End}

peephole(compst)

thread(compst)

return p.code

end

return {

Instruction = Instruction,

compile = compile,

cs_clone = cs_clone,

cs_complement = cs_complement,

cs_diff = cs_diff,

cs_intersection = cs_intersection,

cs_union = cs_union,

fixedlen = fixedlen,

hascaptures = hascaptures,

nofail = nofail,

nullable = nullable,

nullable_with_grammar = nullable_with_grammar,

tocharset = tocharset,

}

end)

register('llpeg.utf8util', function(myrequire)

myrequire('strict')

local utf8util = {}

function utf8util.codepointAt(s, pos)

local c1 = string.byte(s, pos)

if c1 <= 0x7F then

return c1, 1

end

local c2 = string.byte(s, pos + 1)

if c1 <= 0xDF then

return ((c1 % 0x20 ) * 0x40) + (c2 % 0x40), 2

end

local c3 = string.byte(s, pos + 2)

if c1 <= 0xEF then

return (((c1 % 0x10) * 0x40) + (c2 % 0x40)) * 0x40 + (c3 % 0x40), 3

end

local c4 = string.byte(s, pos + 3)

if c1 <= 0xF7 then

return ((((c1 % 0x08) * 0x40) + (c2 % 0x40)) * 0x40 + (c3 % 0x40)) * 0x40 + (c4 % 0x40), 4

end

error( "bad utf8" )

end

-- same as utf8.offset in Lua 5.3 standard library

function utf8util.offset(s, n, i)

if n > 0 then error("unimplemented") end

while n < 0 do

i = i - 1

if i < 1 then return nil end

local c = string.byte(s, i)

if c < 0x80 or c > 0xBF then

n = n + 1

end

end

return i

end

return utf8util

end)

register('llpeg.list', function(myrequire)

local List = {}

List.__index = List

function List:new()

return setmetatable({ n = 0 }, self)

end

function List:__len()

return self.n

end

function List:push(val)

local n = self.n + 1

self[n] = val

self.n = n

end

function List:pop()

local n = self.n

assert(n > 0)

local old = self[n]

self[n] = nil

self.n = n - 1

return old

end

function List:insert(pos, val)

for i=self.n,pos,-1 do

self[i+1] = self[i]

end

self[pos] = val

self.n = self.n + 1

end

return List

end)

register('llpeg.cap', function(myrequire)

myrequire('strict')

local compat = myrequire('advent.compat')

local from = myrequire('llpeg.types').from

local

CapKind,

_ = from(myrequire('llpeg.types'), {

'CapKind',

})

local

printcaplist,

_ = from(myrequire('llpeg.print'), {

'printcaplist',

})

local List = myrequire('llpeg.list')

local MAXRECLEVEL = 200

local Capture = {}

Capture.__index = Capture

-- kind is CapKind of the capture

-- bytePos is the subject position (in bytes)

-- byteLen is the length of the capture (in bytes)

-- extra is extra info (group name, arg index, etc)

function Capture:new(kind, bytePos, byteLen, extra)

assert(getmetatable(kind) == CapKind)

return setmetatable({

kind = kind, bytePos = bytePos, byteLen = byteLen, extra = extra,

}, self)

end

function Capture:__tostring()

return string.format("Capture{kind=%s, pos=%d, len=%s, extra=%s}",

self.kind, self.bytePos, self.byteLen, self.extra)

end

function Capture:isopencap()

return self.byteLen == nil

end

-- true if c2 is (any number of levels) inside self

function Capture:inside(c2)

if self:isopencap() then

return not c2:isclosecap()

else

return c2.bytePos < (self.bytePos + self.byteLen)

end

end

function Capture:isclosecap()

return self.kind == CapKind.close

end

--[[

  • Return the size of capture 'cap'. If it is an open capture, 'close'
  • must be its corresponding close.

]]--

function Capture:size(close)

if self:isopencap() then

assert(close:isclosecap())

return close.bytePos - self.bytePos

else

return self.byteLen

end

end

function CapKind:newCapture(bytePos, byteLen, extra)

return Capture:new(self, bytePos, byteLen, extra)

end

local CapState = {}

CapState.__index = CapState

-- Capture cap: current capture

-- Capture ocap: (original) capture list

-- int ptop: index of last argument to 'match'

-- string s: original string

-- int valuecached: value stored in cache slot

-- int reclevel: recursion level

function CapState:new(captures, source, extraArgs)

return setmetatable({

captures = captures,

index = 1,

source = source,

valuecached = {},

reclevel = 0,

extraArgs = extraArgs,

}, self)

end

function CapState:cap() -- helper

return self.captures[self.index]

end

function CapState:advance() -- helper

local i = self.index

local cap = self.captures[i]

self.index = i + 1

return cap, i

end

function CapState:substr(start, len) -- helper

return string.sub(self.source, start, start + len - 1)

end

function CapState:skipclose(head)

if head:isopencap() then

assert(self.captures[self.index]:isclosecap())

self.index = self.index + 1

end

end

function CapState:closesize(head)

return head:size(self:cap())

end

--[[

  • Go to the next capture at the same level

]]--

function CapState:nextcap()

local cap = self:cap()

if cap:isopencap() then -- must look for a close

local n = 0 -- number of opens waiting a close

while true do -- look for corresponding close

self.index = self.index + 1

cap = self:cap()

if cap:isopencap() then

n = n + 1

elseif cap:isclosecap() then

if n == 0 then break end

n = n - 1

end

end

self.index = self.index + 1 -- skip last close (or entire single capture)

else

self.index = self.index + 1

while cap:inside(self:cap()) do

self.index = self.index + 1 -- skip captures inside the current one

end

end

end

--[[

  • Goes back in a list of captures looking for an open capture
  • corresponding to a close

]]--

function CapState:findopen(i) -- captures[i] is the close that we want to match

assert(self.captures[i]:isclosecap())

local n = 0 -- number of closes waiting an open

while i > 1 do

i = i - 1

local cap = self.captures[i]

if cap:isclosecap() then

n = n + 1 -- one more open to skip

elseif cap:isopencap() then

if n == 0 then return cap,i end

n = n - 1

end

end

error("couldn't find open")

end

--[[

  • Checks whether group 'grp' is visible to 'ref', that is, 'grp' is
  • not nested inside a full capture that does not contain 'ref'. (We
  • only need to care for full captures because the search at 'findback'
  • skips open-end blocks; so, if 'grp' is nested in a non-full capture,
  • 'ref' is also inside it.) To check this, we search backward for the
  • inner full capture enclosing 'grp'. A full capture cannot contain
  • non-full captures, so a close capture means we cannot be inside a
  • full capture anymore.

]]--

function CapState:capvisible(igrp, ref)

local i = igrp

local grp = self.captures[igrp]

while i > 1 do

i = i - 1

local cap = self.captures[i]

if cap:isclosecap() then

return true -- can stop the search

elseif cap:inside(grp) then -- is 'grp' inside cap?

return cap:inside(ref) -- ok iff cap also contains ref

end

end

return true -- 'grp' is not inside any capture

end

--[[

  • Try to find a named group capture with the name given;
  • goes backward from 'i'.

]]--

function CapState:findback(name, i)

if i == nil then i = self.index end

local ref = self.captures[i]

while i > 1 do

i = i - 1

local cap = self.captures[i]

if cap:isclosecap() or not cap:inside(ref) then

if cap:isclosecap() then

cap,i = self:findopen(i)

end

if cap.kind == CapKind.group and self:capvisible(i, ref) then

if cap.extra == name then

return cap,i

end

end

end

end

error("back reference '"..name.."' not found")

end

function CapState:getcaptures()

local result = List:new()

while not self:cap():isclosecap() do

self:pushcapture(result)

end

return result

end

function CapState:pushcapture(result)

self.reclevel = self.reclevel + 1

if self.reclevel > MAXRECLEVEL then

error("subcapture nesting too deep")

end

local cap = self.captures[self.index]

assert(cap.kind.push ~= nil)

local res = cap.kind.push(self, cap, result)

self.reclevel = self.reclevel - 1

return res

end

-- helper functions for pushcapture

--[[

  • Push on the Lua stack all values generated by nested captures inside
  • the current capture. Returns number of values pushed. 'addextra'
  • makes it push the entire match after all captured values. The
  • entire match is pushed also if there are no other nested values,
  • so the function never returns zero.

]]--

function CapState:pushnestedvalues(result, addextra)

local head = self:advance()

local n = 0 -- number of pushed subvalues

-- repeat for all nested patterns

while head:inside(self:cap()) do

n = n + self:pushcapture(result)

end

if addextra or n == 0 then -- need extra?

result:push(self:substr(head.bytePos, self:closesize(head)))

n = n + 1

end

self:skipclose(head)

return n

end

--[[

  • Push only the first value generated by nested captures

]]--

function CapState:pushonenestedvalue(result)

local n = self:pushnestedvalues(result, false)

if n == 0 then

result:push(nil) -- ensure there's exactly one value

return 1

end

while n > 1 do

result:pop() -- pop extra values

n = n - 1

end

return n

end

-- visitor patterns for pushcapture

function CapKind.position.push(capstate, cap, result)

result:push(cap.bytePos)

capstate.index = capstate.index + 1

return 1

end

function CapKind.const.push(capstate, cap, result)

result:push(cap.extra)

capstate.index = capstate.index + 1

return 1

end

function CapKind.arg.push(capstate, cap, result)

local n = cap.extra

if n > capstate.extraArgs.n then

error(string.format("reference to absent extra argument #%d", n))

end

result:push(capstate.extraArgs[n])

capstate.index = capstate.index + 1

return 1

end

function CapKind.simple.push(capstate, cap, result)

local k = capstate:pushnestedvalues(result, true)

-- reorder so that the whole match is the first result, not the last

local last = result:pop()

result:insert(2 + #result - k, last)

return k

end

-- missing a bunch

--[[

  • Table capture: creates a new table and populates it with nested
  • captures.

]]--

function CapKind.table.push(capstate, cap, result) -- aka tablecap

local t = {}

result:push(t)

local head = capstate:advance()

local n = 0

while head:inside(capstate:cap()) do

cap = capstate:cap()

if cap.kind == CapKind.group and cap.extra ~= nil then -- named group?

capstate:pushonenestedvalue(result)

t[cap.extra] = result:pop() -- move it into table

else -- not a named group

local k = capstate:pushcapture(result)

for i=k,1,-1 do

t[n + i] = result:pop() -- move it into table (indexed)

end

n = n + k

end

end

capstate:skipclose(head)

return 1 -- number of values pushed (only the table)

end

--[[

  • Table-query capture

]]--

function CapKind.query.push(capstate, cap, result) -- aka querycap

capstate:pushonenestedvalue(result)

local key = result:pop()

local tbl = cap.extra

assert(type(tbl) == "table")

local val = tbl[key]

if val ~= nil then

result:push(val)

return 1

else

return 0

end

end

--[[

  • Fold capture

]]--

function CapKind.fold.push(capstate, cap, result) -- aka foldcap

local f = cap.extra

assert(type(f) == "function")

local head = capstate:advance()

if capstate:cap():isclosecap() then

-- no nested captures? (large subject)

error("no initial value for fold capture")

end

local args = List:new()

local n = capstate:pushcapture(args)

if n == 0 then

-- nested captures with no values?

error("no initial value for fold capture")

end

local accum = args[1] -- leave only one result for accumulator

while head:inside(capstate:cap()) do

args = List:new()

args:push( accum ) -- put accumulator first

n = capstate:pushcapture(args) -- get next capture's values

accum = f(compat.unpack(args))

end

capstate:skipclose(head)

-- only accumulator left in result

result:push(accum)

return 1

end

--[[

  • Function capture

]]--

CapKind["function"].push = function(capstate, cap, result)

local f = cap.extra

assert(type(f) == "function")

local args = List:new()

local n = capstate:pushnestedvalues(args, false)

local r = compat.pack(f(compat.unpack(args)))

for i=1,r.n do

result:push(r[i])

end

return r.n

end

--[[

  • Accumulator capture

]]--

function CapKind.acc.push(capstate, cap, result) -- aka accumulatorcap

if #result == 0 then

error("no previous value for accumulator capture")

end

local f = cap.extra

assert(type(f) == "function")

local prev = #result

local args = List:new()

args:push(result[prev])

local n = capstate:pushnestedvalues(args, false)

result[prev] = f(compat.unpack(args))

return 0 -- did not add any extra value

end

--[[

  • Select capture

]]--

function CapKind.num.push(capstate, cap, result) -- aka numcap

local idx = cap.extra -- value to select

if idx == 0 then -- no values?

capstate:nextcap() -- skip entire capture

return 0 -- no value produced

else

local vals = List:new()

local n = capstate:pushnestedvalues(vals, false)

if n < idx then -- invalid index?

error("no capture '"..idx.."'")

else

result:push(vals[idx])

return 1

end

end

end

function CapState:runtimecap(closePos)

local close = self.captures[closePos]

local open,openPos = self:findopen(closePos) -- get open group capture

assert(open.kind == CapKind.group)

self.index = openPos -- set state to the open capture

local args = List:new()

args:push( self.source) -- original subject

args:push( close.bytePos ) -- current position

local n = self:pushnestedvalues(args, false) -- push nested captures

local func = open.extra

local funcRet = compat.pack(func(compat.unpack(args)))

local res = closePos - openPos -- number of captures to be removed

return res, funcRet

end

function CapKind.runtime.push(capstate, cap, result) -- aka runtimecap

result:push(cap.extra)

capstate.index = capstate.index + 1

return 1

end

local MAXSTRCAPS = 10

--[[

  • Collect values from current capture into array 'cps'. Current
  • capture must be Cstring (first call) or Csimple (recursive calls).
  • (In first call, fills %0 with whole match for Cstring.)
  • Returns number of elements in the array that were filled.

]]--

function CapState:getstrcaps(cps, n)

local k = n

n = n + 1

cps[k] = {

isstring = true, -- get string value

bytePos = self:cap().bytePos, -- starts here

}

local head = self:advance()

while head:inside(self:cap()) do

if n > MAXSTRCAPS then -- too many captures?

self:nextcap() -- skip extra captures (will not need them)

elseif self:cap().kind == CapKind.Simple then -- string?

n = self:getstrcaps(cps, n) -- put info into array

else

cps[n] = {

isstring = false, -- not a string

cap = self.index, -- keep original capture

}

self:nextcap()

n = n + 1

end

end

cps[k].endPos = head.bytePos + self:closesize(head)

self:skipclose(head)

return n

end

function CapState:addonestring(buffer, what)

local cap = self:cap()

if cap.kind == CapKind.string then

-- add capture directly to buffer

return stringcap(self, buffer)

elseif cap.kind == CapKind.subst then

-- add capture directly to buffer

return substcap(self, buffer)

elseif cap.kind == CapKind.acc then

error("invalid context for an accumulator capture")

end

local result = List:new()

local n = self:pushcapture(result)

if n == 0 then return 0 end -- no values to add

local val = result[1] -- take only one result (the first)

if type(val) == "number" then

val = tostring(val)

elseif type(val) ~= "string" then

error("invalid "..what.." value (a "..type(val)..")")

end

table.insert(buffer, val)

return 1

end

--[[

  • String capture: add result to buffer 'b' (instead of pushing
  • it into the stack)

]]--

function stringcap(capstate, buffer)

local fmt = capstate:cap().extra

local cps = {}

local n = capstate:getstrcaps(cps, 1) - 1 -- collect nested captures

local sawEscape = false

for _,c in compat.utf8codes(fmt) do

if sawEscape then

sawEscape = false

if c < 48 or c > 57 then -- not followed by a digit

table.insert(buffer, compat.utf8char(c))

else

local l = 1 + c - 48 -- capture index (1-based)

if l > n then

error("invalid capture index ("..(l-1)..")")

elseif cps[l].isstring then

table.insert(buffer, capstate:substr(cps[l].bytePos, cps[l].endPos - cps[l].bytePos))

else

-- go back to evaluate that nested capture

local curr = capstate.index

capstate.index = cps[l].cap

if capstate:addonestring(buffer, "capture") == 0 then

error("no values in capture index "..l)

end

capstate.index = curr

end

end

elseif c ~= 37 then -- not a % escape?

table.insert(buffer, compat.utf8char(c))

else

sawEscape = true

end

end

return 1

end

--[[

  • Substitution capture: add result to buffer 'b'

]]--

function substcap(capstate, buffer)

local head = capstate:advance()

local curr = head.bytePos

while head:inside(capstate:cap()) do

local cap = capstate:cap()

local nextPos = cap.bytePos

local s = capstate:substr(curr, nextPos - curr)

table.insert(buffer, s) -- add text up to capture

if capstate:addonestring(buffer, "replacement") == 0 then

-- no capture value, keep original text in final result

curr = nextPos

else

-- continue after match

local lastCap = capstate.captures[capstate.index - 1]

curr = nextPos + cap:size(lastCap)

end

end

-- add last piece of text

local s = capstate:substr(curr, head.bytePos + capstate:closesize(head) - curr)

table.insert(buffer, s)

capstate:skipclose(head)

end

function CapKind.subst.push(capstate, cap, result) -- aka substcap

local buffer = {}

substcap(capstate, buffer)

result:push(table.concat(buffer))

return 1

end

function CapKind.string.push(capstate, cap, result) -- aka stringcap

local buffer = {}

stringcap(capstate, buffer)

result:push(table.concat(buffer))

return 1

end

function CapKind.group.push(capstate, cap, result)

if cap.extra == nil then -- anonymous group?

return capstate:pushnestedvalues(result, false) -- add all nested values

else -- named group: add no values

capstate:nextcap()

return 0

end

end

--[[

  • Back-reference capture. Return number of values pushed.

]]--

function CapKind.backref.push(capstate, cap, result)

local curr = capstate.index

local _,i = capstate:findback(cap.extra)

capstate.index = i

local n = capstate:pushnestedvalues(result, false)

capstate.index = curr + 1

return n

end

return {

CapState = CapState,

Capture = Capture,

}

end)

register('llpeg.vm', function(myrequire)

myrequire('strict')

local compat = myrequire('advent.compat')

local utf8util = myrequire('llpeg.utf8util')

local from = myrequire('llpeg.types').from

local

CHARMAX,

CapKind,

Opcode,

enum,

_ = from(myrequire('llpeg.types'), {

'CHARMAX',

'CapKind',

'Opcode',

'enum',

})

local

CapState,

Capture,

__ = from(myrequire('llpeg.cap'), {

'CapState',

'Capture',

})

local

Instruction,

___ = from(myrequire('llpeg.code'), {

'Instruction',

})

local

printcaplist,

___ = from(myrequire('llpeg.print'), {

'printcaplist',

})

local LFAIL = {}

local InsidePred = enum{

OUTPRED = 0,

INPRED = 1,

}

local Stack = {}

Stack.__index = Stack

-- Stack prev: previous entry in the stack

-- int bytePos: saved position, or NULL for calls

-- Instruction pc: saved instruction

-- int caplevel

-- int labenv -- for labeled failure

-- bool predchoice -- for labeled failure

function Stack:new(prev, bytePos, pc, caplevel, labenv, predchoice)

return setmetatable({

prev = prev,

bytePos = bytePos, pc = pc, caplevel = caplevel,

labenv = labenv, predchoice = predchoice,

}, self)

end

function Stack:__tostring()

return string.format(

"Stack{ bytePos=%d, caplevel=%d, labenv=%s, predchoice=%s }",

self.bytePos, self.caplevel, self.labenv, self.predchoice

)

end

function Stack:print()

local s = self

while s ~= nil do

print("Stack", s)

s = s.prev

end

end

local MatchResult = {}

MatchResult.__index = MatchResult

function MatchResult:new()

return setmetatable({

labelf = nil, -- failure label

sfail = -1, -- farthest failure

}, self)

end

local State = {}

State.__index = State

function State:new(source, bytePos, ...)

local giveup = Instruction:new{Opcode.Giveup}

local insidepred = InsidePred.OUTPRED -- label environment is off inside predicates

local stack = Stack:new(nil, bytePos, giveup, 0, insidepred, nil)

local cp,cpLen

if bytePos <= #source then

cp, cpLen = utf8util.codepointAt(source, bytePos)

else

cp, cpLen = nil, nil

end

assert(bytePos ~= nil)

return setmetatable({

source = source, -- the source string

bytePos = bytePos, -- current position in the string, in bytes

codepoint = cp, -- the codepoint at 'bytePos' in 'source'

codepointLen = cpLen, -- the length of the codepoint at 'bytePos'

stack = stack, -- top of stack

captures = {}, -- list of captures

captop = 1, -- point to first empty slot in captures (1-based)

extraArgs = compat.pack(...),

-- labeled failures:

insidepred = insidepred,

labelf = nil, -- failure label

sfail = -1, -- farthest failure

}, self)

end

function State:advance()

return self:resetPosTo(self.bytePos + self.codepointLen)

end

function State:resetPosTo(newPos)

assert(newPos ~= nil)

self.bytePos = newPos

local source = self.source

if newPos <= #source then

local cp, cpLen = utf8util.codepointAt(source, newPos)

self.codepoint = cp

self.codepointLen = cpLen

return cp

else

self.codepoint = nil

self.codepointLen = nil

return nil

end

end

function State:backtrack(n)

local off = utf8util.offset(self.source, -n, self.bytePos)

if off == nil then return false end -- can't backtrack that far!

self:resetPosTo(off)

return true

end

function State:updatefarthest(label)

self.labelf = label

if self.bytePos > self.sfail then

self.sfail = self.bytePos

end

end

function State:pushcapture(cap)

self.captures[self.captop] = cap

self.captop = self.captop + 1

end

function State:fail()

-- pattern failed, try to backtrack

local lastStack

repeat

lastStack = self.stack

self.stack = lastStack.prev

until lastStack.bytePos ~= nil

self:resetPosTo(lastStack.bytePos)

self.captop = lastStack.caplevel

self.insidepred = lastStack.labenv -- labeled failure

return lastStack.pc:exec(self)

end

function State:giveup()

local r = nil

local lab = "fail"

local errpos = self.sfail

if self.labelf ~= nil and self.labelf ~= LFAIL then

lab = self.labelf

end

return r, lab, errpos

end

function State:getcaptures()

local results = {}

if self.captures[1].kind == CapKind.close then -- are there any captures?

return results -- no captures

end

return CapState:new(self.captures, self.source, self.extraArgs):getcaptures()

end

function Opcode.End:exec(state)

state:pushcapture(CapKind.close:newCapture(state.bytePos, 0))

-- trim table to proper length

while #state.captures > state.captop - 1 do

table.remove(state.captures)

end

-- printcaplist(state.captures, #state.captures) -- for debugging

local results = state:getcaptures()

if #results == 0 then -- no captured values?

return state.bytePos -- return only end position

else

return compat.unpack(results)

end

end

function Opcode.Giveup:exec(state)

return state:giveup()

end

function Opcode.Ret:exec(state)

local lastStack = state.stack

state.stack = lastStack.prev

return lastStack.pc:exec(state)

end

function Opcode.Any:exec(state)

if state.codepoint ~= nil then

state:advance()

return self.next:exec(state)

end

state:updatefarthest(LFAIL)

return state:fail()

end

function Opcode.TestAny:exec(state)

if state.codepoint ~= nil then

return self.next:exec(state)

else

return self.branch:exec(state)

end

end

function Opcode.UTFR:exec(state)

local cp = state.codepoint

if cp ~= nil and self.from <= cp and cp <= self.to then

state:advance()

return self.next:exec(state)

end

state:updatefarthest(LFAIL)

return state:fail()

end

function Opcode.Char:exec(state)

if state.codepoint == self.aux then

state:advance()

return self.next:exec(state)

end

state:updatefarthest(LFAIL)

return state:fail()

end

function Opcode.TestChar:exec(state)

if state.codepoint == self.aux then

return self.next:exec(state)

else

return self.branch:exec(state)

end

end

function Opcode.Set:exec(state)

local cp = state.codepoint

if cp ~= nil then

if cp <= CHARMAX then

if self.set[cp] then

state:advance()

return self.next:exec(state)

end

else

if self.rest then

state:advance()

return self.next:exec(state)

end

end

end

state:updatefarthest(LFAIL)

return state:fail()

end

function Opcode.TestSet:exec(state)

local cp = state.codepoint

if cp ~= nil then

if cp <= CHARMAX then

if self.set[cp] then

return self.next:exec(state)

end

elseif self.rest then

return self.next:exec(state)

end

end

return self.branch:exec(state)

end

function Opcode.Behind:exec(state)

local n = self.aux

-- XXX SLOW self.aux is in *characters* not *bytes*

if state:backtrack(n) then

return self.next:exec(state)

end

state:updatefarthest(LFAIL)

return state:fail()

end

function Opcode.Span:exec(state)

local cp = state.codepoint

while true do

local match = false

if cp ~= nil then

if cp <= CHARMAX then

if self.set[cp] then match = true end

else

if self.rest then match = true end

end

end

if not match then break end

cp = state:advance()

end

return self.next:exec(state)

end

function Opcode.Jmp:exec(state)

return self.branch:exec(state)

end

function Opcode.Choice:exec(state)

state.stack = Stack:new(

state.stack, state.bytePos, self.branch, state.captop, state.insidepred

)

return self.next:exec(state)

end

function Opcode.PredChoice:exec(state)

state.stack = Stack:new(

state.stack, state.bytePos, self.branch, state.captop, state.insidepred,

true -- predchoice

)

state.insidepred = InsidePred.INPRED

return self.next:exec(state)

end

function Opcode.Call:exec(state)

state.stack = Stack:new(

state.stack, nil, self.next

)

return self.branch:exec(state)

end

function Opcode.Commit:exec(state)

state.stack = state.stack.prev

return self.branch:exec(state)

end

function Opcode.PartialCommit:exec(state)

local stack = state.stack

stack.bytePos = state.bytePos

stack.caplevel = state.captop

return self.branch:exec(state)

end

function Opcode.BackCommit:exec(state)

local stack = state.stack

state.stack = stack.prev -- pop the stack

state:resetPosTo(stack.bytePos) -- but reset the position to that stored

state.insidepred = stack.labenv -- labeled failure

state.captop = stack.caplevel

return self.branch:exec(state)

end

function Opcode.Throw:exec(state)

if state.insidepred == InsidePred.OUTPRED then

state.labelf = self.key

-- pop entire stack

while state.stack.prev ~= nil do

state.stack = state.stack.prev

end

else

state.labelf = LFAIL

-- pop until you read a 'predchoice' state

while not state.stack.predchoice do

state.stack = state.stack.prev

end

end

state.sfail = state.bytePos

return state:fail()

end

function Opcode.ThrowRec:exec(state) -- with recovery

state.sfail = state.bytePos

if state.insidepred == InsidePred.OUTPRED then

state.labelf = self.key

state.stack = Stack:new(

state.stack, nil, self.next, state.captop

)

return self.branch:exec(state)

else

state.labelf = LFAIL

-- pop until you read a 'predchoice' state

while not state.stack.predchoice do

state.stack = state.stack.prev

end

return state:fail()

end

end

function Opcode.FailTwice:exec(state)

state.stack = state.stack.prev

state:updatefarthest(LFAIL)

return state:fail()

end

function Opcode.Fail:exec(state)

state:updatefarthest(LFAIL)

return state:fail()

end

function Opcode.CloseRunTime:exec(state)

-- close the group

state:pushcapture(self.cap:newCapture(state.bytePos, 0))

-- trim table to proper length

while #state.captures > state.captop - 1 do

table.remove(state.captures)

end

local cs = CapState:new(state.captures, state.source, state.extraArgs)

local n, funcRet = cs:runtimecap(state.captop - 1)

state.captop = state.captop - n -- remove nested captures

-- resdyncaptures: resolve returned values in `funcRet`

-- first argument false=fail, true=keep current pos, number=next position

local firstArg = funcRet[1]

if funcRet.n == 0 then

firstArg = false -- returning void means we'll fail

end

if not firstArg then -- if it is falsey, discard rest of returned vals & fail

state:updatefarthest(LFAIL)

return state:fail() -- tail call

elseif type(firstArg) == "boolean" then

-- keep current position, nothing needs to be done

else

local npos = tonumber(firstArg)

if npos < state.bytePos or npos > (1 + #(state.source)) then

error("invalid position returned by match-time capture")

end

state:resetPosTo(npos)

end

-- push the rest of the funcRet values as new captures

local n = funcRet.n - 1 -- number of new captures

if n == 0 then -- no new captures?

state.captop = state.captop - 1 -- remove open group

else

-- new captures, keep original open group

-- add new captures + close group to 'capture' list

-- adddyncaptures:

assert(state.captures[state.captop - 1].kind == CapKind.group)

assert(state.captures[state.captop - 1]:isopencap())

-- make group capture an anonymous group (this used to hold match-time f)

state.captures[state.captop - 1].extra = nil

for i=2,funcRet.n do -- add runtime captures

state:pushcapture(CapKind.runtime:newCapture(state.bytePos, 0, funcRet[i]))

end

-- close group

state:pushcapture(CapKind.close:newCapture(state.bytePos, 0))

end

return self.next:exec(state)

end

local MAXLOP = 20

function findopen(captures, i, currPos)

i = i - 1 -- check last captop

local cap = captures[i]

if (not cap:isopencap()) and cap.bytePos == currPos then

return nil -- current one cannot be a full capture

end

-- else, look for an 'open' capture

for j=1,MAXLOP do

if cap:isopencap() then -- open capture?

return cap,i -- that's the one to be closed

elseif cap.kind == CapKind.close then

return nil -- a full capture should not nest a non-full one

end

i = i - 1

if i<1 then break end

cap = captures[i]

end

return nil -- not found within allowed search limit

end

function Opcode.CloseCapture:exec(state)

-- if possible, turn capture into a full capture

assert(state.captop > 1)

local open,_ = findopen(state.captures, state.captop, state.bytePos)

if open ~= nil then -- if possible, turn capture into a full capture

open.byteLen = state.bytePos - open.bytePos

else

-- non-nil length to mark entry as closed

state:pushcapture(self.cap:newCapture(state.bytePos, 0))

end

return self.next:exec(state)

end

function Opcode.OpenCapture:exec(state)

state:pushcapture(self.cap:newCapture(

-- byteLen = nil marks entry as open

state.bytePos, nil, self.key

))

return self.next:exec(state)

end

function Opcode.FullCapture:exec(state)

-- XXX SLOW: self.aux is in *characters* not *bytes*

local nPos = utf8util.offset(state.source, -self.aux, state.bytePos)

state:pushcapture(self.cap:newCapture(

nPos, state.bytePos - nPos, self.key

))

return self.next:exec(state)

end

function match(s, init, code, ...)

local state = State:new(s, init, ...)

return code[1]:exec(state)

end

return {

match = match,

}

end)

register('llpeg', function(myrequire)

local VERSION = '0.0.1'

local MAXSTACK = 400 -- maximum backtracking

local MAXBEHIND = 255 -- maximum look-behind

local MAXRULES = 1000 -- maximum number of rules

local LPEG_COMPAT = true

myrequire('strict')

local compat = myrequire('advent.compat')

local from = myrequire('llpeg.types').from

local

CHARMAX,

CapKind,

TTag,

define,

define_tree_visitor,

metareg,

newcharset,

numsiblings,

_ = from(myrequire('llpeg.types'), {

'CHARMAX',

'CapKind',

'TTag',

'define',

'define_tree_visitor',

'metareg',

'newcharset',

'numsiblings',

})

local

compile,

cs_diff,

cs_union,

fixedlen,

hascaptures,

nofail,

nullable,

nullable_with_grammar,

tocharset,

__ = from(myrequire('llpeg.code'), {

'compile',

'cs_diff',

'cs_union',

'fixedlen',

'hascaptures',

'nofail',

'nullable',

'nullable_with_grammar',

'tocharset',

})

local

match,

___ = from(myrequire('llpeg.vm'), {

'match',

})

local

printpatt,

printrepl,

printtree,

____ = from(myrequire('llpeg.print'), {

'printpatt',

'printrepl',

'printtree',

})

function checkint(v)

if type(v) == 'string' then

v = tonumber(v)

end

if type(v) ~= "number" then

error("not a number")

end

return math.floor(v)

end

local is_pattern = define_type_visitor{

pattern = function() return true end,

default = function() return false end,

}

local ptype = define_type_visitor{

pattern = function() return "pattern" end,

default = function(v) return type(v) end,

}

function val2str(v)

if type(v) == 'number' then return tostring(v) end

if type(v) == 'string' then return v end

return string.format("(a %s)", ptype(v))

end

-- lpltree.c --

function newtree(tag)

local t = {

tag = tag,

code = nil,

}

setmetatable(t, metareg)

return t

end

function newleaf(tag, n)

return setmetatable({

tag = tag,

code = nil,

n = n,

}, metareg)

end

function newroot1sib(tag, sib1)

return setmetatable({

tag = tag,

code = nil,

sib1 = sib1,

}, metareg)

end

function newroot2sib(tag, sib1, sib2)

return setmetatable({

tag = tag,

code = nil,

sib1 = sib1,

sib2 = sib2,

}, metareg)

end

-- Build a sequence of #s nodes from the array 's' with the tag 'tag' --

function fillseq(tag, s)

if type(s) == 'number' then

local len = checkint(s)

s = setmetatable({}, {__len = function() return len end})

end

if #s == 0 then

return newleaf(tag, 0)

end

local i = #s

local result = newleaf(tag, s[i])

while i > 1 do

i = i - 1

result = newroot2sib(

TTag.Seq,

newleaf(tag, s[i]),

result

)

end

return result

end

--[[ Numbers as patterns:

0 == true (always match); n == TAny repeated 'n' times;

-n == not (TAny repeated 'n' times)

]]--

function numtree(n)

n = checkint(n)

if n == 0 then

return newleaf(TTag.True)

elseif n > 0 then

return fillseq(TTag.Any, n) -- sequence of 'n' anys

else

return newroot1sib(TTag.Not, fillseq(TTag.Any, -n))

end

end

-- Convert value v to a pattern

local getpatt = define_type_visitor{

["string"] = function(s)

if #s == 0 then

return newleaf(TTag.True) -- always match if string is empty

end

local cp = {}

for _,c in compat.utf8codes(s) do

table.insert(cp, c)

end

return fillseq(TTag.Char, cp)

end,

["number"] = function(n)

return numtree(n)

end,

["boolean"] = function(b)

if b then

return newleaf(TTag.True)

else

return newleaf(TTag.False)

end

end,

["function"] = function(f)

return setmetatable({

tag = TTag.RunTime,

code = nil,

key = f,

sib1 = newleaf(TTag.True),

}, metareg)

end,

["pattern"] = function(v)

return v

end,

["table"] = function(v)

return newgrammar(v)

end,

default = function(v)

error("Not a pattern")

end,

}

-- labeled failure begin

function newthrowleaf(label)

return setmetatable({

tag = TTag.Throw,

code = nil,

sib2 = nil, -- no recovery rule associated (yet)

key = label,

}, metareg)

end

-- labeled failure end

function lp_P(v)

return getpatt(v)

end

--[[

  • sequence operator; optimizations:
  • false x => false, x true => x, true x => x
  • (cannot do x . false => false because x may have runtime captures)

]]--

function lp_seq(tree1, tree2)

tree1 = getpatt(tree1)

tree2 = getpatt(tree2)

if tree1.tag == TTag.False or tree2.tag == TTag.True then

-- false . x = false, x . true = x

return tree1

elseif tree1.tag == TTag.True then

-- true . x = x

return tree2

else

return newroot2sib(TTag.Seq, tree1, tree2)

end

end

--[[

  • choice operator; optimizations:
  • charset / charset => charset
  • true / x => true, x / false => x, false / x => x
  • (x / true is not equivalent to true)

]]--

function lp_choice(t1, t2)

t1 = getpatt(t1)

t2 = getpatt(t2)

local t1c = tocharset(t1)

local t2c = tocharset(t2)

if t1c ~= nil and t2c ~= nil then

local t = cs_union(t1c, t2c)

return t

elseif nofail(t1) or t2.tag == TTag.False then

-- true / x => true, x / false => x

return t1

elseif t1.tag == TTag.False then

-- false / x => x

return t2

else

return newroot2sib(TTag.Choice, t1, t2)

end

end

--[[

p^n

]]--

function lp_star(p, n)

local tree1 = getpatt(p)

n = checkint(n)

if n >= 0 then -- seq tree1 (seq tree1 ... (seq tree1 (rep tree1)))

if nullable(tree1) then

error("loop body may accept empty string")

end

local tree = newroot1sib(TTag.Rep, tree1)

while n > 0 do

tree = newroot2sib(TTag.Seq, tree1, tree)

n = n - 1

end

return tree

else -- choice (seq tree1 ... choice tree1 true ...) true

n = -n

local tree = newroot2sib( -- at most 1

TTag.Choice,

tree1,

newleaf(TTag.True)

)

while n > 1 do

tree = newroot2sib( -- at most (n-1)

TTag.Seq,

tree1,

tree

)

tree = newroot2sib(TTag.Choice, tree, newleaf(TTag.True))

n = n - 1

end

return tree

end

end

--[[

  • #p == &p

]]--

function lp_and(v)

return newroot1sib(TTag.And, getpatt(v))

end

--[[

  • -p == !p

]]--

function lp_not(v)

return newroot1sib(TTag.Not, getpatt(v))

end

--[[

  • [t1 - t2] == Seq (Not t2) t1
  • If t1 and t2 are charsets, make their difference.

]]--

function lp_sub(t1, t2)

t1 = getpatt(t1)

t2 = getpatt(t2)

local t1c = tocharset(t1)

local t2c = tocharset(t2)

if t1c ~= nil and t2c ~= nil then

return cs_diff(t1c, t2c)

else

return newroot2sib(

TTag.Seq,

newroot1sib(TTag.Not, t2),

t1

)

end

end

--[[

A set with the given characters

]]--

function lp_set(s)

local t = newcharset()

local extra = nil

for _,c in compat.utf8codes(s) do

if c > CHARMAX then

-- non ascii, we can't use charset for these

local one = newleaf(TTag.Char, c)

if extra == nil then

extra = one

else

extra = newroot2sib(TTag.Choice, extra, one)

end

else

t.set[c] = true

end

end

if extra == nil then

return t

else

return newroot2sib(TTag.Choice, t, extra)

end

end

function lp_range(...)

local t = newcharset()

local extra = nil

for _,v in ipairs{...} do

if type(v) ~= "string" then

error("argument must be string")

else

local first, second

for _,c in compat.utf8codes(v) do

if first == nil then

first = c

elseif second == nil then

second = c

else

error("range must have two characters")

end

end

if first == nil or second == nil then

error("range must have two characters")

end

if first > second then

if LPEG_COMPAT then

-- ignore, just silently create an empty range

else

error("empty range")

end

elseif second <= CHARMAX then -- ascii range

for i = first, second do

t.set[i] = true

end

else

local r = lp_utfr(first, second)

if extra == nil then

extra = r

else

extra = newroot2sib(TTag.Choice, extra, one)

end

end

end

end

if extra == nil then

return t

else

return newroot2sib(TTag.Choice, t, extra)

end

end

function lp_utfr(from, to)

from = checkint(from)

to = checkint(to)

if from > to then

error("empty range")

end

if to > 0x10FFFF then

error("invalid code point")

end

if to <= CHARMAX then -- ascii range?

local t = newcharset() -- code it as a regular charset

for i = from, to do

t.set[i] = true

end

return t

end

-- multibyte utf-8 range

return setmetatable({

tag = TTag.UTFR,

code = nil,

from = from,

to = to,

}, metareg)

end

--[[

Look-behind predicate

]]--

function lp_behind(v)

local tree1 = getpatt(v)

local n = fixedlen(tree1)

if n < 0 then

error("pattern may not have fixed length")

end

if hascaptures(tree1) then

error("pattern has captures")

end

if n > MAXBEHIND then

error("pattern too long to look behind")

end

return setmetatable({

tag = TTag.Behind,

code = nil,

sib1 = tree1,

n = n,

}, metareg)

end

-- labeled failure begin --

--[[

  • Throws a label

]]--

local lp_throw = define_type_visitor{

[{"string","number"}] = newthrowleaf,

default = function() error("not a string") end,

}

-- labeled failure end --

--[[

  • Create a non-terminal

]]--

function lp_V(v)

if v == nil then

error("non-nil value expected")

end

return setmetatable({

tag = TTag.Call,

code = nil,

key = v,

}, metareg)

end

--[[

  • Create a tree for a non-empty capture, with a body and
  • optionally with an associated Lua value (at index 'labelidx' in the
  • stack)

]]--

function capture_aux(capkind, patt, val)

local t = newroot1sib(TTag.Capture, getpatt(patt))

t.cap = capkind

t.key = val

return t

end

function newemptycap(capkind, val)

return capture_aux(capkind, newleaf(TTag.True), val)

end

--[[

  • Captures with syntax p / v
  • (function capture, query capture, string capture, or number capture)

]]--

local divcapture_helper = define_type_visitor{

["function"] = function(v, p)

return capture_aux(CapKind["function"], p, v)

end,

["table"] = function(v, p)

return capture_aux(CapKind.query, p, v)

end,

["string"] = function(v, p)

return capture_aux(CapKind.string, p, v)

end,

["number"] = function(v, p)

v = checkint(v)

if v < 0 or v > 65536 then

error("invalid number")

end

return capture_aux(CapKind.num, p, v)

end,

default = function(v)

error("unexpected "..ptype(v).." as 2nd operand to LPeg '/'") end,

}

function lp_divcapture(p, v)

return divcapture_helper(v, p) -- dispatch on v

end

function lp_acccapture(p, v)

return capture_aux(CapKind.acc, p, v)

end

-- the match for patt with the values from nested captures replacing their

-- matches

function lp_substcapture(patt)

return capture_aux(CapKind.subst, patt)

end

-- a table with all captures from patt

function lp_tablecapture(patt)

return capture_aux(CapKind.table, patt)

end

-- the values produced by patt, optionally tagged with key

function lp_groupcapture(patt, key)

-- key can be nil

return capture_aux(CapKind.group, patt, key)

end

-- folding capture (deprecated)

function lp_foldcapture(patt, func)

if type(func) ~= "function" then

error("Bad function argument")

end

return capture_aux(CapKind.fold, patt, func)

end

-- the match for patt plus all captures made by patt

function lp_simplecapture(patt)

return capture_aux(CapKind.simple, patt)

end

-- the current position (matches the empty string)

function lp_poscapture()

return newemptycap(CapKind.position)

end

-- the value of the nth extra argument to lpeg.match (matches the empty string)

function lp_argcapture(n)

n = checkint(n)

if n <= 0 or n > 65536 then error("invalid argument index") end

return newemptycap(CapKind.arg, n)

end

-- the value produced by the previous group capture named `key`

-- (matches the empty string)

function lp_backref(key)

return newemptycap(CapKind.backref, key)

end

-- Constant capture (matches the empty string)

function lp_constcapture(...)

local arg = compat.pack(...)

if arg.n == 0 then -- no values?

return newleaf(TTag.True) -- no capture

else

local i = arg.n

local tree = newemptycap(CapKind.const, arg[i])

while i > 1 do

i = i - 1

tree = newroot2sib(

TTag.Seq,

newemptycap(CapKind.const, arg[i]),

tree

)

end

if arg.n == 1 then

-- single constant capture

return tree

else

-- create a group capture with all values

return lp_groupcapture( tree )

end

end

end

-- the returns of function applied to the captures of patt; the application

-- is done at match time

function lp_matchtime(patt, func)

if type(func) ~= 'function' then

error('not a function')

end

return setmetatable({

tag = TTag.RunTime,

code = nil,

key = func,

sib1 = getpatt(patt),

}, metareg)

end

--======================================================--

--[[

  • ======================================================
  • Grammar - Tree generation
  • ======================================================

]]--

--[[

  • push on the stack the index and the pattern for the
  • initial rule of grammar at index 'arg' in the stack;
  • also add that index into position table.

]]--

function getfirstrule(tbl)

local first_name, first_rule

first_name = tbl[1]

-- is this the name of an initial rule?

if type(first_name) == 'number' or type(first_name) == 'string' then

first_rule = tbl[first_name] -- get associated rule

else

first_name,first_rule = 1,first_name

end

if not is_pattern(first_rule) then

if first_rule == nil then

error("grammar has no initial rule")

else

error(string.format("initial rule '%s' is not a pattern", first_name))

end

end

-- rule position (after TGrammar)

-- return map from name to position, and from position to name

return { [first_name] = 1 }, { first_name }

end

--[[

  • traverse grammar at index 'arg', pushing all its keys and patterns
  • into the stack. Create a new table (before all pairs key-pattern) to
  • collect all keys and their associated positions in the final tree
  • (the "position table").
  • Return the number of rules and (in 'totalsize') the total size
  • for the new tree.

]]--

function collectrules(tbl)

-- find the first rule and put in position table

local postab, rpostab = getfirstrule(tbl)

-- collect and sort rule names (for repeatability)

local names = {}

for k,v in pairs(tbl) do

if k == 1 or postab[k] == 1 then -- initial rule?

-- skip the initial rules, it's already in the position table

else

table.insert(names, k)

end

end

table.sort(names, function(a,b)

return tostring(a) < tostring(b)

end)

-- fill out rule, name, and position maps

for _,k in ipairs(names) do

local v = tbl[k]

if not is_pattern(v) then

error("rule '" .. val2str(k) .. "' is not a pattern")

end

table.insert(rpostab, k)

postab[k] = #rpostab

end

return postab, rpostab

end

function buildgrammar(g, tbl, postab, rpostab)

local trees = {}

for i,name in ipairs(rpostab) do

local rule = setmetatable({

tag = TTag.Rule,

code = nil,

key = nil, -- will be fixed when rule is used

n = i, -- rule number

name = name,

sib1 = tbl[name], -- pattern

sib2 = nil,

}, metareg)

table.insert(trees, rule)

g.ruletab[name] = rule

end

-- link up siblings

for i = 1, #trees-1 do

trees[i].sib2 = trees[i+1]

end

trees[#trees].sib2 = newleaf(TTag.True) -- finish list of rules

g.sib1 = trees[1]

end

--[[

  • Check whether a tree has potential infinite loops

]]--

function checkloops(grammar, tree)

local n = numsiblings[tree.tag]

if tree.tag == TTag.Rep and nullable_with_grammar(tree.sib1, grammar) then

return true

elseif tree.tag == TTag.Grammar then

return false -- sub-grammars already checked

elseif n == 1 then

return checkloops(grammar, tree.sib1) -- tail call

elseif n == 2 then

if checkloops(grammar, tree.sib1) then

return true

else

return checkloops(grammar, tree.sib2) -- tail call

end

elseif n == 0 then

return false

else

error("surprising number of siblings")

end

end

--[[

  • Give appropriate error message for 'verifyrule'. If a rule appears
  • twice in 'passed', there is path from it back to itself without
  • advancing the subject.

]]--

function verifyerror(grammar, passed, npassed)

local i, j

for i = npassed,1,-1 do -- search for a repetition

for j = i-1,1,-1 do

if passed[i] == passed[j] then

error(string.format("rule '%s' may be left recursive", val2str(passed[i])))

end

end

end

error("too many left calls in grammar")

end

--[[

  • Check whether a rule can be left recursive; raise an error in that
  • case; otherwise return 1 iff pattern is nullable.
  • The return value is used to check sequences, where the second pattern
  • is only relevant if the first is nullable.
  • Parameter 'nb' works as an accumulator, to allow tail calls in
  • choices. ('nb' true makes function returns true.)
  • Parameter 'passed' is a list of already visited rules, 'npassed'
  • counts the elements in 'passed'.
  • Assume ktable at the top of the stack.

]]--

local verifyrule

verifyrule = define_tree_visitor{

[{

TTag.Char, TTag.Set, TTag.Any, TTag.False, TTag.UTFR,

TTag.Throw, -- labeled failure

}] = function(tree, g, passed, n, nb)

return nb -- cannot pass from here

end,

[{

TTag.True, TTag.Behind, -- look-behind cannot have calls

}] = function(tree, g, passed, n, nb)

return true

end,

[{ TTag.Not, TTag.And, TTag.Rep, }] = function(tree, g, passed, n, nb)

return verifyrule(tree.sib1, g, passed, n, true) -- tail call

end,

[{ TTag.Capture, TTag.RunTime, TTag.XInfo, }] = function(tree, g, passed, n, nb)

return verifyrule(tree.sib1, g, passed, n, nb) -- tail call

end,

[ TTag.Call ] = function(tree, g, passed, n, nb)

local rule = g.ruletab[tree.key] -- look up rule

return verifyrule(rule, g, passed, n, nb) -- tail call

end,

[ TTag.Seq ] = function(tree, g, passed, n, nb)

-- only check 2nd child if first is nb

if not verifyrule(tree.sib1, g, passed, n, false) then

return nb

else

-- note that we don't propagate new npassed from 1st child

return verifyrule(tree.sib2, g, passed, n, nb) -- tail call

end

end,

[ TTag.Choice ] = function(tree, g, passed, n, nb)

-- must check both children

nb = verifyrule(tree.sib1, g, passed, n, nb)

-- note that we don't propagate new npassed from 1st child

return verifyrule(tree.sib2, g, passed, n, nb) -- tail call

end,

[ TTag.Rule ] = function(tree, g, passed, n, nb)

if n >= MAXRULES then -- too many steps?

return verifyerror(g, passed, n) -- error

else

passed[n+1] = tree.key -- add rule to path

return verifyrule(tree.sib1, g, passed, n + 1, nb) -- tail call

end

end,

[ TTag.Grammar ] = function(tree, g, passed, n, nb)

return nullable(tree) -- sub-grammar cannot be left recursive

end,

}

function verifygrammar(grammar)

local passed = {}

-- check left-recursive rules

local rule = grammar.sib1

while rule.tag == TTag.Rule do

if rule.key ~= nil then -- skip unused rules

verifyrule(rule.sib1, grammar, passed, 0, false)

end

rule = rule.sib2

end

if rule.tag ~= TTag.True then

error("assertion failure")

end

-- check infinite loops inside rules

rule = grammar.sib1

while rule.tag == TTag.Rule do

if rule.key ~= nil then -- skip unused rules

if checkloops(grammar, rule.sib1) then

error("empty loop in rule '" .. val2str(rule.name) .. "'")

end

end

rule = rule.sib2

end

if rule.tag ~= TTag.True then

error("assertion failure")

end

end

--[[

  • Fix a TOpenCall into a TCall node, using table 'postable' to
  • translate a key to its rule address in the tree. Raises an
  • error if key does not exist.

]]--

function fixonecall(g, t, postab)

local name = t.key

local rule = g.ruletab[name]

if t.tag == TTag.Call then

if rule == nil then

error(string.format("rule '%s' undefined in given grammar", val2str(name)))

end

-- unlike our upstream, we don't clone patterns when we build a grammar

-- so we can't mutate this tree w/o possibly breaking any other grammars

-- which might hold an alias of this call. So we don't distinguish

-- Call and OpenCall and we don't mutate the tag here and

-- don't link it up. However, we can mutate the Rule

-- as those are not shared

rule.key = name -- mark this as used

elseif rule ~= nil then -- TTag.Throw

-- As before, we can't mutate the tree

rule.key = name -- mark this as used

end

end

--[[

  • Transform left associative constructions into right
  • associative ones, for sequence and choice; that is:
  • (t11 + t12) + t2 => t11 + (t12 + t2)
  • (t11 * t12) * t2 => t11 * (t12 * t2)
  • (that is, Op (Op t11 t12) t2 => Op t11 (Op t12 t2))

]]--

function correctassociativity (tree)

local tag = tree.tag

if tag ~= TTag.Choice and tag ~= TTag.Seq then

error("impossible")

end

local t1 = tree.sib1

while t1.tag == tree.tag do

local t11, t12 = t1.sib1, t1.sib2

local t2 = tree.sib2

-- don't mutate t1 in place as others may be keeping copies of it;

-- mutating 'tree' in place is okay as we're not changing its semantics

tree.sib1 = t11

tree.sib2 = newroot2sib(tag, t12, t2)

t1 = tree.sib1

end

return tree

end

--[[

  • Make final adjustments in a tree. Fix open calls in tree 't',
  • making them refer to their respective rules or raising appropriate
  • errors (if not inside a grammar). Correct associativity of associative
  • constructions (making them right associative). Assume that tree's
  • ktable is at the top of the stack (for error messages).

]]--

local finalfix_helper = define_tree_visitor{

[TTag.Grammar] = function(t)

return t -- subgrammars were already fixed

end,

[TTag.Call] = function(t, g, postab)

if g == nil then

error("rule '" .. val2str(t.key) .. "' used outside a grammar")

else

return fixonecall(g, t, postab)

end

end,

[TTag.Throw] = function(t, g, postab)

if g ~= nil then

return fixonecall(g, t, postab)

end

end,

[{TTag.Seq, TTag.Choice}] = function(t, g, postab)

return correctassociativity(t)

end,

default = function(t) return t end,

}

function finalfix(g, t, postab)

finalfix_helper(t, g, postab)

if t.tag == TTag.Grammar then return end

local n = numsiblings[t.tag]

if n == 1 then

return finalfix(g, t.sib1, postab) -- tail call

elseif n == 2 then

finalfix(g, t.sib1, postab)

return finalfix(g, t.sib2, postab) -- tail call

elseif n == 0 then

return

else

error("strange number of siblings")

end

end

--[[

  • Give a name for the initial rule if it is not referenced

]]--

function initialrulename(grammar)

if grammar.sib1.key == nil then -- initial rule is not referenced?

grammar.sib1.key = grammar.sib1.name

end

end

function newgrammar(tbl)

local postab, rpostab = collectrules(tbl)

local g = setmetatable({

tag = TTag.Grammar,

code = nil,

sib1 = nil, -- will fill this in

n = #rpostab, -- number of rules

ruletab = {}, -- map rule names to rules

}, metareg)

buildgrammar(g, tbl, postab, rpostab)

finalfix(g, g.sib1, postab)

initialrulename(g)

verifygrammar(g)

return g

end

-- ====================================================== --

function prepcompile(p)

finalfix(nil, p, {}) -- correct associativity

return compile(p)

end

function lp_printtree(patt, c)

local tree = getpatt(patt)

if c then

finalfix(nil, tree, {}) -- correct associativity

end

print("[]") -- for compatibility, this is a fake 'ktable'

io.write(table.concat(printtree(tree, 0, {})))

end

function lp_printcode(patt)

local p = getpatt(patt)

if p.code == nil then

prepcompile(p)

end

print("[]") -- for compatibility, this is a fake 'ktable'

io.write(table.concat(printpatt(p.code, {})))

end

--[[

  • Get the initial position for the match, interpreting negative
  • values from the end of the subject. Result is 1-based.

]]--

function initposition(ii, len)

if ii > 0 then -- positive index?

if ii <= len then -- inside the string?

return ii -- return it (no correction to 0-base)

else

return len + 1 -- crop at the end

end

else -- negative index

if (-ii) <= len then -- inside the string?

return len + 1 - (-ii) -- return position from the end

else

return 1

end

end

end

-- Main match function

function lp_match(pattern, subject, init, ...)

local p = getpatt(pattern)

if p.code == nil then prepcompile(p) end

local code = p.code

if type(subject) ~= 'string' then error("subject is not a string") end

local i

if init == nil then

i = 1

else

i = initposition(checkint(init), #subject)

end

return match(subject, i, code, ...)

end

--[[

  • ======================================================
  • Library creation and functions not related to matching
  • ======================================================

]]--

function lp_setmax(lim)

lim = 0 + lim -- convert to integer

if lim <= 0 then

error("out of range")

end

MAXSTACK = lim

end

local lp_type = define_type_visitor{

pattern = function() return "pattern" end,

default = function() return nil end,

}

function lp_gc(p)

p._code = nil

end

function createcat(charspec)

local t = newcharset()

for i=0,CHARMAX do -- XXX not unicode safe

local s = compat.utf8char(i)

if s:find(charspec) ~= nil then

t.set[i] = true

end

end

return t

end

function lp_locale(tbl)

if tbl == nil then

tbl = {}

end

tbl.alnum = createcat("%w")

tbl.alpha = createcat("%a")

tbl.cntrl = createcat("%c")

tbl.digit = createcat("%d")

tbl.graph = createcat("[%p%w]") -- printable except space

tbl.lower = createcat("%l")

tbl.print = createcat("%C") -- printable = "not a control character"?

tbl.punct = createcat("%p") -- "printable but not space or alnum

tbl.space = createcat("%s")

tbl.upper = createcat("%u")

tbl.xdigit = createcat("%x")

return tbl

end

-- lpltree.c --

metareg.__mul = lp_seq

metareg.__add = lp_choice

metareg.__pow = lp_star

metareg.__gc = lp_gc

metareg.__len = lp_and

metareg.__div = lp_divcapture

metareg.__mod = lp_acccapture

metareg.__unm = lp_not

metareg.__sub = lp_sub

metareg.__tostring = printrepl

local pattreg = {

ptree = lp_printtree,

pcode = lp_printcode,

match = lp_match,

B = lp_behind,

V = lp_V,

C = lp_simplecapture,

Cc = lp_constcapture,

Cmt = lp_matchtime,

Cb = lp_backref,

Carg = lp_argcapture,

Cp = lp_poscapture,

Cs = lp_substcapture,

Ct = lp_tablecapture,

Cf = lp_foldcapture,

Cg = lp_groupcapture,

P = lp_P,

S = lp_set,

R = lp_range,

utfR = lp_utfr,

locale = lp_locale,

version = "LLPegLabel " .. VERSION,

setmaxstack = lp_setmax,

type = lp_type,

T = lp_throw, -- labeled failure throw

}

metareg.__index = pattreg

return pattreg

end)

local modules = {}

modules['bit32'] = require('bit32')

modules['string'] = require('string')

modules['strict'] = {}

modules['table'] = require('table')

local function myrequire(name)

if modules[name] == nil then

modules[name] = true

modules[name] = (builders[name])(myrequire)

end

return modules[name]

end

return myrequire('llpeg')

end)()