Module:User:Cscott/lpegrex

return (function()

local builders = {}

local function register(name, f)

builders[name] = f

end

register('llpeg', function() return require Module:User:Cscott/llpeg end)

register('advent.compat', function() return require Module:User:Cscott/compat end)

register('llpeg.lpegrex', function(myrequire)

--[[

LPegRex - LPeg Regular Expression eXtended

v0.2.2 - 3/Jun/2021

Eduardo Bart - edub4rt@gmail.com

https://github.com/edubart/lpegrex

Check the project page for documentation on how to use.

See end of file for LICENSE.

]]

-- LPegRex depends on LPegLabel.

local lpeg = myrequire('llpeg')

local compat = myrequire('advent.compat') -- lua 5.1 compatibility

local andP = compat.len -- &p for patterns

-- Increase LPEG max stack, because the default is too low to use with complex grammars.

lpeg.setmaxstack(1024)

-- The LPegRex module table.

local lpegrex = {}

-- Cache tables for `match`, `find` and `gsub`.

local mcache, fcache, gcache

-- Global LPegRex options.

local defrexoptions = {

tag = 'tag',

pos = 'pos',

endpos = 'endpos',

SKIP = 'SKIP',

NAME_SUFFIX = 'NAME_SUFFIX',

}

local rexoptions

-- LPeGRex syntax errors.

local ErrorInfo = {

NoPatt = "no pattern found",

ExtraChars = "unexpected characters after the pattern",

ExpPatt1 = "expected a pattern after '/'",

ExpPatt2 = "expected a pattern after '&'",

ExpPatt3 = "expected a pattern after '!'",

ExpPatt4 = "expected a pattern after '('",

ExpPatt5 = "expected a pattern after ':'",

ExpPatt6 = "expected a pattern after '{~'",

ExpPatt7 = "expected a pattern after '

'",

ExpPatt8 = "expected a pattern after '<-'",

ExpPattOrClose = "expected a pattern or closing '}' after '{'",

ExpNumName = "expected a number, '+', '-' or a name (no space) after '^'",

ExpCap = "expected a string, number, '{}' or name after '->'",

ExpName1 = "expected the name of a rule after '=>'",

ExpName2 = "expected the name of a rule after '=' (no space)",

ExpName3 = "expected the name of a rule after '<' (no space)",

ExpName4 = "expected a name, number or string rule after '$' (no space)",

ExpName5 = "expected a name or string rule after '@' (no space)",

ExpLab1 = "expected a label after '{'",

ExpTokOrKey = "expected a keyword or token string after '`'",

ExpNameOrLab = "expected a name or label after '%' (no space)",

ExpItem = "expected at least one item after '[' or '^'",

MisClose1 = "missing closing ')'",

MisClose2 = "missing closing ':}'",

MisClose3 = "missing closing '~}'",

MisClose4 = "missing closing '

'",

MisClose5 = "missing closing '}'", -- for the captures

MisClose6 = "missing closing '>'",

MisClose7 = "missing closing '}'", -- for the labels

MisClose8 = "missing closing ']'",

MisTerm1 = "missing terminating single quote",

MisTerm2 = "missing terminating double quote",

MisTerm3 = "missing terminating backtick quote",

}

-- Localize some functions used in compiled PEGs.

local char = string.char

local utf8char = compat.utf8char

local select, tonumber = select, tonumber

local insert = table.insert

-- Pattern matching any character.

local Any = lpeg.P(1)

-- Predefined patterns.

local Predef = {

nl = lpeg.P"\n", -- new line

ca = lpeg.P"\a", -- audible bell

cb = lpeg.P"\b", -- back feed

ct = lpeg.P"\t", -- horizontal tab

cn = lpeg.P"\n", -- new line

cv = lpeg.P"\v", -- vertical tab

cf = lpeg.P"\f", -- form feed

cr = lpeg.P"\r", -- carriage return

sp = lpeg.S" \n\r\t\f\v",

--utf8 = lpeg.R("\0\x7F", "\xC2\xFD") * lpeg.R("\x80\xBF")^0,

--utf8seq = lpeg.R("\xC2\xFD") * lpeg.R("\x80\xBF")^0,

ascii = lpeg.utfR(0, 0x7F),

tonil = function() return nil end,

totrue = function() return true end,

tofalse = function() return false end,

toemptytable = function() return {} end,

tochar = function(s, base) return char(tonumber(s, base)) end,

toutf8char = function(s, base) return utf8char(tonumber(s, base)) end,

tonumber = tonumber,

}

-- Fold tables to the left (use only with `~>`).

-- Example: ({1}, {2}, {3}) -> {{{1}, 2}, 3}

function Predef.foldleft(lhs, rhs)

insert(rhs, 1, lhs)

return rhs

end

-- Fold tables to the right (use only with `->`).

-- Example: ({1}, {2}, {3}) -> {1, {2, {3}}}}

function Predef.foldright(first, ...)

if ... then

local lhs = first

for i=1,select('#', ...) do

local rhs = select(i, ...)

lhs[compat.len(lhs)+1] = rhs

lhs = rhs

end

end

return first

end

-- Fold tables to the left in reverse order (use only with `->`).

-- Example: ({1}, {2}, {3}) -> {{{3}, 2}, 1}

function Predef.rfoldleft(first, ...)

if ... then

local rhs = first

for i=1,select('#', ...) do

local lhs = select(i, ...)

insert(rhs, 1, lhs)

rhs = lhs

end

end

return first

end

-- Fold tables to the right in reverse order (use only with `~>`)

-- Example: ({1}, {2}, {3}) -> {3, {2, {1}}

function Predef.rfoldright(lhs, rhs)

rhs[compat.len(rhs)+1] = lhs

return rhs

end

-- Updates the pre-defined character classes to the current locale.

function lpegrex.updatelocale()

lpeg.locale(Predef)

-- fill default pattern classes

Predef.a = Predef.alpha

Predef.c = Predef.cntrl

Predef.d = Predef.digit

Predef.g = Predef.graph

Predef.l = Predef.lower

Predef.p = Predef.punct

Predef.s = Predef.space

Predef.u = Predef.upper

Predef.w = Predef.alnum

Predef.x = Predef.xdigit

Predef.A = Any - Predef.a

Predef.C = Any - Predef.c

Predef.D = Any - Predef.d

Predef.G = Any - Predef.g

Predef.L = Any - Predef.l

Predef.P = Any - Predef.p

Predef.S = Any - Predef.s

Predef.U = Any - Predef.u

Predef.W = Any - Predef.w

Predef.X = Any - Predef.x

-- clear the cache because the locale changed

mcache, fcache, gcache = {}, {}, {}

-- don't hold references in cached patterns

local weakmt = {__mode = "v"}

setmetatable(mcache, weakmt)

setmetatable(fcache, weakmt)

setmetatable(gcache, weakmt)

end

-- Fill predefined classes using the default locale.

lpegrex.updatelocale()

-- Create LPegRex syntax pattern.

local function mkrex()

local l = lpeg

local lmt = getmetatable(Any)

local function expect(pattern, label)

return pattern + l.T(label)

end

local function mult(p, n)

local np = l.P(true)

while n >= 1 do

if n % 2 >= 1 then

np = np * p

end

p = p * p

n = n / 2

end

return np

end

local function equalcap(s, i, c)

local e = compat.len(c) + i

if s:sub(i, e - 1) == c then

return e

end

end

local function getuserdef(id, defs)

local v = defs and defs[id] or Predef[id]

if not v then

error("name '" .. id .. "' undefined")

end

return v

end

local function getopt(id)

if rexoptions and rexoptions[id] ~= nil then

return rexoptions[id]

end

return defrexoptions[id]

end

-- current grammar being generated

local G, Gkeywords, Gtokens

local function begindef()

G, Gkeywords, Gtokens = {}, {}, {}

return G

end

local function enddef(t)

-- generate TOKEN rule

if Gtokens and #Gtokens > 0 then

local TOKEN = Gtokens[Gtokens[1]]

for i=2,#Gtokens do

TOKEN = TOKEN + Gtokens[Gtokens[i]]

end

G.TOKEN = TOKEN

end

if lpegrex.debug then

for k, patt in pairs(G) do

if k ~= 1 then

local enter = lpeg.Cmt(lpeg.P(true), function(s, p)

local lineno, colno = lpegrex.calcline(s, p)

io.stderr:write(string.format('ENTER %s (%d:%d)\n', k, lineno, colno))

return true

end)

local leave = lpeg.Cmt(lpeg.P(true), function(s, p)

local lineno, colno = lpegrex.calcline(s, p)

io.stderr:write(string.format('LEAVE %s (%d:%d)\n', k, lineno, colno))

return true

end)

G[k] = enter * patt * leave

end

end

end

-- cleanup grammar context

G, Gkeywords, Gtokens = nil, nil, nil

return l.P(t)

end

local function adddef(t, k, exp)

if t[k] then

error("'"..k.."' already defined as a rule")

else

t[k] = exp

end

return t

end

local function firstdef(t, n, r)

t[1] = n

return adddef(t, n, r)

end

local function NT(n, b)

if not b then

error("rule '"..n.."' used outside a grammar")

end

return l.V(n)

end

local S = (Predef.space + "--" * (Any - Predef.nl)^0)^0

local NamePrefix = l.R("AZ", "az", "__")

local WordSuffix = l.R("AZ", "az", "__", "09")

local NameSuffix = (WordSuffix + (l.P"-" * andP(WordSuffix)))^0

local Name = l.C(NamePrefix * NameSuffix)

local TokenDigit = Predef.punct - "_"

local NodeArrow = S * "<=="

local TableArrow = S * "<-|"

local RuleArrow = S * (l.P"<--" + "<-")

local Arrow = NodeArrow + TableArrow + RuleArrow

local Num = l.C(l.R"09"^1) * S / tonumber

local SignedNum = l.C(l.P"-"^-1 * l.R"09"^1) * S / tonumber

local String = "'" * l.C((Any - "'")^0) * expect("'", "MisTerm1")

+ '"' * l.C((Any - '"')^0) * expect('"', "MisTerm2")

local Token = "`" * l.C(TokenDigit * (TokenDigit - '`')^0) * expect("`", "MisTerm3")

local Keyword = "`" * l.C(NamePrefix * (Any - "`")^0) * expect('`', "MisTerm3")

local Range = l.Cs(Any * (l.P"-"/"") * (Any - "]")) / l.R

local Defs = l.Carg(1)

local NamedDef = Name * Defs -- a defined name only have meaning in a given environment

local Defined = "%" * NamedDef / getuserdef

local Item = (Defined + Range + l.C(Any)) / l.P

local Class =

"["

* (l.C(l.P"^"^-1)) -- optional complement symbol

* l.Cf(expect(Item, "ExpItem") * (Item - "]")^0, lmt.__add)

/ function(c, p) return c == "^" and Any - p or p end

* expect("]", "MisClose8")

local function defwithfunc(f)

return l.Cg(NamedDef / getuserdef * l.Cc(f))

end

local function updatetokens(s)

for _,toks in ipairs(Gtokens) do

if toks ~= s then

if toks:find(s, 1, true) == 1 then

G[s] = -G[toks] * G[s]

elseif s:find(toks, 1, true) == 1 then

G[toks] = -G[s] * G[toks]

end

end

end

end

local function maketoken(s, cap)

local p = Gtokens[s]

if not p then

p = l.V(s)

Gtokens[s] = p

Gtokens[#Gtokens+1] = s

G[s] = l.P(s) * l.V(getopt("SKIP"))

updatetokens(s)

end

if cap then

p = p * l.Cc(s)

end

return p

end

local function updatekeywords(kp)

local p = G.KEYWORD

if not p then

p = kp

else

p = p + kp

end

G.KEYWORD = p

end

local function split(s,sep)

sep = l.P(sep)

local elem = l.C((1 - sep)^0)

local p = l.Ct(elem * (sep * elem)^0) -- make a table capture

return l.match(p, s)

end

local function makekeyword(s, cap)

local kw = getopt('kw')

if kw ~= nil then s = kw(s) or s end

local p = Gkeywords[s]

if not p then

for _,w in ipairs(split(s, " ")) do

local pp = l.P(w) * -l.V(getopt("NAME_SUFFIX")) * l.V(getopt("SKIP"))

if p == nil then p = pp else p = p * pp end

end

Gkeywords[s] = p

updatekeywords(p)

end

if cap then

p = p * l.Cc(s)

end

return p

end

local function makenode(n, tag, p)

local tagfield, posfield, endposfield = getopt('tag'), getopt('pos'), getopt('endpos')

local istagfunc = type(tagfield) == 'function'

if tagfield and not istagfunc then

p = l.Cg(l.Cc(tag), tagfield) * p

end

if posfield then

p = l.Cg(l.Cp(), posfield) * p

end

if endposfield then

p = p * l.Cg(l.Cp(), endposfield)

end

local rp = l.Ct(p)

if istagfunc then

rp = l.Cc(tag) * rp / tagfield

end

return n, rp

end

local exp = l.P{ "Exp",

Exp = S * ( l.V"Grammar"

+ l.Cf(l.V"Seq" * (S * "/" * expect(S * l.V"Seq", "ExpPatt1"))^0, lmt.__add) );

Seq = l.Cf(l.Cc(l.P"") * l.V"Prefix" * (S * l.V"Prefix")^0, lmt.__mul);

Prefix = "&" * expect(S * l.V"Prefix", "ExpPatt2") / lmt.__len

+ "!" * expect(S * l.V"Prefix", "ExpPatt3") / lmt.__unm

+ l.V"Suffix";

Suffix = l.Cf(l.V"Primary" *

( S * ( l.P"+" * l.Cc(1, lmt.__pow)

+ l.P"*" * l.Cc(0, lmt.__pow)

+ l.P"?" * l.Cc(-1, lmt.__pow)

+ l.P"~?" * l.Cc(l.Cc(false), lmt.__add)

+ "^" * expect( l.Cg(Num * l.Cc(mult))

+ l.Cg(l.C(l.S"+-" * l.R"09"^1) * l.Cc(lmt.__pow)

+ Name * l.Cc"lab"

),

"ExpNumName")

+ "->" * expect(S * ( l.Cg((String + Num) * l.Cc(lmt.__div))

+ l.P"{}" * l.Cc(nil, l.Ct)

+ defwithfunc(lmt.__div)

),

"ExpCap")

+ "=>" * expect(S * defwithfunc(l.Cmt),

"ExpName1")

+ "~>" * S * defwithfunc(l.Cf)

) --* S

)^0, function(a,b,f) if f == "lab" then return a + l.T(b) end return f(a,b) end );

Primary = "(" * expect(l.V"Exp", "ExpPatt4") * expect(S * ")", "MisClose1")

+ String / l.P

+ andP(l.P'`') * expect(

Token / maketoken

+ Keyword / makekeyword

, "ExpTokOrKey")

+ Class

+ Defined

+ "%" * expect(l.P"{", "ExpNameOrLab")

* expect(S * l.V"Label", "ExpLab1")

* expect(S * "}", "MisClose7") / l.T

+ "{:" * (Name * ":" + l.Cc(nil)) * expect(l.V"Exp", "ExpPatt5")

* expect(S * ":}", "MisClose2")

/ function(n, p) return l.Cg(p, n) end

+ "=" * expect(Name, "ExpName2")

/ function(n) return l.Cmt(l.Cb(n), equalcap) end

+ l.P"{}" / l.Cp

+ l.P"$" * expect(

l.P"nil" / function() return l.Cc(nil) end

+ l.P"false" / function() return l.Cc(false) end

+ l.P"true" / function() return l.Cc(true) end

+ l.P"{}" / function() return l.Cc({}) end

+ SignedNum / function(s) return l.Cc(tonumber(s)) end

+ String / function(s) return l.Cc(s) end

+ (NamedDef / getuserdef) / l.Cc,

"ExpName4")

+ l.P"@" * expect(

String / function(s) return l.P(s) + l.T('Expected_'..s) end

+ Token / function(s)

return maketoken(s) + l.T('Expected_'..s)

end

+ Keyword / function(s)

return makekeyword(s) + l.T('Expected_'..s)

end

+ Name * l.Cb("G") / function(n, b)

return NT(n, b) + l.T('Expected_'..n)

end,

"ExpName5")

+ "{~" * expect(l.V"Exp", "ExpPatt6") * expect(S * "~}", "MisClose3") / l.Cs

+ "

" * expect(l.V"Exp", "ExpPatt7") * expect(S * "
", "MisClose4") / l.Ct

+ "{" * andP(l.P'`') * expect(

Token * l.Cc(true) / maketoken

+ Keyword * l.Cc(true) / makekeyword

, "ExpTokOrKey") * expect(S * "}", "MisClose5")

+ "{" * expect(l.V"Exp", "ExpPattOrClose") * expect(S * "}", "MisClose5") / l.C

+ l.P"." * l.Cc(Any)

+ (Name * -(Arrow + (S * ":" * S * Name * Arrow)) + "<" * expect(Name, "ExpName3")

* expect(">", "MisClose6")) * l.Cb("G") / NT;

Label = Num + Name;

RuleDefinition = Name * RuleArrow * expect(l.V"Exp", "ExpPatt8");

TableDefinition = Name * TableArrow * expect(l.V"Exp", "ExpPatt8") /

function(n, p) return n, l.Ct(p) end;

NodeDefinition = Name * NodeArrow * expect(l.V"Exp", "ExpPatt8") /

function(n, p) return makenode(n, n, p) end;

TaggedNodeDefinition = Name * S * l.P":" * S * Name * NodeArrow * expect(l.V"Exp", "ExpPatt8") / makenode;

Definition = l.V"TaggedNodeDefinition" + l.V"NodeDefinition" + l.V"TableDefinition" + l.V"RuleDefinition";

Grammar = l.Cg(l.Cc(true), "G")

* l.Cf(l.P"" / begindef

* (l.V"Definition") / firstdef

* (S * (l.Cg(l.V"Definition")))^0, adddef) / enddef;

}

return S * l.Cg(l.Cc(false), "G") * expect(exp, "NoPatt") / l.P

* S * expect(-Any, "ExtraChars")

end

local rexpatt = mkrex()

--[[

Compiles the given `pattern` string and returns an equivalent LPeg pattern.

The given string may define either an expression or a grammar.

The optional `defs` table provides extra Lua values to be used by the pattern.

The optional `options table can provide the following options for node captures:

  • `tag` name of the node tag field, if `false` it's omitted (default "tag").
  • `pos` name of the node initial position field, if `false` it's omitted (default "pos").
  • `endpos` name of the node final position field, if `false` it's omitted (default "endpos").

]]

function lpegrex.compile(pattern, defs)

if lpeg.type(pattern) == 'pattern' then -- already compiled

return pattern

end

rexoptions = defs and defs.__options

local cp, errlabel, errpos = rexpatt:match(pattern, 1, defs)

local ok = true

rexoptions = nil

if not ok and cp then

if type(cp) == "string" then

cp = cp:gsub("^[^:]+:[^:]+: ", "")

end

error(cp, 3)

end

if not cp then

local lineno, colno, line, linepos = lpegrex.calcline(pattern, errpos)

local err = {"syntax error(s) in pattern\n"}

table.insert(err, "L"..lineno..":C"..colno..": "..ErrorInfo[errlabel])

table.insert(err, line)

table.insert(err, (" "):rep(colno-1)..'^')

error(table.concat(err, "\n"), 3)

end

return cp

end

--[[

Matches the given `pattern` against the `subject` string.

If the match succeeds, returns the index in the `subject` of the first character after the match,

or the captured values (if the pattern captured any value).

An optional numeric argument `init` makes the match start at that position in the subject string.

]]

function lpegrex.match(subject, pattern, init)

local cp = mcache[pattern]

if not cp then

cp = lpegrex.compile(pattern)

mcache[pattern] = cp

end

return cp:match(subject, init or 1)

end

--[[

Searches the given `pattern` in the given `subject`.

If it finds a match, returns the index where this occurrence starts and the index where it ends.

Otherwise, returns nil.

An optional numeric argument `init` makes the search starts at that position in the `subject` string.

]]

function lpegrex.find(subject, pattern, init)

local cp = fcache[pattern]

if not cp then

cp = lpegrex.compile(pattern)

cp = cp / 0

cp = lpeg.P{lpeg.Cp() * cp * lpeg.Cp() + 1 * lpeg.V(1)}

fcache[pattern] = cp

end

local i, e = cp:match(subject, init or 1)

if i then

return i, e - 1

else

return i

end

end

--[[

Does a global substitution,

replacing all occurrences of `pattern` in the given `subject` by `replacement`.

]]

function lpegrex.gsub(subject, pattern, replacement)

local cache = gcache[pattern] or {}

gcache[pattern] = cache

local cp = cache[replacement]

if not cp then

cp = lpegrex.compile(pattern)

cp = lpeg.Cs((cp / replacement + 1)^0)

cache[replacement] = cp

end

return cp:match(subject)

end

local calclinepatt = lpeg.Ct(((Any - Predef.nl)^0 * lpeg.Cp() * Predef.nl)^0)

--[[

Extract line information from `position` in `subject`.

Returns line number, column number, line content, line start position and line end position.

]]

function lpegrex.calcline(subject, position)

if position < 0 then error 'invalid position' end

local sublen = #subject

if position > sublen then position = sublen end

local caps = calclinepatt:match(subject:sub(1,position))

local ncaps = #caps

local lineno = ncaps + 1

local lastpos = caps[ncaps] or 0

local linestart = lastpos + 1

local colno = position - lastpos

local lineend = subject:find("\n", position+1, true)

lineend = lineend and lineend-1 or #subject

local line = subject:sub(linestart, lineend)

return lineno, colno, line, linestart, lineend

end

-- Auxiliary function for `prettyast`

local function ast2string(node, indent, ss)

local extra = ''

if node.pos then extra = string.format(' pos=%d', node.pos) end

if node.tag then

ss[#ss+1] = indent..node.tag..extra

else

ss[#ss+1] = indent..'-'..extra

end

indent = indent..'| '

for i=1,#node do

local child = node[i]

local ty = type(child)

if ty == 'table' then

ast2string(child, indent, ss)

elseif ty == 'string' then

local escaped = child

:gsub(\, \\)

:gsub(", \")

:gsub('\n', '\\n')

:gsub('\t', '\\t')

:gsub('\r', '\\r')

:gsub('[^ %w%p]', function(s)

return string.format('\\x%02x', string.byte(s))

end)

ss[#ss+1] = indent..'"'..escaped..'"'

else

ss[#ss+1] = indent..tostring(child)

end

end

end

-- Convert an AST into a human readable string.

function lpegrex.prettyast(node)

local ss = {}

ast2string(node, '', ss)

return table.concat(ss, '\n')

end

return lpegrex

--[[

The MIT License (MIT)

Copyright (c) 2021 Eduardo Bart

Copyright (c) 2014-2020 Sérgio Medeiros

Copyright (c) 2007-2019 Lua.org, PUC-Rio.

Permission is hereby granted, free of charge, to any person obtaining a copy

of this software and associated documentation files (the "Software"), to deal

in the Software without restriction, including without limitation the rights

to use, copy, modify, merge, publish, distribute, sublicense, and/or sell

copies of the Software, and to permit persons to whom the Software is

furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all

copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

SOFTWARE.

]]

end)

local modules = {}

modules['bit32'] = require('bit32')

modules['string'] = require('string')

modules['strict'] = {}

modules['table'] = require('table')

local function myrequire(name)

if modules[name] == nil then

modules[name] = true

modules[name] = (builders[name])(myrequire)

end

return modules[name]

end

return myrequire('llpeg.lpegrex')

end)()