Module:Wikitext Parsing/sandbox
require("strict")
--Helper functions
local function startswith(text, subtext)
return string.sub(text, 1, #subtext) == subtext
end
local function endswith(text, subtext)
return string.sub(text, -#subtext, -1) == subtext
end
local function allcases(s)
return s:gsub("%a", function(c)
return "["..c:upper()..c:lower().."]"
end)
end
local trimcache = {}
local whitespace = {[" "]=1, ["\n"]=1, ["\t"]=1, ["\r"]=1}
local function cheaptrim(str) --mw.text.trim is surprisingly expensive, so here's an alternative approach
local quick = trimcache[str]
if quick then
return quick
else
-- local out = string.gsub(str, "^%s*(.-)%s*$", "%1")
local lowEnd
for i = 1,#str do
if not whitespace[string.sub(str, i, i)] then
lowEnd = i
break
end
end
if not lowEnd then
trimcache[str] = ""
return ""
end
for i = #str,1,-1 do
if not whitespace[string.sub(str, i, i)] then
local out = string.sub(str, lowEnd, i)
trimcache[str] = out
return out
end
end
end
end
--[=[ Implementation notes
---- NORMAL HTML TAGS ----
Tags are very strict on how they want to start, but loose on how they end.
The start must strictly follow <[tAgNaMe](%s|>) with no room for whitespace in
the tag's name, but may then flow as they want afterwards, making
There's no sense of escaping < or >
E.g.
If a tag has no end, it will consume all text instead of not processing
---- NOPROCESSING TAGS (nowiki, pre, syntaxhighlight, source, etc.) ----
(In most comments,
deprecated version of
No-Processing tags have some interesting differences to the above rules.
For example, their syntax is a lot stricter. While an opening tag appears to
follow the same set of rules, A closing tag can't have any sort of extra
formatting period. While
newlines and spaces/tabs are allowed in closing tags.
Note that, even though
tags cause a visual change when the ending tag hasextra formatting, it won't cause the no-processing effects. For some reason, the
format must be strict for that to apply.
Both the content inside the tag pair and the content inside each side of the
pair is not processed. E.g.
|}} would have both of the |}}escaped in practice.
When something in the code is referenced to as a "Nowiki Tag", it means a tag
which causes wiki text to not be processed, which includes
, ,and
Since we only care about these tags, we can ignore the idea of an intercepting
tag preventing processing, and just go straight for the first ending we can find
If there is no ending to find, the tag will NOT consume the rest of the text in
terms of processing behaviour (though
will appear to have an effect).Even if there is no end of the tag, the content inside the opening half will
still be unprocessed, meaning {{X20|
}} wouldn't end at the first }} despite there being no ending to the tag.
Note that there are some tags, like
which are included in this aswell. Some other tags, like , have far too
unpredictable behaviour to be handled currently (they'd have to be split and
processed as something seperate - its complicated, but maybe not impossible.)
I suspect that every tag listed in Special:Version may behave somewhat like
this, but that's far too many cases worth checking for rarely used tags that may
not even have a good reason to contain {{ or }} anyways, so we leave them alone.
---- HTML COMMENTS AND INCLUDEONLY ----
HTML Comments are about as basic as it could get for this
Start at , no extra conditions. Simple enough
If a comment has no end, it will eat all text instead of not being processed
includeonly tags function mostly like a regular nowiki tag, with the exception
that the tag will actually consume all future text if not given an ending as
opposed to simply giving up and not changing anything. Due to complications and
the fact that this is far less likely to be present on a page, aswell as being
something that may not want to be escaped, includeonly tags are ignored during
our processing
--]=]
local validtags = {nowiki=1, pre=1, syntaxhighlight=1, source=1, math=1}
--This function expects the string to start with the tag
local function TestForNowikiTag(text, scanPosition)
local tagName = (string.match(text, "^<([^\n />]+)", scanPosition) or ""):lower()
if not validtags[tagName] then
return nil
end
local nextOpener = string.find(text, "<", scanPosition+1) or -1
local nextCloser = string.find(text, ">", scanPosition+1) or -1
if nextCloser > -1 and (nextOpener == -1 or nextCloser < nextOpener) then
local startingTag = string.sub(text, scanPosition, nextCloser)
--We have our starting tag (E.g. '
')--Now find our ending...
if endswith(startingTag, "/>") then --self-closing tag (we are our own ending)
return {
Tag = tagName,
Start = startingTag,
Content = "", End = "",
Length = #startingTag
}
else
local endingTagStart, endingTagEnd = string.find(text, ""..allcases(tagName).."[ \t\n]*>", scanPosition)
if endingTagStart then --Regular tag formation
local endingTag = string.sub(text, endingTagStart, endingTagEnd)
local tagContent = string.sub(text, nextCloser+1, endingTagStart-1)
return {
Tag = tagName,
Start = startingTag,
Content = tagContent,
End = endingTag,
Length = #startingTag + #tagContent + #endingTag
}
else --Content inside still needs escaping (also linter error!)
return {
Tag = tagName,
Start = startingTag,
Content = "", End = "",
Length = #startingTag
}
end
end
end
return nil
end
local function TestForComment(text, scanPosition) --Like TestForNowikiTag but for
if string.match(text, "^
local commentEnd = string.find(text, "-->", scanPosition+4, true)
if commentEnd then
return {
Start = "",
Content = string.sub(text, scanPosition+4, commentEnd-1),
Length = commentEnd-scanPosition+3
}
else --Consumes all text if not given an ending
return {
Start = "Should see|Shouldn't see}}]=]
local out = p.PrepareText(s)
mw.logObject(out)
local s = [=[BA]=]
local out = p.TestForComment(s, 2)
mw.logObject(out); mw.log(string.sub(s, 2, out.Length))
local a = p.ParseTemplates([=[
{{User:Aidan9382/templates/dummy
|A|B|C {{{A|B}}} { } } {
|
D |
E|F
|G|=|a=|A = A=B{{Text|1==
}} }}|A B=Success}}]=])
mw.logObject(a)
]==]