Module:Wikitext Parsing/sandbox

require("strict")

--Helper functions

local function startswith(text, subtext)

return string.sub(text, 1, #subtext) == subtext

end

local function endswith(text, subtext)

return string.sub(text, -#subtext, -1) == subtext

end

local function allcases(s)

return s:gsub("%a", function(c)

return "["..c:upper()..c:lower().."]"

end)

end

local trimcache = {}

local whitespace = {[" "]=1, ["\n"]=1, ["\t"]=1, ["\r"]=1}

local function cheaptrim(str) --mw.text.trim is surprisingly expensive, so here's an alternative approach

local quick = trimcache[str]

if quick then

return quick

else

-- local out = string.gsub(str, "^%s*(.-)%s*$", "%1")

local lowEnd

for i = 1,#str do

if not whitespace[string.sub(str, i, i)] then

lowEnd = i

break

end

end

if not lowEnd then

trimcache[str] = ""

return ""

end

for i = #str,1,-1 do

if not whitespace[string.sub(str, i, i)] then

local out = string.sub(str, lowEnd, i)

trimcache[str] = out

return out

end

end

end

end

--[=[ Implementation notes

---- NORMAL HTML TAGS ----

Tags are very strict on how they want to start, but loose on how they end.

The start must strictly follow <[tAgNaMe](%s|>) with no room for whitespace in

the tag's name, but may then flow as they want afterwards, making

valid

There's no sense of escaping < or >

E.g.

will end at \> despite it being inside a quote

error"> will not process the larger div

If a tag has no end, it will consume all text instead of not processing

---- NOPROCESSING TAGS (nowiki, pre, syntaxhighlight, source, etc.) ----

(In most comments, will not be mentioned. This is because it is the

deprecated version of )

No-Processing tags have some interesting differences to the above rules.

For example, their syntax is a lot stricter. While an opening tag appears to

follow the same set of rules, A closing tag can't have any sort of extra

formatting period. While

is valid, isn't - only

newlines and spaces/tabs are allowed in closing tags.

Note that, even though

 tags cause a visual change when the ending tag has

extra formatting, it won't cause the no-processing effects. For some reason, the

format must be strict for that to apply.

Both the content inside the tag pair and the content inside each side of the

pair is not processed. E.g. |}} would have both of the |}}

escaped in practice.

When something in the code is referenced to as a "Nowiki Tag", it means a tag

which causes wiki text to not be processed, which includes ,

,

and

Since we only care about these tags, we can ignore the idea of an intercepting

tag preventing processing, and just go straight for the first ending we can find

If there is no ending to find, the tag will NOT consume the rest of the text in

terms of processing behaviour (though

 will appear to have an effect).

Even if there is no end of the tag, the content inside the opening half will

still be unprocessed, meaning {{X20|}} wouldn't end at the first }}

despite there being no ending to the tag.

Note that there are some tags, like , which also function like

which are included in this aswell. Some other tags, like , have far too

unpredictable behaviour to be handled currently (they'd have to be split and

processed as something seperate - its complicated, but maybe not impossible.)

I suspect that every tag listed in Special:Version may behave somewhat like

this, but that's far too many cases worth checking for rarely used tags that may

not even have a good reason to contain {{ or }} anyways, so we leave them alone.

---- HTML COMMENTS AND INCLUDEONLY ----

HTML Comments are about as basic as it could get for this

Start at , no extra conditions. Simple enough

If a comment has no end, it will eat all text instead of not being processed

includeonly tags function mostly like a regular nowiki tag, with the exception

that the tag will actually consume all future text if not given an ending as

opposed to simply giving up and not changing anything. Due to complications and

the fact that this is far less likely to be present on a page, aswell as being

something that may not want to be escaped, includeonly tags are ignored during

our processing

--]=]

local validtags = {nowiki=1, pre=1, syntaxhighlight=1, source=1, math=1}

--This function expects the string to start with the tag

local function TestForNowikiTag(text, scanPosition)

local tagName = (string.match(text, "^<([^\n />]+)", scanPosition) or ""):lower()

if not validtags[tagName] then

return nil

end

local nextOpener = string.find(text, "<", scanPosition+1) or -1

local nextCloser = string.find(text, ">", scanPosition+1) or -1

if nextCloser > -1 and (nextOpener == -1 or nextCloser < nextOpener) then

local startingTag = string.sub(text, scanPosition, nextCloser)

--We have our starting tag (E.g. '

')

--Now find our ending...

if endswith(startingTag, "/>") then --self-closing tag (we are our own ending)

return {

Tag = tagName,

Start = startingTag,

Content = "", End = "",

Length = #startingTag

}

else

local endingTagStart, endingTagEnd = string.find(text, "", scanPosition)

if endingTagStart then --Regular tag formation

local endingTag = string.sub(text, endingTagStart, endingTagEnd)

local tagContent = string.sub(text, nextCloser+1, endingTagStart-1)

return {

Tag = tagName,

Start = startingTag,

Content = tagContent,

End = endingTag,

Length = #startingTag + #tagContent + #endingTag

}

else --Content inside still needs escaping (also linter error!)

return {

Tag = tagName,

Start = startingTag,

Content = "", End = "",

Length = #startingTag

}

end

end

end

return nil

end

local function TestForComment(text, scanPosition) --Like TestForNowikiTag but for

if string.match(text, "^

local commentEnd = string.find(text, "-->", scanPosition+4, true)

if commentEnd then

return {

Start = "",

Content = string.sub(text, scanPosition+4, commentEnd-1),

Length = commentEnd-scanPosition+3

}

else --Consumes all text if not given an ending

return {

Start = "Should see|Shouldn't see}}]=]

local out = p.PrepareText(s)

mw.logObject(out)

local s = [=[BA]=]

local out = p.TestForComment(s, 2)

mw.logObject(out); mw.log(string.sub(s, 2, out.Length))

local a = p.ParseTemplates([=[

{{User:Aidan9382/templates/dummy

|A|B|C {{{A|B}}} { } } {

|D

|

E

|F

|G|=|a=|A = A=B{{Text|1==}}}}|A B=Success}}

]=])

mw.logObject(a)

]==]