Module:Sandbox/Peter coxhead/Tests

require('strict')

local TaxonItalics = require('Module:TaxonItalics')

local ItalicTitle = require('Module:Italic title')

local p = {}

local l = {} -- separate out local functions

function l.show(items)

local result = ""

for i = 1,#items,1 do

result = result .. "|" .. items[i] .. "" .. mw.ustring.len(items[i]) .. ""

end

return result .. "|"

end

-- *****************************************************************************

-- ========= Constants for states =========

local startState = 0

local italState = 1

local whiteSpState = 2

local nonItalState = 3

local entityState = 4

local endState = 9

-- Split a string representing a taxon name into 'units'.

function l.doSplitTaxonName(str)

-- connecting terms that are not italicized

local cTerms = {

--subsp.

subspecies = "subsp.",

["subsp."] = "subsp.",

subsp = "subsp.",

["ssp."] = "subsp.",

ssp = "subsp.",

--var.

varietas = "var.",

["var."] = "var.",

var = "var.",

--subvar.

subvarietas = "subvar.",

["subvar."] = "subvar.",

subvar = "subvar.",

--f.

forma = "f.",

["f."] = "f.",

f = "f.",

--subf.

subforma = "subf.",

["subf."] = "subf.",

subf = "subf.",

--subg.

subgenus = "subg.",

["subg."] = "subg.",

subg = "subg.",

--sect.

section = "sect.",

["sect."] = "sect.",

sect = "sect.",

--subsect.

subsection = "subsect.",

["subsect."] = "subsect.",

subsect = "subsect.",

--ser.

series = "ser.",

["ser."] = "ser.",

ser = "ser.",

--subser.

subseries = "subser.",

["subser."] = "subser.",

subser = "subser.",

--cf.

cf = "cf.",

["cf."] = "cf.",

["c.f."] = "cf."

}

local specialChrs = '[%(%)×%+]' -- pattern listing all specially treated characters

local units = {}

local states = {}

local state = startState

local j = 0

local currUnit = ''

local chr

-- startUnit starts a new unit to be taken from the string str

local startUnit = function (nextState)

currUnit = chr

state = nextState

end

-- saveUnit saves the current unit taken from the string str and starts a

-- new one

local saveUnit = function (nextState)

-- check for words that don't get italicized (?at this position)

if state == italState and cTerms[currUnit] then

currUnit = cTerms[currUnit]

state = nonItalState

end

j = j + 1

units[j] = currUnit

states[j] = state

currUnit = chr

state = nextState

end

local n = mw.ustring.len(str)

local i = 1

while i <= n do

chr = mw.ustring.sub(str, i, i)

if state == startState then

if chr == ' ' then

startUnit(whiteSpState)

elseif mw.ustring.match(chr, specialChrs) then -- chr == '(' or chr == ')' or chr == '×' or chr == '+'

startUnit(nonItalState)

elseif chr == '&' then

startUnit(entityState)

else -- other kind of character

startUnit(italState)

end

elseif state == whiteSpState then

if chr == ' ' then

-- ignore

elseif mw.ustring.match(chr, specialChrs) then -- chr == '(' or chr == ')' or chr == '×' or chr == '+'

saveUnit(nonItalState)

elseif chr == '&' then

saveUnit(entityState)

else -- other kind of character

saveUnit(italState)

end

elseif state == nonItalState then

if chr == ' ' then

saveUnit(whiteSpState)

elseif mw.ustring.match(chr, specialChrs) then -- chr == '(' or chr == ')' or chr == '×' or chr == '+'

saveUnit(nonItalState)

elseif chr == '&' then

saveUnit(entityState)

else -- other kind of character

saveUnit(italState)

end

elseif state == entityState then

if chr == ';' then

currUnit = currUnit .. chr

chr = ''

-- nonbreaking spaces are treated a whitespace

if currUnit == ' ' or currUnit == ' ' or currUnit == ' ' or currUnit == ' ' then state = whiteSpState end

saveUnit(startState)

else

currUnit = currUnit .. chr

end

else -- state == italState

if chr == ' ' then

saveUnit(whiteSpState)

elseif mw.ustring.match(chr, specialChrs) then -- chr == '(' or chr == ')' or chr == '×' or chr == '+'

saveUnit(nonItalState)

elseif chr == '&' then

saveUnit(entityState)

else -- other kind of character

currUnit = currUnit .. chr

end

end

i = i + 1

end

j = j + 1

-- save the last unit and its state

units[j] = currUnit

states[j] = state

return units, states

end

function p.italicizeTaxonName(frame)

local str = frame.args[1] or ""

-- first clean up the name

str = mw.ustring.gsub(str, "%s+", " ")

str = mw.ustring.gsub(str, "'''", "")

str = mw.ustring.gsub(str, "''", "")

str = mw.ustring.gsub(str, "", "")

-- remove the last 'word' if it appears to be a disambiguating term

local last = l.lastWord(str)

local ln = mw.ustring.len(last)

if mw.ustring.match(last, "%([a-z].*%)") then

str = mw.ustring.sub(str, 1, -ln-1)

else

last = ''

end

-- now split up the name into 'units'

units, states = l.doSplitTaxonName(str)

-- finally, put everything back together, correctly italicized

return l.joinUnits(units, states) .. last

end

function l.joinUnits(units, states)

local res = '\n|'

local j = 1

local n = #units

while j <= n do

res = res .. units[j] .. '' .. tostring(states[j]) .. '|'

j = j + 1

end

res = '\n'

states[n+1] = endState

states[n+2] = endState

local openItalics = true

j = 1

while j <= n do

-- res = res .. '' .. tostring(states[j]) .. tostring(italState) .. ''

if states[j] == italState then

if openItalics then

res = res .. "" .. units[j]

openItalics = false

else

res = res .. units[j]

end

if not (states[j+1] == whiteSpState and states[j+2] == italState) then

res = res .. ""

openItalics = true

end

else

res = res .. units[j]

end

j = j + 1

end

return res

end

-- *****************************************************************************

function p.test(frame)

local taxon = frame.args[1] or ""

local item = frame.args[2] or ""

local ok, info = pcall(frame.expandTemplate, frame, { title = 'Template:Taxonomy/' .. taxon, args = {['machine code'] = item } })

if ok then

return 'ok=true; >' .. info .. '<'

else

return 'ok=false; >' .. info .. '<'

end

end

function p.test1(frame)

local str = frame.args[1] or ''

local pattern = frame.args[2] or "%([A-Z].*%)"

if mw.ustring.match(str, pattern) then

return 'matched'

else return 'not matched'

end

end

function p.test2(frame)

local a1 = frame.args[1] or ''

local t = {}

t[1] = {}

t[1].a = a1

t[1].e = 'Argument 1 = '

return t[1].e .. t[1].a

end

function p.test3(frame)

local a1 = frame.args[1] or ''

local a2 = frame.args[2] or ''

local t = mw.text.split( a1, a2, true )

res = ''

for i = 1, #t do

if t[i] == nil then

t[i] = 'NIL'

elseif t[i] == '' then

t[i] = 'EMPTY'

end

res = res .. t[i] .. '
'

end

local tbl = {}

tbl[1] = {}

tbl[1][9] = 'TEST'

return res .. '
/' .. tbl[1][9].. '
/' .. '
/' .. table.concat(tbl[1],',',9,9)

end

function p.test4(frame)

local a1 = frame.args[1] or ''

local firstCh = mw.ustring.sub(a1,1,1)

if firstCh == '[' then firstCh = '*wikilink*' end

return 'Frame arg1 = '..a1..', first char = '..firstCh

end

function p.test5(frame)

local tab1 = {}

local tab2 = {}

tab1[1] = 'test'

tab2[1] = {}

tab2[1].k = 'test'

tab2[1].v = 0

return tab1[1]..' '..' '..tab2[1].k..'+'..tab2[1].v

end

function p.existsTest1(frame)

local taxon = frame.args[1] or 'Life'

local res

if mw.title.new('Taxonomy/'..taxon, 'Template').exists then

res = 'Taxonomy/'..taxon..'exists'

else

res = 'Taxonomy/'..taxon..'does not exist'

end

return res

end

function p.existsTest2(frame)

local taxon = frame.args[1] or 'Life'

local template = 'Template:Taxonomy/' .. taxon

local item = frame.args[2] or 'all'

local ok, dummy = pcall(frame.expandTemplate, frame, { title = template, args = {['machine code'] = item } })

local res = template

if ok then return res .. ' exists' else return res .. ' does not exist' end

end

function p.getGenus(frame)

local str = frame.args[1] or ''

return l.genus(str, 1)

end

function l.genus(str, init)

local res = mw.ustring.match(str, '^[^%s]*', init)

if res == mw.ustring.char(215) then

res = res .. ' ' .. l.genus(str, 3)

end

return res

end

function p.getLastWord(frame)

local str = frame.args[1] or ''

return l.lastWord(str)

end

function l.lastWord(str)

local res, n = mw.ustring.gsub(str, '.*%s', '', 1)

if n == 0 then return str

else return res

end

end

function p.abbreviate(frame)

local str = frame.args[1] or ''

local res, n = mw.ustring.gsub(str, '([A-Z]).- (.*)', '%1. %2')

return res .. ' (' .. tostring(n) .. ' matches)'

end

function p.italicTaxonTitle(frame)

local pageName = frame.args[1] or ''

pageName = frame:expandTemplate{ title = 'Taxon italics', args = {pageName} }

return frame:callParserFunction{ name = 'DISPLAYTITLE', args = {pageName} }

end

function p.linkCheck(frame)

local linkTarget = frame.args[1] or ''

local linkText = frame.args[2] or ''

local res = true

if linkTarget ~= and linkText ~= and linkTarget ~= linkText then

local linkTargetTitle = mw.title.new(linkTarget)

local linkTextTitle = mw.title.new(linkText)

res = linkTextTitle.redirectTarget == linkTargetTitle

end

return res

end

function p.parseSpeciesName(frame)

local speciesName = frame.args[1] or ''

local genus, disambig, species = l.doParseSpeciesName(speciesName)

return 'genus ='..genus..', disambig='..disambig..', species='..species

end

function l.doParseSpeciesName(speciesName)

local genus = ''

local disambig = ''

local species = ''

local words = mw.text.split(speciesName, " ", true)

local nWords = #words

local currWord = 1

if currWord > nWords then return genus, disambig, species end

genus = words[currWord]

if genus == mw.ustring.char(215) then -- hybrid sign

currWord = currWord + 1

if currWord > nWords then return '', disambig, species end

genus = genus .. ' ' .. words[currWord]

end

currWord = currWord + 1

if currWord > nWords then return genus, disambig, species end

local disambig = ''

local species = words[currWord]

local test = mw.ustring.sub(species,1,1)

if mw.ustring.sub(species,1,1) == mw.ustring.char(40) then -- '('

disambig = species

currWord = currWord + 1

if currWord > nWords then return genus, disambig, '' end

species = words[currWord]

end

if species == mw.ustring.char(215) then -- hybrid sign

currWord = currWord + 1

if currWord > nWords then return genus, disambig, '' end

species = species .. ' ' .. words[currWord]

end

return genus, disambig, species

end

-- =============================================================================

function p.infraspeciesboxName(frame)

local name = frame.args[1] or ''

local genus = frame.args[2] or ''

local species = frame.args[3] or ''

local ct = frame.args[4] or ''

local infraspecies = frame.args[5] or ''

local basePageTitle = frame.args[6] or ''

local italicTitle = frame.args[7] or ''

return l.doinfraspeciesboxName(name, genus, species, ct, infraspecies, basePageTitle, italicTitle)

end

function l.doinfraspeciesboxName(name, genus, species, ct, infraspecies, basePageTitle, italicTitle)

genus = mw.ustring.gsub(mw.ustring.gsub(genus, '%s+%b()$', , 1), '/.*$', , 1) -- strip any disambig and qualifier

local taxon = genus .. ' ' .. species

if ct == '' then taxon = taxon .. ' ' .. infraspecies

else taxon = taxon .. ' ' .. ct .. ' ' .. infraspecies

end

local italicizeP = italicTitle ~= 'no' and (basePageTitle == taxon) -- use basePageTitle to match taxon

-- deal with taxobox name (i.e. its caption)

if name == '' then

name = basePageTitle

if italicizeP then name = TaxonItalics.italicizeTaxonName(name, false, false) end

end

-- deal with page title

if italicizeP then

local pageTitle = mw.title.getCurrentTitle().text -- formatting the page title with DISPLAYTITLE needs the full page title

pageTitle = TaxonItalics.italicizeTaxonName(pageTitle, false, false, true) -- format pageTitle, not italicizing any parenthesized term

if italicTitle ~= 'test' then

mw.getCurrentFrame():callParserFunction('DISPLAYTITLE', pageTitle)

else

name = name .. ' \\Italic title\\ ' .. pageTitle -- for testing and debugging

end

end

return name

end

-- =============================================================================

function p.boldList(frame)

local items = {}

for i, v in ipairs(frame:getParent().args) do table.insert(items, v) end

local conj = "" .. (#items > 2 and ", " or " ") .. (frame.args.conj or "or") .. " "

return ""..mw.text.listToText(items, ", ", conj)..""

end

return p