Module:Sandbox/Squc/Roman
-- Module to convert Roman numerals and reject invalid numerals
local p={}
local tags = {
overline = '',
doubleov = '',
rn = '',
rnsize = '',
nrnsize = '',
errs = '',
sspan = '',
pipe = '|',
}
local function atc(cn, rp)
local s = " - ''"
if rp ~= nil then s = s.."roman numeral "..tostring(rp)..", " end
s = s.."char "..tostring(cn).."''; "
return s
end
local function unesc( s )
s = s:gsub("\\p", tags.pipe)
s = s:gsub("\\\\", "\\")
s = s:gsub("\\=", "\=")
return s
end
local function disperr(err)
return tags.errs..err..tags.sspan
end
local rn_ref = {I=1, V=2, X=3, L=4, C=5, D=6, M=7}
local ref_rn = {[1]="I", [2]="V", [3]="X", [4]="L", [5]="C", [6]="D", [7]="M"}
local function todec1 (rns, ovl, vbr, tcc)
local err = ""
local cex, cfr, crn, run, num = 0,0,0,0,0,0 -- prn, crn: previous, current roman numeral value
local pex, pfr, prn = 0,0,0
-- (current) cex: exponent (10^1, 10^2 etc.), cfr: fractional part, run: amount of character so far
local rnc = "" -- roman numeral character
for i = 1, #rns do -- cex = 2, cfr = 0 or 0.5,
cex, cfr = math.modf((rns[i]-1)/2) -- crn = 100 or 500 etc.
if cfr == 0 then crn = 10^cex else crn = 5*10^cex end
tc = tcc[i]
local function rncg(j, ia)
if j == nil then j = 0 end
rn, ov, vb = rns[i+j], ovl[i+j], vbr[i+j]
rncr = rn - ov*6 - vb*4
if ia == 1 then
if rn == 13 and rncr == 7 then
rncr, ov, vb = 4, 1, 1
elseif rncr == 7 then
rncr = 1
ov = ov + 1
else rncr = rncr + 1
end
end
rnc = ref_rn[rncr]
local rnc_vb = ""
if vb == 1 then rnc_vb = "|" end
rnc = rnc_vb .. rnc .. string.rep("̅", ov) .. rnc_vb
return rnc
end
rnc = rncg()
if crn < prn or prn == 0 then
num = num + prn*run
run = 1
elseif crn == prn then
if cfr == 0 then
if run > 3 then -- e.g. "XXXXX" for 50, "L" suggested
err = err.."More than four "..rnc.." in a row, suggestion: "..rncg(0,1).."?"..atc(tc, i)
run = run + 1
elseif run == 0 then -- e.g. occurs after crn > prn (below) e.g. "XCC"
err = err.."Repeat after subtraction - " .. rncg(-2) .. rncg(-1) .. rnc .. atc(tc, i)
run = 1 -- In "XCC", assume "XC" is a unit, so the current "C" is counted separately.
else
run = run + 1
end
elseif cfr == 0.5 then -- e.g. "VV" for 10, "X" suggested
err = err..rncg(-1).." cannot be with another "..rnc..", suggestion: "..rncg(0,1).."?"..atc(tc,i)
else return -1, ("Unknown error 1") end
elseif crn > prn then
if crn > prn * 10 then -- e.g. "XM" or "IL"
err = err..rnc.." cannot follow "..rncg(-1).." (Subtraction can only be within the same digit)"..atc(tc,i)
elseif pfr == 0.5 then -- e.g. "LC" for 50
err = err..rnc.." cannot follow "..rncg(-1).." (Cannot subtract from " .. tostring(prn) .. ")" ..atc(tc,i)
elseif run > 2 then -- e.g. "XXXL" for 20
err = err .. "Number of " .. rncg(-1) .. " before " .. rnc .. " must be at most two" .. atc(tc, i)
end
num = num - prn*run + crn
run = 0
else return -1, ("Unknown error 2") end
prn = crn
pex = cex
pfr = cfr
end
num = num + prn*run
if err ~= "" then err = err:sub(1, -3) end
return num, err
end
local function todec( args ) -- pn: number of pipes (vertical bar) found so far, p: in a vertical bar(X100)?
local err, tag = "", "" -- err: error message, rnseq: sequence of roman numerals, tag: current html tag
local rnseq, t = {},{} -- tc: total character count so far, argn: argument number, cc: current character
local ovl, vbr, tcc = {},{},{} -- ovl,vbr,tcc: tc, status of overline and t: table of html tags,
local argn, tc, pn, ovc = 1, 0, 0, -1 -- vertical bar for each number in rnseq, n: current character number
local ov, dv, rn = 0,0,0 -- ov, dv, rn: number of overline, double overline, rn tags nested
local p = false -- gt, sc: position of greater than, semicolon character
local ierr = "" -- ierr: errors already in input (in error style span tag)
local carg = args[argn] -- atc(): produces " - Char 123; " for error messages | defined at
while carg ~= nil do -- tags: table of html tags | the start
if carg == "" then -- ovc: position of overline character (U+0305) modified roman numeral
pn = pn + 1 -- (or another overline character)
if p then p = false else p = true end
else
local n = 0
local cc = ""
local cplen = mw.ustring.len -- codepoint length
while n < cplen(carg) do
n = n + 1
cc = mw.ustring.sub(carg, n, n)
if cc == "<" then
local gt = mw.ustring.find(carg, ">", n, true)
if gt == nil then
tc = tc + 1
err=err.."Unbalanced '<' found"..atc(tc)
else tag = mw.ustring.sub(carg, n, gt)
local taglen = cplen(tag)
n = n + taglen - 1
if tag == tags.sspan then
ct = t[#t] -- current t
if ct == "ov" then ov = ov - 1
elseif ct == "rn" then rn = rn - 1
elseif ct == "dv" then dv = dv - 1
end
if #t == 0 then err=err.."Unbalanced \""..tags.sspan.."\" tag found"..atc(tc)
else t[#t] = nil end
elseif tag == tags.overline then
ov = ov + 1
t[#t + 1] = "ov"
if ov > 1 then err=err..ov.." nested overline tags found"..atc(tc) end
elseif tag == tags.doubleov then
dv = dv + 1
t[#t + 1] = "dv"
if dv > 1 then err=err..dv.." nested double overline tags found"..atc(tc) end
elseif tag == tags.rn or tag == tags.rnsize then
rn = rn + 1
t[#t + 1] = "rn"
if rn > 1 then err=err..rn.." nested rn tags found"..atc(tc) end
elseif tag == tags.nrnsize then -- Large font size span tag
t[#t + 1] = "sz" -- for overlines to show properly
elseif tag == tags.errs then -- close span tag start, end point
local csp, cep = mw.ustring.find(carg, tags.sspan, n, true)
ierr = ierr .. ", " .. mw.ustring.sub(carg, n+1, csp-1)
n = cep
else
err=err.."Unknown tag \""..tag.."\" found"..atc(tc)
t[#t + 1] = "uk"
end
end
elseif cc == " " then tc = tc + 1 -- spaces
elseif cc == "&" then
local sc = mw.ustring.find(carg, ";", n, true)
if sc == nil then
tc = tc + 1
err=err.."Extra character '&' found"..atc(tc)
else
tag = mw.ustring.sub(carg, n, sc)
tc = tc + cplen(tag)
n = n + cplen(tag) - 1
if tag == "|" or tag == "s" then
pn = pn + 1
if p then p = false else p = true end
elseif tag == "̅" or tag == "̅" then
if ovc+1 < tc then
err=err.."Overline character is not over a roman numeral"..atc(tc)
end
rnseq[#rnseq] = rnseq[#rnseq] + 6
ovl[#rnseq] = ovl[#rnseq] + 1
ovc = tc
else err=err.."Unknown tag \""..tag.."\" found"..atc(tc)
end
end
elseif cc == "̅" then
tc = tc + 1
if ovc+1 < tc then
err=err.."Overline character is not over a roman numeral"..atc(tc)
end
rnseq[#rnseq] = rnseq[#rnseq] + 6
ovl[#rnseq] = ovl[#rnseq] + 1
ovc = tc
elseif cc == "|" then -- Possible by calling from another module
pn = pn + 1
if p then p = false else p = true end
else tc = tc + 1
ccu = cc:upper()
if rn_ref[ccu] == nil then
err=err.."Unknown character \""..cc.."\" found"..atc(tc)
else -- vb: vertical bar modifier
local vb = 0
if p then vb = 1 end
rnseq[#rnseq + 1] = rn_ref[ccu] + ov*6 + dv*12 + vb*4
tcc[#rnseq], ovl[#rnseq], vbr[#rnseq] = tc, ov + dv*2, vb
ovc = tc -- for error message purposes ^
end
end
end
end
argn = argn + 1
carg = args[argn]
end
if argn == 0 then return -1, "Input is empty"
elseif #rnseq == 0 then return -1, "No roman numerals found"
else
num, err1 = todec1(rnseq, ovl, vbr, tcc)
if err ~="" then err = "Syntax errors: "..mw.ustring.sub(err, 1, -3).." " end
if err1 ~="" then err=err.."Roman numeral usage errors: "..err1.." " end
if ierr ~="" then err=err.."Errors already in the input: "..mw.ustring.sub(ierr, 3).." " end
if err ~= "" then err = mw.ustring.sub(err, 1, -3) end
return num, err
end
end
function p.todecimal( frame )
local fargs = frame.args
if fargs.d == "0" then
pframe = frame:getParent()
args = pframe.args
else
args = fargs
end
mode = fargs.mode or "0"
disp = fargs.disp or "0"
local num, err = todec(args)
if mode == "0" then -- Normal mode
if num == nil then return disperr("Unknown error 4") end
if err == "" then
if num ~= -1 then return num
else return disperr("Unknown error 3") end
else
if num == -1 then return disperr(err)
else return num.." "..disperr(err) end
end
elseif mode == "1" then -- Supress errors
if num == nil then num = -2 end
return num
elseif mode == "2" then -- Display all
if disp == "0" or disp == "" then
disp = "[num]\\n [err]\\e [time]\\t"
end
tim = os.clock()
disp = unesc(disp)
disp = disp:gsub("\\n", num)
disp = disp:gsub("\\e", err)
disp = disp:gsub("\\t", tim)
return disp
else return disperr("Unknown mode")
end
end
function p.todecimald( roman )
num, err = todec{ roman }
return num, err, os.clock()
end
-- Decimal to roman numeral --
local function torom1 (dec1) -- For <5000 subunit
local function torom2 (dec2, a, b, c)
local rom3 = ""
if dec2=="1" then rom3 = a
elseif dec2=="2" then rom3 = a..a
elseif dec2=="3" then rom3 = a..a..a
elseif dec2=="4" then rom3 = a..b
elseif dec2=="5" then rom3 = b
elseif dec2=="6" then rom3 = b..a
elseif dec2=="7" then rom3 = b..a..a
elseif dec2=="8" then rom3 = b..a..a..a
elseif dec2=="9" then rom3 = a..c
end
return rom3
end
dec1 = tostring(dec1)
local dec2 = string.rep("0",4-#dec1)..dec1
local a = {[2]="C", [3]="X", [4]="I"}
local b = {[2]="D", [3]="L", [4]="V"}
local c = {[2]="M", [3]="C", [4]="X"}
local rom2 = { ""..string.rep("M", tonumber(dec2:sub(1,1)) ) }
for i=2, 4 do
rom2[i] = torom2(dec2:sub(i,i), a[i], b[i], c[i])
end
local rom1 = table.concat(rom2)
return rom1
end
local function torom (dec, rndisp)
local err, ierr = "", "" -- ierr: errors already in the input
local rn, rc = "", ""
if rndisp then
rn = tags.rn
rc = tags.sspan --
end
local floor = math.floor
if type(dec) == "string" then -- sp, ep: start point, end point
errsp, errep = mw.ustring.find(dec, tags.errs, 1, true) -- error position (if present)
while errsp do
endsp, endep = mw.ustring.find(dec, tags.sspan, errep, true)
if endsp then
ierr = ierr .. ", " .. mw.ustring.sub(dec, errep+1, endsp-1)
dec = mw.ustring.sub(dec, 1, errsp-1)..mw.ustring.sub(dec, endep+1)
end
errsp, errep = mw.ustring.find(dec, tags.errs, 1, true)
end
local ton = tonumber(dec)
if ton == nil then
err = err .. "Not a number; "
dect = dec:gsub("[^%d]", "")
if dect=="" then return -1, "No digits"
else
err=err.."Extra characters '"..dec:gsub("%d","").."' found; "
dec = dec:gsub("[^%d.]", "")
dec = tonumber(dec)
end
else dec = ton
end
elseif type(dec) ~= "number" then
local ton = tonumber(dec)
if ton == nil then return -1, "Not a number or string" end
end
if dec < 1 then return -1, "Input ("..dec..") is less than 1"
else
local dec, frp = math.modf(dec) -- frp: fractional part
if frp ~= 0 then
err=err.."Input has fractional part "..frp..", ignoring...; "
end
local romt = {}
local rdec = dec -- rdec: remaining dec
local od = tags.doubleov
local ov = tags.overline
local cl = tags.sspan -- close
local vb = tags.pipe -- vertical bar
if dec >= 5e9 then
err = err .. "Input is 5,000,000,000 (5e9) or greater; "
local ov = floor( math.log10(dec/5)/3 )
local cdec = 0 -- ov: number of overlines
for i = ov, 3, -1 do
cdec = floor(rdec/10^(i*3))
rdec = rdec - cdec*10^(i*3)
local romt2 = torom1(cdec)
local romt1 = {}
for j=1, #romt2 do romt1[j] = romt2:sub(j, j) end
romt1[#romt1+1] = ""
if rndisp then size = tags.rnsize else size = tags.nrnsize end
romt[#romt+1] = size..table.concat(romt1, string.rep("̅",i))..cl
end
end
if dec >= 5e8 then
cdec = floor( rdec /1e6)
rdec = rdec - cdec*1e6
romt[#romt+1] = od..rn..torom1(cdec)..rc..cl
end
if dec >= 5e6 then
cdec = floor( rdec /1e5)
rdec = rdec - cdec*1e5
romt[#romt+1] = vb..ov..rn..torom1(cdec)..rc..cl..vb
end
if dec >= 5e3 then
cdec = floor( rdec /1e3)
rdec = rdec - cdec*1e3
romt[#romt+1] = ov..rn..torom1(cdec)..rc..cl
end
cdec = rdec
romt[#romt+1] = rn..torom1(cdec)..rc
rom = table.concat(romt, " ")
end
if err ~= "" then err = mw.ustring.sub(err, 1, -3).." " end
if ierr ~="" then err = err.."Errors already in the input: "..mw.ustring.sub(ierr, 3).." " end
if err ~= "" then err = mw.ustring.sub(err, 1, -3) end
return rom, err
end
function p.fromdecimal( frame )
fargs = frame.args
if fargs.d == "0" then
pframe = frame:getParent()
args = pframe.args
else
args = fargs
end
mode = fargs.mode or "0"
disp = fargs.disp or "0"
local rn
if fargs.rn == "1" then rn = true else rn = false end
local rom, err = torom(args[1], rn)
if mode == "0" then -- Normal mode
if rom == nil then return disperr("Unknown error 6") end
if err == "" then
if rom ~= -1 then return rom
else return disperr("Unknown error 5") end
else
if rom == -1 then return disperr(err)
else return rom.." "..disperr(err) end
end
elseif mode == "1" then -- No error mode
if rom == nil then rom = -2 end
return rom
elseif mode == "2" then -- Display all
if disp == "0" or disp == "" then
disp = "[rom]\\r [err]\\e [time]\\t"
end
tim = os.clock()
disp:unesc()
disp:gsub("\\r", rom)
disp:gsub("\\e", err)
disp:gsub("\\t", tim)
return disp
else return disperr("Unknown mode")
end
end
function p.fromdecimald( dec, rn )
if rn == "1" then rndisp = true else rndisp = false end
rom, err = torom(dec, rndisp)
return rom, err, os.clock()
end
return p