module:chem2

local getArgs = require('Module:Arguments').getArgs

local p = {} -- module's table

-- Elements with wiki links

local am = {

H = "Hydrogen",

He = "Helium",

Li = "Lithium",

Be = "Beryllium",

B = "Boron",

C = "Carbon",

N = "Nitrogen",

O = "Oxygen",

F = "Fluorine",

Ne = "Neon",

Na = "Sodium",

Mg = "Magnesium",

Al = "Aluminium",

Si = "Silicon",

P = "Phosphorus",

S = "Sulfur",

Cl = "Chlorine",

Ar = "Argon",

K = "Potassium",

Ca = "Calcium",

Sc = "Scandium",

Ti = "Titanium",

V = "Vanadium",

Cr = "Chromium",

Mn = "Manganese",

Fe = "Iron",

Co = "Cobalt",

Ni = "Nickel",

Cu = "Copper",

Zn = "Zinc",

Ga = "Gallium",

Ge = "Germanium",

As = "Arsenic",

Se = "Selenium",

Br = "Bromine",

Kr = "Krypton",

Rb = "Rubidium",

Sr = "Strontium",

Y = "Yttrium",

Zr = "Zirconium",

Nb = "Niobium",

Mo = "Molybdenum",

Tc = "Technetium",

Ru = "Ruthenium",

Rh = "Rhodium",

Pd = "Palladium",

Ag = "Silver",

Cd = "Cadmium",

In = "Indium",

Sn = "Tin",

Sb = "Antimony",

Te = "Tellurium",

I = "Iodine",

Xe = "Xenon",

Cs = "Caesium",

Ba = "Barium",

La = "Lanthanum",

Ce = "Cerium",

Pr = "Praseodymium",

Nd = "Neodymium",

Pm = "Promethium",

Sm = "Samarium",

Eu = "Europium",

Gd = "Gadolinium",

Tb = "Terbium",

Dy = "Dysprosium",

Ho = "Holmium",

Er = "Erbium",

Tm = "Thulium",

Yb = "Ytterbium",

Lu = "Lutetium",

Hf = "Hafnium",

Ta = "Tantalum",

W = "Tungsten",

Re = "Rhenium",

Os = "Osmium",

Ir = "Iridium",

Pt = "Platinum",

Au = "Gold",

Hg = "Mercury (element)",

Tl = "Thallium",

Pb = "Lead",

Bi = "Bismuth",

Po = "Polonium",

At = "Astatine",

Rn = "Radon",

Fr = "Francium",

Ra = "Radium",

Ac = "Actinium",

Th = "Thorium",

Pa = "Protactinium",

U = "Uranium",

Np = "Neptunium",

Pu = "Plutonium",

Am = "Americium",

Cm = "Curium",

Bk = "Berkelium",

Cf = "Californium",

Es = "Einsteinium",

Fm = "Fermium",

Md = "Mendelevium",

No = "Nobelium",

Lr = "Lawrencium",

Rf = "Rutherfordium",

Db = "Dubnium",

Sg = "Seaborgium",

Bh = "Bohrium",

Hs = "Hassium",

Mt = "Meitnerium",

Ds = "Darmstadtium",

Rg = "Roentgenium",

Cn = "Copernicium",

Nh = "Nihonium",

Fl = "Flerovium",

Mc = "Moscovium",

Lv = "Livermorium",

Ts = "Tennessine",

Og = "Oganesson",

-- Groups etc with element-like names

Bn = 'Benzyl group',

Bu = 'Butyl group',

Bz = 'Benzoyl group',

Cp = 'Cyclopentadienyl',

D = 'Deuterium',

Et = 'Ethyl group',

Ln = 'Lanthanide',

Me = 'Methyl group',

Nu = 'Nucleophile',

Ph = 'Phenyl group',

Pn = 'Pentyl group',

R = 'Substituent',

T = 'Tritium',

Tf = 'Trifluoromethylsulfonyl group',

X = 'Halogen',

}

-- Groups which are redirected from their normal target if wikilinked; never

-- autolinked.

local groups = {

CH3 = 'Methyl group',

CO3 = 'Carbonate',

COOH = 'Carboxyl group',

ClO = 'Hypochlorite',

ClO2 = 'Chlorite',

ClO3 = 'Chlorate',

ClO4 = 'Perchlorate',

H2O = 'Water of crystallization',

H3O = 'Hydronium',

NH2 = 'Amine group',

NH4 = 'Ammonium',

NO3 = 'Nitrate',

PO3 = 'Phosphite',

PO4 = 'Phosphate',

SH = 'Thiol group',

SO3 = 'Sulfite',

SO4 = 'Sulfate',

SeH = 'Selenol group'

}

local T_ELEM = 0 -- token types

local T_NUM = 1 -- number

local T_OPEN = 2 -- open '('

local T_CLOSE = 3 -- close ')'

local T_PM_CHARGE = 4 -- + or −

local T_WATER = 6 -- .xH2O x number

local T_CRYSTAL = 9 -- .x

local T_CHARGE = 8 -- charge (x+), (x-)

local T_SUF_CHARGE = 10 -- suffix and charge e.g. 2+ from H2+

local T_SUF_CHARGE2 = 12 -- suffix and (charge) e.g. 2(2+) from He2(2+)

local T_SPECIAL = 14 -- starting with \ e.g. \d for double bond (=)

local T_SPECIAL2 = 16 -- starting with \y{x} e.g. \i{12} for isotope with mass number 12

local T_ARROW_R = 17 -- match: ->

local T_ARROW_EQ = 18 -- match: <->

local T_UNDERSCORE = 19 -- _{ ... }

local T_CARET = 20 -- ^{ ... }

local T_LINKOPEN = 21 -- Opening of link, always like "[[target|" even if the source wasn't

local T_NOCHANGE = 30 -- Anything else like ☃

function su(up, down)

if up == "" then

return ('%s'):format(down)

end

if down == "" then

return ('%s'):format(up)

end

return ('%s%s'):format(up, down)

end

function DotIt()

return '·'

end

function item(f) -- (iterator) returns one token (type, value) at a time from the formula 'f'

local i = 1

return function ()

local t, x = nil, nil

if (i == 1) and f:match('^[0-9]', i) then

x = f:match('^[%d.]+', i); t = T_NOCHANGE; i = i + x:len(); -- matching coefficient (need a space first)

elseif i <= f:len() then

x = f:match('^%s+[%d.]+', i); t = T_NOCHANGE; -- matching coefficient (need a space first)

if not x then x = f:match('^%s[+]', i); t = T_NOCHANGE; end -- matching + (H2O + H2O)

if not x then x = f:match('^%&%#[%w%d]+%;', i); t = T_NOCHANGE; end -- &#...;

if not x then x = f:match('^%<%-%>', i); t = T_ARROW_EQ; end -- matching <->

if not x then x = f:match('^%-%>', i); t = T_ARROW_R; end -- matching ->

if not x then x = f:match('^%u%l*', i); t = T_ELEM; end -- matching symbols like Aaaaa

if not x then x = f:match('^%d+[+-]', i); t = T_SUF_CHARGE; end -- matching x+, x-

if not x then x = f:match('^%d+%(%d*[+-]%)', i); t = T_SUF_CHARGE2; end -- matching x(y+/-), x(+/-)

if not x then x = f:match('^%(%d*[+-]%)', i); t = T_CHARGE; end -- matching (x+) (xx+), (x-) (xx-)

if not x then x = f:match('^[%d.]+', i); t = T_NUM; end -- matching number

if not x and (f:match('^%[%[%[[^[]', i) or f:match('^%[[^[]', i)) then

i = i + 1; return T_OPEN, '[' end -- escape [[[X or [X (relevant to auto-linking)

if not x and f:sub(i, i + 1) == '[[' then

x = f:match('^%[%[([^]|]*)', i) -- link target

local len = x:len() + 3

x = '[[' .. (groups[x] or am[x] or x) .. '|' -- override link target for common groups

if f:sub(len + i, len + i) == ']' then

-- We're going to read the link twice, once as target and once as

-- chemical markup, e.g. CH3 => "", "CH3"

i = i + 2

else

i = i + len

end

return T_LINKOPEN, x

end

if not x then x = f:match('^[(|

%[]', i); t = T_OPEN; end -- matching ({[

if not x then x = f:match('^[)

|%]]', i); t = T_CLOSE; end -- matching )}]

if not x then x = f:match('^[+-]', i); t = T_PM_CHARGE; end -- matching + or -

if not x then x = f:match('^%*[%d.]*H2O', i); t = T_WATER; end -- Crystal water

if not x then x = f:match('^%*[%d.]*', i); t = T_CRYSTAL; end -- Crystal

if not x then x = f:match('^[\\].{%d+}', i); t = T_SPECIAL2; end -- \y{x}

if not x then x = f:match('^[\\].', i); t = T_SPECIAL; end -- \x

if not x then x = f:match('^_{[^}]*}', i); t = T_UNDERSCORE; end -- _{...}

if not x then x = f:match('^^{[^}]*}', i); t = T_CARET; end -- ^{...}

if not x then x = f:match('^.', i); t = T_NOCHANGE; end --the rest - one by one

if x then i = i + x:len(); else i = i + 999; error("Invalid character in formula! : "..f) end

end

return t, x

end

end

function p._chem(args)

local f = args[1] or ''

f = mw.text.decode( f, true ) -- handle entity input (like −): decode right away

f = string.gsub(f, "–", "-") -- replace – with - (hyphen not ndash)

f = string.gsub(f, "−", "-") -- replace – with - (hyphen not minus sign)

local formula = ''

local t, x

local link = args['link'] or ""

local auto = args['auto'] or ""

local seen = {}

local _debug = false

if not (link == '') then formula = formula .. "[[" .. link .. "|"; end -- wikilink start [[link|

for t, x in item(f) do

if _debug then

formula = ("%s\n* %d %s"):format(formula, t, x)

elseif t == T_ELEM then

if (auto == '') or (not am[x]) or seen[x] then formula = formula .. x

else formula = ("%s%s"):format(formula, am[x], x); seen[x] = true

end

elseif t == T_COEFFICIENT then formula = formula .. x

elseif t == T_NUM then formula = formula .. su("", x);

elseif t == T_LINKOPEN then formula = formula .. x; -- [[Link|

elseif t == T_OPEN then formula = formula .. x; -- ([{

elseif t == T_CLOSE then formula = formula .. x; -- )]}

elseif t == T_PM_CHARGE then formula = formula .. su(x:gsub("-", "−"), "");

elseif t == T_SUF_CHARGE then

formula = formula .. su(x:match("[+-]"):gsub("-", "−"), x:match("%d+"), "");

elseif t == T_SUF_CHARGE2 then

formula = formula .. su(x:match("%(%d*[+-]"):gsub("-", "−"):sub(2, -1), x:match("%d+"))

elseif t == T_CHARGE then

formula = formula .. ""

if x:match("%d+") then formula = formula .. x:match("%d+"); end

formula = formula .. x:match("[%+-]"):gsub("-", "−") .. "";

-- Cannot concatenat a nil value from x:match("%d+");

elseif t == T_CRYSTAL then formula = formula .. DotIt() .. string.gsub( x, "*", '', 1 );

elseif t == T_SPECIAL then

parameter = x:sub(2, 2) -- x fra \x

if parameter == "s" then formula = formula .. "−" -- single bond

elseif parameter == "d" then formula = formula .. "=" -- double bond

elseif parameter == "t" then formula = formula .. "≡" -- tripple bond

elseif parameter == "q" then formula = formula .. "≣" -- Quadruple bond

elseif parameter == "h" then formula = formula .. "η" -- η, hapticity

elseif parameter == "*" then formula = formula .. "*" -- *, normal *

elseif parameter == "-" then formula = formula .. "-" -- -

elseif parameter == "\\" then formula = formula .. "\\" -- \

elseif parameter == "\'" then formula = formula .. "'" -- html-code for '

end

elseif t == T_SPECIAL2 then -- \y{x}

parameter = x:sub(2, 2) -- y fra \y{x}

if parameter == "h" then --Hapticity

if (auto == '') then formula = formula .. "η" .. x:match('%d+') .. "-"

else

formula = formula .. "η" .. x:match('%d+') .. "-"

end

elseif parameter == "m" then formula = formula .. "μ" .. x:match('%d+') .. "-" -- mu (bridging ligand)

end

elseif t == T_WATER then

if x:match("^%*[%d.]") then

formula = formula .. DotIt() .. x:match("%f[%.%d]%d*%.?%d*%f[^%.%d%]]") .. "H2O";

else

formula = formula .. DotIt() .. "H2O";

end

elseif t == T_UNDERSCORE then formula = formula .. su("", x:gsub("-", "−"):sub(3, -2)) -- x contains _{string}

elseif t == T_CARET then formula = formula .. su(x:gsub("-", "−"):sub(3, -2), "") -- x contains ^{string}

elseif t == T_ARROW_R then formula = formula .. " → "

elseif t == T_ARROW_EQ then formula = formula .. " ⇌ "

elseif t == T_NOCHANGE then formula = formula .. x; -- The rest - everything which isn't captured by the regular expresions.

else error('unreachable - ???') end -- in fact, unreachable

end

if not (link == nil or link == '') then formula = formula .. "]]"; end -- wikilink closing ]]

formula = mw.getCurrentFrame():preprocess('') ..

'' .. formula .. ''

if args[2] or args[3] or args[4] then

formula = formula .. require('Module:If preview')._warning{

'{{chem2}} was called with multiple positional arguments. It should have just one, e.g. {{chem2|H2O}}.'

}

end

return formula

end

function p.chem(frame)

local args = getArgs(frame)

return p._chem(args)

end

-- PRIVATE function to generate documentation.

function p._autodoc(frame)

local TableTools = require('Module:TableTools') -- we don't want to load this on articles for no reason

local result = {

'===Elements and element-style symbols===\nThese may be automatically linked or used as if they were redirects.\n',

'

class="wikitable"\n! Symbol !! Link target\n'

}

for symbol, target in TableTools.sortedPairs(am) do

result[#result + 1] = ('

\n| %s%s\n'):format(symbol, target)

end

result[#result + 1] = '

\n===Groups===\nThese must be linked manually; they work as if they were redirects.\n'

result[#result + 1] = '

class="wikitable"\n! Symbol !! Link target\n'

for symbol, target in TableTools.sortedPairs(groups) do

result[#result + 1] = ('

\n| %s%s\n'):format(symbol, target)

end

result[#result + 1] = '

'

return table.concat(result)

end

return p