Module:Convert/makeunits

-- This module generates the wikitext required at Module:Convert/data

-- by reading and processing the wikitext of the master list of units

-- (see conversion_data for the page title).

--

-- Script method:

-- * Read lines, ignoring everything before "== Conversions ==".

-- * Process the following lines:

-- * Find next level-3 heading like "=== Length ===".

-- * Parse each following line starting with "|"

-- (but ignore lines starting with "|-" or "|}".

-- * Split such lines into fields (delimiter "||") and trim

-- leading/trailing whitespace from each field.

-- Remove any "colspan" at front of second field (symbol).

-- * Remove thousand separators (commas) from the scale field.

-- If the scale is a number, do not change it.

-- Otherwise, it should be an expression like "5/9", in

-- which case it is replaced by the value of the expression.

-- * Remove wiki formatting '...' from the link field.

-- * Remove redundant fields from the unit to reduce size of data table.

-- * Create alternative forms of a unit such as an alias or a combination.

-- * Stop processing when encounter end of text or a line starting

-- with a level-2 heading ("==" but not "===").

-- * Repeat above for each heading listed at prepare_data().

-- * Output Lua source for the units table.

--

-- -- Output has the following form.

-- local all_units = {

-- ["unitcode"] = { -- standard format

-- name1 = "singular name", -- omitted if redundant

-- name1_us = "singular name sp=us", -- omitted if redundant

-- name2 = "plural name", -- omitted if redundant

-- name2_us = "plural name sp=us", -- omitted if redundant

-- symbol = "symbol",

-- sym_us = "symbol sp=us", -- omitted if redundant

-- usename = 1, -- omitted if empty

-- utype = "unit type", -- from level-3 heading

-- scale = 1, -- a value, if necessary from evaluating an expression

-- subdivs = { ["ft"] = { 5280, default = "km" }, ["yd"] = { 1760 } } -- composite input; omitted if empty

-- link = "title of article for wikilink", -- omitted if empty or redundant

-- ... -- other values

-- },

-- ["unitcode"] = { -- alternative format to generate an alias

-- target = "unit code",

-- ... -- optional values to override those of target

-- },

-- ["unitcode"] = { -- alternative format to generate a "per" unit like $/acre or BTU/h

-- per = {u1, u2}, -- numbered table of unitcodes (u1 may be a currency symbol)

-- ... -- optional values

-- },

-- ["unitcode"] = { -- alternative format to generate an error message

-- shouldbe = "message that some other unit code should be used",

-- },

-- ["unitcode"] = { -- alternative format for combination outputs (like 'm ft')

-- combination = {u1, u2, ...}, -- numbered table of unitcodes

-- utype = "unit type", -- as for standard format

-- },

-- ["unitcode"] = { -- alternative format for output multiples (like 'ftin')

-- combination = {u1, u2, ...}, -- numbered table of unitcodes

-- multiple = {f1, f2, ...}, -- numbered table of integer factors

-- utype = "unit type", -- as for standard format

-- },

-- ...

-- }

local ulower = mw.ustring.lower

local usub = mw.ustring.sub

local text_code

local specials = {

-- This table is used to add extra fields when defining some units which

-- require exceptions to normal processing.

-- Each key is in the local language, while each value is fixed text.

-- However, this script should NOT be edited.

-- Instead, the translation_table in Module:Convert/text can be edited,

-- and this script will replace sections of the following with localized

-- definitions from Module:Convert/text, if given.

-- Ask for assistance at :en:Module talk:Convert.

-- LATER: It would be better if this was defined in the conversion data.

utype = {

-- ["unit type in local language"] = "name_used_in_this_script"

["fuel efficiency"] = "type_fuel_efficiency",

["length"] = "type_length",

["temperature"] = "type_temperature",

["volume"] = "type_volume",

},

ucode = {

exception = {

-- ["unit code in local language"] = "name_used_in_module_convert"

["ft"] = "integer_more_precision",

["in"] = "subunit_more_precision",

["lb"] = "integer_more_precision",

},

istemperature = {

-- Common temperature scales (not keVT or MK).

-- ["unit code in local language"] = true

["C"] = true,

["F"] = true,

["K"] = true,

["R"] = true,

},

usesymbol = {

-- Use unit symbol not name if abbr not specified.

-- ["unit code in local language"] = 1

["C"] = 1,

["F"] = 1,

["K"] = 1,

["R"] = 1,

["C-change"] = 1,

["F-change"] = 1,

["K-change"] = 1,

},

alttype = {

-- Unit has an alternate type that is a valid conversion.

-- ["unit code in local language"] = "alternate type in local language"

["Nm"] = "energy",

["ftlb"] = "torque",

["ftlb-f"] = "torque",

["ftlbf"] = "torque",

["inlb"] = "torque",

["inlb-f"] = "torque",

["inlbf"] = "torque",

["inoz-f"] = "torque",

["inozf"] = "torque",

},

},

}

-- Module text for the local language (localization).

-- A default table of text for enwiki is provided here.

-- If needed for another wiki, wanted sections from the table can be

-- copied into translation_table in Module:Convert/text.

-- For example, copying and modifying only the titles section may give:

--

-- local translation_table = {

-- ... -- other items

-- mtext = {

-- titles = {

-- -- name_used_in_this_script = 'Title of page'

-- conversion_data = 'Modul:Convert/documentation/conversion data/dok',

-- },

-- },

-- }

local mtext = {

section_names = {

-- name_used_in_this_script = 'Section title used in conversion data'

overrides = 'Overrides',

conversions = 'Conversions',

outmultiples = 'Output multiples',

combinations = 'Combinations',

inmultiples = 'Input multiples',

defaults = 'Defaults',

links = 'Links',

perunits = 'Automatic per units',

varnames = 'Variable names',

pernames = 'Names for second unit in a per',

},

titles = {

-- name_used_in_this_script = 'Title of page'

conversion_data = 'Module:Convert/documentation/conversion data',

},

messages = {

-- name_used_in_this_script = 'Error message ($1 = first parameter, $2 = second)'

m_als_bad = 'Alias has invalid text in field "$1".',

m_als_dup = 'Alias "$1" already defined.',

m_als_link = 'Alias "$1" must include a wikilink ("...") in the symlink text.',

m_als_mul = 'Alias "$1" has multiplier "$2" which is not a number.',

m_als_same = 'Should omit "$1" for alias "$2" because it is the same as its target.',

m_als_type = 'Target of alias "$1" has wrong type.',

m_als_undef = 'Primary unit must be defined before alias "=$1"',

m_cmb_miss = 'Missing unit code for a combination.',

m_cmb_none = 'No units specified for combination "$1"',

m_cmb_one = 'Only one unit specified for combination "$1"',

m_cmb_type = 'Unit "$1" in combination "$2" has wrong type.',

m_cmb_undef = 'Unit "$1" in combination "$2" not defined.',

m_cmp_def = 'Composite "$1" must specify a default unit code.',

m_cmp_int = 'Composite "$1" has components where scale ratios are not integers.',

m_cmp_inval = 'Composite "$1" has a component with an invalid scale, "$2".',

m_cmp_many = 'Composite "$1" has too many fields.',

m_cmp_miss = 'Missing unit code for a composite.',

m_cmp_order = 'Composite "$1" has components in wrong order or with invalid scales.',

m_cmp_scale = 'Alternate unit "$1" in composite "$2" has wrong scale.',

m_cmp_two = 'Composite "$1" must specify exactly two unit codes.',

m_cmp_type = 'Unit "$1" in composite "$2" has wrong type.',

m_cmp_undef = 'Unit "$1" in composite "$2" not defined.',

m_def_cond = 'Invalid condition in default "$1" for unit "$2".',

m_def_fmt = 'Default output "$1" for unit "$2" should have 2 or 3 "!".',

m_def_rpt = 'Default output "$1" for unit "$2" is repeated.',

m_def_same = 'Default output for unit "$1" is the same unit.',

m_def_type = 'Default output "$1" for unit "$2" has wrong type.',

m_def_undef = 'Default output "$1" for unit "$2" is not defined.',

m_dfs_code = 'Defaults section: no unit code specified.',

m_dfs_dup = 'Defaults section: unit "$1" has already been specified.',

m_dfs_none = 'Defaults section: unit "$1" has no default specified.',

m_dfs_sym = 'Defaults section: unit "$1" must have a symbol.',

m_dfs_two = 'Defaults section: unit "$1" should have two fields only.',

m_dfs_undef = 'Defaults section: unit "$1" is not defined.',

m_dup_code = 'Unit code "$1" has already been defined.',

m_error = 'Error:',

m_ftl_read = 'Could not read wikitext from "$1".',

m_ftl_table = '$1 should export table "$2".',

m_ftl_type = 'Fatal error: unknown data type for "$1"',

m_hdg_lev2 = 'Level 2 heading "$1" not found.',

m_hdg_lev3 = 'No level 3 heading before: $1',

m_line_num = ' (line $1).',

m_lnk_brack = 'Link "$1" has wrong number of brackets.',

m_lnk_dup = 'Link exception "$1" is already defined.',

m_lnk_miss = 'Missing unit code for a link.',

m_lnk_none = 'No link defined for unit "$1".',

m_lnk_sym = 'Unit code "$1" for a link must have a symbol.',

m_lnk_two = 'Row for unit "$1" link should have two fields only.',

m_lnk_type = 'Link exception "$1" has wrong type.',

m_lnk_undef = 'Unit code "$1" for a link is not defined.',

m_miss_code = 'Missing unit code.',

m_miss_sym = 'Missing symbol.',

m_miss_type = 'Missing unit type.',

m_mul_int = 'Multiple "$1" has components where scale ratios are not integers.',

m_mul_miss = 'Missing unit code for a multiple.',

m_mul_none = 'No units specified for multiple "$1"',

m_mul_one = 'Only one unit specified for multiple "$1"',

m_mul_order = 'Multiple "$1" has components in wrong order or with invalid scales.',

m_mul_scale = 'Multiple "$1" has a component with an invalid scale, "$2".',

m_mul_std = 'Unit "$1" in multiple "$2" must be a standard unit.',

m_mul_type = 'Unit "$1" in multiple "$2" has wrong type.',

m_mul_undef = 'Unit "$1" in multiple "$2" not defined.',

m_no_title = 'Need title of page with unit definitions.',

m_ovr_dup = 'Override "$1" is already defined.',

m_ovr_miss = 'Missing unit code for an override.',

m_per_dup = 'Per unit "$1" already defined.',

m_per_empty = 'Unit "$1" has an empty field in the "per".',

m_per_fuel = 'Unit "$1" has invalid unit types for fuel efficiency.',

m_per_inv = 'Invalid field for a "per".',

m_per_two = 'Unit "$1" does not have exactly 2 fields in the "per".',

m_per_undef = 'Unit "$1" has undefined unit code "$2" in the "per".',

m_percent_s = 'Field "$1" must not contain "%s".',

m_pnm_cnt = 'Names for second unit in a per section: each row must have two columns.',

m_pnm_dup = 'Unit "$1" already has a per name.',

m_pnm_miss = 'Missing field for a per name.',

m_pnm_undef = 'Unit "$1" in per names is not defined.',

m_pfx_bad = 'Unknown prefix: "$1".',

m_pfx_name = 'Unit with Prefix set must include Name.',

m_scl_bad = 'Scale expression is invalid: "$1".',

m_scl_miss = 'Missing scale.',

m_scl_oflow = 'Scale expression gives an invalid value: "$1".',

m_var_cnt = 'Variable names section: each row must have the configured number of columns.',

m_var_dup = 'Unit "$1" already has a variable name.',

m_var_miss = 'Missing field for a variable name.',

m_var_undef = 'Unit "$1" in variable names is not defined.',

m_warning = 'Warning:',

m_wrn_more = ' (and more not shown)',

m_wrn_nbsp = 'Line $1 contains a nonbreaking space.',

m_wrn_nodef = 'Units with the following unit codes have no default output.',

m_wrn_ucode = ' $1',

},

}

local function message(key, ...)

-- Return a message from the message table, which can be localized.

-- '$1', '$2', ... are replaced with the first, second, ... parameters,

-- each of which must be a string or a number.

-- The global variable is_test_run can be set by a testing program to

-- check the messages generated by this program.

local rep = {}

for i, v in ipairs({...}) do

rep['$' .. i] = v

end

key = key or '???'

local extra

if is_test_run and key ~= 'm_line_num' then

extra = key .. ': '

else

extra = ''

end

return extra .. string.gsub(mtext.messages[key] or key, '$%d+', rep)

end

local function quit(key, ...)

-- Use error() to pass an error message to the surrounding pcall().

error(message(key, ...), 0)

end

local function quit_no_message()

-- Throw an error.

-- This is used in some functions which can throw an error with a message,

-- but where the message is in fact never displayed because the calling

-- function uses pcall to catch errors, and any message is ignored.

-- Using this function documents that the message (which may be useful in

-- some other application) does not need translation as it never appears.

error('this message is not displayed', 0)

end

local function collection()

-- Return a table to hold items.

return {

n = 0,

add = function (self, item)

self.n = self.n + 1

self[self.n] = item

end,

pop = function (self, item)

if self.n > 0 then

local top = self[self.n]

self.n = self.n - 1

return top

end

end,

join = function (self, sep)

return table.concat(self, sep or '\n')

end,

}

end

local warnings = collection()

local function add_warning(key, ...)

-- Add a warning that will be inserted before the final result.

warnings:add(message(key, ...))

end

---Begin code to evaluate expressions-----------------------------------

-- This is needed because Lua's loadstring() is not available in Scribunto,

-- and each scale value can be specifed as an expression such as "5/9".

-- More complex expressions are supported, including use of parentheses

-- and the binary operators: + - * / ^

local operators = {

['+'] = { precedence = 1, associativity = 1, func = function (a, b) return a + b end },

['-'] = { precedence = 1, associativity = 1, func = function (a, b) return a - b end },

['*'] = { precedence = 2, associativity = 1, func = function (a, b) return a * b end },

['/'] = { precedence = 2, associativity = 1, func = function (a, b) return a / b end },

['^'] = { precedence = 3, associativity = 2, func = function (a, b) return a ^ b end },

['('] = '(',

[')'] = ')',

}

local function tokenizer(text)

-- Function 'next' returns the next token which is one of:

-- number

-- table (operator)

-- string ('(' or ')')

-- nil (end of text)

-- If invalid, an error is thrown.

-- The number is unsigned (unary operators are not supported).

return {

pos = 1,

maxpos = #text,

text = text,

next = function(self)

if self.pos <= self.maxpos then

local p1, p2, hit = self.text:find('^%s*([+%-*/^()])', self.pos)

if hit then

self.pos = p2 + 1

return operators[hit]

end

p1, p2, hit = self.text:find('^%s*(%d*%.?%d*[eE][+-]?%d*)', self.pos)

if not hit then

p1, p2, hit = self.text:find('^%s*(%d*%.?%d*)', self.pos)

end

local value = tonumber(hit)

if value then

self.pos = p2 + 1

return value

end

quit_no_message('invalid number "' .. self.text:sub(self.pos) .. '"')

end

end

}

end

local function evaluate_tokens(tokens, inparens)

-- Return the value from evaluating tokenized expression, or throw an error.

local numstack, opstack = collection(), collection()

local function perform_ops(precedence, associativity)

while opstack.n > 0 and (opstack[opstack.n].precedence > precedence or

(opstack[opstack.n].precedence == precedence and associativity == 1)) do

local rhs = numstack:pop()

local lhs = numstack:pop()

if not (rhs and lhs) then quit_no_message('missing number') end

local op = opstack:pop()

numstack:add(op.func(lhs, rhs))

end

end

local token_last

local function set_state(token_type)

if token_last == token_type then

local missing = (token_type == 'number') and 'operator' or 'number'

quit_no_message('missing ' .. missing)

end

token_last = token_type

end

while true do

local token = tokens:next()

if type(token) == 'number' then

set_state('number')

numstack:add(token)

elseif type(token) == 'table' then

set_state('operator')

perform_ops(token.precedence, token.associativity)

opstack:add(token)

elseif token == '(' then

set_state('number')

numstack:add(evaluate_tokens(tokens, true))

elseif token == ')' then

if inparens then

break

end

quit_no_message('unbalanced parentheses')

else

break

end

end

perform_ops(0)

if numstack.n > 1 then quit_no_message('missing operator') end

if numstack.n < 1 then quit_no_message('missing number') end

return numstack:pop()

end

local function evaluate(expression)

-- Return value (a number) from evaluating expression (a string),

-- or throw an error if invalid.

-- This is not bullet proof, but it should support the expressions used.

return evaluate_tokens(tokenizer(expression))

end

---End code to evaluate expressions-------------------------------------

---Begin code adapted from Module:Convert-------------------------------

local plural_suffix = 's' -- may be changed from translation.plural_suffix below

local function shallow_copy(t)

-- Return a shallow copy of t.

-- Do not need the features and overhead of mw.clone() provided by Scribunto.

local result = {}

for k, v in pairs(t) do

result[k] = v

end

return result

end

local function split(text, delimiter)

-- Return a numbered table with fields from splitting text.

-- The delimiter is used in a regex without escaping (for example, '.' would fail).

-- Each field has any leading/trailing whitespace removed.

local t = {}

text = text .. delimiter -- to get last item

for item in text:gmatch('%s*(.-)%s*' .. delimiter) do

table.insert(t, item)

end

return t

end

local unit_mt = {

-- Metatable to get missing values for a unit that does not accept SI prefixes.

-- Warning: The boolean value 'false' is returned for any missing field

-- so __index is not called twice for the same field in a given unit.

__index = function (self, key)

local value

if key == 'name1' or key == 'sym_us' then

value = self.symbol

elseif key == 'name2' then

value = self.name1 .. plural_suffix

elseif key == 'name1_us' then

value = self.name1

if not rawget(self, 'name2_us') then

-- If name1_us is 'foot', do not make name2_us by appending plural_suffix.

self.name2_us = self.name2

end

elseif key == 'name2_us' then

local raw1_us = rawget(self, 'name1_us')

if raw1_us then

value = raw1_us .. plural_suffix

else

value = self.name2

end

elseif key == 'link' then

value = self.name1

else

value = false

end

rawset(self, key, value)

return value

end

}

local function prefixed_name(unit, name, index)

-- Return unit name with SI prefix inserted at correct position.

-- index = 1 (name1), 2 (name2), 3 (name1_us), 4 (name2_us).

-- The position is a byte (not character) index, so use Lua's sub().

local pos = rawget(unit, 'prefix_position')

if type(pos) == 'string' then

pos = tonumber(split(pos, ',')[index])

end

if pos then

return name:sub(1, pos - 1) .. unit.si_name .. name:sub(pos)

end

return unit.si_name .. name

end

local unit_prefixed_mt = {

-- Metatable to get missing values for a unit that accepts SI prefixes.

-- Before use, fields si_name, si_prefix must be defined.

-- The unit must define _symbol, _name1 and

-- may define _sym_us, _name1_us, _name2_us

-- (_sym_us, _name2_us may be defined for a language using sp=us

-- to refer to a variant unrelated to U.S. units).

__index = function (self, key)

local value

if key == 'symbol' then

value = self.si_prefix .. self._symbol

elseif key == 'sym_us' then

value = rawget(self, '_sym_us')

if value then

value = self.si_prefix .. value

else

value = self.symbol

end

elseif key == 'name1' then

value = prefixed_name(self, self._name1, 1)

elseif key == 'name2' then

value = rawget(self, '_name2')

if value then

value = prefixed_name(self, value, 2)

else

value = self.name1 .. plural_suffix

end

elseif key == 'name1_us' then

value = rawget(self, '_name1_us')

if value then

value = prefixed_name(self, value, 3)

else

value = self.name1

end

elseif key == 'name2_us' then

value = rawget(self, '_name2_us')

if value then

value = prefixed_name(self, value, 4)

elseif rawget(self, '_name1_us') then

value = self.name1_us .. plural_suffix

else

value = self.name2

end

elseif key == 'link' then

value = self.name1

else

value = false

end

rawset(self, key, value)

return value

end

}

local function lookup(units, unitcode, sp, what)

-- Return a copy of the unit if found, or return nil.

-- In this cut-down code, sp is always nil, and what is ignored.

local t = units[unitcode]

if t then

if t.shouldbe then

return nil

end

local result = shallow_copy(t)

if result.prefixes then

result.si_name = ''

result.si_prefix = ''

return setmetatable(result, unit_prefixed_mt)

end

return setmetatable(result, unit_mt)

end

local SIprefixes = text_code.SIprefixes

for plen = SIprefixes[1] or 2, 1, -1 do

-- Look for an SI prefix; should never occur with an alias.

-- Check for longer prefix first ('dam' is decametre).

-- SIprefixes[1] = prefix maximum #characters (as seen by mw.ustring.sub).

local prefix = usub(unitcode, 1, plen)

local si = SIprefixes[prefix]

if si then

local t = units[usub(unitcode, plen+1)]

if t and t.prefixes then

local result = shallow_copy(t)

if (sp == 'us' or t.sp_us) and si.name_us then

result.si_name = si.name_us

else

result.si_name = si.name

end

result.si_prefix = si.prefix or prefix

-- In this script, each scale is a string.

result.scale = tostring(tonumber(t.scale) * 10 ^ (si.exponent * t.prefixes))

result.prefixes = nil -- a prefixed unit does not take more prefixes (in this script, the returned unit may be added to the list of units)

return setmetatable(result, unit_prefixed_mt)

end

end

end

local exponent, baseunit = unitcode:match('^e(%d+)(.*)')

if exponent then

local engscale = text_code.eng_scales[exponent]

if engscale then

local result = lookup(units, baseunit, sp, 'no_combination')

if not result then return nil end

if not (result.offset or result.builtin or result.engscale) then

result.defkey = unitcode -- key to lookup default exception

result.engscale = engscale

-- Do not set result.scale as this code is called for units where that is not set.

return result

end

end

end

return nil

end

local function evaluate_condition(value, condition)

-- Return true or false from applying a conditional expression to value,

-- or throw an error if invalid.

-- A very limited set of expressions is supported:

-- v < 9

-- v * 9 < 9

-- where

-- 'v' is replaced with value

-- 9 is any number (as defined by Lua tonumber)

-- '<' can also be '<=' or '>' or '>='

-- In addition, the following form is supported:

-- LHS and RHS

-- where

-- LHS, RHS = any of above expressions.

local function compare(value, text)

local arithop, factor, compop, limit = text:match('^%s*v%s*([*]?)(.-)([<>]=?)(.*)$')

if arithop == nil then

quit_no_message('Invalid default expression.')

elseif arithop == '*' then

factor = tonumber(factor)

if factor == nil then

quit_no_message('Invalid default expression.')

end

value = value * factor

end

limit = tonumber(limit)

if limit == nil then

quit_no_message('Invalid default expression.')

end

if compop == '<' then

return value < limit

elseif compop == '<=' then

return value <= limit

elseif compop == '>' then

return value > limit

elseif compop == '>=' then

return value >= limit

end

quit_no_message('Invalid default expression.') -- should not occur

end

local lhs, rhs = condition:match('^(.-%W)and(%W.*)')

if lhs == nil then

return compare(value, condition)

end

return compare(value, lhs) and compare(value, rhs)

end

---End adapted code-----------------------------------------------------

local function strip(text)

-- Return text with no leading/trailing whitespace.

return text:match("^%s*(.-)%s*$")

end

local function empty(text)

-- Return true if text is nil or empty (assuming a string).

return text == nil or text == ''

end

-- Tables of units: k = unit code, v = unit table.

local units_index = {} -- all units: normal, alias, per, combination, or multiple

local alias_index = {} -- all aliases (to detect attempts to define more than once)

local per_index = {} -- all "per" units (to detect attempts to define more than once)

local function get_unit(ucode, utype)

-- Look up unit code in our cache of units.

-- If utype == nil, the unit should already have been defined.

-- Otherwise, ucode may represent an automatically generated combination

-- where each component must have the given utype; a dummy unit is returned.

if empty(ucode) then

return nil

end

local unit = lookup(units_index, ucode)

if unit or not utype then

return unit

end

local combo = collection()

if ucode:find('+', 1, true) then

for item in (ucode .. '+'):gmatch('%s*(.-)%s*%+') do

if item ~= '' then

combo:add(item)

end

end

elseif ucode:find('%s') then

for item in ucode:gmatch('%S+') do

combo:add(item)

end

end

if combo.n > 1 then

local result = setmetatable({ utype = utype }, {

__index = function (self, key)

error('Bug: invalid use of automatically generated unit')

end })

for _, v in ipairs(combo) do

local component = lookup(units_index, v)

if not component or component.shouldbe or component.combination then

return nil

end

if utype ~= component.utype then

result.utype = component.utype -- set wrong type which caller will detect

break

end

end

return result

end

end

local overrides = {} -- read from input for unit codes that should not be checked for a duplicate

local function insert_unique_unit(data, unit, index)

-- After inserting any required built-in data, insert the unit into the

-- data table and (if index not nil) add to index,

-- but not if the unit code is already defined.

local ucode = unit.unitcode

local known = get_unit(ucode)

if known and not overrides[ucode] then

quit('m_dup_code', ucode)

end

for item, t in pairs(specials.ucode) do

unit[item] = t[ucode]

end

if index then

index[ucode] = unit

end

table.insert(data, unit)

end

local function check_condition(condition)

-- Return true if condition appears to be valid; otherwise return false.

for _, value in ipairs({ 0, 0.1, 1, 1.1, 10, 100, 1000, 1e4, 1e5 }) do

local success, result = pcall(evaluate_condition, value, condition)

if not success then

return false

end

end

return true

end

local function check_default_expression(default, ucode)

-- Return a numbered table of names present in param default

-- (two names if an expression, or one name (param default) otherwise).

-- Throw an error if a problem occurs.

-- An expression uses pipe-delimited fields with 'v' representing

-- the input value for the conversion.

-- Example (suffix is optional): 'v < 120 ! small ! big ! suffix'

-- returns { 'smallsuffix', 'bigsuffix' }.

if not default:find('!', 1, true) then

return { default }

end

local t = {}

for item in (default .. '!'):gmatch('%s*(.-)%s*!') do

t[#t+1] = item -- split on '!', removing leading/trailing whitespace

end

if not (#t == 3 or #t == 4) then

quit('m_def_fmt', default, ucode)

end

local condition, default1, default2 = t[1], t[2], t[3]

if #t == 4 then

default1 = default1 .. t[4]

default2 = default2 .. t[4]

end

if not check_condition(condition) then

quit('m_def_cond', default, ucode)

end

return { default1, default2 }

end

local function check_default(default, ucode, utype, unit_table)

-- Check the given name (or expression) of a default output.

-- Normally a unit must not define itself as its default. However,

-- some units are defined merely for use in per units, and they have

-- the same ucode, utype and default.

-- Example: unit cent which cannot be converted to anything other than

-- a cent, but which can work, for example, in cent/km and cent/mi.

-- Throw an error if a problem occurs.

local done = {}

for _, default in ipairs(check_default_expression(default, ucode)) do

if done[default] then

quit('m_def_rpt', default, ucode)

end

if default == ucode and ucode ~= utype then

quit('m_def_same', ucode)

end

local default_table = get_unit(default, utype)

if not default_table then

quit('m_def_undef', default, ucode)

end

if not (utype == unit_table.utype and utype == default_table.utype) then

quit('m_def_type', default, ucode)

end

done[default] = true

end

end

local function check_all_defaults(cfg, units)

-- Check each default in units and warn if needed.

-- This is done after all input data has been processed.

-- Throw an error if a problem occurs.

local errors = collection()

local missing = collection() -- unitcodes with missing defaults

for _, unit in ipairs(units) do

if not unit.shouldbe and not unit.combination then

-- This is a standard unit or an alias/per (not shouldbe, combo).

-- An alias may have a default defined, but it is optional.

local default = unit.default

local ucode = unit.unitcode

if empty(default) then

if not unit.target then -- unit should have a default

missing:add(ucode)

end

else

local ok, msg = pcall(check_default, default, ucode, unit.utype, unit)

if not ok then

errors:add(msg)

if errors.n >= cfg.maxerrors then

break

end

end

end

end

end

if errors.n > 0 then

error(errors:join(), 0)

end

if missing.n > 0 then

add_warning('m_wrn_nodef')

local limit = cfg.maxerrors

for _, v in ipairs(missing) do

limit = limit - 1

if limit < 0 then

add_warning('m_wrn_more')

break

end

add_warning('m_wrn_ucode', v)

end

end

end

local function check_all_pers(cfg, units)

-- Check each component of each "per" unit and warn if needed.

-- In addition, add any required extra fields for some types of units.

-- This is done after all input data has been processed.

-- Throw an error if a problem occurs.

local errors = collection()

local function errmsg(key, ...)

errors:add(message(key, ...))

end

for _, unit in ipairs(units) do

local per = unit.per

if per then

local ucode = unit.unitcode

if #per ~= 2 then

errmsg('m_per_two', ucode)

else

local types = {}

for i, v in ipairs(per) do

if empty(v) then

errmsg('m_per_empty', ucode)

end

if not text_code.currency[v] then

local t = get_unit(v)

if t then

types[i] = t.utype

else

errmsg('m_per_undef', ucode, v)

end

end

end

if specials.utype[unit.utype] == 'type_fuel_efficiency' then

local expected = { type_volume = 1, type_length = 2 }

local top_type = expected[specials.utype[types[1]]]

local bot_type = expected[specials.utype[types[2]]]

if top_type and bot_type and top_type ~= bot_type then

unit.iscomplex = true

if top_type == 1 then

unit.invert = 1

else

unit.invert = -1

end

else

errmsg('m_per_fuel', ucode)

end

end

end

end

if errors.n >= cfg.maxerrors then

break

end

end

if errors.n > 0 then

error(errors:join(), 0)

end

end

local function update_units(units, composites, varnames, pernames)

-- Update some unit definitions with extra data defined in other sections.

-- This is done after all input data has been processed.

for _, unit in ipairs(units) do

local comp = composites[unit.unitcode]

if comp then

unit.subdivs = '{ ' .. table.concat(comp.subdivs, ', ') .. ' }'

end

if varnames[unit.unitcode] then

unit.varname = varnames[unit.unitcode]

end

if pernames[unit.unitcode] then

unit.pername = pernames[unit.unitcode]

end

end

end

local function make_override(cfg, data)

-- Return a function which, when called, stores a unit code that is not to be

-- checked for a duplicate. The table is stored in data (also a table).

return function (utype, fields)

local ucode = fields[1]

if empty(ucode) then

quit('m_ovr_miss')

end

if data[ucode] then

quit('m_ovr_dup', ucode)

end

data[ucode] = true

end

end

local function make_default(cfg, data)

-- Return a function which, when called, stores a table that defines a

-- default output unit. The table is stored in data (also a table).

local defaults_index = {} -- to detect attempts to define a default twice

return function (utype, fields)

-- Store a table defining a unit.

-- This is for a unit such as 'kg' that has a default output unit

-- different from what is defined for the base unit ('g').

-- Throw an error if a problem occurs.

local ucode = fields[1]

local default = fields[2]

if empty(ucode) then

quit('m_dfs_code')

end

if empty(default) then

quit('m_dfs_none', ucode)

end

if #fields ~= 2 then

quit('m_dfs_two', ucode)

end

local unit_table = get_unit(ucode)

if not unit_table then

quit('m_dfs_undef', ucode)

end

local symbol = unit_table.defkey or unit_table.symbol

if empty(symbol) then

quit('m_dfs_sym', ucode)

end

check_default(default, ucode, utype, unit_table)

if defaults_index[ucode] then

quit('m_dfs_dup', ucode)

end

defaults_index[ucode] = default

table.insert(data, { symbol = symbol, default = default })

end

end

local function clean_link(link, name)

-- Return link, customary where:

-- link = given link after removing any '...' wiki formatting

-- and removing any leading '+' or '*' or '@';

-- customary = 1 if leading '+', or 2 if '*' or 3 if '@', or nil

-- (for extra "US" or "U.S." or "Imperial" customary units link).

-- Result has leading/trailing whitespace removed, and is nil if empty

-- or if link matches the name, if a name is specified.

-- Exception: If the link is empty and the name starts with '[[',

-- the link is stored as '' (for a unit name which is always linked).

-- If the resulting link is nil, no link field is stored, and

-- if a link is required, it will be set from the unit's name.

local original = link

if empty(link) then

return (name and name:sub(1, 2) == '[[') and '' or nil

end

local prefixes = { ['+'] = 1, ['*'] = 2, ['@'] = 3 }

local customary = prefixes[link:sub(1, 1)]

if customary then

link = strip(link:sub(2))

end

if link:sub(1, 2) == '[[' then

link = link:sub(3)

end

if link:sub(-2) == ']]' then

link = link:sub(1, -3)

end

link = strip(link)

if link:sub(1, 1) == '[' or link:sub(-1) == ']' then

quit('m_lnk_brack', original)

end

if link == '' then

link = nil

elseif name then

local l = ulower(usub(link, 1, 1)) .. usub(link, 2)

local n = ulower(usub(name, 1, 1)) .. usub(name, 2)

if l == n then

link = nil -- link == name, ignoring case of first letter

end

end

return link, customary

end

local function make_link(cfg, data)

-- Return a function which, when called, stores a table that defines a

-- link exception. The table is stored in data (also a table).

local links_index = {} -- to detect attempts to define a link twice

return function (utype, fields)

-- Store a table defining a unit.

-- This is for a unit such as 'kg' that has a linked article

-- different from what is defined for the base unit ('g').

-- Throw an error if a problem occurs.

local ucode = fields[1]

local link = clean_link(fields[2])

if empty(ucode) then

quit('m_lnk_miss')

end

if empty(link) then

quit('m_lnk_none', ucode)

end

if #fields ~= 2 then

quit('m_lnk_two', ucode)

end

local unit_table = get_unit(ucode)

if not unit_table then

quit('m_lnk_undef', ucode)

end

if utype ~= unit_table.utype then

quit('m_lnk_type', ucode)

end

local symbol = unit_table.symbol

if empty(symbol) then

quit('m_lnk_sym', ucode)

end

if links_index[ucode] then

quit('m_lnk_dup', ucode)

end

links_index[ucode] = link

table.insert(data, { symbol = symbol, link = link })

end

end

local function clean_scale(scale)

-- Return cleaned scale as a string, after evaluating any expression.

-- It would be better to retain scale expressions like "5/9" so that

-- the expression is evaluated on the server and maintains the full

-- resolution of the server. However, there are many such expressions

-- in the table of all units, and it seems pointless to require the

-- server to evaluate all of them just to do one convert.

if empty(scale) then

quit('m_scl_miss')

end

assert(type(scale) == 'string', 'Bug: scale has an unexpected type')

scale = string.gsub(scale, ',', '') -- remove comma separators

if tonumber(scale) then -- not an expression

return scale

end

local status, value = pcall(evaluate, scale)

if not (status and type(value) == 'number') then

quit('m_scl_bad', scale)

end

local result = string.format('%.17g', value)

if result:find('[#n]') then

-- Lua can give results like "#INF" while Scribunto gives "inf". Either is an error.

quit('m_scl_oflow', scale)

end

-- Omit redundant zeros from results like '1.2e-005'.

-- Do not bother looking for results like '1.2e+005' as none occur in practice.

local lhs, zeros, rhs = result:match('^(.-e%-)(0+)(.*)')

if zeros then

result = lhs .. rhs

end

return result

end

local function add_alias_optional_fields(unit, start, fields, target)

-- Inspect fields[i] for i = start, start+1 ..., and extract any

-- definitions appropriate for an alias or "per", and add them to unit.

-- For an alias, target is a valid unit; for a "per", target is nil.

-- Throw error if encounter an invalid entry.

for i = start, #fields do

local field = fields[i]

if not empty(field) then

local lhs, rhs = field:match('^%s*(.-)%s*=%s*(.-)%s*$')

local good

if not empty(rhs) then

for _, item in ipairs({ 'sp', 'default', 'link', 'multiplier', 'symbol', 'symlink', 'abbr' }) do

if lhs == item then

if item == 'sp' then

if rhs == 'us' then

unit.sp_us = true

good = true

end

elseif item == 'link' then

local tlink

if target then

tlink = target[item]

end

local link, customary = clean_link(rhs, tlink)

if link then

unit[item] = link

end

if customary then

unit.customary = customary

end

good = true

elseif item == 'symlink' then

local pos1 = rhs:find('[[', 1, true)

local pos2 = rhs:find(']]', 1, true)

if not (pos1 and pos2 and (pos1 < pos2)) then

quit('m_als_link', unit.unitcode)

end

unit.symlink = rhs

good = true

elseif item == 'multiplier' then

if not tonumber(rhs) then

quit('m_als_mul', unit.unitcode, rhs)

end

unit[item] = rhs

good = true

elseif item == 'abbr' then

if target and rhs == 'off' then

unit.usename = 1

good = true

end

else

if target and rhs == target[item] then

quit('m_als_same', item, unit.unitcode)

end

unit[item] = rhs

good = true

end

break

end

end

end

if not good then

quit('m_als_bad', field)

end

end

end

end

local function make_alias(fields, ucode, utype, symbol)

-- Return a new alias unit, or return nil if symbol is not already

-- defined as the unit code of the target unit.

-- Throw an error if invalid.

local target = get_unit(symbol)

if not target then

return nil

end

local unit = { unitcode = ucode, utype = utype, target = symbol }

add_alias_optional_fields(unit, 3, fields, target)

if alias_index[ucode] then

quit('m_als_dup', ucode)

else

alias_index[ucode] = unit

end

if target.utype ~= utype then

quit('m_als_type', ucode)

end

return unit

end

local function make_per(fields, ucode, utype, symbol)

-- Return a new "per" unit, or return nil if symbol is not of form "x/y".

-- Throw an error if invalid.

-- The top, bottom unit codes are checked later, after all units are defined.

local top, bottom = symbol:match('^(.-)/(.*)$')

if not top then

return nil

end

local unit = { unitcode = ucode, utype = utype, per = { strip(top), strip(bottom) } }

add_alias_optional_fields(unit, 3, fields)

if per_index[ucode] then

quit('m_per_dup', ucode)

else

per_index[ucode] = unit

end

return unit

end

local function make_unit(cfg, data)

-- Return a function which, when called, stores a table that defines a

-- single unit. The table is stored in data (also a table).

local fieldnames = {

-- Fields in the Conversions section are assumed to be in the following order.

'unitcode',

'symbol',

'sym_us',

'scale',

'extra',

'name1',

'name2',

'name1_us',

'name2_us',

'prefixes',

'default',

'link',

}

return function (utype, fields)

-- Store a table defining a unit.

-- Throw an error if a problem occurs.

local ucode, symbol = fields[1], fields[2]

if empty(utype) then

quit('m_miss_type')

end

if empty(ucode) then

quit('m_miss_code')

end

if empty(symbol) then

quit('m_miss_sym')

end

local prefix = symbol:sub(1, 1)

if prefix == '~' or prefix == '=' or prefix == '!' or prefix == '*' then

if symbol:sub(1, 2) == '==' then

prefix = symbol:sub(1, 2)

end

symbol = strip(symbol:sub(#prefix + 1)) -- omit prefix and any following whitespace

fields[2] = symbol

else

prefix = nil -- not a valid prefix

end

if prefix == '=' or prefix == '==' then

-- ucode is an alias (a fake unit code used in a convert template), or

-- defines a "per" unit like "$/acre" or "BTU/h".

-- For an alias, symbol is the unit code of the actual unit.

-- For a "per", symbol is of form "x/y" where x and y are unit codes,

-- or x is a recognized currency symbol and y is a unit code.

-- Checking that x and y are valid is deferred until all units have

-- been defined so, for example, "BTU/h" can be defined before "h".

local unit

if prefix == '=' then

unit = make_alias(fields, ucode, utype, symbol)

else

unit = make_per(fields, ucode, utype, symbol)

end

if not unit then

-- Do not define an alias in terms of another alias.

quit('m_als_undef', symbol)

end

insert_unique_unit(data, unit, units_index)

return

elseif prefix == '!' then

-- ucode may be incorrectly entered as a unit code.

-- symbol is a message saying what unit code should be used.

local unit = { unitcode = ucode, shouldbe = symbol }

insert_unique_unit(data, unit, nil)

return

end

-- Make the unit.

local unit = { utype = utype }

for i, name in ipairs(fieldnames) do

if not empty(fields[i]) then

unit[name] = fields[i]

end

end

-- Remove redundancy from unit.

if unit.sym_us == symbol then

unit.sym_us = nil

end

local prefixes = unit.prefixes

local name1, name2 = unit.name1, unit.name2

if name1 then

if name1 == symbol and not prefixes then

-- A unit which takes an SI prefix must not have a nil name because,

-- for example, the name for "kW" = "kilo" .. "watt" (name for "W").

-- The "not prefixes" test is needed for bnwiki where the

-- watt unit has the same name and symbol.

unit.name1 = nil

end

else

name1 = symbol

end

if name2 then

if name2 == name1 .. plural_suffix then

unit.name2 = nil

end

else

name2 = name1 .. plural_suffix

end

local name1_us, name2_us = unit.name1_us, unit.name2_us

if name1_us then

if name1_us == name1 then

unit.name1_us = nil

end

end

if name2_us then

if unit.name1_us then

if name2_us == unit.name1_us .. plural_suffix then

unit.name2_us = nil

end

elseif name2_us == name2 then

unit.name2_us = nil

end

end

-- Other changes to unit.

unit.scale = clean_scale(unit.scale)

local extra = unit.extra

if not empty(extra) then

-- Set appropriate fields for a unit that needs more than a simple

-- multiplication by a ratio of unit scales to convert values.

unit.iscomplex = true

if extra == 'volume/length' then

unit.invert = 1

elseif extra == 'length/volume' then

unit.invert = -1

elseif specials.utype[utype] == 'type_temperature' then

unit.offset = extra

elseif extra == 'invert' then

unit.invert = -1

else

unit.builtin = extra

end

end

if prefix == '~' then

-- Magic code for units like "acre" where the symbol is not really a

-- symbol, and output should use the singular or plural name instead.

unit.usename = 1

elseif prefix == '*' then

-- Magic code for units like "pitch" which have a symbol that is the same as

-- another unit with entries defined in the default or link exceptions tables.

unit.defkey = ucode -- key for default exceptions

unit.linkey = ucode -- key for link exceptions

end

local name_for_link

if prefixes then

if prefixes == 'SI' then

unit.prefixes = 1

elseif prefixes == 'SI2' then

unit.prefixes = 2

elseif prefixes == 'SI3' then

unit.prefixes = 3

else

quit('m_pfx_bad', prefixes)

end

else

-- Only units which do not accept SI prefixes have name_for_link set.

-- That is because, for example, if set name_for_link = name1 for unit g,

-- then the link is "kilogram" for kg, and "yottagram" for Yg, and so on

-- for all prefixes. That might be desirable for some units, but not all.

name_for_link = name1

end

unit.link, unit.customary = clean_link(unit.link, name_for_link)

if prefixes then

-- The SI prefix is always at the start (position = 1) for symbol and sym_us.

-- However, each name (name1, name2, name1_us, name2_us) can have the SI prefix

-- at any position, and that position can be different for each name.

-- For enwiki, the only units with names where the prefix is not at the start

-- are "square metre" and "cubic metre" ("square meter" and "cubic meter" for sp=us).

-- Some other wikis want the flexibility that the prefix position can be different

-- so the position is stored as nil (if always 1), or N (an integer, if always N),

-- or a string of four comma-separated numbers such as "5,7,9,11" which means the

-- prefix position for (name1, name2, name1_us, name2_us) is (5, 7, 9, 11)

-- respectively.

local name1, name1_us = unit.name1, unit.name1_us -- after redundancy removed

if not name1 then

quit('m_pfx_name')

end

local positions = collection()

for i, k in ipairs({ 'name1', 'name2', 'name1_us', 'name2_us' }) do

local name = unit[k]

local pos

if name then

pos = name:find('%s', 1, true)

if pos then

unit[k] = name:sub(1, pos - 1) .. name:sub(pos + 2)

end

elseif i == 2 or i == 3 then

pos = positions[1]

elseif i == 4 then

pos = positions[unit.name1_us and 3 or 2]

end

positions:add(pos or 1)

end

local pos = positions[1]

for i = 2, positions.n do

if pos ~= positions[i] then

pos = '"' .. positions:join(',') .. '"'

break

end

end

if pos ~= 1 then

unit.prefix_position = pos

end

for _, name in ipairs({ 'symbol', 'sym_us', 'name1', 'name1_us', 'name2', 'name2_us' }) do

unit['_' .. name] = unit[name]

unit[name] = nil -- force call to __index metamethod so any SI prefix can be handled

end

end

for name, v in pairs(unit) do

-- Reject if a string field includes "%s" (should not occur after above).

if type(v) == 'string' and v:find('%s', 1, true) then

quit('m_percent_s', name)

end

end

insert_unique_unit(data, unit, units_index)

end

end

local function make_combination(cfg, data)

-- Return a function which, when called, stores a table that defines a

-- single combination unit. The table is stored in data (also a table).

return function (utype, fields)

-- Store a table defining a unit.

-- This is for a combination unit that specifies more than one output.

-- The target units must be defined first.

-- Throw an error if a problem occurs.

local unit = { utype = utype, combination = {} }

for i, v in ipairs(fields) do

if i == 1 then -- unitcode

if v == '' then

quit('m_cmb_miss')

end

unit.unitcode = v

elseif v == '' then

-- Ignore empty fields.

else

local target = get_unit(v)

if not target then

quit('m_cmb_undef', v, unit.unitcode)

end

if target.utype ~= utype then

quit('m_cmb_type', v, unit.unitcode)

end

table.insert(unit.combination, v)

end

end

if #unit.combination < 2 then

quit(#unit.combination == 0 and 'm_cmb_none' or 'm_cmb_one', unit.unitcode)

end

insert_unique_unit(data, unit, units_index)

end

end

local function make_perunit(cfg, data)

-- Return a function which, when called, stores a table that defines a

-- fixup for an automatic per unit. The table is stored in data (also a table).

local pertype_index = {} -- to detect attempts to define a fixup twice

return function (utype, fields)

-- Store a table to define a fixup.

-- Typos or other errors in the input are not detected!

-- Parameter utype is ignored (it is nil).

-- Throw an error if a problem occurs.

local lhs, rhs, link, multiplier

for i, v in ipairs(fields) do

if v == '' then

-- Ignore empty fields.

elseif i == 1 then

lhs = v -- like "length/time"

elseif i == 2 then

rhs = v -- like "speed"

elseif i == 3 then

link = v

elseif i == 4 then

if not tonumber(v) then

quit('m_per_inv')

end

multiplier = v

else

quit('m_per_inv')

end

end

if lhs and (rhs or link or multiplier) then

if link or multiplier then

local parts = collection()

if rhs then

parts:add('utype = "' .. rhs .. '"')

end

if link then

parts:add('link = "' .. link .. '"')

end

if multiplier then

parts:add('multiplier = ' .. multiplier)

end

rhs = '{ ' .. parts:join(', ') .. ' }'

else

rhs = '"' .. rhs .. '"'

end

if pertype_index[lhs] then

quit('m_per_dup', lhs)

end

pertype_index[lhs] = rhs

table.insert(data, { lhs = lhs, rhs = rhs })

else

quit('m_per_inv')

end

end

end

local function make_varname(cfg, data)

-- Return a function which, when called, stores a table that defines a

-- variable name for a unit. The table is stored in data (also a table).

return function (utype, fields)

-- Set or update an entry in the data table to record that a unit has a variable name.

-- This is for slwiki where a unit name depends on the value.

-- The target units must be defined first.

-- Parameter utype is ignored (it is nil).

-- Throw an error if a problem occurs.

local count = #fields

if count ~= cfg.varcolumns then

quit('m_var_cnt')

end

local ucode

local names = {}

for i = 1, count do

local v = fields[i]

if empty(v) then

quit('m_var_miss')

end

if i == 1 then -- unitcode

ucode = v

if not get_unit(v) then

quit('m_var_undef', v)

end

else

table.insert(names, v)

end

end

if data[ucode] then

quit('m_var_dup', ucode)

end

data[ucode] = table.concat(names, '!')

end

end

local function make_pername(cfg, data)

-- Return a function which, when called, stores a table that defines a

-- per name for a unit. The table is stored in data (also a table).

return function (utype, fields)

-- Set or update an entry in the data table to record that a unit has a

-- non-standard per name if used as the second unit in a per unit (x per y).

-- The target units must be defined first.

-- Parameter utype is ignored (it is nil).

-- Throw an error if a problem occurs.

local count = #fields

if count ~= 2 then

quit('m_pnm_cnt')

end

local ucode, pername

for i = 1, count do

local v = fields[i]

if empty(v) then

quit('m_pnm_miss')

end

if i == 1 then -- unitcode

ucode = v

if not get_unit(v) then

quit('m_pnm_undef', v)

end

else

pername = v

end

end

if data[ucode] then

quit('m_pnm_dup', ucode)

end

data[ucode] = pername

end

end

local function reversed(t)

-- Return a numbered table in reverse order.

local reversed, count = {}, #t

for i = 1, count do

reversed[i] = t[count + 1 - i]

end

return reversed

end

local function make_inputmultiple(cfg, data)

-- Return a function which, when called, stores a table that defines a

-- single composite (multiple input) unit. The table is stored in data (also a table).

return function (utype, fields)

-- Set or update an entry in the data table to record that a unit

-- accepts subdivisions to make a composite input unit like '|2|ft|6|in'.

-- The target units must be defined first.

-- Throw an error if a problem occurs.

local unitcode -- dummy code required for simplicity, but which is not used in output

local alternate_code -- an alternative unit code can be specified to replace convert input

local fixed_name -- a fixed name can be specified to replace the unit's normal symbol/name

local default_code

local ucodes, scales = {}, {}

for i, v in ipairs(fields) do

-- 1=composite, 2=ucode1, 3=ucode2, 4=default, 5=alternate, 6=name

if i == 1 then

if v == '' then

quit('m_cmp_miss')

end

unitcode = v

elseif 2 <= i and i <= 5 then

if not (i == 5 and v == '') then

local target = get_unit(v, (i == 4) and utype or nil) -- the default may be an auto combination

if not target then

quit('m_cmp_undef', v, unitcode)

end

if target.utype ~= utype then

quit('m_cmp_type', v, unitcode)

end

if i < 4 then

if not target.scale then

quit('m_mul_std', v, unitcode)

end

table.insert(ucodes, v)

table.insert(scales, target.scale)

elseif i == 4 then

default_code = v

else

if scales[#scales] ~= target.scale then

quit('m_cmp_scale', v, unitcode)

end

alternate_code = v

end

end

elseif i == 6 then

if v ~= '' then

fixed_name = v

end

else

quit('m_cmp_many', unitcode)

end

end

if #ucodes ~= 2 then

quit('m_cmp_two', unitcode)

end

if not default_code then

quit('m_cmp_def', unitcode)

end

-- Component units must be specified from most-significant to least-significant,

-- and each ratio of a pair of scales must be very close to an integer.

-- Currently, there will be exactly two scales and one ratio.

local ratios, count = {}, #scales

for i = 1, count do

local scale = tonumber(scales[i])

if scale == nil or scale <= 0 then

quit('m_cmp_inval', unitcode, scales[i])

end

scales[i] = scale

end

for i = 1, count - 1 do

local ratio = scales[i] / scales[i + 1]

local rounded = math.floor(ratio + 0.5)

if rounded < 2 then

quit('m_cmp_order', unitcode)

end

if math.abs(ratio - rounded)/ratio > 1e-6 then

quit('m_cmp_int', unitcode)

end

ratios[i] = rounded

end

local text = { tostring(ratios[1]) }

local function add_text(key, value)

table.insert(text, string.format('%s = %q', key, value))

end

if default_code then

add_text('default', default_code)

end

if alternate_code then

add_text('unit', alternate_code)

end

if fixed_name then

add_text('name', fixed_name)

end

local subdiv = string.format('["%s"] = { %s }', ucodes[2], table.concat(text, ', '))

local main_code = ucodes[1]

local item = data[main_code]

if item then

table.insert(item.subdivs, subdiv)

else

data[main_code] = { subdivs = { subdiv } }

end

end

end

local function make_outputmultiple(cfg, data)

-- Return a function which, when called, stores a table that defines a

-- single multiple output unit. The table is stored in data (also a table).

return function (utype, fields)

-- Store a table defining a unit.

-- This is for a multiple unit like 'ydftin' (result in yards, feet, inches).

-- The target units must be defined first.

-- Throw an error if a problem occurs.

local unit = { utype = utype }

local ucodes, scales = {}, {}

for i, v in ipairs(fields) do

if i == 1 then -- unitcode

if v == '' then

quit('m_mul_miss')

end

unit.unitcode = v

elseif v == '' then

-- Ignore empty fields.

else

local target = get_unit(v)

if not target then

quit('m_mul_undef', v, unit.unitcode)

end

if target.utype ~= utype then

quit('m_mul_type', v, unit.unitcode)

end

if not target.scale then

quit('m_mul_std', v, unit.unitcode)

end

table.insert(ucodes, v)

table.insert(scales, target.scale)

end

end

if #ucodes < 2 then

quit(#ucodes == 0 and 'm_mul_none' or 'm_mul_one', unit.unitcode)

end

-- Component units must be specified from most-significant to least-significant

-- (so scale values will be in descending order),

-- and each ratio of a pair of scales must be very close to an integer.

-- The componenets and ratios are stored in reverse order (least significant first).

-- This script stores a unit scale as a string (might be an expression like "5/9"),

-- but scales in a multiple are handled as numbers (should never be expressions).

local ratios, count = {}, #scales

for i = 1, count do

local scale = tonumber(scales[i])

if scale == nil or scale <= 0 then

quit('m_mul_scale', unit.unitcode, scales[i])

end

scales[i] = scale

end

for i = 1, count - 1 do

local ratio = scales[i] / scales[i + 1]

local rounded = math.floor(ratio + 0.5)

if rounded < 2 then

quit('m_mul_order', unit.unitcode)

end

if math.abs(ratio - rounded)/ratio > 1e-6 then

quit('m_mul_int', unit.unitcode)

end

ratios[i] = rounded

end

unit.combination = reversed(ucodes)

unit.multiple = reversed(ratios)

insert_unique_unit(data, unit, units_index)

end

end

-- To make updating the data module easier, this script inserts a preamble

-- and a postamble so the result can be used to replace the whole page.

local data_preamble = [=[

-- Conversion data used by Module:Convert which uses mw.loadData() for

-- read-only access to this module so that it is loaded only once per page.

-- See :en:Template:Convert/Transwiki guide if copying to another wiki.

--

-- These data tables follow:

-- all_units all properties for a unit, including default output

-- default_exceptions exceptions for default output ('kg' and 'g' have different defaults)

-- link_exceptions exceptions for links ('kg' and 'g' have different links)

--

-- These tables are generated by a script which reads the wikitext of a page that

-- documents the required properties of each unit; see :en:Module:Convert/doc.

]=]

local data_postamble = [=[

return {

all_units = all_units,

default_exceptions = default_exceptions,

link_exceptions = link_exceptions,

per_unit_fixups = per_unit_fixups,

}]=]

local out_unit_prefix = [[

---------------------------------------------------------------------------

-- Do not change the data in this table because it is created by running --

-- a script that reads the wikitext from a wiki page (see note above). --

---------------------------------------------------------------------------

local all_units = {]]

local out_unit_suffix = [[

}

]]

local out_default_prefix = [[

---------------------------------------------------------------------------

-- Do not change the data in this table because it is created by running --

-- a script that reads the wikitext from a wiki page (see note above). --

---------------------------------------------------------------------------

local default_exceptions = {

-- Prefixed units with a default different from that of the base unit.

-- Each key item is a prefixed symbol (unitcode for engineering notation).]]

local out_default_suffix = [[

}

]]

local out_default_item = [[

["{symbol}"] = "{default}",]]

local out_link_prefix = [[

---------------------------------------------------------------------------

-- Do not change the data in this table because it is created by running --

-- a script that reads the wikitext from a wiki page (see note above). --

---------------------------------------------------------------------------

local link_exceptions = {

-- Prefixed units with a linked article different from that of the base unit.

-- Each key item is a prefixed symbol (not unitcode).]]

local out_link_suffix = [[

}

]]

local out_link_item = [[

["{symbol}"] = "{link}",]]

local out_perunit_prefix = [[

---------------------------------------------------------------------------

-- Do not change the data in this table because it is created by running --

-- a script that reads the wikitext from a wiki page (see note above). --

---------------------------------------------------------------------------

local per_unit_fixups = {

-- Automatically created per units of form "x/y" may have their unit type

-- changed, for example, "length/time" is changed to "speed".

-- Other adjustments can also be specified.]]

local out_perunit_suffix = [[

}

]]

local out_perunit_item = [[

["{lhs}"] = {rhs},]]

local combination_specification = { -- pure combination like 'm ft', or a multiple like 'ftin'

'combination',

'multiple',

'utype',

}

local alias_specification = {

'target',

'symbol',

'sp_us',

'usename',

'default',

'link',

'symlink',

'customary',

'multiplier',

}

local per_specification = {

'per',

'symbol',

'sp_us',

'utype',

'invert',

'iscomplex',

'default',

'link',

'symlink',

'customary',

'multiplier',

}

local shouldbe_specification = {

'shouldbe',

}

local unit_specification = {

'_name1',

'_name1_us',

'_name2',

'_name2_us',

'_symbol',

'_sym_us',

'prefix_position',

'name1',

'name1_us',

'name2',

'name2_us',

'pername',

'varname',

'symbol',

'sym_us',

'usename',

'usesymbol',

'utype',

'alttype',

'builtin',

'scale',

'offset',

'invert',

'iscomplex',

'istemperature',

'exception',

'prefixes',

'default',

'subdivs',

'defkey',

'linkey',

'link',

'customary',

'sp_us',

}

local no_quotes = {

combination = true,

customary = true,

multiple = true,

multiplier = true,

offset = true,

per = true,

prefix_position = true,

scale = true,

subdivs = true,

}

local function add_unit_lines(results, unit, spec)

-- Add lines of Lua source to define a unit to the results collection.

local function add_line(line)

-- Had planned to replace sequences of spaces with 4-column tabs here

-- (because the CodeEditor now assumes the use of such tabs).

-- However, 4-column tabs are only visible when editing a module

-- with browser scripting and the CodeEditor enabled, and that is rare.

-- A module is usually viewed (with 8-column tabs), and some indents

-- would be messed up unless 8-column tabs are used. Therefore,

-- have decided to simply replace 8 spaces at start of line with a single

-- tab which reduces the size of the module, and is correct for viewing.

if line:sub(1, 8) == string.rep(' ', 8) then

line = '\t' .. line:sub(9)

end

results:add(line)

end

local first_item = ' ["' .. unit.unitcode .. '"] = {'

local last_item = ' },'

add_line(first_item)

for _, k in ipairs(spec) do

local v = unit[k]

if v then

local want_quotes = (type(v) == 'string' and not no_quotes[k])

if type(v) == 'boolean' then

v = tostring(v)

elseif type(v) == 'number' or k == 'scale' then

-- Replace results like '1e-006' with '1e-6'.

v = string.gsub(tostring(v), '(e[+-])0+([1-9].*)', '%1%2', 1)

elseif type(v) ~= 'string' then

quit('m_ftl_type', unit.unitcode)

end

local fmt = string.format('%8s%%-9s= %%%s,', '', want_quotes and 'q' or 's')

add_line(fmt:format(k, v))

end

end

add_line(last_item)

end

local function numbered_table_as_string(data, unit)

local t = {}

for _, v in ipairs(data) do

if type(v) == 'string' then

table.insert(t, '"' .. v .. '"')

elseif type(v) == 'number' then

table.insert(t, tostring(v))

else

quit('m_ftl_type', unit.unitcode)

end

end

return '{ ' .. table.concat(t, ', ') .. ' }'

end

local function extract_heading(line)

-- Return n, s where n = heading level number (nil if none), and

-- s = heading text (with leading/trailing whitespace removed).

local pattern = '^(==+)%s*(.-)%s*(==+)%s*$'

local before, heading, after = line:match(pattern)

if heading and #heading > 0 then

-- Don't bother checking if before == after.

return #before, heading

end

end

local function fields(line)

-- Return a numbered table of fields split from line.

-- Items are delimited by "||".

-- Each item has leading/trailing whitespace removed, and any encoded pipe

-- characters are decoded.

-- The second field (for symbol when processing units) is adjusted to

-- remove any "colspan" at the front of lines like:

-- "| unitcode || colspan="11" | !Text to display for an error message".

local t = {}

line = line .. "||" -- to get last field

for item in line:gmatch("%s*(.-)%s*||") do

table.insert(t, (item:gsub('|', '|')))

end

if t[2] then

local cleaned = t[2]:match('^%s*colspan%s*=.-|%s*(.*)$')

if cleaned then

t[2] = cleaned

end

end

return t

end

local function prepare_section(cfg, maker, lines, section, need_section, need_utype)

-- Process the first level-two section with the given section name

-- in the given table of lines of wikitext.

-- If successful, maker inserts each item into a table.

-- Otherwise, an error is thrown.

local skip = true

local errors = collection()

local utype -- unit type (from level-three heading)

local nbsp = '\194\160' -- nonbreaking space is utf-8 encoded as hex c2 a0

for linenumber, line in ipairs(lines) do

if skip then

-- Skip down to and including the starting heading.

local level, heading = extract_heading(line)

if level == 2 and heading == section then

skip = false

end

else

-- Accummulate unit definitions.

local c1 = line:sub(1, 1)

local c2 = line:sub(2, 2)

if c1 == '|' and not (c2 == '-' or c2 == '}') then

if need_utype and empty(utype) then

quit('m_hdg_lev3', line)

end

if line:find(nbsp, 1, true) then

-- For example, "acre ft" does not work if it contains nbsp.

add_warning('m_wrn_nbsp', linenumber)

end

local ok, msg = pcall(maker, utype, fields(line:sub(2)))

if not ok then

if msg:sub(-1) == '.' then msg = msg:sub(1, -2) end

errors:add(msg .. message('m_line_num', linenumber))

if errors.n >= cfg.maxerrors then

break

end

end

else

local level, heading = extract_heading(line)

if level == 3 then

utype = ulower(heading)

elseif level == 2 then

break

end

end

end

end

if skip and need_section then

quit('m_hdg_lev2', section)

end

if errors.n > 0 then

error(errors:join(), 0)

end

end

local function get_page_lines(page_title)

-- Read the wikitext of the page at the given title; split the text into

-- lines with leading and trailing space removed from each line.

-- Return a numbered table of the lines, or throw an error.

if empty(page_title) then

quit('m_no_title')

end

local t = mw.title.new(page_title)

if t then

local content = t:getContent()

if content then

if content:sub(-1) ~= '\n' then

content = content .. '\n'

end

local lines = collection()

for line in string.gmatch(content, '[\t ]*(.-)[\t\r ]*\n') do

lines:add(line)

end

return lines

end

end

quit('m_ftl_read', page_title)

end

local function prepare_data(cfg, is_sandbox)

-- Read the page of conversion data, and process the wikitext

-- in the sections with wanted level-two headings.

-- Return units, defaults, links (three tables).

-- Throw an error if a problem occurs.

local composites, defaults, links, units, perunits, varnames, pernames = {}, {}, {}, {}, {}, {}, {}

local sections = {

{ 'overrides' , make_override , overrides , 0 },

{ 'conversions' , make_unit , units , 0 },

{ 'outmultiples', make_outputmultiple, units , 0 },

{ 'combinations', make_combination , units , 0 },

{ 'inmultiples' , make_inputmultiple , composites, 0 }, -- after all units defined so default will be defined

{ 'defaults' , make_default , defaults , 0 },

{ 'links' , make_link , links , 0 },

{ 'perunits' , make_perunit , perunits , 1 },

{ 'varnames' , make_varname , varnames , 1 },

{ 'pernames' , make_pername , pernames , 1 },

}

local lines = get_page_lines(cfg.data_title)

for _, section in ipairs(sections) do

local heading = mtext.section_names[section[1]]

local maker = section[2](cfg, section[3])

local code = section[4]

local need_section, need_utype

if code == 0 and not is_sandbox then

need_section = true

end

if code == 0 then

need_utype = true

end

prepare_section(cfg, maker, lines, heading, need_section, need_utype)

end

check_all_defaults(cfg, units)

check_all_pers(cfg, units)

update_units(units, composites, varnames, pernames)

return units, defaults, links, perunits

end

local function _makeunits(cfg, results)

-- Read the wikitext for the conversion data.

-- Append output to given results collection, or throw error if a problem.

text_code = require(cfg.text_title)

for _, name in ipairs({ 'SIprefixes', 'eng_scales', 'currency' }) do

if type(text_code[name]) ~= 'table' then

quit('m_ftl_table', cfg.text_title, name)

end

end

local translation = text_code.translation_table

if translation then

if translation.plural_suffix then

plural_suffix = translation.plural_suffix

end

local ts = translation.specials

if ts then

if ts.utype then

specials.utype = ts.utype

end

if ts.ucode then

specials.ucode = ts.ucode

end

end

local tm = translation.mtext

if tm then

if tm.section_names then

mtext.section_names = tm.section_names

end

if tm.titles then

mtext.titles = tm.titles

end

if tm.messages then

mtext.messages = tm.messages

end

end

end

local is_sandbox

local conversion_data_title = mtext.titles.conversion_data

if cfg.data_title and cfg.data_title ~= conversion_data_title then

if is_test_run then

is_sandbox = true

data_preamble = nil

data_postamble = nil

out_unit_prefix = 'local all_units = {'

out_unit_suffix = '}'

out_default_prefix = '\nlocal default_exceptions = {'

out_default_suffix = '}'

out_default_item = '\t["{symbol}"] = "{default}",'

out_link_prefix = '\nlocal link_exceptions = {'

out_link_suffix = '}'

out_link_item = '\t["{symbol}"] = "{link}",'

out_perunit_prefix = '\nlocal per_unit_fixups = {'

out_perunit_suffix = '}'

out_perunit_item = '\t["{lhs}"] = {rhs},'

end

else

cfg.data_title = conversion_data_title

end

local units, defaults, links, perunits = prepare_data(cfg, is_sandbox)

if data_preamble then

results:add(data_preamble)

end

results:add(out_unit_prefix)

for _, unit in ipairs(units) do

local spec

if unit.target then

spec = alias_specification

elseif unit.per then

spec = per_specification

unit.per = numbered_table_as_string(unit.per, unit)

elseif unit.shouldbe then

spec = shouldbe_specification

elseif unit.combination then

spec = combination_specification

unit.combination = numbered_table_as_string(unit.combination, unit)

if unit.multiple then

unit.multiple = numbered_table_as_string(unit.multiple, unit)

end

else

spec = unit_specification

end

add_unit_lines(results, unit, spec)

end

results:add(out_unit_suffix)

for _, t in ipairs({

{ defaults, out_default_prefix, out_default_item, out_default_suffix },

{ links , out_link_prefix , out_link_item , out_link_suffix },

{ perunits, out_perunit_prefix, out_perunit_item, out_perunit_suffix } }) do

local data, prefix, item, suffix = t[1], t[2], t[3], t[4]

if #data > 0 or not is_sandbox then

results:add(prefix)

for _, unit in ipairs(data) do

results:add((item:gsub('{([%w_]+)}', unit)))

end

results:add(suffix)

end

end

if data_postamble then

results:add(data_postamble)

end

end

local function makeunits(frame)

local args = frame.args

local config = {

data_title = args[1],

text_title = args[2] or 'Module:Convert/text',

varcolumns = tonumber(args.varcolumns) or 5, -- #columns in "Variable names" section; slwiki uses 5

maxerrors = 20,

}

local results = collection()

local ok, msg = pcall(_makeunits, config, results)

if not ok then

results:add(message('m_error'))

results:add('')

results:add(msg)

end

local warn = ''

if warnings.n > 0 then

warn = message('m_warning') .. '\n\n' .. warnings:join() .. '\n\n'

end

-- Pre tags returned by a module are html tags, not like wikitext

...
.

-- The following renders the text as is, and preserves tab characters.

return '

\n' .. mw.text.nowiki(warn .. results:join()) .. '\n
\n'

end

return { makeunits = makeunits }