Module:ISO 639 name/ISO 639 name to code/make

require('strict');

local temp = {};

--[[--------------------------< A D D _ L A N G >--------------------------------------------------------------

temp table is a table of tables where the key is the language name and the value is a table listing the ISO 639

codes associated with that language name.

This function adds language name (as index) and its code (as a table element) using an appropriate index number.

is the language name from the source data

is the associated ISO 639 code from the source data

is 1 for ISO 639-1 language names and codes, 2, 2B, 3, 5. for the override data is prefixed with 'O'

]]

local function lang_add (lang, code, part)

part = ({ -- convert string to a numeric index

['1']=1, ['2']=2, ['2B']=3, ['3']=4, ['5']=5, -- for the base ISO 639 parts

['O1']=6, ['O2']=7, ['O2B']=8, ['O3']=9, ['O5']=10, -- for the override tables

['D1']=11, ['D2']=12, ['D2B']=13, ['D3']=14, ['D5']=15, -- for the deprecated tables

})[part];

lang = mw.ustring.lower (lang); -- convert to lowercase for use as table index

if not temp[lang] then -- when no entry for this language

temp[lang] = {}; -- make a blank entry

end

table.insert(temp[lang], string.format('[%s]=\"%s\"', part, code)) -- add the code; codes are added in the order that this function is called in iso_639_name_to_code()

end

-- TODO: better name

local function iterate_table(part_data, part_number)

for code, v in pairs (part_data) do

for _, lang in ipairs (v) do -- code can have multiple names so for each one

lang_add (lang, code, part_number); -- create and / or add this name / code pair to the output

end

end

end

--[[--------------------------< I S O 6 3 9 _ N A M E _ T O _ C O D E >----------------------------------------

read code-to-name source tables and convert to a name-to-code table.

]]

local function iso_639_name_to_code ()

local out = {};

local part_data = mw.loadData ('Module:ISO 639 name/ISO 639-3'); -- ISO 639-3 language codes / names

iterate_table(part_data, '3') -- start with part 3 because it has the most codes

part_data = mw.loadData ('Module:ISO 639 name/ISO 639-5'); -- ISO 639-5 language codes / names

iterate_table(part_data, '5')

part_data = mw.loadData ('Module:ISO 639 name/ISO 639-2'); -- ISO 639-2 language codes / names

iterate_table(part_data, '2')

part_data = mw.loadData ('Module:ISO 639 name/ISO 639-2B'); -- ISO 639-2B language codes / names

iterate_table(part_data, '2B')

part_data = mw.loadData ('Module:ISO 639 name/ISO 639-1'); -- ISO 639-1 language codes / names

iterate_table(part_data, '1')

part_data = mw.loadData ('Module:ISO 639 name/ISO 639 override'); -- has override data for all parts

for _, o_part in ipairs ({'1', '2', '2B', '3', '5'}) do -- for each of the override tables

local o_part_data = part_data['override_' .. o_part]; -- point to override data

o_part = 'O' .. o_part; -- prefix o_part

iterate_table(o_part_data, o_part) -- for each code in the data table and for each language name associated with that code

end

part_data = mw.loadData ('Module:ISO 639 name/ISO 639 deprecated'); -- has deprecated data for all parts

for _, d_part in ipairs ({'1', '2', '2B', '3', '5'}) do -- for each of the deprecated tables

local d_part_data = part_data['deprecated_' .. d_part]; -- point to deprecated data

d_part = 'D' .. d_part; -- prefix d_part

iterate_table(d_part_data, d_part) -- for each code in the data table and for each language name associated with that code

end

local function comp (a, b)

return tonumber (a:match ('(%d+)')) < tonumber (b:match ('(%d+)'));

end

for lang, codes in pairs (temp) do

table.sort (codes, comp); -- codes are added in the order that lang_add() is called above; sort to make pretty

table.insert (out, table.concat ({'["', lang, '"] = {', table.concat (codes, ', '), '}'})); -- reformat

end

table.sort (out); -- sort in language name order

local key_str = table.concat ({

'--[[--------------------------< I S O _ 6 3 9 _ N A M E _ T O _ C O D E >--------------------------------------

',

'Key:
',

'[1]=ISO 639-1 [6]=ISO 639-1 override [11]=ISO 639-1 deprecated
',

'[2]=ISO 639-2 [7]=ISO 639-2 override [12]=ISO 639-2 deprecated
',

'[3]=ISO 639-2B [8]=ISO 639-2B override [13]=ISO 639-2B deprecated
',

'[4]=ISO 639-3 [9]=ISO 639-3 override [14]=ISO 639-3 deprecated
',

'[5]=ISO 639-5 [10]=ISO 639-5 override [15]=ISO 639-5 deprecated',

'
]]

'

})

return table.concat ({'

', key_str, 'return {
', table.concat (out, ',
'), '
}
'}); -- render

end

--[[--------------------------< E X P O R T E D _ F U N C T I O N S >------------------------------------------

]]

return {iso_639_name_to_code = iso_639_name_to_code}