Module:ISO 639 name/ISO 639-5/make

require('strict');

--[[--------------------------< I S O _ 5 _ N A M E _ E X T R A C T >------------------------------------------

{{#invoke:Language/data/ISO 639-5/make|ISO_5_name_extract|file-date=2013-02-11}}

reads a local copy of data from the table at https://www.loc.gov/standards/iso639-5/id.php, extracts

the ISO 639-5 codes and their associated language names

useful lines in the source table have the form:

\t\t\t<639-2>\t\t\n

where:

is the 639-5 language code

is the English name

is the French name (not used here)

<639-2> is language group or remainder group or blank (not used here)

(not used here)

(not used here)

for the file date use the date listed at the bottom of the source page in yyyymmdd numeric format without hyphens or spaces

]]

local function ISO_5_name_extract (frame)

local page = mw.title.getCurrentTitle(); -- get a page object for this page

local content = page:getContent(); -- get unparsed content

local content_table = {}; -- table of text lines from source

local out_table = {}; -- output table

local code, name;

local file_date = 'File-Date: ' .. frame.args["file-date"]; -- set the file date line from |file-date= (from the bottom of the source page)

content_table = mw.text.split (content, '[\r\n]'); -- make a table of text lines

for _, line in ipairs (content_table) do -- for each line

code, name = line:match ('^(%l%l%l)\t([%a %-,%(%)\226\128\145]+)\t'); -- \226\128\145 is non-breaking hyphen U+2011 (E2 80 91)

if code then

name = name:gsub ('\226\128\145', '-'); -- replace non-breaking hyphens with hyphen-minus

table.insert (out_table,

table.concat ({

'["', -- open code index

code,

'"] = {"', -- close code index; open name table

name,

'"}' -- close the names table

})

)

end

end

table.sort (out_table);

return "

-- " .. file_date .. "
return {
" .. table.concat (out_table, ',
') .. "
}
" .. "
";

end

--[[--------------------------< E X P O R T E D F U N C T I O N S >------------------------------------------

]]

return {

ISO_5_name_extract = ISO_5_name_extract

};