Module:Sandbox/trappist the monk/bob

p={}

-- crude comparison between the various ISO 639 parts data and iana 2020-09-17

function p.comp ()

local iana = mw.loadData ('Module:Lang/data/iana languages/sandbox');

local iso2 = mw.loadData ('Module:ISO 639 name/ISO 639-2')

local iso2B = mw.loadData ('Module:ISO 639 name/ISO 639-2B')

local iso3 = mw.loadData ('Module:ISO 639 name/ISO 639-3');

local dep3 = mw.loadData ('Module:ISO 639 name/ISO 639-3 (dep)');

local iso5 = mw.loadData ('Module:ISO 639 name/ISO 639-5');

local syn = mw.loadData ('Module:Lang/ISO 639 synonyms');

local wp_lang = mw.loadData ('Module:Language/data/wp languages');

local override = mw.loadData ('Module:Lang/data/sandbox').override;

local iso2_not_in_iana = {} -- ISO 639-1 synonymous codes not in iana

local iso2B_not_in_iana = {} -- none of the ISO 639-2B codes in iana

local iso3_not_in_iana = {} -- ISO 639-1 synonymous codes and hbs (Serbo Croatian) not in iana

local dep3_not_in_iana = {} -- none of the deprecated ISO 639-3 codes in iana

local iso5_not_in_iana = {} -- ISO 639-1 synonymous codes not in iana (bih)

local wp_lang_not_in_override = {}

local iana_not_in_iso3 = {}

local iana_dep_not_in_dep3 = {}

for k, v in pairs (iso2) do

if not iana.active[k] then

if not syn[k] then

table.insert (iso2_not_in_iana, k);

end

end

end

for k, v in pairs (iso2B) do

if not iana.active[k] then

table.insert (iso2B_not_in_iana, k);

end

end

for k, v in pairs (iso3) do

if not iana.active[k] then

if not syn[k] then

table.insert (iso3_not_in_iana, k);

end

end

end

for k, v in pairs (dep3) do

if not iana.deprecated[k] then

table.insert (dep3_not_in_iana, k)

end

end

for k, v in pairs (iana.deprecated) do

if not dep3[k] then

table.insert (iana_dep_not_in_dep3, k)

end

end

for k, v in pairs (iso5) do

if not iana.active[k] then

table.insert (iso5_not_in_iana, k);

end

end

for k, v in pairs (iana.active) do

if (3 == k:len()) and not iso3[k] then

table.insert (iana_not_in_iso3, k);

end

end

for k, v in pairs (wp_lang) do

k = k:lower()

--error (mw.dumpObject (iana[k][1]))

if not override[k] then

if iana.active[k] then

table.insert (wp_lang_not_in_override, table.concat ({

k,

': ',

v[1],

'; iana: ',

iana.active[k][1],

}));

else

table.insert (wp_lang_not_in_override, table.concat ({

k,

': ',

v[1],

'; not in iana',

}));

end

-- table.insert (wp_lang_not_in_override, k);

end

end

table.sort (wp_lang_not_in_override)

table.sort (iso2B_not_in_iana)

table.sort (iso3_not_in_iana)

table.sort (dep3_not_in_iana)

table.sort (iana_dep_not_in_dep3)

return 'iso2_not_in_iana:\n' .. mw.dumpObject (iso2_not_in_iana) ..

'\n\niso2B_not_in_iana:\n' .. mw.dumpObject (iso2B_not_in_iana) ..

'\n\niso3_not_in_iana:\n' .. mw.dumpObject (iso3_not_in_iana) ..

'\n\niso5_not_in_iana:\n' .. mw.dumpObject (iso5_not_in_iana) ..

'\n\dep3_not_in_iana:\n' .. mw.dumpObject (dep3_not_in_iana) ..

'\n\iana_dep_not_in_dep3:\n' .. mw.dumpObject (iana_dep_not_in_dep3) ..

'\n\wp_lang_not_in_override:\n' .. mw.dumpObject (wp_lang_not_in_override)

-- return #dep3_not_in_iana, 'dep3_not_in_iana:\n' .. mw.dumpObject (dep3_not_in_iana)

-- return mw.dumpObject (iana_not_in_iso3)

end

----------------------------------------------------

function p.test ()

local langs = mw.language.fetchLanguageNames('en', 'all');

local iws = mw.site.interwikiMap ('local');

local lang_not_in_iw = {};

local iw_is_lang = {}

local iw_is_not_lang = {}

local iw_is_extra_lang = {}

for code, language in pairs (langs) do

if not iws[code] then

lang_not_in_iw[code] = language;

end

end

for k, v in pairs (iws) do

if langs[v["prefix"]] then

iw_is_lang[v["prefix"]] = langs[v["prefix"]];

elseif v.isExtraLanguageLink then

iw_is_extra_lang[v["prefix"]] = v['url'];

else

if v['url']:match ('%.wikipedia%.org') then

iw_is_not_lang[v["prefix"]] = v['url'];

end

end

end

--[[

these are wikipedia language prefixes that seem to be redirects; these do not add to the languages link-list

["cmn"] = "https://zh.wikipedia.org/wiki/$1", -- Mandarin Chinese (ISO 639-3 code)

["cz"] = "https://cs.wikipedia.org/wiki/$1", -- Czech (ISO 3166 country code)

["dk"] = "https://da.wikipedia.org/wiki/$1", -- Danish (ISO 3166 country code)

["epo"] = "https://eo.wikipedia.org/wiki/$1", -- Esperanto (ISO 639-3 code)

["jp"] = "https://ja.wikipedia.org/wiki/$1", -- Japanese (ISO 3166 country code)

["minnan"] = "https://zh-min-nan.wikipedia.org/wiki/$1",

["zh-cfr"] = "https://zh-min-nan.wikipedia.org/wiki/$1",

these are not wikipedia language codes:

["nost"] = "https://nostalgia.wikipedia.org/wiki/$1",

["nostalgia"] = "https://nostalgia.wikipedia.org/wiki/$1",

["sep11"] = "https://sep11.wikipedia.org/wiki/$1",

["tenwiki"] = "https://ten.wikipedia.org/wiki/$1",

["test2wiki"] = "https://test2.wikipedia.org/wiki/$1",

["testwiki"] = "https://test.wikipedia.org/wiki/$1",

["w"] = "https://en.wikipedia.org/wiki/$1",

["wg"] = "https://wg-en.wikipedia.org/wiki/$1",

["wikipedia"] = "https://en.wikipedia.org/wiki/$1",

["wikipediawikipedia"] = "https://en.wikipedia.org/wiki/Wikipedia:$1",

]]

-- return mw.dumpObject (lang_not_in_iw); -- list of lang codes that are not prefixes (there are a lot)

return mw.dumpObject (iw_is_lang); -- list of prefixes that match supported language codes (use this list)

-- return mw.dumpObject (iw_is_not_lang); -- list of prefixes that have wikipedia.org url but prefix isn't found in the langs list

-- return mw.dumpObject (iw_is_extra_lang); -- none are marked with ["isExtraLanguageLink"] = true,

end

function p.format (text)

local str = text:gsub ('%s*=%s*', '='):gsub('%s*|%s*', ' |'); -- uniform spacing

str = str:gsub ('accessdate', 'access-date'):gsub ('archivedate', 'archive-date'); -- hyphenated version

str = str:gsub ('(%a)url', '%1-url'); -- archive, dead, chapter, etc urls to hyphenated versions

str = str:gsub ('|[%a%-]+= +', ''); -- remove empty parameters

str = str:gsub (' +|[%a%-]+=}', '}'); -- remove empty parameter at end of template

str = str:gsub('{%s+', '{'):gsub('%s+}', '}'); -- no spacing

return str;

end

function p.eq (frame)

local ref = frame[1];

local test = frame[2];

if ref == test then

return 'ref == test'

end

local msg;

if ref:len() == test:len() then

msg = 'same length; ';

else

msg = 'diff length; ';

end

local i=1;

while (1) do

local r, t;

r = mw.ustring.codepoint(ref, i);

t = mw.ustring.codepoint(test, i);

if r == t then

i=i+1;

else

return msg .. 'diff @ char: ' .. i .. '; ref: ' .. mw.ustring.char (r) .. '; test: ' .. mw.ustring.char (t);

end

end

return i;

end

return p;