Module:Sandbox/Trappist the monk/Wiktionary/make data

-- use this module to make the data that goes in Module:Sandbox/Trappist the monk/Wiktionary/data. See main().

--[[--------------------------< M >----------------------------------------------------------------------------

This taken from wikt:Module:languages/data/2 for comparison to en:Module:Language/data so that we might add data

to the latter in support of {{wiktionary}} which currently (2023-12-13) seems to be missing quite a few language

tags.

this table has been hacked to remove stuff that isn't necessary for the task at hand (concatenation operators,

functions, subtables, etc). if the source table is quite dynamic, then a better method might be to parse it as

text file to get only the stuff we need (tag and language name) and ignore the rest. That way manual removal of

stuff won't be necessary.

]]

local c = {};

local p = {};

local s = {};

local m = {};

m["aa"] = {

"Afar",

27811,

"cus-eas",

"Latn",

}

m["ab"] = {

"Abkhaz",

5111,

"cau-abz",

"Cyrl, Geor, Latn",

translit = {

Cyrl = "ab-translit",

Geor = "Geor-translit",

},

override_translit = true,

display_text = {Cyrl = s["cau-Cyrl-displaytext"]},

entry_name = {

Cyrl = s["cau-Cyrl-entryname"],

Latn = s["cau-Latn-entryname"],

},

sort_key = {

Cyrl = {

from = {

"х'ә", -- 3 chars

"гь", "гә", "ӷь", "ҕь", "ӷә", "ҕә", "дә", "ё", "жь", "жә", "ҙә", "ӡә", "ӡ'", "кь", "кә", "қь", "қә", "ҟь", "ҟә", "ҫә", "тә", "ҭә", "ф'", "хь", "хә", "х'", "ҳә", "ць", "цә", "ц'", "ҵә", "ҵ'", "шь", "шә", "џь", -- 2 chars

"ӷ", "ҕ", "ҙ", "ӡ", "қ", "ҟ", "ԥ", "ҧ", "ҫ", "ҭ", "ҳ", "ҵ", "ҷ", "ҽ", "ҿ", "ҩ", "џ", "ә" -- 1 char

},

},

},

}

m["ae"] = {

"Avestan",

29572,

"ira-cen",

"Avst, Gujr",

translit = {Avst = "Avst-translit"},

wikipedia_article = "Avestan",

}

m["af"] = {

"Afrikaans",

14196,

"gmw-frk",

"Latn, Arab",

ancestors = "nl",

sort_key = {

Latn = {

from = {"['ʼ]n"},

}

},

}

m["ak"] = {

"Akan",

28026,

"alv-ctn",

"Latn",

}

m["am"] = {

"Amharic",

28244,

"sem-eth",

"Ethi",

translit = "Ethi-translit",

}

m["an"] = {

"Aragonese",

8765,

"roa-ibe",

"Latn",

ancestors = "roa-oan",

}

m["ar"] = {

"Arabic",

13955,

"sem-arb",

"Arab, Hebr, Syrc, Brai",

translit = {Arab = "ar-translit"},

entry_name = {Arab = "ar-entryname"},

-- put Judeo-Arabic (Hebrew-script Arabic) under the category header

-- U+FB21 HEBREW LETTER WIDE ALEF so that it sorts after Arabic script titles

sort_key = {

Hebr = {

},

},

}

m["as"] = {

"Assamese",

29401,

"inc-eas",

"as-Beng",

ancestors = "inc-mas",

translit = "as-translit",

}

m["av"] = {

"Avar",

29561,

"cau-ava",

"Cyrl, Latn, Arab",

ancestors = "oav",

translit = {

Cyrl = "cau-nec-translit",

Arab = "ar-translit",

},

override_translit = true,

display_text = {Cyrl = s["cau-Cyrl-displaytext"]},

entry_name = {

Cyrl = s["cau-Cyrl-entryname"],

Latn = s["cau-Latn-entryname"],

},

sort_key = {

Cyrl = {

from = {"гъ", "гь", "гӏ", "ё", "кк", "къ", "кь", "кӏ", "лъ", "лӏ", "тӏ", "хх", "хъ", "хь", "хӏ", "цӏ", "чӏ"},

},

},

}

m["ay"] = {

"Aymara",

4627,

"sai-aym",

"Latn",

}

m["az"] = {

"Azerbaijani",

9292,

"trk-ogz",

"Latn, Cyrl, fa-Arab",

ancestors = "trk-oat",

dotted_dotless_i = true,

entry_name = {["fa-Arab"] = "ar-entryname"},

sort_key = {

Latn = {

from = {

"i", -- Ensure "i" comes after "ı".

"ç", "ə", "ğ", "x", "ı", "q", "ö", "ş", "ü", "w"

},

},

Cyrl = {

from = {"ғ", "ә", "ы", "ј", "ҝ", "ө", "ү", "һ", "ҹ"},

},

},

}

m["ba"] = {

"Bashkir",

13389,

"trk-kbu",

"Cyrl",

translit = "ba-translit",

override_translit = true,

sort_key = {

from = {"ғ", "ҙ", "ё", "ҡ", "ң", "ө", "ҫ", "ү", "һ", "ә"},

},

}

m["be"] = {

"Belarusian",

9091,

"zle",

"Cyrl, Latn",

ancestors = "zle-obe",

translit = {Cyrl = "be-translit"},

entry_name = {

remove_exceptions = {"Ć", "ć", "Ń", "ń", "Ś", "ś", "Ź", "ź"},

},

sort_key = {

Cyrl = {

from = {"ґ", "ё", "і", "ў"},

},

Latn = {

from = {"ć", "č", "dz", "dź", "dž", "ch", "ł", "ń", "ś", "š", "ŭ", "ź", "ž"},

},

},

standardChars = {

Cyrl = "АаБбВвГгДдЕеЁёЖжЗзІіЙйКкЛлМмНнОоПпРрСсТтУуЎўФфХхЦцЧчШшЫыЬьЭэЮюЯя",

Latn = "AaBbCcĆćČčDdEeFfGgHhIiJjKkLlŁłMmNnŃńOoPpRrSsŚśŠšTtUuŬŭVvYyZzŹźŽž",

c.punc

},

}

m["bg"] = {

"Bulgarian",

7918,

"zls",

"Cyrl",

ancestors = "cu-bgm",

translit = "bg-translit",

entry_name = {

remove_exceptions = {"%f[^%z%s]ѝ%f[%z%s]"},

},

standardChars = "АаБбВвГгДдЕеЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЬьЮюЯя"

}

m["bh"] = {

"Bihari",

135305,

"inc-eas",

"Deva",

ancestors = "pra-mag",

}

m["bi"] = {

"Bislama",

35452,

"crp",

"Latn",

ancestors = "en",

}

m["bm"] = {

"Bambara",

33243,

"dmn-emn",

"Latn",

sort_key = {

from = {"ɛ", "ɲ", "ŋ", "ɔ"},

},

}

m["bn"] = {

"Bengali",

9610,

"inc-eas",

"Beng, Newa",

ancestors = "inc-mbn",

translit = {Beng = "bn-translit"},

}

m["bo"] = {

"Tibetan",

34271,

"sit-tib",

"Tibt", -- sometimes Deva?

ancestors = "xct",

translit = "Tibt-translit",

override_translit = true,

display_text = s["Tibt-displaytext"],

entry_name = s["Tibt-entryname"],

sort_key = "Tibt-sortkey",

}

m["br"] = {

"Breton",

12107,

"cel-brs",

"Latn",

ancestors = "xbm",

sort_key = {

from = {"ch", "c['ʼ’]h"},

},

}

m["ca"] = {

"Catalan",

7026,

"roa-ocr",

"Latn",

ancestors = "roa-oca",

sort_key = {

from = {"l·l"},

},

standardChars = "AaÀàBbCcÇçDdEeÉéÈèFfGgHhIiÍíÏïJjLlMmNnOoÓóÒòPpQqRrSsTtUuÚúÜüVvXxYyZz·"

}

m["ce"] = {

"Chechen",

33350,

"cau-vay",

"Cyrl, Latn, Arab",

translit = {

Cyrl = "cau-nec-translit",

Arab = "ar-translit",

},

override_translit = true,

display_text = {Cyrl = s["cau-Cyrl-displaytext"]},

entry_name = {

Cyrl = s["cau-Cyrl-entryname"],

Latn = s["cau-Latn-entryname"],

},

sort_key = {

Cyrl = {

from = {"аь", "гӏ", "ё", "кх", "къ", "кӏ", "оь", "пӏ", "тӏ", "уь", "хь", "хӏ", "цӏ", "чӏ", "юь", "яь"},

},

},

}

m["ch"] = {

"Chamorro",

33262,

"poz-sus",

"Latn",

sort_key = {

from = {"å", "ch", "ñ", "ng"},

},

}

m["co"] = {

"Corsican",

33111,

"roa-itd",

"Latn",

sort_key = {

from = {"chj", "ghj", "sc", "sg"},

},

standardChars = "AaÀàBbCcDdEeÈèFfGgHhIiÌìÏïJjLlMmNnOoÒòPpQqRrSsTtUuÙùÜüVvZz"

}

m["cr"] = {

"Cree",

33390,

"alg",

"Cans, Latn",

translit = {Cans = "cr-translit"},

}

m["cs"] = {

"Czech",

9056,

"zlw",

"Latn",

ancestors = "cs-ear",

sort_key = {

from = {"á", "č", "ď", "é", "ě", "ch", "í", "ň", "ó", "ř", "š", "ť", "ú", "ů", "ý", "ž"},

},

standardChars = "AaÁáBbCcČčDdĎďEeÉéĚěFfGgHhIiÍíJjKkLlMmNnŇňOoÓóPpRrŘřSsŠšTtŤťUuÚúŮůVvYyÝýZzŽž"

}

m["cu"] = {

"Old Church Slavonic",

35499,

"zls",

"Cyrs, Glag",

translit = {Cyrs = "Cyrs-translit", Glag = "Glag-translit"},

entry_name = {Cyrs = s["Cyrs-entryname"]},

sort_key = {Cyrs = s["Cyrs-sortkey"]},

}

m["cv"] = {

"Chuvash",

33348,

"trk-ogr",

"Cyrl",

ancestors = "xbo",

translit = "cv-translit",

override_translit = true,

sort_key = {

from = {"ӑ", "ё", "ӗ", "ҫ", "ӳ"},

},

}

m["cy"] = {

"Welsh",

9309,

"cel-brw",

"Latn",

ancestors = "wlm",

sort_key = {

from = {"ch", "dd", "ff", "ng", "ll", "ph", "rh", "th"},

},

standardChars = "ÂâAaBbCcDdEeÊêFfGgHhIiÎîLlMmNnOoÔôPpRrSsTtUuÛûWwŴŵYyŶŷ"

}

m["da"] = {

"Danish",

9035,

"gmq-eas",

"Latn",

ancestors = "gmq-oda",

sort_key = {

remove_exceptions = {"å"},

from = {"æ", "ø", "å"},

},

standardChars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÆæØøÅå"

}

m["de"] = {

"German",

188,

"gmw-hgm",

"Latn, Latf",

ancestors = "gmh",

sort_key = {

from = {"ß"},

},

standardChars = "AaÄäBbCcDdEeFfGgHhIiJjKkLlMmNnOoÖöPpQqRrSsẞßTtUuÜüVvWwXxYyZz"

}

m["dv"] = {

"Dhivehi",

32656,

"inc-ins",

"Thaa, Diak",

ancestors = "pra-hel",

translit = {

Thaa = "dv-translit",

Diak = "Diak-translit",

},

override_translit = true,

}

m["dz"] = {

"Dzongkha",

33081,

"sit-tib",

"Tibt",

ancestors = "xct",

translit = "Tibt-translit",

override_translit = true,

display_text = s["Tibt-displaytext"],

entry_name = s["Tibt-entryname"],

sort_key = "Tibt-sortkey",

}

m["ee"] = {

"Ewe",

30005,

"alv-gbe",

"Latn",

sort_key = {

from = {"ɖ", "dz", "ɛ", "ƒ", "gb", "ɣ", "kp", "ny", "ŋ", "ɔ", "ts", "ʋ"},

},

}

m["el"] = {

"Greek",

9129,

"grk",

"Grek, Polyt, Brai",

ancestors = "el-kth",

translit = {

Grek = "el-translit",

Polyt = "grc-translit",

},

override_translit = true,

entry_name = {

Polyt = {

},

},

sort_key = {

Grek = s["Grek-sortkey"],

Polyt = s["Grek-sortkey"],

},

standardChars = {

Grek = "΅·ͺ΄ΑαΆάΒβΓγΔδΕεέΈΖζΗηΉήΘθΙιΊίΪϊΐΚκΛλΜμΝνΞξΟοΌόΠπΡρΣσςΤτΥυΎύΫϋΰΦφΧχΨψΩωΏώ",

Brai = c.braille,

c.punc

},

}

m["en"] = {

"English",

1860,

"gmw-ang",

"Latn, Brai, Shaw, Dsrt", -- entries in Shaw or Dsrt might require prior discussion

wikimedia_codes = "en, simple",

ancestors = "enm",

sort_key = {

Latn = {

from = {"æ", "œ"},

},

},

standardChars = {

Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz",

Brai = c.braille,

c.punc

},

}

m["eo"] = {

"Esperanto",

143,

"art",

"Latn",

sort_key = {

from = {"ĉ", "ĝ", "ĥ", "ĵ", "ŝ", "ŭ"},

},

standardChars = "AaBbCcĈĉDdEeFfGgĜĝHhĤĥIiJjĴĵKkLlMmNnOoPpRrSsŜŝTtUuŬŭVvZz"

}

m["es"] = {

"Spanish",

1321,

"roa-ibe",

"Latn, Brai",

ancestors = "osp",

sort_key = {

Latn = {

from = {"ñ"},

},

},

standardChars = {

Latn = "AaÁáBbCcDdEeÉéFfGgHhIiÍíJjLlMmNnÑñOoÓóPpQqRrSsTtUuÚúÜüVvXxYyZz",

Brai = c.braille,

c.punc

},

}

m["et"] = {

"Estonian",

9072,

"urj-fin",

"Latn",

sort_key = {

from = {

"š", "ž", "õ", "ä", "ö", "ü", -- 2 chars

"z" -- 1 char

},

},

standardChars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvÕõÄäÖöÜü"

}

m["eu"] = {

"Basque",

8752,

"euq",

"Latn",

sort_key = {

from = {"ç", "ñ"},

},

standardChars = "AaBbDdEeFfGgHhIiJjKkLlMmNnÑñOoPpRrSsTtUuXxZz"

}

m["fa"] = {

"Persian",

9168,

"ira-swi",

"fa-Arab",

ancestors = "fa-cls",

entry_name = {

from = {"هٔ", "ٱ"}, -- character "ۂ" code U+06C2 to "ه"; hamzatu l-waṣli to a regular alif

},

}

m["ff"] = {

"Fula",

33454,

"alv-fwo",

"Latn, Adlm",

}

m["fi"] = {

"Finnish",

1412,

"urj-fin",

"Latn",

display_text = {

from = {"'"},

},

entry_name = { -- used to indicate gemination of the next consonant

from = {"’"},

},

sort_key = {

remove_exceptions = {"å"},

from = {"ø", "æ", "œ", "ß", "å", "(.)%-"},

},

standardChars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÄäÖö"

}

m["fj"] = {

"Fijian",

33295,

"poz-occ",

"Latn",

}

m["fo"] = {

"Faroese",

25258,

"gmq-ins",

"Latn",

sort_key = {

from = {"á", "ð", "í", "ó", "ú", "ý", "æ", "ø"},

},

standardChars = "AaÁáBbDdÐðEeFfGgHhIiÍíJjKkLlMmNnOoÓóPpRrSsTtUuÚúVvYyÝýÆæØø"

}

m["fr"] = {

"French",

150,

"roa-oil",

"Latn, Brai",

display_text = {

from = {"'"},

},

entry_name = {

from = {"’"},

},

ancestors = "frm",

sort_key = {Latn = s["roa-oil-sortkey"]},

standardChars = {

Latn = "AaÀàÂâBbCcÇçDdEeÉéÈèÊêËëFfGgHhIiÎîÏïJjLlMmNnOoÔôŒœPpQqRrSsTtUuÙùÛûÜüVvXxYyZz",

Brai = c.braille,

c.punc

},

}

m["fy"] = {

"West Frisian",

27175,

"gmw-fri",

"Latn",

ancestors = "ofs",

sort_key = {

from = {"y"},

},

standardChars = "AaâäàÆæBbCcDdEeéêëèFfGgHhIiïìYyỳJjKkLlMmNnOoôöòPpRrSsTtUuúûüùVvWwZz"

}

m["ga"] = {

"Irish",

9142,

"cel-gae",

"Latn, Latg",

ancestors = "mga",

sort_key = {

from = {"ḃ", "ċ", "ḋ", "ḟ", "ġ", "ṁ", "ṗ", "ṡ", "ṫ"},

},

standardChars = "AaÁáBbCcDdEeÉéFfGgHhIiÍíLlMmNnOoÓóPpRrSsTtUuÚúVv"

}

m["gd"] = {

"Scottish Gaelic",

9314,

"cel-gae",

"Latn, Latg",

ancestors = "mga",

standardChars = "AaÀàBbCcDdEeÈèFfGgHhIiÌìLlMmNnOoÒòPpRrSsTtUuÙù"

}

m["gl"] = {

"Galician",

9307,

"roa-ibe",

"Latn",

ancestors = "roa-opt",

sort_key = {

from = {"ñ"},

},

standardChars = "AaÁáBbCcDdEeÉéFfGgHhIiÍíÏïLlMmNnÑñOoÓóPpQqRrSsTtUuÚúÜüVvXxZz"

}

m["gn"] = {

"Guaraní",

35876,

"tup-gua",

"Latn",

}

m["gu"] = {

"Gujarati",

5137,

"inc-wes",

"Arab, Gujr",

ancestors = "inc-mgu",

translit = {

Gujr = "gu-translit",

},

entry_name = {

},

}

m["gv"] = {

"Manx",

12175,

"cel-gae",

"Latn",

ancestors = "mga",

standardChars = "AaBbCcÇçDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwYy"

}

m["ha"] = {

"Hausa",

56475,

"cdc-wst",

"Latn, Arab",

}

m["he"] = {

"Hebrew",

9288,

"sem-can",

"Hebr, Phnx, Brai",

ancestors = "he-med",

}

m["hi"] = {

"Hindi",

1568,

"inc-hnd",

"Deva, Kthi, Newa",

ancestors = "inc-ohi",

translit = {Deva = "hi-translit"},

standardChars = {

Deva = "अआइईउऊएऐओऔकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसहत्रज्ञक्षक़ख़ग़ज़झ़ड़ढ़फ़काखागाघाङाचाछाजाझाञाटाठाडाढाणाताथादाधानापाफाबाभामायारालावाशाषासाहात्राज्ञाक्षाक़ाख़ाग़ाज़ाझ़ाड़ाढ़ाफ़ाकिखिगिघिङिचिछिजिझिञिटिठिडिढिणितिथिदिधिनिपिफिबिभिमियिरिलिविशिषिसिहित्रिज्ञिक्षिक़िख़िग़िज़िझ़िड़िढ़िफ़िकीखीगीघीङीचीछीजीझीञीटीठीडीढीणीतीथीदीधीनीपीफीबीभीमीयीरीलीवीशीषीसीहीत्रीज्ञीक्षीक़ीख़ीग़ीज़ीझ़ीड़ीढ़ीफ़ीकुखुगुघुङुचुछुजुझुञुटुठुडुढुणुतुथुदुधुनुपुफुबुभुमुयुरुलुवुशुषुसुहुत्रुज्ञुक्षुक़ुख़ुग़ुज़ुझ़ुड़ुढ़ुफ़ुकूखूगूघूङूचूछूजूझूञूटूठूडूढूणूतूथूदूधूनूपूफूबूभूमूयूरूलूवूशूषूसूहूत्रूज्ञूक्षूक़ूख़ूग़ूज़ूझ़ूड़ूढ़ूफ़ूकेखेगेघेङेचेछेजेझेञेटेठेडेढेणेतेथेदेधेनेपेफेबेभेमेयेरेलेवेशेषेसेहेत्रेज्ञेक्षेक़ेख़ेग़ेज़ेझ़ेड़ेढ़ेफ़ेकैखैगैघैङैचैछैजैझैञैटैठैडैढैणैतैथैदैधैनैपैफैबैभैमैयैरैलैवैशैषैसैहैत्रैज्ञैक्षैक़ैख़ैग़ैज़ैझ़ैड़ैढ़ैफ़ैकोखोगोघोङोचोछोजोझोञोटोठोडोढोणोतोथोदोधोनोपोफोबोभोमोयोरोलोवोशोषोसोहोत्रोज्ञोक्षोक़ोख़ोग़ोज़ोझ़ोड़ोढ़ोफ़ोकौखौगौघौङौचौछौजौझौञौटौठौडौढौणौतौथौदौधौनौपौफौबौभौमौयौरौलौवौशौषौसौहौत्रौज्ञौक्षौक़ौख़ौग़ौज़ौझ़ौड़ौढ़ौफ़ौक्ख्ग्घ्ङ्च्छ्ज्झ्ञ्ट्ठ्ड्ढ्ण्त्थ्द्ध्न्प्फ्ब्भ्म्य्र्ल्व्श्ष्स्ह्त्र्ज्ञ्क्ष्क़्ख़्ग़्ज़्झ़्ड़्ढ़्फ़्।॥०१२३४५६७८९॰",

c.punc

},

}

m["ho"] = {

"Hiri Motu",

33617,

"crp",

"Latn",

ancestors = "meu",

}

m["ht"] = {

"Haitian Creole",

33491,

"crp",

"Latn",

ancestors = "ht-sdm",

sort_key = {

from = {

"oun", -- 3 chars

"an", "ch", "è", "en", "ng", "ò", "on", "ou", "ui" -- 2 chars

},

},

}

m["hu"] = {

"Hungarian",

9067,

"urj-ugr",

"Latn, Hung",

ancestors = "ohu",

sort_key = {

Latn = {

from = {

"dzs", -- 3 chars

"á", "cs", "dz", "é", "gy", "í", "ly", "ny", "ó", "ö", "ő", "sz", "ty", "ú", "ü", "ű", "zs", -- 2 chars

},

},

},

standardChars = {

Latn = "AaÁáBbCcDdEeÉéFfGgHhIiÍíJjKkLlMmNnOoÓóÖöŐőPpQqRrSsTtUuÚúÜüŰűVvWwXxYyZz",

c.punc

},

}

m["hy"] = {

"Armenian",

8785,

"hyx",

"Armn, Brai",

ancestors = "axm",

translit = {Armn = "Armn-translit"},

override_translit = true,

entry_name = {

Armn = {

from = {"եւ", "յ", "ի", "է"},

},

},

sort_key = {

Armn = {

from = {

"ու", "եւ", -- 2 chars

"և" -- 1 char

},

},

},

}

m["hz"] = {

"Herero",

33315,

"bnt-swb",

"Latn",

}

m["ia"] = {

"Interlingua",

35934,

"art",

"Latn",

}

m["id"] = {

"Indonesian",

9240,

"poz-mly",

"Latn",

ancestors = "ms",

standardChars = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz"

}

m["ie"] = {

"Interlingue",

35850,

"art",

"Latn",

type = "appendix-constructed",

}

m["ig"] = {

"Igbo",

33578,

"alv-igb",

"Latn",

sort_key = {

from = {"gb", "gh", "gw", "ị", "kp", "kw", "ṅ", "nw", "ny", "ọ", "sh", "ụ"},

},

}

m["ii"] = {

"Sichuan Yi",

34235,

"tbq-lol",

"Yiii",

translit = "ii-translit",

}

m["ik"] = {

"Inupiaq",

27183,

"esx-inu",

"Latn",

sort_key = {

from = {

"ch", "ġ", "dj", "ḷ", "ł̣", "ñ", "ng", "r̂", "sr", "zr", -- 2 chars

"ł", "ŋ", "ʼ" -- 1 char

},

},

}

m["io"] = {

"Ido",

35224,

"art",

"Latn",

}

m["is"] = {

"Icelandic",

294,

"gmq-ins",

"Latn",

sort_key = {

from = {"á", "ð", "é", "í", "ó", "ú", "ý", "þ", "æ", "ö"},

},

standardChars = "AaÁáBbDdÐðEeÉéFfGgHhIiÍíJjKkLlMmNnOoÓóPpRrSsTtUuÚúVvXxYyÝýÞþÆæÖö"

}

m["it"] = {

"Italian",

652,

"roa-itd",

"Latn",

ancestors = "it-oit",

standardChars = "AaÀàBbCcDdEeÈèÉéFfGgHhIiÌìLlMmNnOoÒòPpQqRrSsTtUuÙùVvZz"

}

m["iu"] = {

"Inuktitut",

29921,

"esx-inu",

"Cans, Latn",

translit = {Cans = "cr-translit"},

override_translit = true,

}

m["ja"] = {

"Japanese",

5287,

"jpx",

"Jpan, Latn, Brai",

ancestors = "ja-ear",

translit = s["Jpan-translit"],

link_tr = true,

sort_key = s["Jpan-sortkey"],

}

m["jv"] = {

"Javanese",

33549,

"poz-sus",

"Latn, Java",

ancestors = "kaw",

translit = {Java = "jv-translit"},

link_tr = true,

sort_key = {

Latn = {

from = {"å", "dh", "é", "è", "ng", "ny", "th"},

},

},

}

m["ka"] = {

"Georgian",

8108,

"ccs-gzn",

"Geor, Geok, Hebr", -- Hebr is used to write Judeo-Georgian

ancestors = "ka-mid",

translit = {

Geor = "Geor-translit",

Geok = "Geok-translit",

},

override_translit = true,

}

m["kg"] = {

"Kongo",

33702,

"bnt-kng",

"Latn",

}

m["ki"] = {

"Kikuyu",

33587,

"bnt-kka",

"Latn",

}

m["kj"] = {

"Kwanyama",

1405077,

"bnt-ova",

"Latn",

}

m["kk"] = {

"Kazakh",

9252,

"trk-kno",

"Cyrl, Latn, kk-Arab",

translit = {

Cyrl = {

from = {

"Ё", "ё", "Й", "й", "Нг", "нг", "Ӯ", "ӯ", -- 2 chars; are "Ӯ" and "ӯ" actually used?

"А", "а", "Ә", "ә", "Б", "б", "В", "в", "Г", "г", "Ғ", "ғ", "Д", "д", "Е", "е", "Ж", "ж", "З", "з", "И", "и", "К", "к", "Қ", "қ", "Л", "л", "М", "м", "Н", "н", "Ң", "ң", "О", "о", "Ө", "ө", "П", "п", "Р", "р", "С", "с", "Т", "т", "У", "у", "Ұ", "ұ", "Ү", "ү", "Ф", "ф", "Х", "х", "Һ", "һ", "Ц", "ц", "Ч", "ч", "Ш", "ш", "Щ", "щ", "Ъ", "ъ", "Ы", "ы", "І", "і", "Ь", "ь", "Э", "э", "Ю", "ю", "Я", "я", -- 1 char

},

}

},

-- override_translit = true,

sort_key = {

Cyrl = {

from = {"ә", "ғ", "ё", "қ", "ң", "ө", "ұ", "ү", "һ", "і"},

},

},

standardChars = {

Cyrl = "АаӘәБбВвГгҒғДдЕеЁёЖжЗзИиЙйКкҚқЛлМмНнҢңОоӨөПпРрСсТтУуҰұҮүФфХхҺһЦцЧчШшЩщЪъЫыІіЬьЭэЮюЯя",

c.punc

},

}

m["kl"] = {

"Greenlandic",

25355,

"esx-inu",

"Latn",

sort_key = {

from = {"æ", "ø", "å"},

}

}

m["km"] = {

"Khmer",

9205,

"mkh-kmr",

"Khmr",

ancestors = "xhm",

translit = "km-translit",

}

m["kn"] = {

"Kannada",

33673,

"dra-kan",

"Knda",

ancestors = "dra-mkn",

translit = "kn-translit",

}

m["ko"] = {

"Korean",

9176,

"qfa-kor",

"Kore, Brai",

ancestors = "ko-ear",

translit = {Kore = "ko-translit"},

entry_name = {Kore = s["Kore-entryname"]},

}

m["kr"] = {

"Kanuri",

36094,

"ssa-sah",

"Latn, Arab",

sort_key = {

Latn = {

from = {"ǝ", "ny", "ɍ", "sh"},

},

},

}

m["ks"] = {

"Kashmiri",

33552,

"inc-kas",

"ks-Arab, Deva, Shrd, Latn",

translit = {

["ks-Arab"] = "ks-Arab-translit",

Deva = "ks-Deva-translit",

Shrd = "Shrd-translit",

},

}

-- "kv" IS TREATED AS "koi", "kpv", SEE WT:LT

m["kw"] = {

"Cornish",

25289,

"cel-brs",

"Latn",

ancestors = "cnx",

sort_key = {

from = {"ch"},

},

}

m["ky"] = {

"Kyrgyz",

9255,

"trk-kip",

"Cyrl, Latn, Arab",

translit = {Cyrl = "ky-translit"},

override_translit = true,

sort_key = {

Cyrl = {

from = {"ё", "ң", "ө", "ү"},

},

},

}

m["la"] = {

"Latin",

397,

"itc",

"Latn, Ital",

ancestors = "itc-ola",

sort_key = {

Latn = {

from = {"æ", "œ"},

},

},

standardChars = {

Latn = "AaBbCcDdEeFfGgHhIiLlMmNnOoPpQqRrSsTtUuVvXxZz",

c.punc

},

}

m["lb"] = {

"Luxembourgish",

9051,

"gmw-hgm",

"Latn",

ancestors = "gmw-cfr",

sort_key = {

from = {"ä", "ë", "é"},

},

}

m["lg"] = {

"Luganda",

33368,

"bnt-nyg",

"Latn",

sort_key = {

from = {"ŋ"},

},

}

m["li"] = {

"Limburgish",

102172,

"gmw-frk",

"Latn",

ancestors = "dum",

}

m["ln"] = {

"Lingala",

36217,

"bnt-bmo",

"Latn",

sort_key = {

from = {"ɛ", "gb", "mb", "mp", "nd", "ng", "nk", "ns", "nt", "ny", "nz", "ɔ"},

},

}

m["lo"] = {

"Lao",

9211,

"tai-swe",

"Laoo",

translit = "lo-translit",

sort_key = "Laoo-sortkey",

standardChars = "0-9ກຂຄງຈຊຍດຕຖທນບປຜຝພຟມຢຣລວສຫອຮຯ-ໝ"

}

m["lt"] = {

"Lithuanian",

9083,

"bat-eas",

"Latn",

ancestors = "olt",

sort_key = {

from = {"ą", "č", "ę", "ė", "į", "y", "š", "ų", "ū", "ž"},

},

standardChars = "AaĄąBbCcČčDdEeĘęĖėFfGgHhIiĮįYyJjKkLlMmNnOoPpRrSsŠšTtUuŲųŪūVvZzŽž"

}

m["lu"] = {

"Luba-Katanga",

36157,

"bnt-lub",

"Latn",

}

m["lv"] = {

"Latvian",

9078,

"bat-eas",

"Latn",

entry_name = {

-- This attempts to convert vowels with tone marks to vowels either with or without macrons. Specifically, there should be no macrons if the vowel is part of a diphthong (including resonant diphthongs such pìrksts -> pirksts not #pīrksts). What we do is first convert the vowel + tone mark to a vowel + tilde in a decomposed fashion, then remove the tilde in diphthongs, then convert the remaining vowel + tilde sequences to macroned vowels, then delete any other tilde. We leave already-macroned vowels alone: Both e.g. ar and ār occur before consonants. FIXME: This still might not be sufficient.

},

sort_key = {

from = {"ā", "č", "ē", "ģ", "ī", "ķ", "ļ", "ņ", "š", "ū", "ž"},

},

standardChars = "AaĀāBbCcČčDdEeĒēFfGgĢģHhIiĪīJjKkĶķLlĻļMmNnŅņOoPpRrSsŠšTtUuŪūVvZzŽž"

}

m["mg"] = {

"Malagasy",

7930,

"poz-bre",

"Latn",

}

m["mh"] = {

"Marshallese",

36280,

"poz-mic",

"Latn",

sort_key = {

from = {"ā", "ļ", "m̧", "ņ", "n̄", "o̧", "ō", "ū"},

},

}

m["mi"] = {

"Maori",

36451,

"poz-pep",

"Latn",

sort_key = {

from = {"ng", "wh"},

},

}

m["mk"] = {

"Macedonian",

9296,

"zls",

"Cyrl",

ancestors = "cu",

translit = "mk-translit",

entry_name = {

remove_exceptions = {"Ѓ", "ѓ", "Ќ", "ќ"}

},

sort_key = {

from = {"ѓ", "ѕ", "ј", "љ", "њ", "ќ", "џ"},

},

standardChars = "АаБбВвГгДдЃѓЕеЖжЗзЅѕИиЈјКкЛлЉљМмНнЊњОоПпРрСсТтЌќУуФфХхЦцЧчЏџШш"

}

m["ml"] = {

"Malayalam",

36236,

"dra-mal",

"Mlym",

translit = "ml-translit",

override_translit = true,

}

m["mn"] = {

"Mongolian",

9246,

"xgn-cen",

"Cyrl, Mong, Latn, Brai",

ancestors = "cmg",

translit = {

Cyrl = "mn-translit",

Mong = "Mong-translit",

},

override_translit = true,

display_text = {Mong = s["Mong-displaytext"]},

entry_name = {

Mong = s["Mong-entryname"],

},

sort_key = {

Cyrl = {

from = {"ё", "ө", "ү"},

},

},

standardChars = {

Cyrl = "АаБбВвГгДдЕеЁёЖжЗзИиЙйЛлМмНнОоӨөРрСсТтУуҮүХхЦцЧчШшЫыЬьЭэЮюЯя—",

Brai = c.braille,

c.punc

},

}

-- "mo" IS TREATED AS "ro", SEE WT:LT

m["mr"] = {

"Marathi",

1571,

"inc-sou",

"Deva, Modi",

ancestors = "omr",

translit = {

Deva = "mr-translit",

Modi = "mr-Modi-translit",

},

entry_name = {

Deva = {

from = {"च़", "ज़", "झ़"},

},

},

}

m["ms"] = {

"Malay",

9237,

"poz-mly",

"Latn, ms-Arab",

ancestors = "ms-cla",

standardChars = {

Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz",

c.punc

},

}

m["mt"] = {

"Maltese",

9166,

"sem-arb",

"Latn",

display_text = {

from = {"'"},

},

entry_name = {

from = {"’"},

},

ancestors = "sqr",

sort_key = {

from = {

"ċ", "ġ", "ż", -- Convert into PUA so that decomposed form does not get caught by the next step.

"([cgz])", -- Ensure "c" comes after "ċ", "g" comes after "ġ" and "z" comes after "ż".

p[3], p[4], "ħ", "ie", p[5] -- Convert "ċ", "ġ", "ħ", "ie", "ż" into final output.

},

},

}

m["my"] = {

"Burmese",

9228,

"tbq-brm",

"Mymr",

ancestors = "obr",

translit = "my-translit",

override_translit = true,

sort_key = {

from = {"ျ", "ြ", "ွ", "ှ", "ဿ"},

},

}

m["na"] = {

"Nauruan",

13307,

"poz-mic",

"Latn",

}

m["nb"] = {

"Norwegian Bokmål",

25167,

"gmq",

"Latn",

wikimedia_codes = "no",

ancestors = "gmq-mno, da",

sort_key = s["no-sortkey"],

standardChars = s["no-standardchars"],

}

m["nd"] = {

"Northern Ndebele",

35613,

"bnt-ngu",

"Latn",

}

m["ne"] = {

"Nepali",

33823,

"inc-pah",

"Deva, Newa",

translit = {Deva = "ne-translit"},

}

m["ng"] = {

"Ndonga",

33900,

"bnt-ova",

"Latn",

}

m["nl"] = {

"Dutch",

7411,

"gmw-frk",

"Latn, Brai",

ancestors = "dum",

standardChars = {

Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz",

Brai = c.braille,

c.punc

},

}

m["nn"] = {

"Norwegian Nynorsk",

25164,

"gmq-wes",

"Latn",

ancestors = "gmq-mno",

entry_name = {

},

sort_key = s["no-sortkey"],

standardChars = s["no-standardchars"],

}

m["no"] = {

"Norwegian",

9043,

"gmq-wes",

"Latn",

ancestors = "gmq-mno",

sort_key = s["no-sortkey"],

standardChars = s["no-standardchars"],

}

m["nr"] = {

"Southern Ndebele",

36785,

"bnt-ngu",

"Latn",

}

m["nv"] = {

"Navajo",

13310,

"apa",

"Latn",

sort_key = {

from = {

"chʼ", "tłʼ", "tsʼ", -- 3 chars

"ch", "dl", "dz", "gh", "hw", "kʼ", "kw", "sh", "tł", "ts", "zh", -- 2 chars

"ł", "ʼ" -- 1 char

},

},

}

m["ny"] = {

"Chichewa",

33273,

"bnt-nys",

"Latn",

sort_key = {

from = {"ng'"},

},

}

m["oc"] = {

"Occitan",

14185,

"roa-ocr",

"Latn, Hebr",

ancestors = "pro",

sort_key = {

Latn = {

from = {"([lns])·h"},

},

},

}

m["oj"] = {

"Ojibwe",

33875,

"alg",

"Cans, Latn",

sort_key = {

Latn = {

from = {"aa", "ʼ", "ii", "oo", "sh", "zh"},

},

},

}

m["om"] = {

"Oromo",

33864,

"cus-eas",

"Latn, Ethi",

}

m["or"] = {

"Odia",

33810,

"inc-eas",

"Orya",

ancestors = "inc-mor",

translit = "or-translit",

}

m["os"] = {

"Ossetian",

33968,

"xsc",

"Cyrl, Geor, Latn",

ancestors = "oos",

translit = {

Cyrl = "os-translit",

Geor = "Geor-translit",

},

override_translit = true,

display_text = {

Cyrl = {

from = {"æ"},

},

Latn = {

from = {"ӕ"},

},

},

entry_name = {

Cyrl = {

from = {"æ"},

},

Latn = {

from = {"ӕ"},

},

},

sort_key = {

Cyrl = {

from = {"ӕ", "гъ", "дж", "дз", "ё", "къ", "пъ", "тъ", "хъ", "цъ", "чъ"},

},

},

}

m["pa"] = {

"Punjabi",

58635,

"inc-pan",

"Guru, pa-Arab",

ancestors = "inc-opa",

translit = {

Guru = "Guru-translit",

["pa-Arab"] = "pa-Arab-translit",

},

entry_name = {

["pa-Arab"] = {

from = {"ݨ", "ࣇ"},

},

},

}

m["pi"] = {

"Pali",

36727,

"inc-mid",

"Latn, Brah, Deva, Beng, Sinh, Mymr, Thai, Lana, Laoo, Khmr, Cakm",

ancestors = "sa",

translit = {

Brah = "Brah-translit",

Deva = "sa-translit",

Beng = "pi-translit",

Sinh = "si-translit",

Mymr = "pi-translit",

Thai = "pi-translit",

Lana = "pi-translit",

Laoo = "pi-translit",

Khmr = "pi-translit",

Cakm = "Cakm-translit",

},

entry_name = {

Thai = {

},

},

sort_key = { -- FIXME: This needs to be converted into the current standardized format.

},

}

m["pl"] = {

"Polish",

809,

"zlw-lch",

"Latn",

ancestors = "zlw-mpl",

sort_key = {

from = {"ą", "ć", "ę", "ł", "ń", "ó", "ś", "ź", "ż"},

},

standardChars = "AaĄąBbCcĆćDdEeĘęFfGgHhIiJjKkLlŁłMmNnŃńOoÓóPpRrSsŚśTtUuWwYyZzŹźŻż"

}

m["ps"] = {

"Pashto",

58680,

"ira-pat",

"ps-Arab",

}

m["pt"] = {

"Portuguese",

5146,

"roa-ibe",

"Latn, Brai",

ancestors = "roa-opt",

standardChars = {

Latn = "AaÁáÂâÃãBbCcÇçDdEeÉéÊêFfGgHhIiÍíJjLlMmNnOoÓóÔôÕõPpQqRrSsTtUuÚúVvXxZz",

Brai = c.braille,

c.punc

},

}

m["qu"] = {

"Quechua",

5218,

"qwe",

"Latn",

}

m["rm"] = {

"Romansch",

13199,

"roa-rhe",

"Latn",

}

m["ro"] = {

"Romanian",

7913,

"roa-eas",

"Latn, Cyrl",

sort_key = {

Latn = {

from = {"ă", "â", "î", "ș", "ț"},

},

Cyrl = {

from = {"ӂ"},

},

},

standardChars = {

Latn = "AaĂăÂâBbCcDdEeFfGgHhIiÎîJjLlMmNnOoPpRrSsȘșTtȚțUuVvXxZz",

Cyrl = "АаБбВвГгДдЕеЖжӁӂЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЫыЬьЭэЮюЯя",

c.punc

},

}

m["ru"] = {

"Russian",

7737,

"zle",

"Cyrl, Cyrs, Brai",

ancestors = "zle-mru",

translit = {

Cyrl = "ru-translit",

Cyrs = "ru-translit",

},

entry_name = {

Cyrl = s["ru-Cyrl-entryname"],

Cyrs = s["ru-Cyrl-entryname"],

},

sort_key = {

Cyrl = s["ru-Cyrl-sortkey"],

Cyrs = s["ru-Cyrl-sortkey"],

},

standardChars = {

Cyrl = "АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя—",

Brai = c.braille,

c.punc

},

}

m["rw"] = {

"Rwanda-Rundi",

3217514,

"bnt-glb",

"Latn",

}

m["sa"] = {

"Sanskrit",

11059,

"inc-old",

"Deva, Bali, as-Beng, Beng, Bhks, Brah, Gran, Gujr, Guru, Hani, Java, Kawi, Khar, Khmr, Knda, Lana, Laoo, Marc, Mlym, Modi, Mong, mnc-Mong, xwo-Mong, Mymr, Nand, Newa, Orya, Phag, Ranj, Saur, Shrd, Sidd, Sinh, Taml, Tang, Telu, Thai, Tibt, Tirh",

translit = {

Deva = "sa-translit",

["as-Beng"] = "sa-Beng-translit",

Beng = "sa-Beng-translit",

Brah = "Brah-translit",

Gujr = "sa-Gujr-translit",

Java = "sa-Java-translit",

Khmr = "pi-translit",

Knda = "sa-Knda-translit",

Lana = "pi-translit",

Laoo = "pi-translit",

Mlym = "sa-Mlym-translit",

Modi = "sa-Modi-translit",

Mong = "Mong-translit",

["mnc-Mong"] = "mnc-translit",

["xwo-Mong"] = "xal-translit",

Mymr = "pi-translit",

Orya = "sa-Orya-translit",

Sinh = "si-translit",

Thai = "pi-translit",

Tibt = "Tibt-translit",

},

display_text = {

Mong = s["Mong-displaytext"],

Tibt = s["Tibt-displaytext"],

},

entry_name = {

Mong = s["Mong-entryname"],

Tibt = s["Tibt-entryname"],

Thai = {

},

},

sort_key = {

Tibt = "Tibt-sortkey",

{ -- FIXME: This needs to be converted into the current standardized format.

},

},

}

m["sc"] = {

"Sardinian",

33976,

"roa",

"Latn",

}

m["sd"] = {

"Sindhi",

33997,

"inc-snd",

"sd-Arab, Deva, Sind, Khoj",

translit = {Sind = "Sind-translit"},

entry_name = {

["sd-Arab"] = {

from = {"ٱ"},

},

},

ancestors = "inc-vra",

}

m["se"] = {

"Northern Sami",

33947,

"smi",

"Latn",

display_text = {

from = {"'"},

},

sort_key = {

from = {"á", "č", "đ", "ŋ", "š", "ŧ", "ž"},

},

standardChars = "AaÁáBbCcČčDdĐđEeFfGgHhIiJjKkLlMmNnŊŋOoPpRrSsŠšTtŦŧUuVvZzŽž"

}

m["sg"] = {

"Sango",

33954,

"crp",

"Latn",

ancestors = "ngb",

}

m["sh"] = {

"Serbo-Croatian",

9301,

"zls",

"Latn, Cyrl, Glag",

wikimedia_codes = "sh, bs, hr, sr",

entry_name = {

Latn = {

remove_exceptions = {"Ć", "ć", "Ś", "ś", "Ź", "ź"}

},

Cyrl = {

remove_exceptions = {"З́", "з́", "С́", "с́"}

},

},

sort_key = {

Latn = {

from = {"č", "ć", "dž", "đ", "lj", "nj", "š", "ś", "ž", "ź"},

},

Cyrl = {

from = {"ђ", "з́", "ј", "љ", "њ", "с́", "ћ", "џ"},

},

},

standardChars = {

Latn = "AaBbCcČčĆćDdĐđEeFfGgHhIiJjKkLlMmNnOoPpRrSsŠšTtUuVvZzŽž",

Cyrl = "АаБбВвГгДдЂђЕеЖжЗзИиЈјКкЛлЉљМмНнЊњОоПпРрСсТтЋћУуФфХхЦцЧчЏџШш",

c.punc

},

}

m["si"] = {

"Sinhalese",

13267,

"inc-ins",

"Sinh",

ancestors = "pra-hel",

translit = "si-translit",

override_translit = true,

}

m["sk"] = {

"Slovak",

9058,

"zlw",

"Latn",

ancestors = "zlw-osk",

standardChars = "AaÁáÄäBbCcČčDdĎďEeFfGgHhIiÍíJjKkLlĹ弾MmNnŇňOoÔôPpRrŔŕSsŠšTtŤťUuÚúVvYyÝýZzŽž"

}

m["sl"] = {

"Slovene",

9063,

"zls",

"Latn",

entry_name = {

from = {"Ə", "ə", "Ł", "ł"},

},

sort_key = {

from = {"č", "š", "ž"},

},

standardChars = "AaBbCcČčDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsŠšTtUuVvZzŽž"

}

m["sm"] = {

"Samoan",

34011,

"poz-pnp",

"Latn",

}

m["sn"] = {

"Shona",

34004,

"bnt-sho",

"Latn",

}

m["so"] = {

"Somali",

13275,

"cus-som",

"Latn, Arab, Osma",

}

m["sq"] = {

"Albanian",

8748,

"sqj",

"Latn, Grek, ota-Arab, Elba, Vith",

entry_name = {

from = {'^i (%w)', '^të (%w)'},

},

sort_key = {

from = {'ç', 'dh', 'ë', 'gj', 'll', 'nj', 'rr', 'sh', 'th', 'xh', 'zh'},

},

standardChars = "AaBbCcÇçDdEeËëFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvXxYyZz"

}

m["ss"] = {

"Swazi",

34014,

"bnt-ngu",

"Latn",

}

m["st"] = {

"Sotho",

34340,

"bnt-sts",

"Latn",

}

m["su"] = {

"Sundanese",

34002,

"poz-msa",

"Latn, Sund",

ancestors = "osn",

translit = {Sund = "su-translit"},

}

m["sv"] = {

"Swedish",

9027,

"gmq-eas",

"Latn",

ancestors = "gmq-osw-lat",

sort_key = {

remove_exceptions = {"å"},

from = {"ø", "æ", "œ", "ß", "å"},

},

standardChars = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvXxYyÅåÄäÖö"

}

m["sw"] = {

"Swahili",

7838,

"bnt-swh",

"Latn, Arab",

sort_key = {

Latn = {

from = {"ng'"},

},

},

}

m["ta"] = {

"Tamil",

5885,

"dra-tam",

"Taml",

ancestors = "ta-mid",

translit = "ta-translit",

override_translit = true,

}

m["te"] = {

"Telugu",

8097,

"dra-tel",

"Telu",

ancestors = "dra-ote",

translit = "te-translit",

override_translit = true,

}

m["tg"] = {

"Tajik",

9260,

"ira-swi",

"Cyrl, fa-Arab, Latn",

ancestors = "fa-cls",

translit = {Cyrl = "tg-translit"},

override_translit = true,

sort_key = {

Cyrl = {

from = {"ғ", "ё", "ӣ", "қ", "ӯ", "ҳ", "ҷ"},

},

},

}

m["th"] = {

"Thai",

9217,

"tai-swe",

"Thai, Brai",

translit = {Thai = "th-translit"},

sort_key = {Thai = "Thai-sortkey"},

}

m["ti"] = {

"Tigrinya",

34124,

"sem-eth",

"Ethi",

translit = "Ethi-translit",

}

m["tk"] = {

"Turkmen",

9267,

"trk-ogz",

"Latn, Cyrl, Arab",

sort_key = {

Latn = {

from = {"ç", "ä", "ž", "ň", "ö", "ş", "ü", "ý"},

},

Cyrl = {

from = {"ё", "җ", "ң", "ө", "ү", "ә"},

},

},

}

m["tl"] = {

"Tagalog",

34057,

"phi",

"Latn, Tglg",

translit = {Tglg = "tl-translit"},

override_translit = true,

standardChars = {

Latn = "AaBbKkDdEeGgHhIiLlMmNnOoPpRrSsTtUuWwYy",

c.punc

},

sort_key = {

Latn = "tl-sortkey",

},

}

m["tn"] = {

"Tswana",

34137,

"bnt-sts",

"Latn",

}

m["to"] = {

"Tongan",

34094,

"poz-pol",

"Latn",

}

m["tr"] = {

"Turkish",

256,

"trk-ogz",

"Latn",

ancestors = "ota",

dotted_dotless_i = true,

sort_key = {

from = {

-- Ignore circumflex, but account for capital Î wrongly becoming ı + circ due to dotted dotless I logic.

"i", -- Ensure "i" comes after "ı".

"ç", "ğ", "ı", "ö", "ş", "ü"

},

},

standardChars = "AaÂâBbCcÇçDdEeFfGgĞğHhIıİiÎîJjKkLlMmNnOoÖöPpRrSsŞşTtUuÛûÜüVvYyZz"

}

m["ts"] = {

"Tsonga",

34327,

"bnt-tsr",

"Latn",

}

m["tt"] = {

"Tatar",

25285,

"trk-kbu",

"Cyrl, Latn, tt-Arab",

translit = {Cyrl = "tt-translit"},

override_translit = true,

dotted_dotless_i = true,

sort_key = {

Cyrl = {

from = {"ә", "ў", "ғ", "ё", "җ", "қ", "ң", "ө", "ү", "һ"},

},

Latn = {

from = {

"i", -- Ensure "i" comes after "ı".

"ä", "ə", "ç", "ğ", "ı", "ñ", "ŋ", "ö", "ɵ", "ş", "ü"

},

},

},

}

-- "tw" IS TREATED AS "ak", SEE WT:LT

m["ty"] = {

"Tahitian",

34128,

"poz-pep",

"Latn",

}

m["ug"] = {

"Uyghur",

13263,

"trk-kar",

"ug-Arab, Latn, Cyrl",

ancestors = "chg",

translit = {

["ug-Arab"] = "ug-translit",

Cyrl = "ug-translit",

},

override_translit = true,

}

m["uk"] = {

"Ukrainian",

8798,

"zle",

"Cyrl",

ancestors = "zle-ouk",

translit = "uk-translit",

sort_key = {

from = {

"ї", -- 2 chars

"ґ", "є", "і" -- 1 char

},

},

standardChars = "АаБбВвГгДдЕеЄєЖжЗзИиІіЇїЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЬьЮюЯя"

}

m["ur"] = {

"Urdu",

1617,

"inc-hnd",

"ur-Arab,Hebr",

ancestors = "inc-ohi",

translit = {["ur-Arab"] = "ur-translit"},

entry_name = {

-- character "ۂ" code U+06C2 to "ه" and "هٔ"‎ (U+0647 + U+0654) to "ه"; hamzatu l-waṣli to a regular alif

from = {"هٔ", "ۂ", "ٱ"},

},

-- put Judeo-Urdu (Hebrew-script Urdu) under the category header

-- U+FB21 HEBREW LETTER WIDE ALEF so that it sorts after Arabic script titles

sort_key = {

},

}

m["uz"] = {

"Uzbek",

9264,

"trk-kar",

"Latn, Cyrl, fa-Arab",

ancestors = "chg",

translit = {Cyrl = "uz-translit"},

sort_key = {

Latn = {

from = {"oʻ", "gʻ", "sh", "ch", "ng"},

},

Cyrl = {

from = {"ё", "ў", "қ", "ғ", "ҳ"},

},

},

}

m["ve"] = {

"Venda",

32704,

"bnt-bso",

"Latn",

}

m["vi"] = {

"Vietnamese",

9199,

"mkh-vie",

"Latn, Hani",

ancestors = "mkh-mvi",

sort_key = {

Latn = "vi-sortkey",

Hani = "Hani-sortkey",

},

}

m["vo"] = {

"Volapük",

36986,

"art",

"Latn",

}

m["wa"] = {

"Walloon",

34219,

"roa-oil",

"Latn",

ancestors = "fro",

sort_key = s["roa-oil-sortkey"],

}

m["wo"] = {

"Wolof",

34257,

"alv-fwo",

"Latn, Arab",

}

m["xh"] = {

"Xhosa",

13218,

"bnt-ngu",

"Latn",

}

m["yi"] = {

"Yiddish",

8641,

"gmw-hgm",

"Hebr",

ancestors = "gmh",

translit = "yi-translit",

sort_key = {

from = {"א[ַָ]", "בּ", "ו[ֹּ]", "יִ", "ײַ", "פֿ"},

},

}

m["yo"] = {

"Yoruba",

34311,

"alv-yor",

"Latn, Arab",

sort_key = {

Latn = {

from = {"ẹ", "ɛ", "gb", "ị", "kp", "ọ", "ɔ", "ṣ", "sh", "ụ"},

},

},

}

m["za"] = {

"Zhuang",

13216,

"tai",

"Latn, Hani",

sort_key = {

Latn = "za-sortkey",

Hani = "Hani-sortkey",

},

}

m["zh"] = {

"Chinese",

7850,

"zhx",

"Hani, Hant, Hans, Latn, Bopo, Nshu, Brai",

ancestors = "ltc",

generate_forms = "zh-generateforms",

translit = {

Hani = "zh-translit",

Bopo = "zh-translit",

},

sort_key = {Hani = "Hani-sortkey"},

}

m["zu"] = {

"Zulu",

10179,

"bnt-ngu",

"Latn",

}

--[[--------------------------< M A I N >----------------------------------------------------------------------

read the table above () and extract the language tag and its associated language name. If the tag is not

found in :en:Module:Language/data, add tag and name to our output sequence

to use this module call it from the debug console:

=p.main()

]]

local function main()

local en_wiki_data = mw.loadData ('Module:Language/data'); -- load the data table

local out_t = {}; -- output goes here

for wikt_tag, wikt_lang_t in pairs (m) do -- spin through getting tag and associated table

if not en_wiki_data.languages[wikt_tag] then -- when en:Module:Language/data does not have matching language tag

table.insert (out_t, string.format ('\t\t["%s"] = {["name"] = "%s"}', -- add wikt language tag and name to

wikt_tag,

wikt_lang_t[1]

));

end

end

table.sort (out_t); -- ascending sort

return table.concat (out_t, ',\n'); -- make a big string and done

end

--[[--------------------------< E X P O R T E D F U N C T I O N S >------------------------------------------

]]

return {

main = main

}