Module:Character info

local m_unicode = require('Module:Unicode data')

local char_to_script = require('Module:scripts').charToScript

local export = {}

local dingbat_scripts = {

["Zsym"] = true;

["Zmth"] = true;

["Zyyy"] = true;

}

local function page_exists(title)

local ok, title_obj = pcall(mw.title.new, title)

if ok and title_obj then

local ok, exists = pcall(function() return title_obj.exists end)

return ok and exists

else

return false

end

end

function export.exotic_symbol_warning(frame)

local title = mw.title.getCurrentTitle()

if title.exists then

return ""

end

if mw.ustring.len(title.fullText) ~= 1 then

return ""

end

local codepoint = mw.ustring.codepoint(title.fullText)

local script_code = char_to_script(codepoint)

if dingbat_scripts[script_code] then

return frame:expandTemplate { title = "editnotice-exotic symbols" }

end

return ""

end

local function get_codepoint(codepoint, param_name)

if codepoint then

codepoint = mw.text.trim(codepoint)

codepoint = tonumber(codepoint) or mw.text.decode(codepoint)

if (type(codepoint) == "string") and (mw.ustring.len(codepoint) == 1) then

codepoint = mw.ustring.codepoint(codepoint)

elseif type(codepoint) ~= "number" then

error("Unrecognised string given for the " .. param_name

.. " parameter")

end

end

return codepoint

end

function export._show(args, parent_title)

local codepoint = args.codepoint or args[1] or "";

local image

local title = mw.title.getCurrentTitle()

local to_boolean = require('Module:yesno')

local namespace = mw.title.getCurrentTitle().nsText

if codepoint ~= "" then

codepoint = get_codepoint(codepoint, "codepoint")

else

if title.fullText == parent_title then

codepoint = 0xfffd

elseif mw.ustring.len(title.fullText) == 1 then

codepoint = mw.ustring.codepoint(title.fullText)

else

if title.nsText == "Template" then return "" end

error("Page title is not a single Unicode character")

end

end

args.image = args.image and mw.text.trim(args.image)

if args.image == "" then

image = nil

else

image = args.image or m_unicode.lookup_image(codepoint)

end

local table_markup = {}

table.insert(table_markup,

'

'):format(row_title, row_contents, row_category))

end

end

insert_row(parse_composition())

insert_row(parse_gardiner())

insert_row(parse_egpz())

insert_row(parse_mdc())

if rows[1] then

return ('

class="wikitable floatright" style="width:25em;"\n')

if image then

if not image:match("\127") then -- tags generate these; pass them through

if image:match("^%[?%[?[Ff]ile:") or image:match("^%[?%[?[Ii]mage:") then

image = image:gsub("^%[%[", ""):gsub("^[Ff]ile:", ""):gsub("^[Ii]mage:", ""):gsub("|.*", ""):gsub("]]", "")

end

local category = "*" .. string.format("%010d", codepoint) .. ""

image = "120x140px"

if namespace == "" then

--image = image .. category

end

end

table.insert(table_markup,

('

\n| colspan="2" style="text-align: center;" | %s
%s\n'):format(

image, args.caption or ""

)

)

elseif args.caption then

table.insert(table_markup,

('

\n| colspan="2" style="text-align: center;" | %s\n'):format(

args.caption

)

)

end

local script_code = args.sc or char_to_script(codepoint)

local script_data = mw.loadData("Module:scripts/data")[script_code]

or error("No data for script code " .. script_code .. ".")

local script_name = script_data[1]

local NAMESPACE = title.namespace

local cat_name

if not args.nocat and ((NAMESPACE == 0) or (NAMESPACE == 100)) then -- main and Appendix

if script_data.character_category ~= nil then

-- false means no category, overriding the default below

cat_name = script_data.character_category or nil

elseif script_name then

cat_name = script_name .. " script characters"

end

end

local block_name = mw.text.encode(args.block or m_unicode.lookup_block(codepoint))

local aliases

if args.aliases == "" then

aliases = nil

else

aliases = mw.loadData('Module:Unicode data/aliases')[codepoint]

end

local function parse_aliases(aliases)

local result = {}

if aliases then

local classif = {}

for i, alias in ipairs(aliases) do

if not classif[alias[1]] then

classif[alias[1]] = {}

end

table.insert(classif[alias[1]], mw.text.encode(alias[2]))

end

if classif.correction then

for i, name in ipairs(classif.correction) do

local category = 'Category:Character boxes with corrected names'

if namespace == "" then

table.insert(result,

('Category:Character boxes with corrected namesCorrected: %s'):format(

name

)

)

else

table.insert(result,

('Corrected: %s'):format(

name

)

)

end

end

end

if classif.alternate then

for i, name in ipairs(classif.alternate) do

local category = 'Category:Character boxes with alternative names'

if namespace == "" then

table.insert(result,

('Category:Character boxes with alternative namesAlternative: %s'):format(

name

)

)

else

table.insert(result,

('Alternative: %s'):format(

name

)

)

end

end

end

if classif.abbreviation then

local category = 'Category:Character boxes with abbreviations'

if namespace == "" then

table.insert(result,

('Category:Character boxes with abbreviationsAbbreviation: %s'):format(

table.concat(classif.abbreviation, ", ")

)

)

else

table.insert(result,

('Abbreviation: %s'):format(

table.concat(classif.abbreviation, ", ")

)

)

end

end

local parsed_result = table.concat(result, ", ")

return '

(' .. parsed_result .. ')
'

end

return ""

end

local li, vi, ti = nil, nil, nil

if block_name == "Hangul Syllables" then

local m_Kore = require('Module:ko-hangul')

li, vi, ti = m_Kore.syllable2JamoIndices(codepoint)

end

local initial_to_letter = { [0] =

0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139, 0x3141, 0x3142,

0x3143, 0x3145, 0x3146, 0x3147, 0x3148, 0x3149, 0x314A, 0x314B,

0x314C, 0x314D, 0x314E,

}

local vowel_to_letter = { [0] =

0x314F, 0x3150, 0x3151, 0x3152, 0x3153, 0x3154, 0x3155, 0x3156,

0x3157, 0x3158, 0x3159, 0x315A, 0x315B, 0x315C, 0x315D, 0x315E,

0x315F, 0x3160, 0x3161, 0x3162, 0x3163,

}

local final_to_letter = {

0x3131, 0x3132, 0x3133, 0x3134, 0x3135, 0x3136, 0x3137, 0x3139,

0x313A, 0x313B, 0x313C, 0x313D, 0x313E, 0x313F, 0x3140, 0x3141,

0x3142, 0x3144, 0x3145, 0x3146, 0x3147, 0x3148, 0x314A, 0x314B,

0x314C, 0x314D, 0x314E, -- KIYEOK-RIEUL = ???

}

local function parse_composition()

local result = nil

if block_name == "Hangul Syllables" then

result = ((ti ~= 0) and

'&#%u; + &#%u; + &#%u;' or

'&#%u; + &#%u;'):format(

initial_to_letter[li],

vowel_to_letter[vi],

final_to_letter[ti]

)

else

local nfd = mw.ustring.toNFD(mw.ustring.char(codepoint))

if mw.ustring.len(nfd) ~= 1 then

local compo = {}

for nfdcp in mw.ustring.gcodepoint(nfd) do

local dotted_circle = (m_unicode.is_combining(nfdcp) and "◌" or "")

local link_target = m_unicode.get_entry_title(nfdcp)

if not link_target or not page_exists(link_target) then

link_target = nil

end

local script = char_to_script(nfdcp)

local character_text =

link_target and ('%s&#%u; [U+%04X]')

:format(nfdcp, script, dotted_circle, nfdcp, nfdcp)

or ('%s&#%u; [U+%04X]')

:format(script, dotted_circle, nfdcp, nfdcp)

table.insert(compo, '' .. character_text .. ' ')

end

result = table.concat(compo, " + ")

end

end

if result then

return "Composition", result, "Category:Character boxes with compositions"

end

return nil

end

-- [[ Egyptian Hieroglyphs

local function parse_gardiner()

local result = nil

if args.gardiner then

result =

(

'[http://vincent.euverte.free.fr/Rosette/Rosette_410.php?Hiero=%s&Lang=E %s]\n'):format(

args.gardiner, args.gardiner

)

return "Gardiner number", result, "Category:Character boxes with additional information for Egyptian Hieroglyphs"

end

return nil

end

local function parse_mdc()

local result = nil

if args.mdc then

result = args.mdc

return "Manuel de Codage", result, "Category:Character boxes with additional information for Egyptian Hieroglyphs"

end

return nil

end

local function parse_egpz()

local result = nil

if args.egpz then

result = args.egpz

return "EGPZ 1.0", result, "Category:Character boxes with additional information for Egyptian Hieroglyphs"

end

return nil

end

-- ]]

local function middle_part()

local rows = {}

local function insert_row(row_title, row_contents, row_category)

if row_contents then

table.insert(rows,

('

%s:%s%s
%s
')

:format(table.concat(rows, ""))

end

return ""

end

local function present_codepoint(codepoint, np, script, combining, name, printable, title)

local display

local link_target

if combining then

combining = to_boolean(combining)

else

combining = m_unicode.is_combining(codepoint)

end

if printable then

printable = to_boolean(printable)

else

printable = m_unicode.is_printable(codepoint)

end

local char = mw.ustring.char(codepoint)

if title == "self" or page_exists(char) then

link_target = char

elseif title ~= "" then

link_target = m_unicode.get_entry_title(codepoint)

end

if printable then

display = ('%s&#x%04X;'):format(

script or char_to_script(codepoint),

combining and "◌" or "", codepoint

)

end

local arrow_and_maybe_char

if np then

arrow_and_maybe_char = (display or "") .. ' →'

else

arrow_and_maybe_char = '← ' .. (display or "")

end

local text = ('%s
[U+%04X]
')

:format(mw.text.encode(name or m_unicode.lookup_name(codepoint)),

arrow_and_maybe_char, codepoint)

if link_target then

return ('' .. text .. '')

else

return text

end

end

local function get_next(codepoint, step)

-- Skip past noncharacters and reserved characters (Cn), private-use

-- characters (Co), surrogates (Cs), and control characters (Cc), all

-- of which have a label beginning in "<" rather than a proper name.

if step < 0 and 0 < codepoint or step > 0 and codepoint < 0x10FFFF then

repeat

codepoint = codepoint + step

until m_unicode.lookup_name(codepoint):sub(1, 1) ~= "<"

or not (0 < codepoint and codepoint < 0x10FFFF)

end

return codepoint

end

local previous_codepoint =

get_codepoint(args.previous_codepoint, "previous_codepoint")

and tonumber(args.previous_codepoint, 16) or get_next(codepoint, -1)

local next_codepoint = get_codepoint(args.next_codepoint, "next_codepoint")

and tonumber(args.next_codepoint, 16) or get_next(codepoint, 1)

local combining

if args.combining then

combining = to_boolean(args.combining)

else

combining = m_unicode.is_combining(codepoint)

end

table.insert(table_markup,

'\n| style="width: 70px;" colspan="2" | ' ..

'

' ..

'

' ..

'

' ..

'

' ..

'

' ..

'

' ..

('%s&#%u;')

:format(script_code, combining and "◌" or "", codepoint) ..

'

' ..

(' [https://util.unicode.org/UnicodeJsps/character.jsp?a=%.4X U+%.4X]'):format(codepoint, codepoint) ..

', &#' .. codepoint .. ';\n' ..

'

' ..

mw.text.encode(args.name or m_unicode.lookup_name(codepoint)) ..

'

' ..

parse_aliases(aliases) ..

'

'

)

table.insert(table_markup,

middle_part()

)

local previous_unassigned_first = previous_codepoint + 1

local previous_unassigned_last = codepoint - 1

local next_unassigned_first = codepoint + 1

local next_unassigned_last = next_codepoint - 1

local left_unassigned_text

local right_unassigned_text

if previous_codepoint == 0 then

previous_unassigned_first = 0

end

if previous_unassigned_first <= previous_unassigned_last or next_unassigned_first <= next_unassigned_last then

if previous_unassigned_first < previous_unassigned_last then

left_unassigned_text = ('[unassigned: U+%.4X–U+%.4X]'):format(previous_unassigned_first, previous_unassigned_last)

elseif previous_unassigned_first == previous_unassigned_last then

left_unassigned_text = ('[unassigned: U+%.4X]'):format(previous_unassigned_first)

end

if next_unassigned_first < next_unassigned_last then

right_unassigned_text = ('[unassigned: U+%.4X–U+%.4X]'):format(next_unassigned_first, next_unassigned_last)

elseif next_unassigned_first == next_unassigned_last then

right_unassigned_text = ('[unassigned: U+%.4X]'):format(next_unassigned_first)

end

end

local unassignedsRow =

mw.html.create('table'):css('width', '100%'):css('font-size', '80%'):css('white-space', 'nowrap')

:tag('tr')

:tag('td'):css('width', '50%'):css('text-align', 'left'):wikitext(left_unassigned_text or ''):done()

:tag('td'):css('width', '50%'):css('text-align', 'right'):wikitext(right_unassigned_text or ''):done()

:allDone()

table.insert(table_markup, tostring(unassignedsRow) ..'\n')

local previous_codepoint_text = ""

local next_codepoint_text = ('%s\n')

:format(present_codepoint(next_codepoint, true,

args.next_codepoint_sc, args.next_codepoint_combining,

args.next_codepoint_name, args.next_codepoint_printable,

args.next_codepoint_title))

if previous_codepoint > 0 then

previous_codepoint_text = ('%s\n')

:format(present_codepoint(previous_codepoint, false,

args.previous_codepoint_sc, args.previous_codepoint_combining,

args.previous_codepoint_name, args.previous_codepoint_printable,

args.previous_codepoint_title))

end

--Here we're assuming that "Block name (Unicode block)" exists as either the name of the article or a redirect.

--It would be nicer if we could check if the page title "Block name (Unicode block)" actually exists and pipe the link if it does and otherwise just link %s.

--It would seem the page_exists function should work for this, but AJ is too stupid to get that to work.

local block_name_text = ('%s') --on wiktionary this is ('%s').

:format(block_name, block_name)

if namespace == "" then

block_name_text = block_name_text .. ('*%010d\n')

:format(block_name, codepoint)

else

block_name_text = block_name_text .. '\n'

end

local lastRow =

mw.html.create('table'):css('width', '100%'):css('text-align', 'center')

:tag('tr')

:tag('td'):css('width', '20%'):wikitext(previous_codepoint_text):done()

--:tag('td'):css('width', '15%')

-- :tag('span'):wikitext(left_unassigned_text and "..." or ""):attr('title', left_unassigned_text or ""):done():done()

:tag('td'):css('width', '60%'):css('font-size', '110%'):css('font-weight', 'bold'):wikitext(block_name_text)

--:tag('td'):css('width', '15%')

-- :tag('span'):wikitext(right_unassigned_text and "..." or ""):attr('title', right_unassigned_text or ""):done():done()

:tag('td'):css('width', '20%'):wikitext(next_codepoint_text):done()

:allDone()

table.insert(table_markup, tostring(lastRow) ..'\n')

table.insert(table_markup, '')

if cat_name and namespace == "" then

table.insert(table_markup, " " .. mw.ustring.char(codepoint) .. "")

end

table.insert(table_markup, require("Module:TemplateStyles")("User:Alexis Reggae/Character info template/styles.css"))

return table.concat(table_markup)

end

function export.show(frame)

local parent_frame = frame:getParent()

return export._show(parent_frame.args, parent_frame:getTitle())

end

return export