Module:Sandbox/Eievie

local p = {}

local data = mw.loadData( 'Module:Interlinear/data' )

local gloss_override = {} -- for custom gloss abbreviations

local getArgs = require('Module:Arguments').getArgs

local yesno = require('Module:Yesno')

local lang_data = mw.loadData( 'Module:Lang/data' )

local gcl = require('Module:Interlinear/sandbox2/gcl').gcl

--------------------------

-- Almost-global variables

--------------------------

local frame, glossing_type, displaying_messages, free_translation, msg, buffer

-------------------

-- General settings

-------------------

local conf = { --settings

WordSeparator = " \n\r\t", -- Don't replace with %s as this would include non-breaking spaces

GlossAbbrPattern = "^([Ø0-9A-Z]+)$", -- this isn't a full regex, but a Lua pattern

-- NOTE: The following characters must be formatted for use in a pattern set.

GlossAbbrBoundary = "-.,;:<>‹›/\\~+=%?%s%[%]()%_\127'",

GlossExcludeTable = {I = true,}, --strings not be treated as glossing abbreviations

GlossExcludePattern = '^[0-9][0-9]+$', -- excludes strings consisting entirely of digits

ErrorCategory = "Category:Pages with errors in interlinear text",

AmbiguousGlossCategory = "Category:Articles with ambiguous glossing abbreviations",

MessageGlossingError = "Error(s) in interlinear glossing",

LowerCaseGlosses = {

["1sg"] = true, ["2sg"] = true, ["3sg"] = true,

["1du"] = true, ["2du"] = true, ["3du"] = true,

["1pl"] = true, ["2pl"] = true, ["3pl"] = true,

["Fsg"] = true, ["Fpl"] = true,

["Msg"] = true, ["Mpl"] = true,

},

ErrorHelpLocation = "Template:Interlinear",

}

---------------------

-- CSS styles and classes

---------------------

conf.class = { --CSS classes

GlossAbbr = "gloss-abbr",

ErrorMessage = "error",

}

---------------------

-- Sundry small functions

---------------------

local function normalise(str)

return mw.ustring.gsub(str,"[" .. conf.WordSeparator .. "]+"," ")

end

local function tidyCss(str)

str = mw.ustring.gsub(str, '^[\"\']*(.-)[\"\']*$', "%1") -- trims quotation marks

if mw.ustring.sub(str, -1) ~= ";" then str = str .. ";" end -- appends ";" if missing

return str

end

local function highlight(text)

if text then

return '' .. text .. ''

else return "" end

end

local function tone_sup(str)

return mw.ustring.gsub(str, "([^%p%s0-9])([0-9]+)", "%1%2")

end

local function is_empty(str) -- returns "false" if its argument is a string containing chars other than spaces &c.

if not str then return true end

if mw.ustring.find(str, "[^" .. conf.WordSeparator .. "]")

then return false

else return true end

end

local function help_link (anchor)

if anchor then

return " (help)"

else return "" end

end

---------------------

-- Apply other template

---------------------

local function expand_template(str, template, langarg)

return frame:expandTemplate{

title = template,

args = { langarg, str }

}

end

local function template_to_line(arr, n, template, args)

if template and arr then

for i = 0, n do

arr[i] = expand_template(arr[i], template, args)

end

end

return arr;

end

---------------------

-- The UserMessages object contains and processes error messages and warnings

---------------------

local UserMessages = {errors = {}, warnings = {}, gloss_messages = {}}

function UserMessages:add(msgtype, text, gloss)

if msgtype == "gloss_message" then

self.gloss_messages[gloss] = text

elseif msgtype == "warning" then

table.insert(self.warnings, text)

elseif msgtype == "non-repeating error" then

self.errors.nre = text

elseif msgtype == "ambiguous gloss" then

self.if_ambiguous_glosses = true

elseif msgtype == "error" then

table.insert(self.errors, text)

else return error("UserMessages:add(): unknown message type", 2)

end

end

function UserMessages:print_errors()

local out = ""

local namespace = mw.title.getCurrentTitle().namespace

if next(self.errors) or self.warnings[1] then

local err_span = mw.html.create("span")

err_span:addClass(conf.class.ErrorMessage)

for _,v in pairs(self.errors) do

err_span:wikitext(" " .. v .. ";") end

if namespace % 2 == 0 and namespace ~= 2 -- non-talk namespaces, excluding user pages; if modifying please update the description on the category page

then err_span:wikitext(conf.ErrorCategory)

end

out = tostring(err_span)

mw.addWarning(conf.MessageGlossingError)

end

if self.if_ambiguous_glosses then

if namespace == 0 -- article namespace

then out = out .. conf.AmbiguousGlossCategory -- this category will only track articles

end

end

return out

end

function UserMessages:print_warnings()

local out = ""

-- Messages and warnings get displayed only if the page is being viewed in "preview" mode:

if displaying_messages and (next(self.gloss_messages) or next(self.warnings)) then

local div = mw.html.create("div")

div:addClass("interlinear-preview-warning")

:wikitext("This message box is shown only in preview:")

:newline()

for _,v in ipairs(self.warnings) do

local p = div:tag("p")

p:addClass(conf.class.ErrorMessage)

p:wikitext(v)

end

if self.gloss_messages then

div:wikitext("

To change any of the following default expansions, see the template's documentation:

")

end

for _,v in pairs(self.gloss_messages) do

div:wikitext("

" .. v .. "

")

end

out = out .. "\n\n" .. tostring(div)

end

return out

end

---------------------

-- find_gloss() parses a word into morphemes, and it calls format_gloss()

-- for anything that looks like a glossing abbreviation.

---------------------

local function set_glossing_type(glossing)

if glossing then

local GlossingType

glossing = mw.ustring.lower(mw.text.trim(glossing))

if mw.ustring.find(glossing, 'link') then

GlossingType = "wikilink"

elseif mw.ustring.find(glossing, 'label')

or mw.ustring.find(glossing, 'no link') then

GlossingType = 'label'

elseif mw.ustring.find(glossing, 'no abbr') then

GlossingType = "no abbr"

elseif yesno(glossing) == false then

GlossingType = nil

elseif yesno(glossing) then

GlossingType = 'label' --conf.GlossingType

else

msg:add('error', 'Glossing type "' .. glossing .. '" not recognised') end

return GlossingType

else

error("set_glossing_type: 'glossing' is nil or false", 2)

end

end

local function find_gloss(word)

local function scan_gloss(boundary, gloss_abbr) -- checks a morpheme if it is a gloss abbreviation

if (mw.ustring.match(gloss_abbr, conf.GlossAbbrPattern)

or conf.LowerCaseGlosses[gloss_abbr])

and not (conf.GlossExcludeTable[gloss_abbr]

or mw.ustring.match(gloss_abbr, conf.GlossExcludePattern))

then --gloss_abbr = '{{gcl|' .. gloss_abbr .. '}}'

local gcl_args = { gloss_abbr }

if glossing_type then

gcl_args = { gloss_abbr, glossing = glossing_type }

end

-- gloss_abbr = frame.gcl(gcl_args)

gloss_abbr = frame:expandTemplate{

title = 'gcl/sandbox', args = gcl_args

}

end

return boundary .. gloss_abbr

end

local word = mw.text.decode(word, true)

-- for the case of the English word "I", the 1SG pronoun

if word == "I" then return word end

local pattern = "([" .. conf.GlossAbbrBoundary .. "]?)([^" .. conf.GlossAbbrBoundary .. "]+)"

word = mw.ustring.gsub(word, pattern, scan_gloss) -- splits into morphemes

return word

end

---------------------

-- The main purpose of the bletcherous parse() is to split a line into words and and then for each eligible word

-- to call find_gloss(). The parser outputs the individual words (with any gloss abbreviation formatting applied).

-- The simple job of splitting at whitespaces has been made complicated by a) the fact that the input can contain

-- whitespaces inside the various html elements that are the result of the application of various formatting templates;

-- and b) the need to be able to recognise the output of the template that formats custom gloss abbreviations

-- (and hence skip passing it on to find_gloss). See talk for a suggestion about its future.

---------------------

local function parse(cline, i, tags_found,ifglossing)

local function issue_error(message, culprit)

UserMessages:add("error", message .. ": " .. mw.ustring.sub(cline.whole, 1, i-1) .. "" .. culprit .. "")

end

if i > cline.length then return i end --this will only be triggered if the current line has less words than line 1

local next_step, j, _, chunk

local probe = mw.ustring.sub(cline.whole,i,i)

if mw.ustring.match(probe,"[" .. conf.WordSeparator .. "]") and tags_found == 0

then next_step = i-1

elseif probe == "[" then --Wikilink?

if mw.ustring.sub(cline.whole,i+1,i+1) == "[" then

_,j,chunk = mw.ustring.find(cline.whole,"(%[%[.-%]%])", i)

else chunk = "["; j = i end --not a wikilink then

buffer = buffer .. chunk

next_step = parse(cline, j+1,tags_found,ifglossing)

elseif probe == "{" and tags_found == 0 then --curly brackets enclose a sequence of words to be treated as a single unit

_,j,chunk = mw.ustring.find(cline.whole,"(.-)(})", i+1)

if not chunk then

issue_error("Unclosed curly bracket", "{")

chunk = highlight("{"); j = i

elseif ifglossing==true then

chunk = find_gloss(chunk)

elseif

cline.tone_sup then chunk = tone_sup(chunk)

end

buffer = buffer .. chunk

next_step = parse(cline, j+1,tags_found,ifglossing)

elseif probe == "<" then -- We've encountered an HTML tag. What do we do now?

local _,j,chunk = mw.ustring.find(cline.whole,"(<.->)",i)

if not chunk then

issue_error("Unclosed angle bracket", "<")

chunk = highlight("<"); j = i

elseif mw.ustring.sub(cline.whole,i,i+1) == "

if cline.glossing

and ifglossing==false

and mw.ustring.match(chunk,"")

then ifglossing=true end

tags_found = tags_found - 1

elseif not mw.ustring.match(chunk, "/>$") -- It's an OPENING tag, unless it opens a self-closing element (in which case the element is ignored)

then if ifglossing == true -- the following checks for the output of {{ggl}}:

and mw.ustring.find(chunk, conf.class.GlossAbbr, 1, true) -- it's important that the "find" function uses literal strings and not patterns

then ifglossing = false end

tags_found = tags_found + 1

end

buffer = buffer .. chunk

next_step = parse(cline, j+1,tags_found,ifglossing)

else -- No HTML tags, so we only need to find where the word ends

local _,k,chunk = mw.ustring.find(cline.whole,"(..-)([ <[])",i)

if k then --ordinary text

if ifglossing==true then

buffer = buffer .. find_gloss(chunk)

else

if cline.tone_sup then chunk = tone_sup(chunk) end

buffer = buffer .. chunk

end

next_step = parse(cline, k, tags_found, ifglossing)

else -- reached end of string

if ifglossing == true then

chunk = find_gloss(mw.ustring.sub(cline.whole,i))

else

chunk = mw.ustring.sub(cline.whole,i)

if cline.tone_sup then chunk = tone_sup(chunk) end

end

buffer = buffer .. chunk

next_step = cline.length

end

end

return next_step

end

---------------------

-- HTML stuff

---------------------

local function build_interlinear_html(args, number_of_words, line)

local interlinear_wrapper = mw.html.create("div")

interlinear_wrapper:addClass("interlinear")

-- right-to-left script

if yesno(args.rtl) == true then

interlinear_wrapper:addClass("right_to_left")

end

-- box

if yesno(args.box) == true then

interlinear_wrapper:addClass("box")

end

-- numbering and/or indent in the left margin

local number, indent = nil, nil

if args.number and args.number ~= ""

then number = args.number end

if args.indent and args.indent ~=""

then indent = args.indent end

if indent or number then

if not indent then indent = "4" end --default value

interlinear_wrapper:css("margin-left", indent .. 'em')

if number then

interlinear_wrapper:tag("div")

:addClass("number")

:wikitext(args.number)

end

end

--lines to display above the interlinear block

if args.top and args.top ~= "" then

interlinear_wrapper:tag("div")

:addClass("top")

:wikitext(args.top)

end

-- Producing the interlinear block

local blocks_wrapper = interlinear_wrapper:tag("div")

:addClass("block_wrapper")

-- non-standard spacing

local _spacing = tonumber(args.spacing)

if _spacing and _spacing <= 20 then

blocks_wrapper:css('column-gap', _spacing .. 'em')

end

for wi = 1, number_of_words do

local block = blocks_wrapper:tag("div")

:addClass("word_block")

for i,_ in ipairs (line) do

if line[i].whole ~= "" then -- skipping empty lines

local p = block:tag("p")

p:attr(line[i].attr)

if line[i].class then

p:addClass(line[i].class)

end

local _text = line[i].words[wi]

if _text == "" or _text == " "

then _text = " " end

--

elements without content mess up the interlinear display

p:wikitext(_text)

end

end

end

--- "comments", added at the end of each line

if line.hasComments then

local comment_block = blocks_wrapper:tag("div")

:addClass("comment_block")

for i,_ in ipairs (line) do

local p = comment_block:tag("p")

if line[i].c then

p:wikitext(line[i].c)

else p:wikitext(" ")

end

end

end

--Add hidden lines containing the content of each line of interlinear text

-- this is for accessibility

for i,v in ipairs(line) do

local hidden_line = interlinear_wrapper:tag("p")

hidden_line:addClass("hidden_text")

:wikitext(v.whole)

end

-- Free translation

local ft_line = interlinear_wrapper:tag("p")

if free_translation and free_translation ~= "" then

ft_line:addClass("free_translation")

ft_line:wikitext(free_translation)

end

ft_line:node(msg:print_errors()) -- for error messages

-- bottom

if args.bottom and args.bottom ~= "" then

local bottom = interlinear_wrapper:tag('p')

:addClass('bottom')

:wikitext(args.bottom)

end

return interlinear_wrapper

end

--------------------

-- The following is the function called by Template:Interlinear.

-- It processes the template arguments, then calls parse() to split the input lines into words

-- and it then builds the output html.

--------------------

function p.interlinearise(f)

---------------------

-- Prepare arguments

---------------------

frame = f

local if_auto_translit = false

local args = getArgs(frame, { -- configuration for Module:Arguments

trim = true,

removeBlanks = false,

parentFirst = true,

-- wrappers = {

-- 'Template:Interlinear', 'Template:Fs interlinear',

-- 'Template:Interlinear/sandbox', 'Template:Fs interlinear/sandbox'

-- },

})

local template_name = frame:getParent():getTitle()

if template_name == 'Template:Fs interlinear/sandbox' then

args.italics1 = args.italics1 or "no"

args.italics2 = args.italics2 or "yes"

args.glossing3 = args.glossing3 or "yes"

if args.lang and not args.lang2 then args.lang2 = args.lang .."-Latn" end

if args.transl and not args.transl2 then args.transl2 = args.transl end

if_auto_translit = true

end

-- if args.wordseparator and (args.wordseparator ~= "") then

-- conf.WordSeparator = conf.WordSeparator .. args.wordseparator

-- end

if args.glossing then

glossing_type = set_glossing_type(args.glossing)

end

-- messages will be displayed only in preview mode

if frame:preprocess("{{REVISIONID}}") == "" then

if not args['display-messages']

or yesno(args['display-messages']) then

displaying_messages = true

end

end

msg = UserMessages

local line = {}

local function set_italics(n)

line[n].class = "italics"

line[n].tone_sup = true -- single digits are assumed to be tone markers and will hence be superscripted

if args['tone-superscripting'] and not yesno(args['tone-superscripting'])

then line[n].tone_sup = false end

end

local offset, last_line = 0, 0

for j,v in ipairs(args) do -- iterates over the unnamed parameters from the template

last_line = last_line +1

if is_empty(v) then

offset = offset + 1

else

local i = j - offset

line[i] = {}

-- normalise

if args.wordseparator and (args.wordseparator ~= "") then

v = mw.ustring.gsub(v,"([" .. args.wordseparator .. "]+)","%1 ")

end

v = mw.ustring.gsub(v,"[" .. conf.WordSeparator .. "]+"," ")

line[i].whole = v

line[i].length = mw.ustring.len(v)

local _c = args["c" .. i]

if _c and _c ~= "" then

line.hasComments = true

line[i].c = _c

end

---prepare style arguments----

line[i].class = ""

local _style = args["style" .. i]

if not _style then

_style = ""

else

_style = tidyCss(_style)

end

--line[i].attr holds the attributes for the

elements that enclose the words in line i

line[i].attr = { style = _style }

local _lang = args["lang" .. i]

if _lang and #_lang > 1 then

line[i].lang = _lang

else _lang = args.lang

if _lang and #_lang > 1 and i == 1 then -- if a lang= parameter is supplied, it's assumed to apply to line 1

line[i].lang = _lang

end

end

line[i].attr.lang = line[i].lang

if yesno(args["italics" .. i]) then

set_italics(i)

end

local _gloss = yesno(args["glossing" .. i]);

if _gloss then

line[i].glossing = _gloss

end

if yesno(args['ipa' .. i]) then

line[i].ipa = true

end

local _wrapper = args['wrapper' .. i]

if _wrapper and line[i].words then

line[i].wrapper = _wrapper

end

if yesno(args["smallcaps" .. i]) then

line[i].class = line[i].class .. " smallcaps"

line[i].glossing = false

end

local _transl = args["transl" .. i]

if _transl and #_transl > 1 then

_transl = mw.ustring.lower(_transl)

local _lookup = lang_data.translit_title_table[_transl]

if _lookup then

if _lang and _lookup[_lang] then

_transl = _lookup[_lang]

else _transl = _lookup.default

end

if _transl then

line[i].attr.title = _transl

end

else msg:add("error", "Transliteration scheme '" .. _transl .. "' not recognised")

end

end

local _class = args['class' .. i]

if _class then

line[i].class = line[i].class .. " " .. _class

end

if line[i].class == "" then

line[i].class = nil

end

end -- ends the first if-statement in the loop

end -- ends the FOR cycle

local line_count = #line

if line_count == 0 then

msg:add("error", template_name .. ": no lines supplied.")

return msg:print_errors()

elseif line_count == 1 then

msg:add("error", template_name .. ": only 1 line supplied.")

return msg:print_errors()

end

if line_count > 1 then

local _italics = args.italics

local n = tonumber(_italics)

if n and n > 0 then

set_italics(n)

elseif not (_italics and not yesno(_italics))

and not (args["italics1"] and not yesno(args["italics1"])) then

set_italics(1) -- by default, the first line will get italicised, unless italics=no or italics1=no

end

-- the last unnamed parameter is assumed to be the free translation:

free_translation = args[last_line]

if not is_empty(free_translation) then

line [line_count] = nil

end --... and is thus excluded from interlinearising

end

---------------------

-- Segment lines into words

---------------------

for i,v in ipairs(line) do

local wc, n = 1, 1

line[i].words = {}

while n <= line[i].length do

buffer = ""

n = parse(line[i], n, 0, true) + 2

line[i].words[wc] = buffer

wc = wc + 1

end

end

----Check for mismatches in number of words across lines----

local number_of_words, mismatch_found = 0, false

for i,v in ipairs(line) do -- find the maximum number of words in any line

local wc = #line[i].words

if wc ~= number_of_words then

if i ~= 1 and wc ~= 0 then

mismatch_found = true

end

if wc > number_of_words then

number_of_words = wc

end

end

end

----Deal with mismatches---

if mismatch_found then

local error_text = "Mismatch in the number of words between lines: "

for i,v in ipairs(line) do

local wc = #line[i].words

error_text = error_text .. wc .. " word(s) in line " .. i .. ", "

if wc ~= number_of_words then

for current_word = wc+1, number_of_words do

line[i].words[current_word] = " "

end

end

end

if string.sub(error_text, -2) == ", " then

error_text = string.sub(error_text, 1, #error_text - 2) .. " "

end

error_text = error_text .. help_link("mismatch")

UserMessages:add("error", error_text)

end

-- Wrap in first line of {{Fs interlinear}} in {{Script}}

--if template_name == 'Template:Fs interlinear/sandbox'

if args.script and line[1].words then

line[1].words = template_to_line(

line[1].words, number_of_words,

'Script', args.script)

if args.lang and line[2].words then

line[2].words = template_to_line(

line[2].words, number_of_words,

'transliteration', args.lang)

end

elseif args.lang and line[1].words then

line[1].words = template_to_line(

line[1].words, number_of_words,

'lang', args.lang)

end

---------------------

-- Build the HTML

---------------------

local divHtml = build_interlinear_html(args, number_of_words, line)

local temp_track = ""

if last_line == 2 then

temp_track = "Category:Pages with interlinear glosses using two unnamed parameters"

end

if last_line > 3 and template_name ~= 'Template:Fs interlinear' then

temp_track = "Category:Pages with interlinear glosses using more than three unnamed parameters"

end

return tostring(divHtml) .. temp_track .. msg:print_warnings()

end

return p