Module:Doi

require ('strict');

local cfg = mw.loadData ('Module:Citation/CS1/Configuration');

local utilities = require ('Module:Citation/CS1/Utilities'); -- forward declarations for functions in Module:Citation/CS1/Utilities

utilities.set_selected_modules (cfg); -- so that functions in Utilities can see the selected cfg tables

local has_accept_as_written = utilities.has_accept_as_written; -- import functions from Module:Citation/CS1/Utilities

local is_set = utilities.is_set;

local make_wikilink = utilities.make_wikilink;

local set_message = utilities.set_message;

local substitute = utilities.substitute;

local this_page = mw.title.getCurrentTitle(); -- used to limit categorization to certain namepsaces

-- check this page to see if it is in one of the namespaces that cs1 is not supposed to add to the error categories

local no_cat;

if cfg.uncategorized_namespaces[this_page.namespace] then -- is this page's namespace id one of the uncategorized namespace ids?

no_cat = "true"; -- set no_tracking_cats

end

for _, v in ipairs (cfg.uncategorized_subpages) do -- cycle through page name patterns

if this_page.text:match (v) then -- test page name against each pattern

no_cat = "true"; -- set no_tracking_cats

break; -- bail out if one is found

end

end

--[[--------------------------< L I N K _ L A B E L _ M A K E >------------------------------------------------

adapted from Module:Citation/CS1/Identifiers

function to create identifier link label from handler table

returns the first available of

1. redirect from local wiki's handler table (if enabled)

2. label specified in the local wiki's handler table

]]

local function link_label_make (handler)

return (cfg.use_identifier_redirects and is_set (handler.redirect) and handler.redirect) or handler.link;

end

--[[--------------------------< E X T E R N A L _ L I N K _ I D >----------------------------------------------

copied from Module:Citation/CS1/Identifiers

Formats a wiki-style external link

]]

local function external_link_id (options)

local url_string = options.id;

local ext_link;

local this_wiki_code = cfg.this_wiki_code; -- Wikipedia subdomain; 'en' for en.wikipedia.org

local wd_article; -- article title from Wikidata

if options.encode == true or options.encode == nil then

url_string = mw.uri.encode (url_string, 'PATH');

end

-- if options.auto_link and is_set (options.access) then -- not supported in this module

-- auto_link_urls[options.auto_link] = table.concat ({options.prefix, url_string, options.suffix});

-- end

ext_link = mw.ustring.format ('[%s%s%s %s]', options.prefix, url_string, options.suffix or "", mw.text.nowiki (options.id));

if is_set (options.access) then

ext_link = substitute (cfg.presentation['ext-link-access-signal'], {cfg.presentation[options.access].class, cfg.presentation[options.access].title, ext_link}); -- add the free-to-read / paywall lock

end

return table.concat ({

make_wikilink (link_label_make (options), options.label), -- redirect, Wikidata link, or locally specified link (in that order)

options.separator or ' ',

ext_link

});

end

--[[--------------------------< D O I >------------------------------------------------------------------------

copied from Module:Citation/CS1/Identifiers

Formats a DOI and checks for DOI errors.

DOI names contain two parts: prefix and suffix separated by a forward slash.

Prefix: directory indicator '10.' followed by a registrant code

Suffix: character string of any length chosen by the registrant

This function checks a DOI name for: prefix/suffix. If the DOI name contains spaces or endashes, or, if it ends

with a period or a comma, this function will emit a bad_doi error message.

DOI names are case-insensitive and can incorporate any printable Unicode characters so the test for spaces, endash,

and terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely

if ever used in DOI names.

https://www.doi.org/doi_handbook/2_Numbering.html -- 2.2 Syntax of a DOI name

https://www.doi.org/doi_handbook/2_Numbering.html#2.2.2 -- 2.2.2 DOI prefix

]]

local function doi (options)

local id = options.id;

local inactive = nil; -- |doi-broken-date= not supported in this module

local access = options.access;

local ignore_invalid = options.accept;

local handler = options.handler;

local err_flag;

local function is_extended_free (registrant, suffix) -- local function to check those few registrants that are mixed; identifiable by the doi suffix

if cfg.extended_registrants_t[registrant] then -- if this registrant has known free-to-read extentions

for _, incipit in ipairs (cfg.extended_registrants_t[registrant]) do -- loop through the registrant's incipits

if mw.ustring.find (suffix, '^' .. incipit) then -- if found

return true;

end

end

end

end

local text;

-- if is_set (inactive) then -- |doi-broken-date= not supported in this module

-- local inactive_year = inactive:match("%d%d%d%d"); -- try to get the year portion from the inactive date

-- local inactive_month, good;

--

-- if is_set (inactive_year) then

-- if 4 < inactive:len() then -- inactive date has more than just a year (could be anything)

-- local lang_obj = mw.getContentLanguage(); -- get a language object for this wiki

-- good, inactive_month = pcall (lang_obj.formatDate, lang_obj, 'F', inactive); -- try to get the month name from the inactive date

-- if not good then

-- inactive_month = nil; -- something went wrong so make sure this is unset

-- end

-- end

-- end -- otherwise, |doi-broken-date= has something but it isn't a date

--

-- if is_set (inactive_year) and is_set (inactive_month) then

-- set_message ('maint_doi_inactive_dated', {inactive_year, inactive_month, ' '});

-- elseif is_set (inactive_year) then

-- set_message ('maint_doi_inactive_dated', {inactive_year, , });

-- else

-- set_message ('maint_doi_inactive');

-- end

-- inactive = " (" .. cfg.messages['inactive'] .. ' ' .. inactive .. ')';

-- end

local suffix;

local registrant, suffix = mw.ustring.match (id, '^10%.([^/]+)/([^%s–]-[^%.,])$'); -- registrant and suffix set when DOI has the proper basic form

local registrant_err_patterns = { -- these patterns are for code ranges that are not supported

'^[^1-3]%d%d%d%d%.%d+$', -- 5 digits with subcode (0xxxx, 40000+); accepts: 10000–39999

'^[^1-7]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 60000+); accepts: 10000–69999

'^[^1-9]%d%d%d%.%d+$', -- 4 digits with subcode (0xxx); accepts: 1000–9999

'^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accepts: 1000–9999

'^%d%d%d%d%d%d+', -- 6 or more digits

'^%d%d?%d?$', -- less than 4 digits without subcode (3 digits with subcode is legitimate)

'^%d%d?%.[%d%.]+', -- 1 or 2 digits with subcode

'^5555$', -- test registrant will never resolve

'[^%d%.]', -- any character that isn't a digit or a dot

}

if not ignore_invalid then

if registrant then -- when DOI has proper form

for i, pattern in ipairs (registrant_err_patterns) do -- spin through error patterns

if registrant:match (pattern) then -- to validate registrant codes

err_flag = set_message ('err_bad_doi'); -- when found, mark this DOI as bad

break; -- and done

end

end

else

err_flag = set_message ('err_bad_doi'); -- invalid directory or malformed

end

else

set_message ('maint_doi_ignore');

end

if err_flag then

-- options.coins_list_t['DOI'] = nil; -- when error, unset so not included in COinS; COinS not supported in this module

else

if not access and (cfg.known_free_doi_registrants_t[registrant] or is_extended_free (registrant, suffix)) then -- |doi-access=free not set and is known to be free

set_message ('maint_doi_unflagged_free'); -- set a maint cat

end

end

text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,

prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = access,

auto_link = not (err_flag or is_set (inactive) or ignore_invalid) and 'doi' or nil -- do not auto-link when |doi-broken-date= has a value or when there is a DOI error or (to play it safe, after all, auto-linking is not essential) when invalid DOIs are ignored

}) .. (inactive or '');

return text;

end

--[[--------------------------< _ M A I N >--------------------------------------------------------------------

entry point from another module

]]

local function _main (args_t)

local id, accept = utilities.has_accept_as_written (args_t[1] or args_t.id); -- strip accept-as-written markup if present

local empty_flag;

if not id then -- in case args_t[1] is nil

id = ''; -- set to empty string

empty_flag = true; -- and set a flag

end

-- local inactive = args_t['doi-broken-date']; -- |doi-broken-date= not currently supported in this module

local access = args_t['doi-access']; -- |doi-access=

if 'free' ~= access then -- 'free' is the only supported value

access = nil; -- sommat other than 'free' so unset

end

local handler = cfg.id_handlers.DOI; -- handler sepcific to |doi=

local rendered_doi = doi ({id=id, access=access, handler=handler, accept=accept}); -- go render the doi

if utilities.z.error_msgs_t[1] then -- only one error message considered

local msg = utilities.z.error_msgs_t[1]:gsub ('Help:CS1 errors#bad_doi', 'Template:doi');

if empty_flag then -- if args_t[1] was empty

rendered_doi = rendered_doi:match ('^[^:]+:'); -- keep only the linked label from the rendering

end

rendered_doi = rendered_doi .. ' ' .. msg .. (no_cat and '' or 'Category:Pages with DOI errors'); -- limited to certain namespaces

elseif utilities.z.maint_cats_t[1] then -- only one maint message considered per rendering

local msg = utilities.z.maint_cats_t[1]:gsub ('CS1 maint: ', ''); -- strip cs1-specific prefix from cat name

rendered_doi = table.concat ({ -- assemble maint message with category

rendered_doi,

' ',

no_cat and '' or substitute (cfg.messages['cat wikilink'], msg), -- the category link; limited to certain namespaces

substitute (cfg.presentation['hidden-maint'], msg), -- the maint message text

' (', -- and the help link

substitute (cfg.messages[':cat wikilink'], msg), -- links to the maint cat, just as cs1|2 links to its maint cats

')'

});

end

return rendered_doi;

end

--[[--------------------------< M A I N >----------------------------------------------------------------------

entry point from an #invoke; implements {{doi}}

{{#invoke:doi|main}}

accepted parameters are:

{{{1}}} – digital object identifier; takes precedence over |id=; may use accept-as-written ((..)) markup

|id= – digital object identifier; yeikds to {{{1}}}; may use accept-as-written ((..)) markup

|doi-access= – accepts one value: 'free'

]]

local function main (frame)

local args_t = require ('Module:Arguments').getArgs (frame);

return frame:extensionTag ('templatestyles', '', {src='Module:Citation/CS1/styles.css'}) .. _main (args_t);

end

--[[--------------------------< E X P O R T S >----------------------------------------------------------------

]]

return {

main = main, -- entry point for an #invoke (template call)

_main = _main, -- entry point from another module

}