Module:Cite IUCN

require('strict');

local getArgs = require ('Module:Arguments').getArgs;

local amendment_pattern = '%s*%(amended version of (%d%d%d%d) assessment%)';

local errata_pattern = '%s*%(errata version published in (%d%d%d%d)%)';

local green_status_pattern = '%s*%((Green Status assessment)%)';

--[[--------------------------< I U C N _ I D E N T I F I E R S _ G E T >--------------------------------------

cs1|2 templates cite single sources; when the identifiers in |doi=, |id=, and |page= are different from each other

then the template is attempting to cite multiple sources. This function evaluates the identifier portions of these

parameters. returns seven values: identifyier parts (or nil when parameter not used) and a message (nil on success,

error message else)

the identifier portions of the several parameters must be properly formed

]]

local function iucn_identifiers_get (args, error_msgs_t)

local doi_taxon_ID, doi_assesment_ID

local page_taxon_ID, page_assesment_ID

local url_taxon_ID, url_assesment_ID

if args.doi then

local lang_tag

doi_taxon_ID, doi_assesment_ID, lang_tag = args.doi:match ('[Tt](%d+)[Aa](%d+)%.(%l%l)$')

if not doi_taxon_ID or not ({['en'] = true, ['es'] = true, ['fr'] = true, ['pt'] = true})[lang_tag] then

table.insert (error_msgs_t, 'malformed |doi= identifier');

end

end

if args.page then

page_taxon_ID, page_assesment_ID = args.page:match ('^[eE]%.[Tt](%d+)[Aa](%d+)$')

if not page_taxon_ID then

table.insert (error_msgs_t, 'malformed |page= identifier');

end

end

if args.url then

if args.url:match ('https://www.iucnredlist.org/species/') then -- must be a 'new-form' url

url_taxon_ID, url_assesment_ID = args.url:match ('/species/(%d+)/(%d+)')

if not url_taxon_ID then

table.insert (error_msgs_t, 'malformed |url= identifier');

end

end

end

if not error_msgs_t[1] then

if doi_taxon_ID and page_taxon_ID then

if (doi_taxon_ID ~= page_taxon_ID or ((doi_assesment_ID ~= page_assesment_ID) and not args.errata)) then

table.insert (error_msgs_t, '|doi= / |page= mismatch');

end

end

if doi_taxon_ID and url_taxon_ID then

if (doi_taxon_ID ~= url_taxon_ID or ((doi_assesment_ID ~= url_assesment_ID) and not args.errata)) then

table.insert (error_msgs_t, '|doi= / |url= mismatch');

end

end

if page_taxon_ID and url_taxon_ID then

if (page_taxon_ID ~= url_taxon_ID or ((page_assesment_ID ~= url_assesment_ID) and not args.errata)) then

table.insert (error_msgs_t, '|page= / |url= mismatch');

end

end

end

return doi_taxon_ID, doi_assesment_ID, page_taxon_ID, page_assesment_ID;

end

--[[--------------------------< I U C N _ V O L U M E _ C H E C K >--------------------------------------------

compares volume in |volume= (if present) against year in |date= or |year= (if present) against volume in |doi= (if present)

returns nil if all that are present are correct; message else

]]

local function iucn_volume_check (args, maint_msgs_t)

local vol = args.volume;

local date = args.date or args.year;

local doi = args.doi and args.doi:match ('[Ii][Uu][Cc][Nn]%.[Uu][Kk]%.(%d%d%d%d)')

if vol and date and (vol ~= date) then

table.insert (maint_msgs_t, '|volume= / |date= mismatch');

end

if vol and doi and ((vol ~= doi) and not args.amends) then

table.insert (maint_msgs_t, '|volume= / |doi= mismatch');

end

if date and doi and ((doi ~= date) and not args.amends) then

table.insert (maint_msgs_t, '|date= / |doi= mismatch');

end

end

--[[--------------------------< C I T E >----------------------------------------------------------------------

Wraps {{cite journal}}:

takes cite journal parameters but updates old style url using electronic page number

page should be in format e.T13922A45199653

the url uses 13922/45199653

so we need to extract the number between T and A (taxon ID) and the number after A (assessment ID)

the target url is https://www.iucnredlist.org/species/13922/45199653

usage: {{#invoke:iucn|cite}}

template: {{Template:Cite iucn}}

]]

local function cite (frame)

local error_msgs_t = {}; -- holds error messages for rendering

local maint_msgs_t = {}; -- holds hidden maint messages for rendering

local namespace = mw.title.getCurrentTitle().namespace; -- used for categorization

local args = getArgs (frame); -- local copy of template arguments

if args.title and (args.title:match (errata_pattern) or args.title:match (amendment_pattern)) then

table.insert (error_msgs_t, 'title has extraneous text'); -- announce that this template has has errata or amendment text

end

local doi_taxon_ID, doi_assesment_ID; -- all of these contain the same identifying info in slightly

local page_taxon_ID, page_assesment_ID; -- different forms. when any combination of these is present,

doi_taxon_ID, doi_assesment_ID, page_taxon_ID, page_assesment_ID = iucn_identifiers_get (args, error_msgs_t);

args.id = nil -- unset; not supported

local url_taxon_ID = page_taxon_ID or doi_taxon_ID; -- select for use in url that we will create

local url_assesment_ID = page_assesment_ID or doi_assesment_ID;

local url = args.url;

if url then

if url:find ('iucnredlist.org/details/', 1, true) then -- old-form url

if url_taxon_ID then -- when there is an identifier

url = nil -- unset; we'll create new url below

else -- here when old-form but no identifier that we can use to create new url

args.url = args.url:gsub ("http:", "https:") -- sometimes works with redirect on iucn site

end

table.insert (maint_msgs_t, 'old-form url') -- announce that this template has has an old-form url

elseif url:find ('iucnredlist.org/species/', 1, true) then -- new-form url

-- table.insert (maint_msgs_t, 'new-form url') --TODO: restore this line when most new-form urls have been removed from article space -- announce that this template has has an new-form url

else

table.insert (error_msgs_t, 'unknown url') -- emit error message

end

end

if not url then -- when no url or unset old-form url

if url_taxon_ID then

args.url = "https://www.iucnredlist.org/species/" .. url_taxon_ID .. '/' .. url_assesment_ID

else

table.insert (error_msgs_t, 'no identifier') -- emit error message

end

end

-- add journal if not provided (TODO decide if this should override provided value)

if not args['journal'] and not args['work'] then

args['journal'] = "IUCN Red List of Threatened Species"

end

iucn_volume_check (args, maint_msgs_t); -- |volume=, |year= (|date=), |doi= must all refer to the same volume

if not args.volume and (args.year or args.date) then

args.volume = args.year or args.date

end

if args.errata then

args['orig-date'] = 'errata version of ' .. (args.year or args.date or args.volume) .. ' assessment';

args.date = args.errata; -- update publication data to errata year

args.year = nil; -- unset these as no longer needed

args.errata = nil;

elseif args.amends then

args['orig-date'] = 'amended version of ' .. args.amends .. ' assessment';

args.amends = nil; -- unset as no longer needed

end

-- add free-to-read icon to mark a correctly formed doi

args['doi-access'] = args.doi and args.doi:match ('10%.2305/[Ii][Uu][Cc][Nn].+[Tt]%d+[Aa]%d+%.%a%a') and 'free' or nil

local out_t = {};

if error_msgs_t[1] then

table.insert (out_t, ' {{cite iucn}}: error: ');

table.insert (out_t, table.concat (error_msgs_t, ', '));

table.insert (out_t, ' (help)');

if (0 == namespace) then

table.insert (out_t, 'Category:cite IUCN errors');

end

table.insert (out_t, '');

end

if maint_msgs_t[1] then

table.insert (out_t, '

if not error_msgs_t[1] then

table.insert (out_t, '{{cite iucn}}: ')

table.insert (out_t, table.concat (maint_msgs_t, ', '));

table.insert (out_t, ' (help)');

if (0 == namespace) then

table.insert (out_t, 'Category:cite IUCN maint');

end

end

table.insert (out_t, '');

end

if (not args['doi-access']) and (0 == namespace) then

table.insert (out_t, 'Category:cite IUCN without doi');

end

return frame:expandTemplate{ title = 'cite journal', args = args } .. -- the template

table.concat (out_t); -- error and maint messages and categories

end

--[=[-------------------------< E T _ A L _ P A T T E R N S >--------------------------------------------------

This adapted from Module:Citation/CS1/Configuration

This table provides Lua patterns for the phrase "et al" and variants in a name.

]=]

local et_al_patterns = {

"[;,]? *[\"']*%f[%a][Ee][Tt]%.? *[Aa][Ll][%.;,\"']*$", -- variations on the 'et al' theme

"[;,]? *[\"']*%f[%a][Ee][Tt]%.? *[Aa][Ll][Ii][AaIi][Ee]?[%.;,\"']*$", -- variations on the 'et alia', 'et alii' and 'et aliae' themes (false positive 'et aliie' unlikely to match)

"[;,]? *%f[%a]and [Oo]thers", -- an alternative to et al.

}

--[[---------------------< N A M E _ H A S _ E T A L >--------------------------

This adapted from Module:Citation/CS1

Evaluates the content of a name for variations on the theme of et al. If found,

returns true; nil else

]]

local function name_has_etal (name)

local etal;

if name then -- name can be nil in which case just return

name = name:gsub ('%b<>', ''); -- remove any html markup (typically ...)

for _, pattern in ipairs (et_al_patterns) do -- loop through all of the patterns

if name:match (pattern) then -- if this 'et al' pattern is found in name

return true; -- has etal, so return true

end

end

end

end

--[[--------------------------< A U T H O R _ L I S T _ M A K E >----------------------------------------------

creates a list of individual |authorn= parameters from the list of names provided in the raw iucn citation. names

must have the form: Surname, I. (more than one 'I.' pair allowed but no spaces between I. pairs)

assumes that parenthetical text at the end of the author-name-list is a collaboration

Name, I.I., & Name, I.I. (Colaboration name)

assumes that et al. is the last name in a list of names

]]

--local function author_names_get (raw_iucn_cite)

local function author_names_get (raw_iucn_cite, params_t) -- EXPERIMENT

local list = {}; -- table that holds name list parts

local author_names = raw_iucn_cite:match ('^([^%d]-)%s+%d%d%d%d'); -- extract author name-list from raw iucn citation

local collaboration = author_names:match ('%s*(%b())$'); -- get collaboration name if it exists

if collaboration then -- when there is a colaboration

collaboration = collaboration:gsub ('[%(%)]', ''); -- remove bounding parentheses

author_names = author_names:gsub ('%s*(%b())$', ''); -- and remove collaboration from author-name-list

end

local names = author_names:gsub ('%.?,?%s+&%s+', '.|'); -- replace 'separators' () with

names = names:gsub ('%.,%s*', '.|'); -- replace 'separators' () with

names = names:gsub ('(%.%u),', '%1.|'); -- special case for when last initial is missing its trailing dot

list = mw.text.split (names, '|'); -- split the string on the pipes into entries in list{}

if 0 == #list then

params_t['author'] = author_names; -- EXPERIMENT

return table.concat ({'|author=', author_names}), params_t; -- EXPERIMENT -- no 'names' of the proper form; return the original as a single |author= parameter

-- return table.concat ({'|author=', author_names}) -- no 'names' of the proper form; return the original as a single |author= parameter

else

for i, name in ipairs (list) do -- spin through the list and

if name_has_etal (name) then -- if this name has some form of 'et al'

params_t['display-authors'] = 'etal'; -- EXPERIMENT

list[i] = '|display-authors=etal'; -- add |dispaly-authors=etal parameter and

break; -- assume that the etal was the last 'name' so stop processing names

else

params_t['author' .. i] = name; -- EXPERIMENT

list[i] = table.concat ({'|author', (i == 1) and '' or i, '=', name}); -- add |authorn= parameter names; create |author= instead of |author1=

end

end

if collaboration then

params_t['collaboration'] = collaboration; -- EXPERIMENT

table.insert (list, table.concat ({'|collaboration', '=', collaboration})); -- add |collaboration= parameter

end

return table.concat (list, ' '); -- make a big string and return that

end

end

--[[--------------------------< T I T L E _ G E T >------------------------------------------------------------

extract and format citation title; attempts to get the italic right

binomen (amended or errata title)

binomen

binomen ssp. subspecies

binomen subsp. subspecies

binomen var. variety

binomen subvar. subvariety

all of the above may have trailing amended or errata text in parentheses

TODO: are there others?

]]

local function title_get (raw_iucn_cite)

local title = raw_iucn_cite:match ('%d%d%d%d%.%s+(.-)%s*%. The IUCN Red List of Threatened Species');

local patterns = { -- tables of string.match patterns [1] and string.gsub patterns [2]

{'(.-)%sssp%.%s+(.-)%s(%b())$', "%1 ssp. %2 %3"}, -- binomen ssp. subspecies (zoology) with errata or amended text

{'(.-)%sssp%.%s+(.+)', "%1 ssp. %2"}, -- binomen ssp. subspecies (zoology)

{'(.-)%ssubsp%.%s+(.-)%s(%b())$', "%1 subsp. %2 %3"}, -- binomen subsp. subspecies (botany) with errata or amended text

{'(.-)%ssubsp%.%s+(.+)', "%1 subsp. %2"}, -- binomen subsp. subspecies (botany)

{'(.-)%svar%.%s+(.-)%s+(%b())$', "%1 var. %2 %3"}, -- binomen var. variety (botany) with errata or amended text

{'(.-)%svar%.%s+(.+)', "%1 var. %2"}, -- binomen var. variety (botany)

{'(.-)%ssubvar%.%s+(.-)%s(%b())$', "%1 subvar. %2 %3"}, -- binomen subvar. subvariety (botany) with errata or amended text

{'(.-)%ssubvar%.%s+(.+)', "%1 subvar. %2"}, -- binomen subvar. subvariety (botany)

{'(.-)%s*(%b())$', "%1 %2"}, -- binomen with errata or amended text

{'(.+)', "%1"}, -- binomen

}

for i, v in ipairs (patterns) do -- spin through the patterns

if title:match (v[1]) then -- when a match

title = title:gsub (v[1], v[2]); -- add italics

break; -- and done

end

end

-- return table.concat ({' |title=', title}); -- return the |title= parameter

return title; -- return the formatted title

end

--[[--------------------------< M A K E _ C I T E _ I U C N >--------------------------------------------------

parses apart an iucn-format citation copied from their webpage and reformats that into a {{cite iucn}} template for substing

automatic substing by User:AnomieBOT/docs/TemplateSubster

]]

local function make_cite_iucn (frame)

local args_t = getArgs (frame);

local raw_iucn_cite = args_t[1];

local template_t = {'{{cite iucn '}; -- sequence that holds the {{cite iucn}} template as it is being assembled; for nowiki'd output

local params_t = {}; -- table of parameter/value pairs for substing

local year, volume, page, doi, accessdate;

year = raw_iucn_cite:match ('^%D+(%d%d%d%d)');

volume, page = raw_iucn_cite:match ('(%d%d%d%d):%s+(e%.T%d+A+%d+)%.%s?');

doi = raw_iucn_cite:match ('10%.2305/IUCN%.UK%.[%d%-]+%.RLTS%.T%d+A%d+%.%a%a');

accessdate = raw_iucn_cite:match ('Accessed on (.-)%.?$') or raw_iucn_cite:match ('Downloaded on (.-)%.?$'); -- 'Downloaded' → 'Accessed' change occured December 2021;

accessdate = accessdate:gsub ('^0', ''); -- strips leading 0 in day 01 January 2020 -> 1 January 2020

table.insert (template_t, author_names_get (raw_iucn_cite, params_t)); -- add author name parameters; as a single string to ; as individual entries to

table.insert (template_t, table.concat ({' |year=', year})); -- add formatted year

params_t.year = year;

local title = title_get (raw_iucn_cite);

local type_p = title:match (green_status_pattern);

if type_p then

title = title:match ('^([^%(]+)%s*%(');

table.insert (template_t, table.concat ({' |type=', type_p})); -- add formatted errata

params_t.type = type_p;

end

local errata = title:match (errata_pattern); -- nil unless IUCN citation has errata annotation; else year that this errata published (|date=)

if errata then

table.insert (template_t, table.concat ({' |errata=', errata})); -- add formatted errata

params_t.errata = errata;

title = title:gsub (errata_pattern, ''); -- remove errata annotation

end

local amends = title:match (amendment_pattern); -- nil unless IUCN citation has amendment annotation; else year that this assessment amends (|orig-date=)

if amends then

table.insert (template_t, table.concat ({' |amends=', amends})); -- add year of assessment that this assessment amends

params_t.amends = amends;

title = title:gsub (amendment_pattern, ''); -- remove amendment annotation

end

table.insert (template_t, table.concat ({' |title=', title})); -- add formatted title

params_t.title = title;

table.insert (template_t, table.concat ({' |volume=', volume})); -- add formatted volume

params_t.volume = volume;

table.insert (template_t, table.concat ({' |page=', page})); -- add formatted page

params_t.page = page;

table.insert (template_t, table.concat ({' |doi=', doi})); -- add formatted doi

params_t.doi = doi;

table.insert (template_t, table.concat ({' |access-date=', accessdate})); -- add formatted access-date

params_t['access-date'] = accessdate;

table.insert (template_t, '}}'); -- close the template

if args_t[2] then -- if anything in args_t[2], write a nowiki'd version that editors can copy into tags

return frame:preprocess (table.concat ({'', table.concat (template_t), ''})); -- caveat lector: if left long enough anomiebot will subst this

end

if args_t['ref'] then -- enable subst of ref tags with name

return frame:preprocess ('' .. table.concat (template_t) .. '')

end

return frame:preprocess (table.concat (template_t)); -- render {{cite iucn}} template; substable

end

--[[--------------------------< E X P O R T E D F U N C T I O N S >------------------------------------------

]]

return {

cite = cite,

make_cite_iucn = make_cite_iucn,

}