Module:Sandbox/Innesw/PopulationFromWikidata-upgrade

---------------- Defining variables--------------------

local Pop_P = "P1082" -- population property

local Applies_P = "P518" -- applies to part property

local Pointin_P = "P585" -- point in time property

local DetMeth_P = "P459" -- determination method property

local RefURL_P = "P854" -- reference URL

local RefTitle_P = "P1476" -- reference title

local RefPubIn_P = "P1433" -- reference published in property

local DatePub_P = "P577" -- date of publication property

local Publisher_P = "P123" -- publisher property

local Retrieved_P = "P813" -- retrieved property

local Instof_P = "P31" -- instance of property

local ShortN_P = "P1813" -- short name property

local CensusAU_I = "Q5058971" -- Australian census item

local SAL_I = "Q33112019" -- state suburb item (includes SSC and SAL)

--local GCCSA_I = "Q112762887" -- Greater Capital City Statistical Area item

local LGA_I = "Q33127844" -- Local Government Area item

local UCL_I = "Q33127891" -- Urban Centre or Locality

--local SA2_I = "Q33128776" -- SA2

--local SA3_I = "Q118313924" -- SA3

local ILOC_I = "Q112729549" -- Indigenous Location

local item = nil

local p = {}

--------------- Function LastURLSection returns last section of a url, ie: the text after the last '/' ----------------------

local function LastURLSection(url)

local pos = 1

local f

while true do

f = string.find(url, '/', pos, true)

if (f == nil) then

break

else

pos = f + 1

end

end

return string.sub(url, pos)

end

--------------- Function SplitDoubleClaims returns the claims table with any claim with multiple points-in-time split into multiple claims. ----------------------

local function SplitDoubleClaims(claims)

local oldclaims, newclaims = {}, {}

for j, s in pairs(claims) do

local npits = table.maxn(s.qualifiers[Pointin_P])

if npits > 1 then

-- we need to split this claim

-- first, make copies of the claim, and the points-in-time

local newc, pits = {}, {}

for a, pit in pairs(s.qualifiers[Pointin_P]) do

table.insert(newc, mw.clone(s))

table.insert(pits, mw.clone(pit))

end

-- for each point-in-time, only keep matching point-in-time, determination method and reference qualifiers from each copy

for a, pit in pairs(pits) do

local keeps = {}

-- points in time

for k, p in pairs(newc[a].qualifiers[Pointin_P]) do

if p.datavalue.value.time == pit.datavalue.value.time then

keeps = mw.clone(p)

break -- only the matching point-in-time is kept

end

end

newc[a].qualifiers[Pointin_P] = {}

table.insert(newc[a].qualifiers[Pointin_P], keeps)

local year = string.sub(pit.datavalue.value.time, 2, 5)

-- determination methods

keeps = {}

for k, p in pairs(newc[a].qualifiers[DetMeth_P]) do

local detmet = mw.wikibase.getEntity(p.datavalue.value.id)

if string.find(detmet.labels.en.value, year, 1, true) ~= nil then

keeps = mw.clone(p)

break -- only the matching determination method is kept

end

end

newc[a].qualifiers[DetMeth_P] = {}

table.insert(newc[a].qualifiers[DetMeth_P], keeps)

-- references

keeps = {}

for k, p in pairs(newc[a].references) do

if p.snaks[RefURL_P] ~= nil and string.find(p.snaks[RefURL_P][1].datavalue.value, year, 1, true) ~= nil then

table.insert(keeps, mw.clone(p)) -- multiple matching references may be kept

end

end

newc[a].references = {}

for r, b in pairs(keeps) do

table.insert(newc[a].references, b)

end

end

for k, p in pairs(newc) do

table.insert(newclaims, p)

end

table.insert(oldclaims, j)

end

end

-- remove the original splitable claims

for k, p in pairs(oldclaims) do

table.remove(claims, p)

end

-- and add the separate ones they were split into

for k, p in pairs(newclaims) do

table.insert(claims, p)

end

end

--------------- Function IdForGeog returns the wikidata item ID for the specified geography abbreviation. Returns nil if abbreviation is blank. ----------------------

local function IdForGeog(geog)

if geog == "ucl" then

return UCL_I

elseif geog == "sal" then

return SAL_I -- includes SSC and SAL

elseif geog == "lga" then

return LGA_I

elseif geog == "iloc" then

return ILOC_I

end

return nil

end

--------------- Function GeogIdsForType returns a table of geography IDs that could be shown for the specified type. ----------------------

local function GeogIdsForType(type)

type = string.lower(type)

if type == "town" then

return {UCL_I, ILOC_I, SAL_I}

elseif type == "suburb" then

return {SAL_I}

elseif type == "city" then

return {UCL_I}

elseif type == "settlement" then

return {SAL_I, ILOC_I}

elseif type == "locality" then

return {SAL_I, ILOC_I}

elseif type == "townandlocality" then

return {UCL_I, ILOC_I, SAL_I}

elseif type == "lga" then

return {LGA_I}

elseif type == "region" then -- for now saying region uses LGA_I, but unclear what is most apprpriate ABS geography type. Can revise.

return {LGA_I}

end

end

--------------- Function GetRefsForClaim to check, collate and format all the reference components ----------------------

local function GetRefsForClaim(claim, defaulttitle)

local refs = ""

local r = 0

for b, x in pairs(claim.references) do -- loop through all references in a claim

-- each reference in the wikidata will produce a citation reference for the claim

r = r + 1

-- gather various values for potential later use

local refurl = ""

if claim.references[b].snaks[RefURL_P] ~= nil then -- if reference has a reference url, use it

refurl = claim.references[b].snaks[RefURL_P][1].datavalue.value

end

local reftitle = defaulttitle -- default title is the Wikidata item title

if claim.references[b].snaks[RefTitle_P] ~= nil then -- if reference has a title, use it

reftitle = claim.references[b].snaks[RefTitle_P][1].datavalue.value.text

end

local detmet = mw.wikibase.getEntity(claim.qualifiers[DetMeth_P][1].datavalue.value.id) -- load the claim determination method item

local pubinlabel = ""

if claim.references[b].snaks[RefPubIn_P] ~= nil then -- if reference has a published in (it should for all references), use its item's label

local pubin = mw.wikibase.getEntity(claim.references[b].snaks[RefPubIn_P][1].datavalue.value.id)

pubinlabel = pubin.labels.en.value

end

local refwork = pubinlabel -- the default reference work for for non-census references, or fall-back for census references with missing parts

local pubdate = ""

if claim.references[b].snaks[DatePub_P] ~= nil then -- if reference has a date published, use it. This is the second-best option for the published date.

pubdate = mw.language.getContentLanguage():formatDate('j F Y', claim.references[b].snaks[DatePub_P][1].datavalue.value.time)

end

if detmet.claims[Instof_P] ~=nil and detmet.claims[Instof_P][1].mainsnak.datavalue.value.id == CensusAU_I then

-- if determination method is an instance of an australian census

refwork = detmet.labels.en.value .. " " .. pubinlabel -- reference work is determination method label + published in

if detmet.claims[DatePub_P] ~=nil then -- if determination method has a date published, use that as the date

pubdate = mw.language.getContentLanguage():formatDate('j F Y', detmet.claims[DatePub_P][1].mainsnak.datavalue.value.time)

end

end

local refpublisher = ""

if detmet.claims[Publisher_P] ~= nil then -- if determination method has a publisher, use its item's label

local publisheritem = mw.wikibase.getEntity(detmet.claims[Publisher_P][1].mainsnak.datavalue.value.id)

refpublisher = publisheritem.labels.en.value

end

local refaccessdate = ""

if claim.references[b].snaks[Retrieved_P] ~= nil then -- if reference has an access date, use it.

refaccessdate = mw.language.getContentLanguage():formatDate('j F Y', claim.references[b].snaks[Retrieved_P][1].datavalue.value.time)

end

local appliespart = mw.wikibase.getEntity(claim.qualifiers[Applies_P][1].datavalue.value.id).labels.en.value -- the label of the item of the applies to part of the claim

local year = string.sub(claim.qualifiers[Pointin_P][1].datavalue.value.time, 2, 5) -- the population point in time as a year string

local reference

if detmet.claims[Instof_P] ~=nil and detmet.claims[Instof_P][1].mainsnak.datavalue.value.id == CensusAU_I then

-- if determination method is an instance of an australian census

refwork = detmet.labels.en.value .. " " .. pubinlabel -- reference work is determination method label + published in

-- the reference is built using the specific template for the census year, which ensures the link format is correct

local geogid = LastURLSection(refurl) -- the id for the specific ABS reference is easiest to get from the ref URL. It may be the only place it is available.

reference = mw.getCurrentFrame():expandTemplate{title = 'Census_' .. year .. '_AUS', args = {id = geogid, name = reftitle .. " (" .. appliespart .. ")", ["access-date"] = refaccessdate, quick = 'on'} }

else

-- use the provided reference url, and whatever other citation data is available

local citewebargs = {

url = refurl,

title = reftitle .. " (" .. appliespart .. ")" ,

date = pubdate,

work = refwork,

author = "" .. refpublisher .. "", -- author is used to match existing population references

accessdate = refaccessdate

}

reference = mw.getCurrentFrame():expandTemplate{ title = 'cite web', args = citewebargs }

end

local wdeditpencil = mw.getCurrentFrame():expandTemplate{title = 'EditAtWikidata', args = {qid = item.id, pid = claim.id, nbsp = 1}} -- the Edit At Wikidata icon & link

reference = reference .. wdeditpencil

-- The name of the citation reference will be the same for each wikidata claim reference. This will allow references to the same data to be combined into a single citation reference.

local refname = refwork .. "_" .. year .. "_" .. appliespart .. "_" .. reftitle

if r > 1 then -- 2nd and later references in the same wikidata claim have their number appended, to keep them unique

refname = refname .. "_" .. r

end

refs = refs .. mw.getCurrentFrame():extensionTag{ name = 'ref', content = reference, args = { name = refname} } -- accumulate the citation references

end

return refs

end

--------------- Function GetAbbrLabel gets the population geography abbreviation ---------------

local function GetAbbrLabel(returnclaim)

local appliespartitem = mw.wikibase.getEntity(returnclaim.qualifiers[Applies_P][1].datavalue.value.id) -- load the applies to part item

local abbrelabel = appliespartitem.labels.en.value -- the fall back value for the geography label if no abbreviation (short name) value exists in Wikidata item

if appliespartitem.claims[ShortN_P] ~= nil then -- if a short name value exists, use it, with the full label as a tooltip

abbrelabel = mw.getCurrentFrame():expandTemplate{title = 'Abbr', args = {appliespartitem.claims[ShortN_P][1].mainsnak.datavalue.value.text, appliespartitem.labels.en.value } }

end

return '' .. abbrelabel .. ''

end

--------------- Function GetYearLink gets the Wikipedia article link for the population year ---------------

local function GetYearLink(returnclaim)

local year = string.sub(returnclaim.qualifiers[Pointin_P][1].datavalue.value.time, 2, 5) -- the population point in time as a year string

local yearreturn = year -- if no links to Wikipedia articles describing population determination method exist, default is year

local detmetitem = mw.wikibase.getEntity(returnclaim.qualifiers[DetMeth_P][1].datavalue.value.id) -- load the claim determination method item

if detmetitem.sitelinks ~=nil and detmetitem.sitelinks.enwiki ~=nil then -- if determination method item has an enwiki URL

yearreturn = "" .. year .. "" -- use that URL as the link for the year value

elseif detmetitem.claims[Instof_P] ~=nil and detmetitem.claims[Instof_P][1].mainsnak.datavalue.value.id == CensusAU_I then -- if determination method is an instance of an australian census

yearreturn = "" .. year .. "" -- use the section of the Census in Australia article as the link for the year value

end

return yearreturn

end

---------------- Function HistoricPopulations returns a wikitable of all census population values for all geography types, or a specified one ---------------

-- parameters:

-- required: type= the type value as for the Infobox

-- optional: wikidata= the wikidata item to be used instead of the one in the current page

-- optional: geog= a single geography type to return pop values for. Valid are 'ucl', 'sal', 'lga', 'iloc'. If left blank, all geographies will be returned.

function p.HistoricPopulations( frame )

if frame.args.wikidata ~= nil and frame.args.wikidata ~= "" then -- if there's a Wikidata item specified, use it

item = mw.wikibase.getEntity(frame.args.wikidata)

else

item = mw.wikibase.getEntity() -- if there's a Wikidata item connected to the article it will find it here.

end

-- if there are no population claims in the item, return an empty string

if not (item and item.claims and item.claims[Pop_P]) then

return ""

end

-- Find claims with:

-- (1) point in time is not nil

-- (2) applies to part is not nil

-- (3) determination method is not nil

-- (4) References table is not empty

-- (5) The determination method for the claim is an australian census

local validpopclaims = {}

local z = 0

for j, s in pairs(item.claims[Pop_P]) do

local isCensus = false

if s.qualifiers[DetMeth_P] ~= nil then

local detmetitem = mw.wikibase.getEntity(s.qualifiers[DetMeth_P][1].datavalue.value.id) -- load the claim determination method item

isCensus = (detmetitem.claims[Instof_P] ~=nil and detmetitem.claims[Instof_P][1].mainsnak.datavalue.value.id == CensusAU_I) -- is determination method an instance of an australian census?

end

if s.qualifiers ~= nil and

s.qualifiers[Pointin_P] ~= nil and

s.qualifiers[Applies_P] ~= nil and

s.qualifiers[DetMeth_P] ~= nil and

s.references ~= nil and

isCensus then

z = z + 1

validpopclaims[z] = s -- add to valid claims table

end

end

-- if there are no valid claims, return an empty string

if #validpopclaims < 1 then

return ""

end

SplitDoubleClaims(validpopclaims) -- any claims with multiple points-in-time are split into separate claims

-- add to history table for all (or requested-geography-only) claims

local showGeogIds = {}

if frame.args.geog ~= nil and frame.args.geog ~= "" then -- if geog is specified, only claims for its id are returned

showGeogIds[1] = IdForGeog(string.lower(frame.args.geog))

else

showGeogIds = GeogIdsForType(frame.args.type)

end

local showGeogIdsString = ',' .. table.concat(showGeogIds, ',') .. ','

local geog = nil

if frame.args.geog ~= nil and frame.args.geog ~= "" then

geog = string.lower(frame.args.geog)

end

local oneplaceid = IdForGeog(geog)

local history = {}

local years = {}

local glist = {}

for i, q in pairs(validpopclaims) do

local claimgeogid = q.qualifiers[Applies_P][1].datavalue.value.id -- the ID of the applies_to_part item in the claim

if string.find(showGeogIdsString, ',' .. claimgeogid .. ',', 1, true) then -- the geography ID of the claim is in the list of IDs that could be shown for the type

if (not oneplaceid) or (claimgeogid == oneplaceid) then -- if geog is not specified, or it is and the claim applies_to_part matches it

local claimyear = string.sub(q.qualifiers[Pointin_P][1].datavalue.value.time, 2, 5) -- the population point in time as a year string

if not history[claimyear] then

history[claimyear] = {year = claimyear, claim = {}}

table.insert(years, claimyear)

end

local refs = GetRefsForClaim(q, item.labels.en.value)

history[claimyear].claim[claimgeogid] = mw.language.getContentLanguage():formatNum(tonumber(q.mainsnak.datavalue.value.amount)) .. refs

glist[claimgeogid] = 1

end

end

end

-- sort the years table

table.sort(years)

local geogNames = {[UCL_I] = 'UCL', [SAL_I] = 'SAL', [ILOC_I] = 'ILOC', [LGA_I] = 'LGA'}

-- build the wikidata table contents from the history table

local wt = {}

for g, l in pairs(showGeogIds) do

if glist[l] then

table.insert(wt, ' !! ' .. geogNames[l])

end

end

-- data rows

for k, v in ipairs(years) do

table.insert(wt, '\n|-\n! | ' .. v) -- first column, contains years

for g, l in pairs(showGeogIds) do

if glist[l] then

if not history[v].claim[l] then

table.insert(wt, '\n| ') -- empty table cell

else

table.insert(wt, '\n| ' .. history[v].claim[l])

end

end

end

end

local wts = table.concat(wt)

-- if there are table contents, add the start and end of the table

if #wts > 0 then

wts = '

class="wikitable"\n
\n!' .. wts -- start of table and empty top-left cell

wts = wts .. '\n

' -- end of table

end

local cat = ''

if mw.title.getCurrentTitle().namespace == 0 then

-- category not added except in article namespace

cat = 'Category:Australian place articles using Wikidata population values'

end

return wts .. cat

end

---------------- Function LatestPopulation returns the most recent population value for a specified geography ---------------

-- parameters:

-- required: geog= a single geography type to return pop value for. Valid are 'ucl', 'sal', 'lga', 'iloc'.

-- optional: wikidata= the wikidata item to be used instead of the one in the current page

-- optional: year= any value (except 'no') requests the year to be shown after the population figure

-- optional: punc= any value will be inserted into the output before the reference number

function p.LatestPopulation( frame )

if frame.args.geog == nil then

return ""

end

local geogID = IdForGeog(string.lower(frame.args.geog))

if geogID == nil then

return ""

end

if frame.args.wikidata ~= nil and frame.args.wikidata ~= "" then -- if there's a Wikidata item specified, use it

item = mw.wikibase.getEntity(frame.args.wikidata)

else

item = mw.wikibase.getEntity() -- if there's a Wikidata item connected to the article it will find it here.

end

-- if there are no population claims in the item, return an empty string

if not (item and item.claims and item.claims[Pop_P]) then

return ""

end

------------ PART 1: Find claims that meet mimimum criteria

-- (1) point in time is not nil

-- (2) applies to part is not nil

-- (3) determination method is not nil

-- (4) References table is not empty

local validpopclaims = {}

local z = 0

for j, s in pairs(item.claims[Pop_P]) do

if s.qualifiers ~= nil and

s.qualifiers[Pointin_P]~= nil and

s.qualifiers[Applies_P] ~= nil and

s.qualifiers[DetMeth_P] ~= nil and

s.references ~= nil

then

z = z + 1

validpopclaims[z] = s -- add to valid claims table

end

end

-- if there are no valid claims, return an empty string

if #validpopclaims <1 then

return ""

end

SplitDoubleClaims(validpopclaims) -- any claims with multiple points-in-time are split into separate claims

--------------- PART 2: Find the latest claim for each geography found

local latestclaim = {}

for i, q in pairs(validpopclaims) do

local oclaimdate = q.qualifiers[Pointin_P][1].datavalue.value.time

local claimgeog = q.qualifiers[Applies_P][1].datavalue.value.id

if latestclaim[claimgeog] == nil

or (latestclaim[claimgeog] ~= nil and oclaimdate >= latestclaim[claimgeog].qualifiers[Pointin_P][1].datavalue.value.time) then -- if the max date for a particular geography value is later than the previous latest, overwrite with the current latest claim

latestclaim[claimgeog] = q

end

end

--------------- PART 3: Compile the module output, using only latest claim for the specified geography

local wikitext = ""

if latestclaim[geogID] ~= nil then

local yearlink = ""

if (frame.args.year or ) ~= and frame.args.year:lower() ~= 'no' then -- year is only shown on request

yearlink = " (" .. GetYearLink(latestclaim[geogID]) .. ")"

end

local refs = GetRefsForClaim(latestclaim[geogID], item.labels.en.value) -- the references for the claim

wikitext = mw.language.getContentLanguage():formatNum(tonumber(latestclaim[geogID].mainsnak.datavalue.value.amount)) .. yearlink .. (frame.args.punc or '') .. refs

end

local cat = ''

if mw.title.getCurrentTitle().namespace == 0 then

-- category not added except in article namespace

cat = 'Category:Australian place articles using Wikidata population values'

end

return wikitext .. cat

end

---------------- Function ListForInfobox returns the most recent population values ---------------

-- parameters:

-- required: type= the type value as for the Infobox

-- optional: wikidata= the wikidata item to be used instead of the one in the current page

-- optional: geog= a single geography type to return pop values for. Valid are 'ucl', 'sal', 'lga', 'iloc'. If left blank, all geographies will be returned.

function p.ListForInfobox( frame )

if frame.args.type == nil then

return ""

end

local luaplacetype = ""

local articleplacetype = string.lower(frame.args.type) -- for the place type supplied, change to a lower case string

-- Check for valid place types

if articleplacetype == "town"

or articleplacetype == "suburb"

or articleplacetype == "city"

or articleplacetype == "settlement"

or articleplacetype == "locality"

or articleplacetype == "townlocality"

or articleplacetype == "lga"

or articleplacetype == "region"

then

-- OK to continue

elseif articleplacetype == "cadastral"

or articleplacetype == "protected" then

-- these place types don't have ABS populations

return ""

else

-- unrecognised type supplied

return ""

end

if frame.args.wikidata ~= nil and frame.args.wikidata ~= "" then -- if there's a Wikidata item specified, use it

item = mw.wikibase.getEntity(frame.args.wikidata)

else

item = mw.wikibase.getEntity() -- if there's a Wikidata item connected to the article it will find it here.

end

-- if there are no population claims in the item, return an empty string

if not (item and item.claims and item.claims[Pop_P]) then

return ""

end

------------ PART 1: Find claims that meet mimimum criteria

-- (1) point in time is not nil

-- (2) applies to part is not nil

-- (3) determination method is not nil

-- (4) References table is not empty

local validpopclaims = {}

local z = 0

for j, s in pairs(item.claims[Pop_P]) do

if s.qualifiers ~= nil and

s.qualifiers[Pointin_P]~= nil and

s.qualifiers[Applies_P] ~= nil and

s.qualifiers[DetMeth_P] ~= nil and

s.references ~= nil

then

z = z + 1

validpopclaims[z] = s -- add to valid claims table

end

end

-- if there are no valid claims, return an empty string

if #validpopclaims <1 then

return ""

end

SplitDoubleClaims(validpopclaims) -- any claims with multiple points-in-time are split into separate claims

--------------- PART 2: Find the latest claim for each geography found

local latestclaim = {}

for i, q in pairs(validpopclaims) do

local oclaimdate = q.qualifiers[Pointin_P][1].datavalue.value.time

local claimgeog = q.qualifiers[Applies_P][1].datavalue.value.id

if latestclaim[claimgeog] == nil

or (latestclaim[claimgeog] ~= nil and oclaimdate >= latestclaim[claimgeog].qualifiers[Pointin_P][1].datavalue.value.time) then -- if the max date for a particular geography value is later than the previous latest, overwrite with the current latest claim

latestclaim[claimgeog] = q

end

end

--------------- PART 3: specify the geography types that can be returned for each place type

local showGeogIds = {}

if frame.args.geog ~= nil and frame.args.geog ~= "" then -- if geog is specified, only claims for its id are returned

showGeogIds[1] = IdForGeog(string.lower(frame.args.geog))

else

showGeogIds = GeogIdsForType(articleplacetype)

end

--------------- PART 4: Compile the module output, using only latest claims in specified geographies

local returnlist = {}

for j, t in pairs(showGeogIds) do

if latestclaim[t] ~= nil then

local refs = GetRefsForClaim(latestclaim[t], item.labels.en.value) -- the references for the max date claim

table.insert(returnlist, mw.language.getContentLanguage():formatNum(tonumber(latestclaim[t].mainsnak.datavalue.value.amount)) .. " (" .. GetAbbrLabel(latestclaim[t]) .. " " .. GetYearLink(latestclaim[t]) .. ")" .. refs)

end

end

local wikitext = ""

if #returnlist == 1 then

-- if there is only one entry in returnlist, return it without a bullet point

wikitext = returnlist[1]

else

-- if there are multiple entries in returnlist, return all the rows with new line and bullet points between them

wikitext = "\n*" .. table.concat(returnlist, "\n*")

end

local cat = ''

if mw.title.getCurrentTitle().namespace == 0 then

-- category not added except in article namespace

cat = 'Category:Australian place articles using Wikidata population values'

end

return wikitext .. cat

end

-- ###### this function is just for testing of the upgrade during development

function p.main()

local wdata = 'Q649969' -- Ulladulla

return p.HistoricPopulations( { args = {type = 'town', wikidata = wdata} } )

.. '\n\n' .. p.HistoricPopulations( { args = {type = 'town', wikidata = wdata, geog='sal'} } )

.. p.LatestPopulation( { args = {geog = 'sal', wikidata = wdata} } ) .. '
'

.. p.ListForInfobox( { args = {type = 'town', wikidata = wdata} } )

end

return p