Module:Sandbox/genewiki/geneboxdev

--This code is originally from https://en.wikipedia.org/wiki/Module:Sandbox/RexxS/AA and copied here (9/17/2015) for modifications and reusablity

-- Testing for arbitrary access

-- Intended to be:

-- Use : {{#invoke:Wikidata|getValueFromID|||FETCH_WIKIDATA}}

-- E.g.: {{#invoke:Wikidata|getValueFromID|Q151973|P26|FETCH_WIKIDATA}} - to fetch value of 'spouse' (P26) from 'Richard Burton' (Q151973)

-- While in sandbox:

-- Use : {{#invoke:Sandbox/RexxS/AA|getValueFromID|||FETCH_WIKIDATA}}

-- E.g.: {{#invoke:Sandbox/RexxS/AA|getValueFromID|Q151973|P26|FETCH_WIKIDATA}} - to fetch value of 'spouse' (P26) from 'Richard Burton' (Q151973)

-- Added Extra parameters for chosing rank (preferred, normal, truth, depreciated and random value (just first in list)

-- E.g.: {{#invoke:Sandbox/RexxS/AA|getValueFromID|Q151973|P26|FETCH_WIKIDATA|RANK_PREFERRED|RANDOM}}

local p = {}

-- This is used to get a value, or a comma separated list of them if multiple values exist

p.getValueFromID = function(frame)

local itemID = mw.text.trim(frame.args[1] or "")

local propertyID = mw.text.trim(frame.args[2] or "")

local input_parm = mw.text.trim(frame.args[3] or "")

local input_rank = mw.text.trim(frame.args[4] or "")

local random = mw.text.trim(frame.args[5] or "")

if input_parm == "FETCH_WIKIDATA" then

local entity = mw.wikibase.getEntity(itemID)

local claims = entity.claims[propertyID]

if claims then

-- if wiki-linked value output as link if possible

if (claims[1] and claims[1].mainsnak.snaktype == "value" and claims[1].mainsnak.datavalue.type == "wikibase-entityid" ) then

local out = {}

for k, v in pairs(claims) do

local sitelink = mw.wikibase.sitelink("Q" .. v.mainsnak.datavalue.value["numeric-id"])

local label = mw.wikibase.label("Q" .. v.mainsnak.datavalue.value["numeric-id"])

if label == nil then label = "Q" .. v.mainsnak.datavalue.value["numeric-id"] end

if sitelink then

out[#out + 1] = "" .. label .. ""

else

--out[#out + 1] = "" .. label .. "[*]"

out[#out + 1] = "" .. label .. ""

end

end

return table.concat(out, ", ")

else

local results

if input_rank == "" then

results = entity:formatPropertyValues(propertyID, mw.wikibase.entity.claimRanks).value

else

results = entity:formatPropertyValues(propertyID, mw.wikibase.entity.claimRanks.input_rank).value

end

if random == "" then

return results

else

local results_split = mw.text.split(results, ",")

return results_split[1]

end

end

else

return ""

end

else

return input_parm

end

end

-- A function to return the QID of a property value, rather than its text label

-- May be useful for constructing chains of calls to get properties of properties, etc.

-- It returns the QID of only the first property value if more than one

-- Use like this: {{#invoke:Sandbox/RexxS/AA|getQIDFromID|Q151973|P26|FETCH_WIKIDATA}}

-- That will fetch the QID of the first value for the spouse (P26) of Richard Burton (Q151973)

-- Returns an empty string if the value doesn't exist or has no QID.

p.getQIDFromID = function(frame)

local itemID = mw.text.trim(frame.args[1] or "")

local propertyID = mw.text.trim(frame.args[2] or "")

local input_parm = mw.text.trim(frame.args[3] or "")

local entity = mw.wikibase.getEntity(itemID)

local claims = entity.claims[propertyID]

if claims then

-- if wiki-linked value return the QID of the first value of the property

if (claims[1] and claims[1].mainsnak.snaktype == "value" and claims[1].mainsnak.datavalue.type == "wikibase-entityid") then

return "Q" .. claims[1].mainsnak.datavalue.value["numeric-id"]

else

return ""

end

else

return ""

end

end

p.getRawValueFromID = function(frame)

local itemID = mw.text.trim(frame.args[1] or "")

local propertyID = mw.text.trim(frame.args[2] or "")

local input_parm = mw.text.trim(frame.args[3] or "")

if input_parm == "FETCH_WIKIDATA" then

local entity = mw.wikibase.getEntity(itemID)

local claims = entity.claims[propertyID]

if claims then

local result = entity:formatPropertyValues(propertyID, mw.wikibase.entity.claimRanks).value

-- if number type: remove thousand separators

if (claims[1] and claims[1].mainsnak.snaktype == "value" and claims[1].mainsnak.datavalue.type == "quantity") then

result = mw.ustring.gsub(result, "(%d),(%d)", "%1%2")

end

return result

else

return ""

end

else

return input_parm

end

end

--This could be expanded into more levels taking a list in. But the deeper one abstracts the more likely one level won't return a value and error checking could be tedious.

--{{#invoke:Sandbox/RexxS/AA|getQIDFromID|Q151973|P26|P35|FETCH_WIKIDATA}}

p.getQIDFromID_two_levels = function(frame)

local parent_itemID = mw.text.trim(frame.args[1] or "") ---first Qid

local parent_propertyID = mw.text.trim(frame.args[2] or "")

local child_propertyID = mw.text.trim(frame.args[3] or "")

local input_parm = mw.text.trim(frame.args[4] or "")

local entity_parent = mw.wikibase.getEntity(parent_itemID)

local claims_parent = entity_parent.claims[parent_propertyID]

if claims_parent then

-- if wiki-linked value return the QID of the first value of the property

if (claims_parent[1] and claims_parent[1].mainsnak.snaktype == "value" and claims_parent[1].mainsnak.datavalue.type == "wikibase-entityid") then

local child_itemID = "Q" .. claims_parent[1].mainsnak.datavalue.value["numeric-id"]

local entity_child = mw.wikibase.getEntity(child_itemID)

local claims_child = entity_child.claims[child_propertyID]

--wiki-linked value of the second QID

if (claims_child[1] and claims_child[1].mainsnak.snaktype == "value" and claims_child[1].mainsnak.datavalue.type == "wikibase-entityid") then

return "Q"..claims_child[1].mainsnak.datavalue.value["numeric-id"]

else

return ""

end

else

return ""

end

else

return ""

end

end

p.getQualifierID = function(frame)

local propertyID = mw.text.trim(frame.args[1] or "")

local qualifierID = mw.text.trim(frame.args[2] or "")

local input_parm = mw.text.trim(frame.args[3] or "")

local itemID = mw.text.trim(frame.args[4] or "") --if direct link from wikidata item

if input_parm == "FETCH_WIKIDATA" then

local entity = ""

if itemID ~= "" then

entity = mw.wikibase.getEntityObject(itemID)

else

entity = mw.wikibase.getEntityObject()

end

if entity.claims[propertyID] ~= nil then

local out = {}

for k, v in pairs(entity.claims[propertyID]) do

for k2, v2 in pairs(v.qualifiers[qualifierID]) do

if v2.snaktype == 'value' then

out[#out + 1] = "Q" .. v2.datavalue.value["numeric-id"]

end

end

end

return table.concat(out, ", ")

else

return ""

end

else

return input_parm

end

end

p.getAliasFromGenomeAssembly_hs = function(frame)

-- will contain the numeric value for the requested coordinate

local output = ""

local sep = " "

-- can only be P644 (genomic start) or P645 (genomic end) for this to work

-- should probably try to catch that. Might also increase legibility to use specific variable names when possible

local propertyID = mw.text.trim(frame.args[1] or "")

-- this can really only be P659 right now. I'm not sure of the value of including it as a parameter as other values will likely break this function

local qualifierID = mw.text.trim(frame.args[2] or "")

-- Why do we include this here? What should happen if FETCH_WIKIDATA is not included?

local input_parm = mw.text.trim(frame.args[3] or "")

-- this can needs to be fed to the function either by a call to {{#invoke:Wikidata|pageId}} or by setting it directly (e.g. if the function was applied on a page other than the targeted gene)

--alert if this id is not a valid thing in wikidata, a Lua error will occur that says

--The ID entered is unknown to the system. Please use a valid entity ID.

local itemID = mw.text.trim(frame.args[4] or "")

-- will track the different builds pulled from the qualifiers

local newest_build = "0"

-- starts the process

local entity = mw.wikibase.getEntityObject(itemID)

local claims

--gets a table of claims on the (genomic start or end) property Q19847637

if entity and entity.claims then

claims = entity.claims[propertyID]

end

--will return nothing if no claims are found

if claims then

--checking to be sure claims is populated, not sure it its needed

if (claims[1] ) then

--useful for debugging

--local out = {}

--pulls the genome location from the claim

for k, v in pairs(claims) do

local quals = v.qualifiers.P659

--if there are any

if quals then

for qk, qv in pairs(quals) do

local qual_obj_id = "Q"..qv.datavalue.value["numeric-id"]

--get to the entity targeted by the qualifier property. Genome builds are Items in wikidata

local qual_obj = mw.wikibase.getEntityObject(qual_obj_id)

local alias = ""

--this uses the aliases to pull out version numbers

--seems like there ought to be a better way to do this, but likely would need to change the data added by the bot

if qual_obj["aliases"] ~= nil then

local test = qual_obj["aliases"]["en"]

for key, value in ipairs(test) do

if string.match(value['value'], '^hg') then

alias = value['value']

local build_no = alias:gsub("hg","")

--report only the most location associated with the most recent build

--if there is more than one location per build, just give one back as that is not our problem right now.

if build_no > newest_build then

newest_build = build_no

end

end

end

end

end

--in case there are no qualifiers, but there is a location, might as well return it

else output = location

end

end

return "hg"..newest_build

else

return ""

end

else

return ""

--debug

--"no claims for "..itemID.." prop "..propertyID

end

end

--in future could just combine this with getChromosomeLoc once all in once code

p.getAliasFromGenomeAssembly_mm = function(frame)

-- will contain the numeric value for the requested coordinate

local output = ""

local sep = " "

-- can only be P644 (genomic start) or P645 (genomic end) for this to work

-- should probably try to catch that. Might also increase legibility to use specific variable names when possible

local propertyID = mw.text.trim(frame.args[1] or "")

-- this can really only be P659 right now. I'm not sure of the value of including it as a parameter as other values will likely break this function

local qualifierID = mw.text.trim(frame.args[2] or "")

-- Why do we include this here? What should happen if FETCH_WIKIDATA is not included?

local input_parm = mw.text.trim(frame.args[3] or "")

-- this can needs to be fed to the function either by a call to {{#invoke:Wikidata|pageId}} or by setting it directly (e.g. if the function was applied on a page other than the targeted gene)

--alert if this id is not a valid thing in wikidata, a Lua error will occur that says

--The ID entered is unknown to the system. Please use a valid entity ID.

local itemID = mw.text.trim(frame.args[4] or "")

-- will track the different builds pulled from the qualifiers

local newest_build = "0"

-- starts the process

local mouse_propertyID = "P684"

--get the QID for the mouse gene

local mouse_itemID

--use itemID (QID) to get the mouse QID to return start or end location (propertyID)

local entity_gene = mw.wikibase.getEntity(itemID)

local claims_gene = entity_gene.claims[mouse_propertyID]

if claims_gene then

local claims_mouse

if (claims_gene[1] and claims_gene[1].mainsnak.snaktype == "value" and claims_gene[1].mainsnak.datavalue.type == "wikibase-entityid") then

mouse_itemID = "Q" .. claims_gene[1].mainsnak.datavalue.value["numeric-id"]

local entity_mouse = mw.wikibase.getEntity(mouse_itemID)

claims_mouse = entity_mouse.claims[propertyID]

end --will return nothing if no claims are found

if claims_mouse then

--checking to be sure claims is populated, not sure it its needed

if (claims_mouse[1] ) then

--useful for debugging

--local out = {}

--pulls the genome location from the claim

for k, v in pairs(claims_mouse) do

local quals = v.qualifiers.P659

--if there are any

if quals then

for qk, qv in pairs(quals) do

local qual_obj_id = "Q"..qv.datavalue.value["numeric-id"]

--get to the entity targeted by the qualifier property. Genome builds are Items in wikidata

local qual_obj = mw.wikibase.getEntityObject(qual_obj_id)

local alias = ""

--this uses the aliases to pull out version numbers

--seems like there ought to be a better way to do this, but likely would need to change the data added by the bot

if qual_obj["aliases"] ~= nil then

local test = qual_obj["aliases"]["en"]

for key, value in ipairs(test) do

if string.match(value['value'], '^mm') then

alias = value['value']

local build_no = alias:gsub("mm","")

--report only the most location associated with the most recent build

--if there is more than one location per build, just give one back as that is not our problem right now.

if build_no > newest_build then

newest_build = build_no

end

end

end

end

end

--in case there are no qualifiers, but there is a location, might as well return it

else

end

end

return "mm"..newest_build

else

return ""

end

else

return ""

--debug

--"no claims for "..itemID.." prop "..propertyID

end

end

end

---getChromosomeLoc

---input propertyID ie(Genomic start) P644

--- qualifierID (ie GenLoc Assembly) P659

--- input_parm (ie FETCH_WIKIDATA)

--- {{#invoke:Sandbox/genewiki/geneboxdev|getChromosomeLoc|P644|P659|FETCH_WIKIDATA}}

---output preferred chromosome location start value in this case it would be 49893092

--for debug window -- Q14865053

--frame = mw.getCurrentFrame()

--frame.args = {"P644","P659","FETCH_WIKIDATA","Q14865053"}

--print(p.getChromosomeLoc(frame))

p.getChromosomeLoc = function(frame)

-- will contain the numeric value for the requested coordinate

local output = ""

local sep = " "

-- can only be P644 (genomic start) or P645 (genomic end) for this to work

-- should probably try to catch that. Might also increase legibility to use specific variable names when possible

local propertyID = mw.text.trim(frame.args[1] or "")

-- this can really only be P659 right now. I'm not sure of the value of including it as a parameter as other values will likely break this function

local qualifierID = mw.text.trim(frame.args[2] or "")

-- Why do we include this here? What should happen if FETCH_WIKIDATA is not included?

local input_parm = mw.text.trim(frame.args[3] or "")

-- this can needs to be fed to the function either by a call to {{#invoke:Wikidata|pageId}} or by setting it directly (e.g. if the function was applied on a page other than the targeted gene)

--alert if this id is not a valid thing in wikidata, a Lua error will occur that says

--The ID entered is unknown to the system. Please use a valid entity ID.

local itemID = mw.text.trim(frame.args[4] or "")

-- will track the different builds pulled from the qualifiers

local newest_build = "0"

-- starts the process

local entity = mw.wikibase.getEntityObject(itemID)

local claims

--gets a table of claims on the (genomic start or end) property Q19847637

if entity and entity.claims then

claims = entity.claims[propertyID]

end

--will return nothing if no claims are found

if claims then

--checking to be sure claims is populated, not sure it its needed

if (claims[1] ) then

--useful for debugging

--local out = {}

--pulls the genome location from the claim

for k, v in pairs(claims) do

local location = v.mainsnak.datavalue.value

--debugging

--out[#out + 1] = k.." location:" .. location.. " || "

--gets the qualifiers linked to the current claim

local quals = v.qualifiers.P659

--if there are any

if quals then

for qk, qv in pairs(quals) do

local qual_obj_id = "Q"..qv.datavalue.value["numeric-id"]

--get to the entity targeted by the qualifier property. Genome builds are Items in wikidata

local qual_obj = mw.wikibase.getEntityObject(qual_obj_id)

local alias = ""

--this uses the aliases to pull out version numbers

--seems like there ought to be a better way to do this, but likely would need to change the data added by the bot

if qual_obj["aliases"] ~= nil then

local test = qual_obj["aliases"]["en"]

for key, value in ipairs(test) do

if string.match(value['value'], '^hg') then

alias = value['value']

local build_no = alias:gsub("hg","")

--report only the most location associated with the most recent build

--if there is more than one location per build, just give one back as that is not our problem right now.

if build_no > newest_build then

output = location

newest_build = build_no

end

end

end

end

end

--in case there are no qualifiers, but there is a location, might as well return it

else output = location

end

end

return output

else

return ""

end

else

return ""

--debug

--"no claims for "..itemID.." prop "..propertyID

end

end

p.getChromosomeLoc_mm = function(frame)

-- will contain the numeric value for the requested coordinate

local output = ""

local sep = " "

-- can only be P644 (genomic start) or P645 (genomic end) for this to work

-- should probably try to catch that. Might also increase legibility to use specific variable names when possible

local propertyID = mw.text.trim(frame.args[1] or "")

-- this can really only be P659 right now. I'm not sure of the value of including it as a parameter as other values will likely break this function

local qualifierID = mw.text.trim(frame.args[2] or "")

-- Why do we include this here? What should happen if FETCH_WIKIDATA is not included?

local input_parm = mw.text.trim(frame.args[3] or "")

-- this can needs to be fed to the function either by a call to {{#invoke:Wikidata|pageId}} or by setting it directly (e.g. if the function was applied on a page other than the targeted gene)

--alert if this id is not a valid thing in wikidata, a Lua error will occur that says

--The ID entered is unknown to the system. Please use a valid entity ID.

local itemID = mw.text.trim(frame.args[4] or "")

-- will track the different builds pulled from the qualifiers

local newest_build = "0"

-- starts the process

local mouse_propertyID = "P684"

--get the QID for the mouse gene

local mouse_itemID

--use itemID (QID) to get the mouse QID to return start or end location (propertyID)

local entity_gene = mw.wikibase.getEntity(itemID)

local claims_gene = entity_gene.claims[mouse_propertyID]

if claims_gene then

local claims_mouse

if (claims_gene[1] and claims_gene[1].mainsnak.snaktype == "value" and claims_gene[1].mainsnak.datavalue.type == "wikibase-entityid") then

mouse_itemID = "Q" .. claims_gene[1].mainsnak.datavalue.value["numeric-id"]

local entity_mouse = mw.wikibase.getEntity(mouse_itemID)

claims_mouse = entity_mouse.claims[propertyID]

end --will return nothing if no claims are found

if claims_mouse then

--checking to be sure claims is populated, not sure it its needed

if (claims_mouse[1] ) then

--useful for debugging

--local out = {}

--pulls the genome location from the claim

for k, v in pairs(claims_mouse) do

local location = v.mainsnak.datavalue.value

--debugging

--out[#out + 1] = k.." location:" .. location.. " || "

--gets the qualifiers linked to the current claim

local quals = v.qualifiers.P659

--if there are any

if quals then

for qk, qv in pairs(quals) do

local qual_obj_id = "Q"..qv.datavalue.value["numeric-id"]

--get to the entity targeted by the qualifier property. Genome builds are Items in wikidata

local qual_obj = mw.wikibase.getEntityObject(qual_obj_id)

local alias = ""

--this uses the aliases to pull out version numbers

--seems like there ought to be a better way to do this, but likely would need to change the data added by the bot

if qual_obj["aliases"] ~= nil then

local test = qual_obj["aliases"]["en"]

for key, value in ipairs(test) do

if string.match(value['value'], '^mm') then

alias = value['value']

local build_no = alias:gsub("mm","")

--report only the most location associated with the most recent build

--if there is more than one location per build, just give one back as that is not our problem right now.

if build_no > newest_build then

output = location

newest_build = build_no

end

end

end

end

end

--in case there are no qualifiers, but there is a location, might as well return it

else output = location

end

end

return output

else

return ""

end

else

return ""

--debug

--"no claims for "..itemID.." prop "..propertyID

end

end

end

--eg:{{#invoke:Sandbox/genewiki/geneboxdev|getChromosomeLoc|P644|FETCH_WIKIDATA}}

--This function is used to generate a list of aliases

--To Do: exclude gene symbol that is already displayed

function p.get_aliases(frame)

entity = mw.wikibase.getEntityObject(frame.args['from'])

a = ''

if entity['aliases'] ~= nil then

test = entity['aliases']['en']

for key, value in ipairs(test) do

a = a .. ', ' .. value['value']

end

return a

end

end

p.trimChromosome = function(frame)

local string_to_trim = mw.text.trim(frame.args[1] or "")

local out = '';

if string.find(string_to_trim, 'chromosome.') then

out = string_to_trim:gsub("chromosome ", "")

end

if string.find(string_to_trim, 'mouse.chromosome.') then

out = string_to_trim:gsub("mouse chromosome ", "")

end

return out

end

--EG: {{#invoke:Sandbox/genewiki/geneboxdev|getGO|Q14864805|P680|P686|FETCH_WIKIDATA}}

p.getGO = function(frame)

local itemID_parent = mw.text.trim(frame.args[1] or "")

local propertyID_parent = mw.text.trim(frame.args[2] or "") -- ie molecular, cellular, function

local propertyID_child = mw.text.trim(frame.args[3] or "") -- Gene Ontology ID

local input_parm = mw.text.trim(frame.args[4] or "")

if input_parm == "FETCH_WIKIDATA" then

local entity = mw.wikibase.getEntity(itemID_parent)

local claims = entity.claims[propertyID_parent]

if claims then

if (claims[1] and claims[1].mainsnak.snaktype == "value" and claims[1].mainsnak.datavalue.type == "wikibase-entityid") then

local out = {}

for k, v in pairs(claims) do

local itemID_child = "Q" .. v.mainsnak.datavalue.value["numeric-id"]

local entity = mw.wikibase.getEntityObject(itemID_child)

local claims

local result_GOID = ''

if entity then claims = entity.claims[propertyID_child] end

if claims then

result_GOID = entity:formatPropertyValues(propertyID_child, mw.wikibase.entity.claimRanks).value

else

return ""

end

local sitelink = "http://amigo.geneontology.org/amigo/term/GO:"

local label = mw.wikibase.label("Q" .. v.mainsnak.datavalue.value["numeric-id"])

if label == nil then label = "Q" .. v.mainsnak.datavalue.value["numeric-id"] end

if sitelink then

---out[#out + 1] = "*[" .. sitelink .. "|" .. label .."]\n"

out[#out + 1] = "*[" .. sitelink .. result_GOID .. " " .. label .."]\n"

else

out[#out + 1] = "*" .. label .. "\n"

end

end

return table.concat(out, "")

else

return entity:formatPropertyValues(propertyID, mw.wikibase.entity.claimRanks).value

end

else

return ""

end

else

return input_parm

end

end

p.getPDB = function(frame)

local itemID = mw.text.trim(frame.args[1] or "")

local propertyID = mw.text.trim(frame.args[2] or "")

local input_parm = mw.text.trim(frame.args[3] or "")

if input_parm == "FETCH_WIKIDATA" then

local entity = mw.wikibase.getEntity(itemID)

local claims = entity.claims[propertyID]

local sitelink = "http://www.rcsb.org/pdb/explore/explore.do?pdbId="

if claims then

if (claims[1] and claims[1].mainsnak.snaktype == "value") then

local out = {}

for k, v in pairs(claims) do

--don't think we want the Q here, right ?

--local label = mw.wikibase.label("Q" .. v.mainsnak.datavalue.value)

--if label == nil then label = "Q" .. v.mainsnak.datavalue.value end

local label = mw.wikibase.label(v.mainsnak.datavalue.value)

if label == nil then label = v.mainsnak.datavalue.value end

if sitelink then

out[#out + 1] = "[" .. sitelink .. label .. " " ..label .. "]"

else

out[#out + 1] = "" .. label .. ""

end

end

return table.concat(out, ", ")

else

return entity:formatPropertyValues(propertyID, mw.wikibase.entity.claimRanks).value

end

else

return ""

end

else

return input_parm

end

end

function p.ViewSomething(frame)

local itemID = mw.text.trim(frame.args[1] or "")

local data = mw.wikibase.getEntityObject(itemID)

if not data then

return nil

end

local f = frame.args[1] and frame or frame:getParent()

local i = 1

while true do

local index = f.args[i]

if not index then

if type(data) == "table" then

return mw.text.jsonEncode(data, mw.text.JSON_PRESERVE_KEYS + mw.text.JSON_PRETTY)

else

return tostring(data)

end

end

data = data[index] or data[tonumber(index)]

if not data then

return

end

i = i + 1

end

end

return p