User:Magicpiano/NRBot/UpdateNRHPProgress.js

/* jshint maxerr: 3000 */

/*

The following script places a button at the top of the NRHP Progress page WP:NRHPPROGRESS. When the button is clicked,

the script begins to load each county list linked from the Progress page in the background, extract statistics about

sites in each list, and updates the Progress page with the fetched data.

  • /

var wikitext = 'error';

var ProgressStructure=[]; // ProgressStructure[table][row].StatName

var TotalToQuery=0;

var TotalQueried=0;

var ErrorCount=0;

var WarningCount="",0; // 0=status, 1=count

var InitialTime=0;

var ProgressDivTimer=0; // timer for updating ProgressDiv

var DefaultQueryPause=1; // number of milliseconds to wait between each API query; increased by code if rate limit reached

var RestrictedImageCount=0;

var bIgnoreTableCount = false;

// Users authorized to run this script

var AuthorizedUsers = [ "Magicpiano" ]; // nb this is for use only when g_disabled is true

var CurrentMaintainer = "Magicpiano"; // User who is currently maintaining the script

function ProgressButton() {

var bNotHere = true;

if (mw.config.get('wgPageName')=="Wikipedia:WikiProject_National_Register_of_Historic_Places/Progress") bNotHere = false;

if (mw.config.get('wgPageName')=="User:Magicpiano/NRBot/UpdateNRHPProgressTester")

{

bNotHere = false;

bIgnoreTableCount = true;

}

// the above notwithstanding, if in edit mode we also ignore

if (location.href.indexOf('action')!=-1) bNotHere = true;

if (bNotHere) return;

var button = document.getElementById("NRHPProgressUpdateButton");

if (button !== null) return;

button=document.createElement("input");

button.setAttribute("type", "button");

button.setAttribute("value", "Update Statistics");

button.setAttribute("onclick", "ClickUpdateNRHPProgress()");

button.id = "NRHPProgressUpdateButton";

var content=document.getElementById('mw-content-text');

content.parentNode.insertBefore(button, content);

}

var g_disabled = false;

function CheckPermission() {

var username = mw.user.getName();

console.log("UpdateNRHPProgress: run by user "+username);

if (username == CurrentMaintainer) return true;

for (var i=0; i< AuthorizedUsers.length; i++) {

if (username == AuthorizedUsers[i]) {

return true;

}

}

if (g_disabled) {

alert("Script is currently disabled for maintenance.");

return false;

}

return true;

}

function ClickUpdateNRHPProgress() { // after button is clicked, disable it and fetch wikitext of Progress page

if (!CheckPermission()) {

return;

}

var button2 = document.getElementById("NRHPProgressUpdateButton");

button2.disabled = true;

var ProgressDiv = document.getElementById("ProgressDiv");

if (ProgressDiv === null) {

ProgressDiv = document.createElement("div");

ProgressDiv.setAttribute("id", "ProgressDiv");

ProgressDiv.setAttribute("style", "width:500px; border:1px solid black; padding:5px; background:#ffffff");

button2.parentNode.insertBefore(ProgressDiv, button2);

}

ProgressDiv.innerHTML = "Initializing...";

getWikitext(mw.config.get('wgPageName')); // after wikitext fetched, SetupTables() is called

}

// create array of table structure to be populated later

function SetupTables() {

var table=document.getElementsByClassName('wikitable sortable');

// Expected table count:

// There should be 61 tables, as follows:

// The national table, 50 states, and the following 10 non-state entities:

// District of Columbia, Puerto Rico, Virgin Islands, Guam, American Samoa,

// Northern Mariana Islands, Federated States of Micronesia, Palau, Marshall Islands

// Minor Outlying Islands

// This number should only need adjustmnent if the number of states or non-state entities having

// separate tables is changed. An alert here is more likely caused by some sort of formatting change

// or error in the table.

//

// Ideally, the above search should be restrictable (by class in some way?) to only the specific tables of interest.

//

if ((!bIgnoreTableCount) && table.length !== 61) {

alert('Incorrect table count in progress page: expected 61, saw '+table.length+'.');

return;

}

// set up national totals

var tr=table[0].getElementsByTagName("tr");

var i,j, td;

ProgressStructure[0]=[];

for (j=1; j

td=tr[j].getElementsByTagName("td");

ProgressStructure[0][j-1]={

ID: td[0].innerHTML,

Total: 0,

Illustrated: 0,

Articled: 0,

Stubs: 0,

NRISonly: 0,

StartPlus: 0,

Unassessed: 0,

Untagged: 0

};

}

// special row for Tangier, Morocco

td=tr[tr.length-3].getElementsByTagName("td");

/* This code copies the existing settings for the American Legation in Tangier into the table.

This process is prone to error if it is modified. Because the article is an illustrated Start+ article,

the settings hardcoded further down are unlikely to change.

*/

/* // To copy previous contents instead of hardcoding:

ProgressStructure[0][tr.length-4]={};

ProgressStructure[0][tr.length-4].ID="Tangier, Morocco";

ProgressStructure[0][tr.length-4].Total=parseFloat(td[1].innerHTML.replace(",",""));

ProgressStructure[0][tr.length-4].Illustrated=parseFloat(td[2].innerHTML.replace(",",""));

ProgressStructure[0][tr.length-4].Articled=parseFloat(td[4].innerHTML.replace(",",""));

ProgressStructure[0][tr.length-4].Stubs=parseFloat(td[6].innerHTML.replace(",",""));

ProgressStructure[0][tr.length-4].NRISonly=parseFloat(td[7].innerHTML.replace(",",""));

ProgressStructure[0][tr.length-4].StartPlus=parseFloat(td[8].innerHTML.replace(",",""));

ProgressStructure[0][tr.length-4].Unassessed=parseFloat(td[10].innerHTML.replace(",",""));

ProgressStructure[0][tr.length-4].Untagged=parseFloat(td[11].innerHTML.replace(",",""));

*/

/* American Legation: Illustrated, Articled, not stub, not NRIS, Start+, Assessed, Tagged */

ProgressStructure[0][tr.length-4]={

ID: "Tangier, Morocco",

Total: 1,

Illustrated: 1,

Articled: 1,

Stubs: 0,

NRISonly: 0,

StartPlus: 1,

Unassessed: 0,

Untagged: 0

};

// duplicates row

td=tr[tr.length-2].getElementsByTagName("td");

ProgressStructure[0][tr.length-3]={

ID: "National Duplicates",

Total: parseFloat(td[0].innerHTML.replace(",","")),

Illustrated: parseFloat(td[1].innerHTML.replace(",","")),

Articled: parseFloat(td[3].innerHTML.replace(",","")),

Stubs: parseFloat(td[5].innerHTML.replace(",","")),

NRISonly: parseFloat(td[6].innerHTML.replace(",","")),

StartPlus: parseFloat(td[7].innerHTML.replace(",","")),

Unassessed: parseFloat(td[9].innerHTML.replace(",","")),

Untagged: parseFloat(td[10].innerHTML.replace(",",""))

};

// national totals

ProgressStructure[0][tr.length-2]={

ID: "National Totals",

Total: 0,

Illustrated: 0,

Articled:0,

Stubs: 0,

NRISonly: 0,

StartPlus: 0,

Unassessed: 0,

Untagged: 0

};

// now data for each state

for (i=1; i

tr=table[i].getElementsByTagName("tr");

ProgressStructure[i]=[];

for (j=1; j

td=tr[j].getElementsByTagName("td");// fill in existing data in case error

ProgressStructure[i][j-1]={};

ProgressStructure[i][j-1].ID=td[0].innerHTML.substr(0,5);

ProgressStructure[i][j-1].Total=parseFloat(td[3].innerHTML.replace(",",""));

ProgressStructure[i][j-1].Illustrated=parseFloat(td[4].innerHTML.replace(",",""));

ProgressStructure[i][j-1].Articled=parseFloat(td[6].innerHTML.replace(",",""));

ProgressStructure[i][j-1].Stubs=parseFloat(td[8].innerHTML.replace(",",""));

ProgressStructure[i][j-1].NRISonly=parseFloat(td[9].innerHTML.replace(",",""));

ProgressStructure[i][j-1].StartPlus=parseFloat(td[10].innerHTML.replace(",",""));

ProgressStructure[i][j-1].Unassessed=parseFloat(td[12].innerHTML.replace(",",""));

ProgressStructure[i][j-1].Untagged=parseFloat(td[13].innerHTML.replace(",",""));

var link=td[1].getElementsByTagName("a");

if (link.length!==0 && link[0].href.search("#")==-1) {

link=decodeURI(link[0].href).split("/");

link=link[link.length-1].replace(/_/g," ");

ProgressStructure[i][j-1].Link=link;

ProgressStructure[i][j-1].ArticleQueried=0; // for querying later

ProgressStructure[i][j-1].TalkQueried=0;

} else {

if (ProgressStructure[i][j-1].ID!="ddddd") { // if no link and not duplicate, must be totals row, so we can zero it

ProgressStructure[i][j-1].Total=0;

ProgressStructure[i][j-1].Illustrated=0;

ProgressStructure[i][j-1].Articled=0;

ProgressStructure[i][j-1].Stubs=0;

ProgressStructure[i][j-1].NRISonly=0;

ProgressStructure[i][j-1].StartPlus=0;

ProgressStructure[i][j-1].Unassessed=0;

ProgressStructure[i][j-1].Untagged=0;

}

}

}

// duplicates row

td=tr[tr.length-2].getElementsByTagName("td");

ProgressStructure[i][tr.length-3]={

ID: ProgressStructure[0][i-1].ID+" Duplicates",

Total: parseFloat(td[0].innerHTML.replace(",","")),

Illustrated: parseFloat(td[1].innerHTML.replace(",","")),

Articled: parseFloat(td[3].innerHTML.replace(",","")),

Stubs: parseFloat(td[5].innerHTML.replace(",","")),

NRISonly: parseFloat(td[6].innerHTML.replace(",","")),

StartPlus: parseFloat(td[7].innerHTML.replace(",","")),

Unassessed: parseFloat(td[9].innerHTML.replace(",","")),

Untagged: parseFloat(td[10].innerHTML.replace(",",""))

};

// state totals

ProgressStructure[i][tr.length-2]={

ID: ProgressStructure[0][i-1].ID+" Totals",

Total: 0,

Illustrated: 0,

Articled:0,

Stubs: 0,

NRISonly: 0,

StartPlus: 0,

Unassessed: 0,

Untagged: 0

};

}

for (i=1; i

for (j=0; j

if (typeof ProgressStructure[i][j].Link!="undefined") TotalToQuery++; // don't count duplicates and total rows

}

}

TotalQueried=0;

var ProgressDiv=document.getElementById("ProgressDiv");

ProgressDiv.innerHTML+=" Done!
";

var ProgressSpan=document.createElement("span");

ProgressSpan.setAttribute("id", "ProgressSpan");

ProgressDiv.appendChild(ProgressSpan);

ProgressSpan.innerHTML = "Querying county data... 0 (0%) of "+TotalToQuery+" lists checked.";

var TimeSpan=document.createElement("span");

TimeSpan.setAttribute("id", "TimeSpan");

ProgressDiv.appendChild(TimeSpan);

TimeSpan.innerHTML = "";

InitialTime=new Date(); // record starting time

UpdateProgressDiv();

LoadList(1,0); // begin querying first page

}

// load next list to query

function LoadList(currentTable,currentRow) {

// check if we need to go to the next table

if (currentRow>ProgressStructure[currentTable].length-3) {

currentRow=0;

currentTable++;

}

// check if there are no more tables

if (currentTable>ProgressStructure.length-1) return;

if (typeof ProgressStructure[currentTable][currentRow].Link=="undefined") { // skip duplicate and total rows

LoadList(currentTable,currentRow+1);

return;

}

var title=ProgressStructure[currentTable][currentRow].Link;

setTimeout(function(){ // short delay to prevent API overload

getProgressListWikitext(title,currentTable,currentRow);

LoadList(currentTable,currentRow+1);

}, DefaultQueryPause);

return;

}

function WikitextFetched(ajaxResponse,status,title,currentTable,currentRow) {

// console.log("WikitextFetched: table="+currentTable+" row="+currentRow+" title="+title+" status="+status);

if (status!="success") {

NewWarning("Wikitext "+ajaxResponse.errorThrown);

setTimeout(function(){ // try again after delay if rate limit reached

getProgressListWikitext(title,currentTable,currentRow);

}, 250);

return;

}

// won't get here unless successful

var responseText, pagetext;

var tabletext, regex, i, j;

try {

responseText=JSON.parse(ajaxResponse.responseText);

pagetext=responseText.query.pages[responseText.query.pageids[0]].revisions[0]["*"];

}

catch (e) {

console.log("WikiTextFetched: Exception parsing "+title+": "+e);

ProgressFatalError(0,title,currentTable,currentRow);

return;

}

var StartIndex;

// console.log("WikiTextFetched: Parsing out "+title+" to find relevant table");

try {

if (responseText.query.redirects) { // if redirect, find section

var SectionName="Undefined";

for (var r in responseText.query.redirects) {

if (typeof responseText.query.redirects[r].tofragment!="undefined") SectionName=responseText.query.redirects[r].tofragment.replace(/.27/g,"'");

}

regex = new RegExp("=[ ]*(\\[\\[(.*?\\|)?[ ]*)?"+SectionName+"([ ]*\\]\\])?[ ]*=", "g");

var sectionheader=pagetext.match(regex);

if (sectionheader === null || sectionheader === undefined) { // if no section found, check if one of known empty counties

var ID = ProgressStructure[currentTable][currentRow].ID;

//console.log("WikiTextFetched: List appears to be empty: title="+title+" id="+ID);

// list last check date: 2023-04-03

var EmptyCounties=["01061", // Geneva County AL

"02270", // Kusilvak Census Area AK (first)

"02158", // Kusilvak Census Area AK (second)

"08014", // Broomfield County CO

"12067", // Lafayette County FL

"20081", // Haskell County KS

"20175", // Seward County KS

"20187", // Stanton County KS

"20189", // Stevens County KS

"26051", // Gladwin County MI

"26079", // Kalkaska County MI

"26119", // Montmorency County MI

"26129", // Ogemaw County MI

"26133", // Osceola County MI

"31009", // Blaine County NE

"31113", // Logan County NE

"31117", // McPherson County NE

"38085", // Sioux County ND

"48017", // Bailey County TX

"48023", // Baylor County TX

"48033", // Borden County TX

"48069", // Castro County TX

"48079", // Cochran County TX

"48103", // Crane County TX

"48107", // Crosby County TX

"48119", // Delta County TX

"48131", // Duval County TX

"48155", // Foard County TX

"48165", // Gaines County TX

"48207", // Haskell County TX

"48219", // Hockley County TX

"48247", // Jim Hogg County TX

"48269", // King County TX

"48279", // Lamb County TX

"48341", // Moore County TX

"48389", // Reeves County TX

"48415", // Scurry County TX

"48421", // Sherman County TX

"48433", // Stonewall County TX

"48437", // Swisher County TX

"48445", // Terry County TX

"48461", // Upton County TX

"48475", // Ward County TX

"48501", // Yoakum County TX

"51735" // Poquoson VA

];

var errorcode = 0;

for (var k=0; k

if (ID==EmptyCounties[k]) {errorcode=-1;}

}

if (errorcode!==0) { // must be an empty county

ProgressStructure[currentTable][currentRow].Total=0;

ProgressStructure[currentTable][currentRow].Illustrated=0;

ProgressStructure[currentTable][currentRow].Articled=0;

ProgressStructure[currentTable][currentRow].Stubs=0;

ProgressStructure[currentTable][currentRow].NRISonly=0;

ProgressStructure[currentTable][currentRow].StartPlus=0;

ProgressStructure[currentTable][currentRow].Unassessed=0;

ProgressStructure[currentTable][currentRow].Untagged=0;

ProgressStructure[currentTable][currentRow].Link=title;

TotalQueried++;

if (TotalQueried==TotalToQuery) CalculateProgressTotals();

return;

}

// if we're here, must have been a redirect with no section, and not a known empty county

sectionheader=pagetext.match(/{{NRHP header/g); // then look for tables without a section

if (sectionheader===null||sectionheader.length>1) { // if still can't find a table or find multiple tables, fatal error

ProgressFatalError(0,title,currentTable,currentRow);

}

}

StartIndex=pagetext.indexOf(sectionheader[0]);

var sectiontext=pagetext.substr(StartIndex,pagetext.indexOf("\n==",StartIndex)-StartIndex); // only look at relevant section

StartIndex=sectiontext.indexOf("{{NRHP header");

if (StartIndex==-1) {

if (sectiontext.indexOf("{{NRHP row")!=-1) {

ProgressFatalError(2,title,currentTable,currentRow); // incorrectly formatted table

} else { // must be an empty county

console.log("WikiTextFetched: county appears to be empty: "+title);

ProgressStructure[currentTable][currentRow].Total=0;

ProgressStructure[currentTable][currentRow].Illustrated=0;

ProgressStructure[currentTable][currentRow].Articled=0;

ProgressStructure[currentTable][currentRow].Stubs=0;

ProgressStructure[currentTable][currentRow].NRISonly=0;

ProgressStructure[currentTable][currentRow].StartPlus=0;

ProgressStructure[currentTable][currentRow].Unassessed=0;

ProgressStructure[currentTable][currentRow].Untagged=0;

ProgressStructure[currentTable][currentRow].Link=title;

TotalQueried++;

if (TotalQueried==TotalToQuery) CalculateProgressTotals();

return;

}

}

var EndIndex = sectiontext.indexOf("\n|}",StartIndex);

// EndIndex should exist, but a malformed file may not have it

if (EndIndex === -1) {

console.log("WikiTextFetched: missing end of table in "+title);

ProgressFatalError(2,title,currentTable,currentRow); // list is malformed

return;

}

tabletext=sectiontext.substr(StartIndex,EndIndex-StartIndex);

} else { // if not a redirect, default to first table on page

StartIndex=pagetext.indexOf("{{NRHP header");

if (StartIndex==-1) {

ProgressFatalError(1,title,currentTable,currentRow); // no list found

return;

}

var EndIndex = pagetext.indexOf("\n|}",StartIndex);

// EndIndex should exist, but a malformed file may not have it

if (EndIndex === -1) {

console.log("WikiTextFetched: missing end of table in "+title);

ProgressFatalError(2,title,currentTable,currentRow); // list is malformed

return;

}

tabletext=pagetext.substr(StartIndex,EndIndex-StartIndex);

}

}

catch (e) {

console.log("WikiTextFetched: Exception searching for table in "+title+": "+e);

ProgressFatalError(0,title,currentTable,currentRow);

return;

}

// now that tabletext has only relevant table, extract rows

var Rows=[];

var str = "{{";

var start=0;

var commentstart=0;

//console.log("WikiTextFetched: Extracting rows from "+title);

try {

while (true) {

commentstart=tabletext.indexOf("",commentstart);

commentstart=tabletext.indexOf("