User:Magicpiano/NRBot/UpdateNRHPProgress.js
/* jshint maxerr: 3000 */
/*
The following script places a button at the top of the NRHP Progress page WP:NRHPPROGRESS. When the button is clicked,
the script begins to load each county list linked from the Progress page in the background, extract statistics about
sites in each list, and updates the Progress page with the fetched data.
- /
var wikitext = 'error';
var ProgressStructure=[]; // ProgressStructure[table][row].StatName
var TotalToQuery=0;
var TotalQueried=0;
var ErrorCount=0;
var WarningCount="",0; // 0=status, 1=count
var InitialTime=0;
var ProgressDivTimer=0; // timer for updating ProgressDiv
var DefaultQueryPause=1; // number of milliseconds to wait between each API query; increased by code if rate limit reached
var RestrictedImageCount=0;
var bIgnoreTableCount = false;
// Users authorized to run this script
var AuthorizedUsers = [ "Magicpiano" ]; // nb this is for use only when g_disabled is true
var CurrentMaintainer = "Magicpiano"; // User who is currently maintaining the script
function ProgressButton() {
var bNotHere = true;
if (mw.config.get('wgPageName')=="Wikipedia:WikiProject_National_Register_of_Historic_Places/Progress") bNotHere = false;
if (mw.config.get('wgPageName')=="User:Magicpiano/NRBot/UpdateNRHPProgressTester")
{
bNotHere = false;
bIgnoreTableCount = true;
}
// the above notwithstanding, if in edit mode we also ignore
if (location.href.indexOf('action')!=-1) bNotHere = true;
if (bNotHere) return;
var button = document.getElementById("NRHPProgressUpdateButton");
if (button !== null) return;
button=document.createElement("input");
button.setAttribute("type", "button");
button.setAttribute("value", "Update Statistics");
button.setAttribute("onclick", "ClickUpdateNRHPProgress()");
button.id = "NRHPProgressUpdateButton";
var content=document.getElementById('mw-content-text');
content.parentNode.insertBefore(button, content);
}
var g_disabled = false;
function CheckPermission() {
var username = mw.user.getName();
console.log("UpdateNRHPProgress: run by user "+username);
if (username == CurrentMaintainer) return true;
for (var i=0; i< AuthorizedUsers.length; i++) {
if (username == AuthorizedUsers[i]) {
return true;
}
}
if (g_disabled) {
alert("Script is currently disabled for maintenance.");
return false;
}
return true;
}
function ClickUpdateNRHPProgress() { // after button is clicked, disable it and fetch wikitext of Progress page
if (!CheckPermission()) {
return;
}
var button2 = document.getElementById("NRHPProgressUpdateButton");
button2.disabled = true;
var ProgressDiv = document.getElementById("ProgressDiv");
if (ProgressDiv === null) {
ProgressDiv = document.createElement("div");
ProgressDiv.setAttribute("id", "ProgressDiv");
ProgressDiv.setAttribute("style", "width:500px; border:1px solid black; padding:5px; background:#ffffff");
button2.parentNode.insertBefore(ProgressDiv, button2);
}
ProgressDiv.innerHTML = "Initializing...";
getWikitext(mw.config.get('wgPageName')); // after wikitext fetched, SetupTables() is called
}
// create array of table structure to be populated later
function SetupTables() {
var table=document.getElementsByClassName('wikitable sortable');
// Expected table count:
// There should be 61 tables, as follows:
// The national table, 50 states, and the following 10 non-state entities:
// District of Columbia, Puerto Rico, Virgin Islands, Guam, American Samoa,
// Northern Mariana Islands, Federated States of Micronesia, Palau, Marshall Islands
// Minor Outlying Islands
// This number should only need adjustmnent if the number of states or non-state entities having
// separate tables is changed. An alert here is more likely caused by some sort of formatting change
// or error in the table.
//
// Ideally, the above search should be restrictable (by class in some way?) to only the specific tables of interest.
//
if ((!bIgnoreTableCount) && table.length !== 61) {
alert('Incorrect table count in progress page: expected 61, saw '+table.length+'.');
return;
}
// set up national totals
var tr=table[0].getElementsByTagName("tr");
var i,j, td;
ProgressStructure[0]=[];
for (j=1; j
td=tr[j].getElementsByTagName("td");
ProgressStructure[0][j-1]={
ID: td[0].innerHTML,
Total: 0,
Illustrated: 0,
Articled: 0,
Stubs: 0,
NRISonly: 0,
StartPlus: 0,
Unassessed: 0,
Untagged: 0
};
}
// special row for Tangier, Morocco
td=tr[tr.length-3].getElementsByTagName("td");
/* This code copies the existing settings for the American Legation in Tangier into the table.
This process is prone to error if it is modified. Because the article is an illustrated Start+ article,
the settings hardcoded further down are unlikely to change.
*/
/* // To copy previous contents instead of hardcoding:
ProgressStructure[0][tr.length-4]={};
ProgressStructure[0][tr.length-4].ID="Tangier, Morocco";
ProgressStructure[0][tr.length-4].Total=parseFloat(td[1].innerHTML.replace(",",""));
ProgressStructure[0][tr.length-4].Illustrated=parseFloat(td[2].innerHTML.replace(",",""));
ProgressStructure[0][tr.length-4].Articled=parseFloat(td[4].innerHTML.replace(",",""));
ProgressStructure[0][tr.length-4].Stubs=parseFloat(td[6].innerHTML.replace(",",""));
ProgressStructure[0][tr.length-4].NRISonly=parseFloat(td[7].innerHTML.replace(",",""));
ProgressStructure[0][tr.length-4].StartPlus=parseFloat(td[8].innerHTML.replace(",",""));
ProgressStructure[0][tr.length-4].Unassessed=parseFloat(td[10].innerHTML.replace(",",""));
ProgressStructure[0][tr.length-4].Untagged=parseFloat(td[11].innerHTML.replace(",",""));
*/
/* American Legation: Illustrated, Articled, not stub, not NRIS, Start+, Assessed, Tagged */
ProgressStructure[0][tr.length-4]={
ID: "Tangier, Morocco",
Total: 1,
Illustrated: 1,
Articled: 1,
Stubs: 0,
NRISonly: 0,
StartPlus: 1,
Unassessed: 0,
Untagged: 0
};
// duplicates row
td=tr[tr.length-2].getElementsByTagName("td");
ProgressStructure[0][tr.length-3]={
ID: "National Duplicates",
Total: parseFloat(td[0].innerHTML.replace(",","")),
Illustrated: parseFloat(td[1].innerHTML.replace(",","")),
Articled: parseFloat(td[3].innerHTML.replace(",","")),
Stubs: parseFloat(td[5].innerHTML.replace(",","")),
NRISonly: parseFloat(td[6].innerHTML.replace(",","")),
StartPlus: parseFloat(td[7].innerHTML.replace(",","")),
Unassessed: parseFloat(td[9].innerHTML.replace(",","")),
Untagged: parseFloat(td[10].innerHTML.replace(",",""))
};
// national totals
ProgressStructure[0][tr.length-2]={
ID: "National Totals",
Total: 0,
Illustrated: 0,
Articled:0,
Stubs: 0,
NRISonly: 0,
StartPlus: 0,
Unassessed: 0,
Untagged: 0
};
// now data for each state
for (i=1; i
tr=table[i].getElementsByTagName("tr");
ProgressStructure[i]=[];
for (j=1; j
td=tr[j].getElementsByTagName("td");// fill in existing data in case error
ProgressStructure[i][j-1]={};
ProgressStructure[i][j-1].ID=td[0].innerHTML.substr(0,5);
ProgressStructure[i][j-1].Total=parseFloat(td[3].innerHTML.replace(",",""));
ProgressStructure[i][j-1].Illustrated=parseFloat(td[4].innerHTML.replace(",",""));
ProgressStructure[i][j-1].Articled=parseFloat(td[6].innerHTML.replace(",",""));
ProgressStructure[i][j-1].Stubs=parseFloat(td[8].innerHTML.replace(",",""));
ProgressStructure[i][j-1].NRISonly=parseFloat(td[9].innerHTML.replace(",",""));
ProgressStructure[i][j-1].StartPlus=parseFloat(td[10].innerHTML.replace(",",""));
ProgressStructure[i][j-1].Unassessed=parseFloat(td[12].innerHTML.replace(",",""));
ProgressStructure[i][j-1].Untagged=parseFloat(td[13].innerHTML.replace(",",""));
var link=td[1].getElementsByTagName("a");
if (link.length!==0 && link[0].href.search("#")==-1) {
link=decodeURI(link[0].href).split("/");
link=link[link.length-1].replace(/_/g," ");
ProgressStructure[i][j-1].Link=link;
ProgressStructure[i][j-1].ArticleQueried=0; // for querying later
ProgressStructure[i][j-1].TalkQueried=0;
} else {
if (ProgressStructure[i][j-1].ID!="ddddd") { // if no link and not duplicate, must be totals row, so we can zero it
ProgressStructure[i][j-1].Total=0;
ProgressStructure[i][j-1].Illustrated=0;
ProgressStructure[i][j-1].Articled=0;
ProgressStructure[i][j-1].Stubs=0;
ProgressStructure[i][j-1].NRISonly=0;
ProgressStructure[i][j-1].StartPlus=0;
ProgressStructure[i][j-1].Unassessed=0;
ProgressStructure[i][j-1].Untagged=0;
}
}
}
// duplicates row
td=tr[tr.length-2].getElementsByTagName("td");
ProgressStructure[i][tr.length-3]={
ID: ProgressStructure[0][i-1].ID+" Duplicates",
Total: parseFloat(td[0].innerHTML.replace(",","")),
Illustrated: parseFloat(td[1].innerHTML.replace(",","")),
Articled: parseFloat(td[3].innerHTML.replace(",","")),
Stubs: parseFloat(td[5].innerHTML.replace(",","")),
NRISonly: parseFloat(td[6].innerHTML.replace(",","")),
StartPlus: parseFloat(td[7].innerHTML.replace(",","")),
Unassessed: parseFloat(td[9].innerHTML.replace(",","")),
Untagged: parseFloat(td[10].innerHTML.replace(",",""))
};
// state totals
ProgressStructure[i][tr.length-2]={
ID: ProgressStructure[0][i-1].ID+" Totals",
Total: 0,
Illustrated: 0,
Articled:0,
Stubs: 0,
NRISonly: 0,
StartPlus: 0,
Unassessed: 0,
Untagged: 0
};
}
for (i=1; i for (j=0; j if (typeof ProgressStructure[i][j].Link!="undefined") TotalToQuery++; // don't count duplicates and total rows } } TotalQueried=0; var ProgressDiv=document.getElementById("ProgressDiv"); ProgressDiv.innerHTML+=" Done! var ProgressSpan=document.createElement("span"); ProgressSpan.setAttribute("id", "ProgressSpan"); ProgressDiv.appendChild(ProgressSpan); ProgressSpan.innerHTML = "Querying county data... 0 (0%) of "+TotalToQuery+" lists checked."; var TimeSpan=document.createElement("span"); TimeSpan.setAttribute("id", "TimeSpan"); ProgressDiv.appendChild(TimeSpan); TimeSpan.innerHTML = ""; InitialTime=new Date(); // record starting time UpdateProgressDiv(); LoadList(1,0); // begin querying first page } // load next list to query function LoadList(currentTable,currentRow) { // check if we need to go to the next table if (currentRow>ProgressStructure[currentTable].length-3) { currentRow=0; currentTable++; } // check if there are no more tables if (currentTable>ProgressStructure.length-1) return; if (typeof ProgressStructure[currentTable][currentRow].Link=="undefined") { // skip duplicate and total rows LoadList(currentTable,currentRow+1); return; } var title=ProgressStructure[currentTable][currentRow].Link; setTimeout(function(){ // short delay to prevent API overload getProgressListWikitext(title,currentTable,currentRow); LoadList(currentTable,currentRow+1); }, DefaultQueryPause); return; } function WikitextFetched(ajaxResponse,status,title,currentTable,currentRow) { // console.log("WikitextFetched: table="+currentTable+" row="+currentRow+" title="+title+" status="+status); if (status!="success") { NewWarning("Wikitext "+ajaxResponse.errorThrown); setTimeout(function(){ // try again after delay if rate limit reached getProgressListWikitext(title,currentTable,currentRow); }, 250); return; } // won't get here unless successful var responseText, pagetext; var tabletext, regex, i, j; try { responseText=JSON.parse(ajaxResponse.responseText); pagetext=responseText.query.pages[responseText.query.pageids[0]].revisions[0]["*"]; } catch (e) { console.log("WikiTextFetched: Exception parsing "+title+": "+e); ProgressFatalError(0,title,currentTable,currentRow); return; } var StartIndex; // console.log("WikiTextFetched: Parsing out "+title+" to find relevant table"); try { if (responseText.query.redirects) { // if redirect, find section var SectionName="Undefined"; for (var r in responseText.query.redirects) { if (typeof responseText.query.redirects[r].tofragment!="undefined") SectionName=responseText.query.redirects[r].tofragment.replace(/.27/g,"'"); } regex = new RegExp("=[ ]*(\\[\\[(.*?\\|)?[ ]*)?"+SectionName+"([ ]*\\]\\])?[ ]*=", "g"); var sectionheader=pagetext.match(regex); if (sectionheader === null || sectionheader === undefined) { // if no section found, check if one of known empty counties var ID = ProgressStructure[currentTable][currentRow].ID; //console.log("WikiTextFetched: List appears to be empty: title="+title+" id="+ID); // list last check date: 2023-04-03 var EmptyCounties=["01061", // Geneva County AL "02270", // Kusilvak Census Area AK (first) "02158", // Kusilvak Census Area AK (second) "08014", // Broomfield County CO "12067", // Lafayette County FL "20081", // Haskell County KS "20175", // Seward County KS "20187", // Stanton County KS "20189", // Stevens County KS "26051", // Gladwin County MI "26079", // Kalkaska County MI "26119", // Montmorency County MI "26129", // Ogemaw County MI "26133", // Osceola County MI "31009", // Blaine County NE "31113", // Logan County NE "31117", // McPherson County NE "38085", // Sioux County ND "48017", // Bailey County TX "48023", // Baylor County TX "48033", // Borden County TX "48069", // Castro County TX "48079", // Cochran County TX "48103", // Crane County TX "48107", // Crosby County TX "48119", // Delta County TX "48131", // Duval County TX "48155", // Foard County TX "48165", // Gaines County TX "48207", // Haskell County TX "48219", // Hockley County TX "48247", // Jim Hogg County TX "48269", // King County TX "48279", // Lamb County TX "48341", // Moore County TX "48389", // Reeves County TX "48415", // Scurry County TX "48421", // Sherman County TX "48433", // Stonewall County TX "48437", // Swisher County TX "48445", // Terry County TX "48461", // Upton County TX "48475", // Ward County TX "48501", // Yoakum County TX "51735" // Poquoson VA ]; var errorcode = 0; for (var k=0; k if (ID==EmptyCounties[k]) {errorcode=-1;} } if (errorcode!==0) { // must be an empty county ProgressStructure[currentTable][currentRow].Total=0; ProgressStructure[currentTable][currentRow].Illustrated=0; ProgressStructure[currentTable][currentRow].Articled=0; ProgressStructure[currentTable][currentRow].Stubs=0; ProgressStructure[currentTable][currentRow].NRISonly=0; ProgressStructure[currentTable][currentRow].StartPlus=0; ProgressStructure[currentTable][currentRow].Unassessed=0; ProgressStructure[currentTable][currentRow].Untagged=0; ProgressStructure[currentTable][currentRow].Link=title; TotalQueried++; if (TotalQueried==TotalToQuery) CalculateProgressTotals(); return; } // if we're here, must have been a redirect with no section, and not a known empty county sectionheader=pagetext.match(/{{NRHP header/g); // then look for tables without a section if (sectionheader===null||sectionheader.length>1) { // if still can't find a table or find multiple tables, fatal error ProgressFatalError(0,title,currentTable,currentRow); } } StartIndex=pagetext.indexOf(sectionheader[0]); var sectiontext=pagetext.substr(StartIndex,pagetext.indexOf("\n==",StartIndex)-StartIndex); // only look at relevant section StartIndex=sectiontext.indexOf("{{NRHP header"); if (StartIndex==-1) { if (sectiontext.indexOf("{{NRHP row")!=-1) { ProgressFatalError(2,title,currentTable,currentRow); // incorrectly formatted table } else { // must be an empty county console.log("WikiTextFetched: county appears to be empty: "+title); ProgressStructure[currentTable][currentRow].Total=0; ProgressStructure[currentTable][currentRow].Illustrated=0; ProgressStructure[currentTable][currentRow].Articled=0; ProgressStructure[currentTable][currentRow].Stubs=0; ProgressStructure[currentTable][currentRow].NRISonly=0; ProgressStructure[currentTable][currentRow].StartPlus=0; ProgressStructure[currentTable][currentRow].Unassessed=0; ProgressStructure[currentTable][currentRow].Untagged=0; ProgressStructure[currentTable][currentRow].Link=title; TotalQueried++; if (TotalQueried==TotalToQuery) CalculateProgressTotals(); return; } } var EndIndex = sectiontext.indexOf("\n|}",StartIndex); // EndIndex should exist, but a malformed file may not have it if (EndIndex === -1) { console.log("WikiTextFetched: missing end of table in "+title); ProgressFatalError(2,title,currentTable,currentRow); // list is malformed return; } tabletext=sectiontext.substr(StartIndex,EndIndex-StartIndex); } else { // if not a redirect, default to first table on page StartIndex=pagetext.indexOf("{{NRHP header"); if (StartIndex==-1) { ProgressFatalError(1,title,currentTable,currentRow); // no list found return; } var EndIndex = pagetext.indexOf("\n|}",StartIndex); // EndIndex should exist, but a malformed file may not have it if (EndIndex === -1) { console.log("WikiTextFetched: missing end of table in "+title); ProgressFatalError(2,title,currentTable,currentRow); // list is malformed return; } tabletext=pagetext.substr(StartIndex,EndIndex-StartIndex); } } catch (e) { console.log("WikiTextFetched: Exception searching for table in "+title+": "+e); ProgressFatalError(0,title,currentTable,currentRow); return; } // now that tabletext has only relevant table, extract rows var Rows=[]; var str = "{{"; var start=0; var commentstart=0; //console.log("WikiTextFetched: Extracting rows from "+title); try { while (true) { commentstart=tabletext.indexOf("",commentstart); commentstart=tabletext.indexOf("
";