User:Phlsph7/ListUnreferencedParagraphs.js

(function(){

const scriptName = 'List Unreferenced Paragraphs';

$.when(mw.loader.using('mediawiki.util'), $.ready).then(function(){

const listPortletlink = mw.util.addPortletLink('p-tb', '#', scriptName, scriptName + 'Id');

listPortletlink.onclick = function(e) {

e.preventDefault();

listUnreferencedParagraphs();

};

const highlightPortletlinkName = 'Highlight Unreferenced Paragraphs';

const highlightPortletlink = mw.util.addPortletLink('p-tb', '#', highlightPortletlinkName, highlightPortletlinkName + 'Id');

highlightPortletlink.onclick = function(e) {

e.preventDefault();

highlightUnreferencedParagraphs();

};

});

function listUnreferencedParagraphs(){

const timeout = 50;

let stopProcessing = false;

const content = document.getElementById('content');

const contentContainer = content.parentElement;

content.style.display = 'none';

let scriptContainer = document.createElement('div');

contentContainer.appendChild(scriptContainer);

scriptContainer.outerHTML = `

Unreferenced Paragraph Counter

Article title Paragraphs without references Maintenance tags

`;

const btStart = $('#btStart');

btStart.click(function(){

stopProcessing = false;

btStart.prop("disabled", true);

btStop.prop("disabled", false);

let articleTitles = $('#taList').val().trim()

.split('\r').join('')

.split('\n');

// remove duplicates

articleTitles = [...new Set(articleTitles)];

// populate table

$("#tbodyCounter").empty();

for(let i = 0; i < articleTitles.length; i++){

let linkHTML = getLinkHTML(articleTitles[i]);

let row = `${linkHTML}-`;

$("#tbodyCounter").append(row);

}

recursivelyProcessArticles(articleTitles, 0, timeout);

function getLinkHTML(articleTitle) {

var link = document.createElement('a');

link.href = 'https://en.wikipedia.org/wiki/' + encodeURIComponent(articleTitle);

link.textContent = articleTitle;

return link.outerHTML;

}

});

const btStop = $('#btStop');

btStop.click(function(){

stopProcessing = true;

btStart.prop("disabled", false);

btStop.prop("disabled", true);

});

const btCopy = $('#btCopy');

btCopy.click(function(){

const tableText = getTextViaSelection();

copyToClipboard(tableText);

mw.notify("The table was copied to the clipboard.");

function getTextViaSelection(){

const tbodyCounter = $('#tbodyCounter')[0];

const range = document.createRange();

range.selectNodeContents(tbodyCounter);

const selection = window.getSelection();

selection.removeAllRanges();

selection.addRange(range);

return selection.toString();

}

function copyToClipboard(text) {

const textarea = document.createElement('textarea');

textarea.value = text;

document.body.appendChild(textarea);

textarea.select();

document.execCommand('copy');

document.body.removeChild(textarea);

}

});

const btClose = $('#btClose');

btClose.click(function(){

btStop.trigger('click');

let scriptContainer = document.getElementById('scriptContainer');

scriptContainer.parentElement.removeChild(scriptContainer);

content.style.display = '';

});

function recursivelyProcessArticles(articleTitles, index, timeout){

if(!stopProcessing && index < articleTitles.length){

btStop.text(`Stop (${index}/${articleTitles.length})`);

const articleTitle = articleTitles[index];

processArticle(articleTitles, index);

setTimeout(function(){recursivelyProcessArticles(articleTitles, index + 1, timeout);}, timeout);

}

else{

btStop.text(`Stop`);

btStop.trigger('click');

}

}

function processArticle(articleTitles, index){

const articleTitle = articleTitles[index];

const articleSearchTerm = encodeURIComponent(articleTitle);

let wikiApiUrl = `https://en.wikipedia.org/w/api.php?action=parse&page=${articleSearchTerm}&format=json`;

fetch(wikiApiUrl).then(async function(response) { // jshint ignore:line

const data = await response.json();

const cellUnrefId = `td_unref_${index}`;

const cellTagsId = `td_tags_${index}`;

if (data && data.parse && data.parse.text && data.parse.text['*']) {

const articleHTML = data.parse.text['*'];

const parser = new DOMParser();

const doc = parser.parseFromString(articleHTML, 'text/html');

const paragraphContainer = $(doc).find('.mw-parser-output').eq(0);

const paragraphInfo = getParagraphInfo(paragraphContainer);

const unreferencedParagraphs = paragraphInfo.unreferencedParagraphs;

const includedParagraphs = paragraphInfo.includedParagraphs;

//const count = `${unreferencedParagraphs.length} / ${includedParagraphs.length}`;

const count = `${unreferencedParagraphs.length}`;

$('#' + cellUnrefId).html(count);

const maintenanceTagString = getMaintenanceTagString(paragraphContainer);

$('#' + cellTagsId).html(maintenanceTagString);

} else {

$('#' + cellUnrefId).html('error');

$('#' + cellTagId).html('error');

}

});

}

function getMaintenanceTagString(element){

const templateOverview = {};

const amboxes = getAmboxes(element);

for(const ambox of amboxes){

const amboxType = getAmboxTyp(ambox);

updateOverview(templateOverview, amboxType);

}

const inlineTemplates = getInlineTemplates(element);

for(const inlineTemplate of inlineTemplates){

const inlineTemplateType = getInlineTemplateType(inlineTemplate);

updateOverview(templateOverview, inlineTemplateType);

}

const overviewString = getOverviewString(templateOverview);

return overviewString;

function getInlineTemplates(element){

return element.find('.Inline-Template').toArray();

}

function getInlineTemplateType(inlineTemplate){

let innerText = inlineTemplate.innerText;

let type = innerText.substring(1, innerText.length - 1);

return type;

}

function getAmboxes(element){

return element.find('.ambox').toArray();

}

function getAmboxTyp(ambox){

for(const entry of ambox.classList){

if(entry.substring(0,4) === 'box-'){

return entry.substring(4).split('_').join(' ');

}

}

return entry.innerText;

}

function updateOverview(overview, entry){

if(Object.keys(overview).includes(entry)){

overview[entry]++;

}

else{

overview[entry] = 1;

}

}

function getOverviewString(overview){

let overviewString = '';

const keys = Object.keys(overview);

if(keys.length > 0){

for(const key of keys){

const count = overview[key];

overviewString += count + 'x ';

overviewString += key + ', ';

}

overviewString = overviewString.substring(0, overviewString.length - 2);

}

return overviewString;

}

}

}

function highlightUnreferencedParagraphs(){

const paragraphContainer = $('#mw-content-text').find('.mw-parser-output').eq(0);

const paragraphInfo = getParagraphInfo(paragraphContainer);

const includedParagraphs = paragraphInfo.includedParagraphs;

const unreferencedParagraphs = paragraphInfo.unreferencedParagraphs;

for(let p of includedParagraphs){

if(unreferencedParagraphs.includes(p)){

p.style.background = '#faa';

}

else{

p.style.background = '#afa';

}

}

console.log(unreferencedParagraphs);

mw.notify(`${unreferencedParagraphs.length} unreferenced paragraphs found`);

}

function getParagraphInfo(paragraphContainer){

const minimalParagraphLength = 100;

hideRefs(paragraphContainer[0]);

combineMathBlocks(paragraphContainer.children().toArray());

addElementsFollowingParagraphs(paragraphContainer.children().toArray());

addElementsPrecedingParagraphs(paragraphContainer.children().toArray());

showRefs(paragraphContainer[0]);

const children = paragraphContainer.children();

const releventChildren = [];

for(let child of children){

if(child.tagName.toLowerCase() === 'p'){

releventChildren.push(child);

}

else if(child.classList.contains('mw-heading2')){

releventChildren.push(child);

}

}

const articleObject = convertToObject(releventChildren);

removeIrrelevantSections(articleObject);

const paragraphsInRelevantSections = convertToSimpleArray(articleObject);

const includedParagraphs = removeShortParagraphs(paragraphsInRelevantSections);

const unreferencedParagraphs = getUnreferencedParagraphs(includedParagraphs);

return {

'includedParagraphs': includedParagraphs,

'unreferencedParagraphs': unreferencedParagraphs

};

function hideRefs(element){

let refs = element.querySelectorAll('.reference, .Inline-Template');

for(let ref of refs){

ref.style.display = 'none';

}

}

function showRefs(element){

let refs = element.querySelectorAll('.reference, .Inline-Template');

for(let ref of refs){

ref.style.display = '';

}

}

// includes the elements before and after a paragraph consisting only of a math formula into one element; this is based on the idea that the math formula artifically divides a single paragraph into parts

function combineMathBlocks(elements){

for(let i = 1; i < elements.length-1; i++){

let previousElement = elements[i-1];

let element = elements[i];

let nextElement = elements[i+1];

if(isMathBlock(elements[i])){

previousElement.appendChild(element);

previousElement.appendChild(nextElement);

}

}

function isMathBlock(element){

if(element.firstChild && element.firstChild.classList){

if(element.firstChild.classList.contains('mwe-math-element')){

if(element.innerText === element.firstChild.innerText){

return true;

}

}

}

return false;

}

}

// if the meaning of the passage does not end with the html paragraph then add the next element to it.

function addElementsFollowingParagraphs(elements){

for(let i = 0; i < elements.length-1; i++){

let element = elements[i];

let clone = element.cloneNode(true);

removeStyleElements(clone);

let innerText = clone.innerText.trim();

if(element.tagName === 'P' && innerText.length > 0){

let lastCharacter = innerText[innerText.length-1];

const nonEndingCharacters = [',', ':'];

if(nonEndingCharacters.includes(lastCharacter) || isLetter(lastCharacter)){

let nextElement = elements[i+1];

element.appendChild(nextElement);

if(nextElement.tagName === 'STYLE' || nextElement.tagName === 'LINK'){

if(i+2 < elements.length -1){

let nextNextElement = elements[i+2];

element.appendChild(nextNextElement);

}

}

}

}

}

function isLetter(character){

return character.toLowerCase() !== character.toUpperCase();

}

function removeStyleElements(element){

let styleElements = element.getElementsByTagName('style');

for(const styleElement of styleElements){

styleElement.remove();

}

}

}

// if a paragraph starts in the middle then add the previous element

function addElementsPrecedingParagraphs(elements){

for(let i = 1; i < elements.length; i++){

let element = elements[i];

let innerText = element.innerText.trim();

if(element.tagName === 'P' && innerText.length > 0){

let firstCharacter = innerText[0];

if(isLowerCaseLetter(firstCharacter)){

let previousElement = elements[i-1];

element.insertBefore(previousElement, element.firstChild);

}

}

}

function isLowerCaseLetter(character){

return character.toLowerCase() !== character.toUpperCase() && character === character.toLowerCase();

}

}

function convertToObject(elementArray){

const articleObject = {};

let currentSection = "Lead";

articleObject["Lead"] = []; // jshint ignore:line

for(let element of elementArray){

if(element.classList.contains('mw-heading2')){

currentSection = element.innerText.split('[edit]').join('');

articleObject[currentSection] = [];

}

else{

articleObject[currentSection].push(element);

}

}

return articleObject;

}

function removeIrrelevantSections(articleObject){

const excludedSections = ['Lead', 'Plot', 'Plots', 'Plot summary', 'Plot synopsis', 'Synopsis', 'Storylines', 'Appearances', 'Further reading', 'See also', 'External links', 'References', 'Bibliography', 'Notes', 'Selected publications', 'Selected works', 'Cited sources', 'Sources', 'Footnotes'];

for(let sectionName in articleObject){

if(excludedSections.indexOf(sectionName) != -1){

delete articleObject[sectionName];

}

}

}

function convertToSimpleArray(articleObject){

let array = [];

for (let sectionName in articleObject){

array = array.concat(articleObject[sectionName]);

}

return array;

}

function removeShortParagraphs(paragraphArray){

const longParagraphs = [];

for(let paragraph of paragraphArray){

if(paragraph.innerText.length >= minimalParagraphLength){

longParagraphs.push(paragraph);

}

}

return longParagraphs;

}

function getUnreferencedParagraphs(paragraphArray){

const unreferencedParagraph = [];

for(let paragraph of paragraphArray){

if(isUnreferenced(paragraph)){

unreferencedParagraph.push(paragraph);

}

}

return unreferencedParagraph;

}

function isUnreferenced(paragraph){

let hasRegularRef = $(paragraph).find('.reference').length > 0;

let hasHarvRef = false;

const links = $(paragraph).find('a').toArray();

for(const link of links){

let href = link.getAttribute('href');

if(href && href.substring(0, 8) == '#CITEREF'){

hasHarvRef = true;

}

}

return !(hasRegularRef || hasHarvRef);

}

}

})();