User:Polygnotus/DuplicateReferences.js

//Testpage: https://en.wikipedia.org/wiki/User:Polygnotus/DuplicateReferencesTest

//

mw.loader.using(['mediawiki.util'], function () {

$(document).ready(function () {

const DEBUG = false;

function debug(...args) {

if (DEBUG) {

console.log('[DuplicateReferences]', ...args);

}

}

if (

mw.config.get('wgAction') !== 'view' ||

mw.config.get('wgDiffNewId') ||

mw.config.get('wgDiffOldId') ||

(mw.config.get('wgNamespaceNumber') !== 0 && mw.config.get('wgPageName') !== 'User:Polygnotus/DuplicateReferencesTest')

) {

debug("Not the correct page or action, script terminated");

return;

}

debug("Page title:", document.title);

debug("URL:", window.location.href);

function findNextReflistDiv(element) {

let nextElement = element.nextElementSibling;

while (nextElement) {

if (nextElement.tagName.toLowerCase() === 'div' &&

(nextElement.classList.contains('reflist') || nextElement.classList.contains('mw-references-wrap'))) {

return nextElement;

}

nextElement = nextElement.nextElementSibling;

}

return null;

}

const referencesHeader = document.querySelector("h2#References");

if (!referencesHeader) {

debug("References heading not found, script terminated");

return;

}

const containerDiv = referencesHeader.closest("div");

if (!containerDiv) {

debug("Container div not found, script terminated");

return;

}

const reflistDiv = findNextReflistDiv(containerDiv);

if (!reflistDiv) {

debug("Reflist div not found, script terminated");

return;

}

const referencesList = reflistDiv.querySelector('ol.references');

if (!referencesList) {

debug("ol.references not found within reflist div");

return;

}

const style = document.createElement('style');

style.textContent = `

li:target { border: 1px dotted red; padding: 2px; background-color: #ffcccc !important;}

.duplicate-citation-highlight { background-color: #e1eeff; }

.duplicate-citation-hover { background-color: #cce0ff; border: 1px dotted blue; }

.duplicate-citation-clicked { border: 1px dotted red; padding: 2px; background-color: #ffe6e6; }

.mw-collapsible-toggle { font-weight: normal; float: right; }

.duplicate-references-table { width: 100%; }

@media only screen and (max-width: 768px) {

.duplicate-references-table { display: none; }

}

`;

document.head.appendChild(style);

function addDuplicateCitationsTemplate(linkElement) {

debug("Adding duplicate citations template");

showLoading(linkElement);

var api = new mw.Api();

var pageTitle = mw.config.get('wgPageName');

let duplicateInfo = getDuplicateInfo();

// Get current date

const currentDate = new Date();

const monthNames = ["January", "February", "March", "April", "May", "June",

"July", "August", "September", "October", "November", "December"

];

const currentMonth = monthNames[currentDate.getMonth()];

const currentYear = currentDate.getFullYear();

const dateParam = `|date=${currentMonth} ${currentYear}`;

api.get({

action: 'query',

prop: 'revisions',

titles: pageTitle,

rvprop: 'content',

rvslots: 'main',

formatversion: 2

}).then(function (data) {

var page = data.query.pages[0];

var content = page.revisions[0].slots.main.content;

// Define the templates to check for

const templatesToCheck = [

'{{short description',

'{{DISPLAYTITLE',

'{{Lowercase title',

'{{Italic title',

'{{about',

'{{redirect',

'{{Distinguish',

'{{for'

];

// Find the position to insert the new template

let insertPosition = 0;

let lines = content.split('\n');

for (let i = 0; i < lines.length; i++) {

let line = lines[i].trim().toLowerCase();

if (templatesToCheck.some(template => line.startsWith(template.toLowerCase()))) {

insertPosition = i + 1;

} else if (line && !line.startsWith('{{') && !line.startsWith('__')) {

break;

}

}

// Create the reason string

let reason = 'DuplicateReferences detected:
\n';

if (duplicateInfo.length > 0) {

duplicateInfo.forEach((info) => {

reason += `* ${info.url} (refs: ${info.refs.map(r => r.number).join(', ')})
\n`;

});

}

// Insert the new template with the reason parameter

lines.splice(insertPosition, 0, `{{Duplicated citations|reason=${reason}${dateParam}}}`);

var newContent = lines.join('\n');

let summary = `DuplicateReferences +{{Duplicated citations|reason=${reason}${dateParam}}}`;

return api.postWithToken('csrf', {

action: 'edit',

title: pageTitle,

text: newContent,

summary: summary

});

}).then(function () {

showSuccess(linkElement);

setTimeout(function () {

location.reload();

}, 100); // Reload after 0.1 second

}).catch(function (error) {

console.error('Error:', error);

showError(linkElement);

mw.notify('Failed to add the template. See console for details.', {type: 'error'});

});

}

function showLoading(element) {

element.innerHTML = '[ Working... ]';

}

function showSuccess(element) {

element.innerHTML = '[ Done ]';

}

function showError(element) {

element.innerHTML = '[ Error ]';

}

function getVisibleText(element) {

// Recursively get the visible text content of an element

let text = '';

for (let node of element.childNodes) {

if (node.nodeType === Node.TEXT_NODE) {

text += node.textContent.trim() + ' ';

} else if (node.nodeType === Node.ELEMENT_NODE) {

// Skip hidden elements

const style = window.getComputedStyle(node);

if (style.display !== 'none' && style.visibility !== 'hidden') {

text += getVisibleText(node) + ' ';

}

}

}

return text.trim();

}

function calculateLevenshteinDistance(a, b) {

debug("Comparing:");

debug("Text 1:", a);

debug("Text 2:", b);

if (a.length === 0) return b.length;

if (b.length === 0) return a.length;

const matrix = [];

// Increment along the first column of each row

for (let i = 0; i <= b.length; i++) {

matrix[i] = [i];

}

// Increment each column in the first row

for (let j = 0; j <= a.length; j++) {

matrix[0][j] = j;

}

// Fill in the rest of the matrix

for (let i = 1; i <= b.length; i++) {

for (let j = 1; j <= a.length; j++) {

if (b.charAt(i - 1) === a.charAt(j - 1)) {

matrix[i][j] = matrix[i - 1][j - 1];

} else {

matrix[i][j] = Math.min(

matrix[i - 1][j - 1] + 1, // substitution

Math.min(

matrix[i][j - 1] + 1, // insertion

matrix[i - 1][j] + 1 // deletion

)

);

}

}

}

debug("Levenshtein distance:", matrix[b.length][a.length]);

return matrix[b.length][a.length];

}

function calculateSimilarityPercentage(distance, maxLength) {

const similarity = ((maxLength - distance) / maxLength) * 100;

debug("Similarity percentage:", similarity.toFixed(2) + "%");

return Math.round(similarity) + '%';

}

function getDuplicateInfo() {

debug("Getting duplicate info");

const duplicates = [];

const urlMap = new Map();

const referenceItems = Array.from(referencesList.children);

debug("Number of reference items:", referenceItems.length);

referenceItems.forEach((item, index) => {

if (item.tagName.toLowerCase() === 'li') {

const refId = item.id;

const refNumber = index + 1;

debug(`Processing reference item ${refNumber} (${refId})`);

// Get the visible text of the entire reference item

const refText = getVisibleText(item);

debug(` Reference text: ${refText}`);

// Find the first valid link in the reference

const links = item.querySelectorAll('a');

let validLink = null;

for (let link of links) {

const url = link.href;

// Skip this reference if the URL doesn't contain 'http'

if (!url.includes('http')) {

debug(` Skipping reference ${refNumber} - URL does not contain 'http'`);

return; // This 'return' is equivalent to 'continue' in a regular for loop

}

const linkText = link.textContent.trim();

if (

// (!url.includes("wikipedia.org/wiki/") || url.includes("Special:BookSources")) &&

linkText !== "Archived" &&

!url.includes("wikipedia.org") &&

!url.includes("_(identifier)") && // Templates like ISBN and ISSN and OCLC and S2CID contain (identifier)

!url.startsWith("https://search.worldcat.org/") && // |issn= parameter in cite news

!url.startsWith("https://www.bbc.co.uk/news/live/") && // live articles get frequent updates

!url.startsWith("https://www.aljazeera.com/news/liveblog/") &&

!url.startsWith("https://www.nbcnews.com/news/world/live-blog/") &&

!url.startsWith("https://www.theguardian.com/world/live/") &&

!url.startsWith("https://www.nytimes.com/live/") &&

!url.startsWith("https://edition.cnn.com/world/live-news/") &&

!url.startsWith("https://www.timesofisrael.com/liveblog") &&

!url.startsWith("https://www.france24.com/en/live-news/") &&

!url.startsWith("https://books.google.com/") && //may be 2 different pages of the same book

!url.startsWith("https://archive.org/details/isbn_")

) {

validLink = link;

debug(` Valid link found: ${url}`);

break;

}

}

if (validLink) {

const url = validLink.href;

if (urlMap.has(url)) {

urlMap.get(url).push({id: refId, number: refNumber, text: refText});

debug(` Duplicate found for URL: ${url}`);

} else {

urlMap.set(url, [{id: refId, number: refNumber, text: refText}]);

debug(` New URL added to map: ${url}`);

}

} else {

debug(` No valid link found in this item`);

}

}

});

urlMap.forEach((refs, url) => {

if (refs.length > 1) {

// Calculate Levenshtein distance for each pair of refs

for (let i = 0; i < refs.length - 1; i++) {

for (let j = i + 1; j < refs.length; j++) {

debug(`Comparing references ${refs[i].number} and ${refs[j].number}:`);

const distance = calculateLevenshteinDistance(refs[i].text, refs[j].text);

const maxLength = Math.max(refs[i].text.length, refs[j].text.length);

const similarity = calculateSimilarityPercentage(distance, maxLength);

refs[i].similarity = refs[i].similarity || {};

refs[i].similarity[refs[j].id] = similarity;

}

}

duplicates.push({url, refs});

}

});

debug("Number of duplicate sets found:", duplicates.length);

debug("Duplicate sets:", duplicates);

return duplicates;

}

function createCollapsibleTable(duplicateInfo) {

const table = document.createElement('table');

table.className = 'wikitable mw-collapsible duplicate-references-table';

table.setAttribute('role', 'presentation');

const tbody = document.createElement('tbody');

table.appendChild(tbody);

const headerRow = document.createElement('tr');

const headerCell = document.createElement('td');

headerCell.innerHTML = 'Duplicate References';

const toggleSpan = document.createElement('span');

toggleSpan.className = 'mw-collapsible-toggle';

toggleSpan.innerHTML = '[hide]';

headerCell.appendChild(toggleSpan);

// Check if the {{Duplicated citations}} template is already present

const duplicatedCitationsTemplate = document.querySelector('table.box-Duplicated_citations');

// Only add the link if the template is not present

if (!duplicatedCitationsTemplate) {

// Add the "add {{duplicated citations}}" link to the header

const addTemplateLink = document.createElement('a');

addTemplateLink.textContent = ' add {{duplicated citations}} ';

addTemplateLink.href = '#';

addTemplateLink.addEventListener('click', function (e) {

e.preventDefault();

addDuplicateCitationsTemplate(this);

});

//headerCell.appendChild(document.createTextNode(' ['));

headerCell.appendChild(addTemplateLink);

//headerCell.appendChild(document.createTextNode(']'));

}

headerRow.appendChild(headerCell);

tbody.appendChild(headerRow);

const pageTitle = mw.config.get('wgPageName').replace(/_/g, ' ');

duplicateInfo.forEach(({url, refs}) => {

const row = document.createElement('tr');

const cell = document.createElement('td');

// Create report icon

const reportIcon = document.createElement('a');

reportIcon.href = `https://en.wikipedia.org/wiki/User_talk:Polygnotus?action=edit§ion=new&preloadtitle=Reporting%20%5B%5BUser%3APolygnotus%2FDuplicateReferences%7CDuplicateReferences%5D%5D%20false-positive&preload=User:Polygnotus/$1&preloadparams%5b%5d=${encodeURIComponent(`${pageTitle} ${url}`)}%20~~~~`;

reportIcon.innerHTML = 'Report false positive';

reportIcon.style.marginRight = '5px';

cell.appendChild(reportIcon);

let urlLink = document.createElement('a');

urlLink.href = url;

urlLink.textContent = url;

urlLink.target = "_blank";

urlLink.rel = "noopener noreferrer";

cell.appendChild(urlLink);

cell.appendChild(document.createTextNode(' in refs: '));

const originalRef = refs[0];

refs.forEach((ref, index) => {

let link = document.createElement('a');

link.href = `#${ref.id}`;

link.textContent = ref.number;

cell.appendChild(link);

// Add similarity information

if (index > 0) {

const similarity = calculateSimilarityPercentage(

calculateLevenshteinDistance(originalRef.text, ref.text),

Math.max(originalRef.text.length, ref.text.length)

);

let similarityInfo = document.createElement('span');

similarityInfo.textContent = ` (${similarity})`;

cell.appendChild(similarityInfo);

}

link.addEventListener('mouseover', () => {

refs.forEach(r => {

const citationElement = document.getElementById(r.id);

if (citationElement) {

if (r.id === ref.id) {

citationElement.classList.add('duplicate-citation-hover');

} else {

citationElement.classList.add('duplicate-citation-highlight');

}

}

});

});

link.addEventListener('mouseout', () => {

refs.forEach(r => {

const citationElement = document.getElementById(r.id);

if (citationElement) {

citationElement.classList.remove('duplicate-citation-hover');

citationElement.classList.remove('duplicate-citation-highlight');

}

});

});

link.addEventListener('click', () => {

document.querySelectorAll('.duplicate-citation-clicked').forEach(el => {

el.classList.remove('duplicate-citation-clicked');

});

refs.forEach(r => {

const citationElement = document.getElementById(r.id);

if (citationElement) {

citationElement.classList.add('duplicate-citation-clicked');

}

});

});

if (index < refs.length - 1) {

cell.appendChild(document.createTextNode(', '));

}

});

row.appendChild(cell);

tbody.appendChild(row);

});

return table;

}

function checkDuplicateReferenceLinks() {

debug("Checking for duplicate reference links");

const duplicateInfo = getDuplicateInfo();

if (duplicateInfo.length > 0) {

debug("Duplicates found, creating collapsible table");

const table = createCollapsibleTable(duplicateInfo);

containerDiv.after(table);

// Set up collapsible functionality

const toggleLink = table.querySelector('.mw-collapsible-toggle a');

const tableBody = $(table).find('tr:not(:first-child)');

const storageKey = 'duplicateReferencesTableState';

function setTableState(isCollapsed) {

if (isCollapsed) {

tableBody.hide();

toggleLink.textContent = 'show';

} else {

tableBody.show();

toggleLink.textContent = 'hide';

}

localStorage.setItem(storageKey, isCollapsed);

}

// Initialize state from localStorage

const initialState = localStorage.getItem(storageKey) === 'true';

setTableState(initialState);

toggleLink.addEventListener('click', function (e) {

e.preventDefault();

const isCurrentlyCollapsed = tableBody.is(':hidden');

setTableState(!isCurrentlyCollapsed);

});

} else {

debug("No duplicates found");

}

}

checkDuplicateReferenceLinks();

debug("Script execution completed");

});

});

//