User:SuperHamster/rsp-to-json.js

/**

* Parses the perennial sources table and generates a JSON structure for each source.

* @param {string} tableClass - The class name of the table to parse.

* @returns {Array} Array of source objects.

*/

function parsePerennialSourcesTable(tableClass) {

const table = document.querySelector(`.${tableClass}`);

const sources = [];

if (table) {

const rows = table.querySelectorAll("tbody > tr");

rows.forEach((row, rowIndex) => {

const cells = row.querySelectorAll("td");

if (cells.length < 6) {

return;

}

const sourceNameCell = cells[0];

const statusCell = cells[1];

const discussionCell = cells[2];

const lastCell = cells[3];

const summaryCell = cells[4];

const domainsCell = cells[5];

const source = {

name: findSourceName(sourceNameCell),

link: findSourceLink(sourceNameCell),

shortcuts: findShortcuts(sourceNameCell),

status: findStatus(statusCell),

blacklisted: isBlacklisted(statusCell),

discussions: parseDiscussions(discussionCell),

lastDiscussed: lastCell.textContent.trim(),

summary: summaryCell.textContent.trim(),

summary_wikitext: convertHtmlToWikiMarkup(summaryCell),

domains: findDomains(domainsCell),

};

sources.push(source);

});

} else {

console.error(`[RSP-to-JSON] Table with class ${tableClass} not found`);

}

return sources;

}

/**

* Checks if an element or any of its ancestors has a given class.

* @param {Element} element - The DOM element to check.

* @param {string} className - The class name to look for.

* @returns {boolean} True if the class is found, false otherwise.

*/

function hasAncestorWithClass(element, className) {

while (element) {

if (element.classList && element.classList.contains(className)) return true;

element = element.parentElement;

}

return false;

}

/**

* Extracts the source name from a table cell.

* @param {Element} cell - The table cell element.

* @returns {string} The extracted source name.

*/

function findSourceName(cell) {

function extractTextFromNode(node) {

if (node.nodeType === Node.TEXT_NODE) {

return node.textContent.trim();

} else if (node.nodeType === Node.ELEMENT_NODE && (node.tagName === "A" || node.tagName === "I")) {

return Array.from(node.childNodes).map(extractTextFromNode).join(" ").trim();

}

return "";

}

// Traverse child nodes to locate the source name and combine all text

let sourceName = Array.from(cell.childNodes)

.map(extractTextFromNode)

.filter(text => text)

.join(" ")

.trim();

return sourceName || "";

}

/**

* Finds the main source link in a table cell, ignoring shortcut links.

* @param {Element} cell - The table cell element.

* @returns {string} The href of the main source link, or an empty string if not found.

*/

function findSourceLink(cell) {

const linkElement = Array.from(cell.querySelectorAll("a")).find(link => !hasAncestorWithClass(link, "wp-rsp-sc"));

return linkElement ? linkElement.href : "";

}

/**

* Finds all shortcut links in a table cell.

* @param {Element} cell - The table cell element.

* @returns {Array} Array of shortcut strings.

*/

function findShortcuts(cell) {

const shortcuts = Array.from(cell.querySelectorAll(".wp-rsp-sc a")).map(anchor => anchor.textContent.trim());

return shortcuts;

}

/**

* Determines the status of a source from a table cell.

* @param {Element} cell - The table cell element.

* @returns {string} The status string (e.g., 'deprecated', 'generally reliable', etc.).

*/

function findStatus(cell) {

anchors = cell.querySelectorAll('a');

statuses = [];

anchors.forEach(anchor => {

statuses.push(anchor.title.toLowerCase());

});

if (statuses.includes("deprecated")) return "deprecated";

if (statuses.includes("generally reliable")) return "generally reliable";

if (statuses.includes("generally unreliable")) return "generally unreliable";

if (statuses.includes("no consensus")) return "no consensus";

if (statuses.includes("blacklisted")) return "blacklisted";

return "unknown";

}

/**

* Checks if a source is blacklisted based on the cell content.

* @param {Element} cell - The table cell element.

* @returns {boolean} True if blacklisted, false otherwise.

*/

function isBlacklisted(cell) {

const blacklisted = !!cell.querySelector("a[title='Blacklisted']");

return blacklisted;

}

/**

* Parses the discussions cell to extract discussion links and metadata.

* @param {Element} cell - The table cell element.

* @returns {Array} Array of discussion objects.

*/

function parseDiscussions(cell) {

const discussions = [];

const links = cell.querySelectorAll("a");

links.forEach(link => {

const typeIcon = link.previousElementSibling?.querySelector("img[alt]");

const type = typeIcon ? typeIcon.getAttribute("alt") : "General";

const discussionLink = link.getAttribute("href");

// If cite-note, fetch the links from the corresponding citation note

if (discussionLink && discussionLink.startsWith("#cite_note-")) {

const noteId = discussionLink.replace("#", "");

const citationLinks = parseCitationLinks(noteId);

discussions.push(...citationLinks);

} else {

// Check that the link has text content

// otherwise, it is likely an icon and can be skipped

if (link.textContent.length) {

discussions.push({

link: discussionLink.startsWith("/") ? `https://en.wikipedia.org${discussionLink}` : discussionLink,

type: type,

display: "inline",

label: link.textContent.trim()

});

}

}

});

return discussions;

}

/**

* Converts the HTML content of a cell to Wikipedia wikitext markup.

* @param {Element} cell - The table cell element.

* @returns {string} The wikitext representation of the cell's content.

*/

function convertHtmlToWikiMarkup(cell) {

const wikiMarkup = Array.from(cell.childNodes).map(node => {

if (node.nodeType === Node.ELEMENT_NODE) {

if (node.tagName === "A") return `${node.textContent}`;

if (node.tagName === "I") return `${node.textContent}`;

if (node.tagName === "B") return `${node.textContent}`;

}

return node.textContent;

}).join("");

return wikiMarkup.trim();

}

/**

* Extracts all domain strings from a domains cell.

* @param {Element} cell - The table cell element.

* @returns {Array} Array of domain strings.

*/

function findDomains(cell) {

const domains = Array.from(cell.querySelectorAll("a")).map(link => {

const domainMatch = link.href.match(/insource:%22([^"]+)%22/);

return domainMatch ? domainMatch[1] : "";

}).filter(Boolean); // Remove empty entries

return domains;

}

/**

* Parses a citation note to extract discussion links and their context.

* @param {string} noteId - The ID of the citation note element.

* @returns {Array} Array of discussion objects from the citation note.

*/

function parseCitationLinks(noteId) {

const citationLinks = [];

const noteElement = document.getElementById(noteId);

if (noteElement) {

const referenceText = noteElement.querySelector(".reference-text");

if (referenceText) {

const links = Array.from(referenceText.querySelectorAll("a"));

const contextMatches = [];

let currentContext = "";

let accumulatingContext = false;

referenceText.childNodes.forEach(node => {

// Most citation notes have a structure like "See these discussions of :",

// from which we want to extract those links to discussions,

// so we check for the existence of " of ":

if (node.nodeType === Node.TEXT_NODE && node.textContent.includes(" of ")) {

currentContext = "";

accumulatingContext = true;

let textAfterOf = node.textContent.split(" of ")[1] || "";

// Extract the content after the colon, if it exists

if (textAfterOf) {

const colonIndex = textAfterOf.indexOf(":");

if (colonIndex !== -1) {

currentContext = textAfterOf.slice(0, colonIndex).trim();

contextMatches.push({ context: currentContext.trim(), node });

accumulatingContext = false;

} else {

currentContext = textAfterOf.trim();

}

}

// Some citation notes have multiple text nodes,

// covering multiple contexts

// e.g. arXiv and bioRxiv

if (accumulatingContext) {

let nextNode = node.nextSibling;

while (nextNode && accumulatingContext) {

if (nextNode.nodeType === Node.TEXT_NODE) {

const colonIndex = nextNode.textContent.indexOf(":");

if (colonIndex !== -1) {

currentContext += " " + nextNode.textContent.slice(0, colonIndex).trim();

contextMatches.push({ context: currentContext.trim(), node: nextNode });

accumulatingContext = false;

} else {

currentContext += " " + nextNode.textContent.trim();

}

} else if (nextNode.nodeType === Node.ELEMENT_NODE && nextNode.tagName === "I") {

currentContext += " " + nextNode.textContent.trim();

}

nextNode = nextNode.nextSibling;

}

}

}

});

const multipleContexts = contextMatches.length > 1;

let currentContextIndex = 0;

currentContext = contextMatches[currentContextIndex]?.context.trim() || "";

links.forEach(link => {

// Check that the link has text content

// otherwise, it is likely an icon and can be skipped

if (link.textContent.length) {

const nextContextNode = contextMatches[currentContextIndex + 1]?.node;

if (nextContextNode && link.compareDocumentPosition(nextContextNode) & Node.DOCUMENT_POSITION_PRECEDING) {

if (contextMatches[currentContextIndex + 1]) {

currentContextIndex++;

currentContext = contextMatches[currentContextIndex].context.trim();

}

}

const discussionLink = link.getAttribute("href");

let label = link.textContent.trim();

if (multipleContexts && currentContext) {

label += ` (${currentContext})`;

}

const typeIcon = link.previousElementSibling?.querySelector("img[alt]");

const type = typeIcon ? typeIcon.getAttribute("alt") : "General";

citationLinks.push({

link: discussionLink.startsWith("/") ? `https://en.wikipedia.org${discussionLink}` : discussionLink,

type: type,

display: "footnote",

label: label

});

}

});

}

} else {

console.warn(`[RSP-to-JSON] No element found for citation note ID: ${noteId}`);

}

return citationLinks;

}

/**

* Removes the 'discussions' field from each source object in the array.

* @param {Array} sources - Array of source objects.

* @returns {Array} New array with 'discussions' removed from each source.

*/

function filterOutDiscussions(sources) {

return sources.map(source => {

const { discussions, ...rest } = source;

return rest;

});

}

/**

* Initializes the dropdown UI and handles copy-to-clipboard actions for the perennial sources table.

*/

function init() {

const table = document.querySelector('.perennial-sources');

if (!table) {

return;

}

// Create container div for dropdown

const container = document.createElement('div');

container.style.float = 'right';

container.style.marginBottom = '10px';

container.style.marginTop = '10px';

// Create select element

const select = document.createElement('select');

select.classList = 'cdx-select';

select.style.padding = '8px';

select.style.borderRadius = '2px';

// Add default option

const defaultOption = document.createElement('option');

defaultOption.value = '';

defaultOption.textContent = 'Copy JSON...';

defaultOption.disabled = true;

defaultOption.selected = true;

select.appendChild(defaultOption);

// Add copy options

const options = [

{ value: 'with-discussions', text: 'Copy with discussions' },

{ value: 'without-discussions', text: 'Copy without discussions' }

];

options.forEach(option => {

const optElement = document.createElement('option');

optElement.value = option.value;

optElement.textContent = option.text;

select.appendChild(optElement);

});

// Add elements to container

container.appendChild(select);

// Add documentation link below the select

const docLink = document.createElement('a');

docLink.href = 'https://en.wikipedia.org/wiki/User:SuperHamster/RSP-to-JSON';

docLink.textContent = 'RSP-to-JSON Documentation';

docLink.target = '_blank';

docLink.style.display = 'block';

docLink.style.fontSize = '11px';

docLink.style.marginTop = '2px';

docLink.style.color = '#3366cc';

docLink.style.textDecoration = 'underline';

docLink.style.textAlign = 'right';

container.appendChild(docLink);

// Clear float for table

table.style.clear = 'both';

// Insert container before table

table.parentNode.insertBefore(container, table);

select.addEventListener('change', async () => {

try {

let result = parsePerennialSourcesTable('perennial-sources');

if (!result || result.length === 0) {

console.error(`[RSP-to-JSON] Failed to produce JSON`);

select.style.backgroundColor = '#f9dde9';

} else {

if (select.value === 'without-discussions') {

result = filterOutDiscussions(result);

}

await navigator.clipboard.writeText(JSON.stringify(result));

select.style.backgroundColor = '#dbf3ec';

}

} catch (error) {

console.error('Failed to copy JSON to clipboard:', error);

select.style.backgroundColor = '#f9dde9';

}

// Reset select to default after 2 seconds

setTimeout(() => {

select.style.backgroundColor = '';

select.value = '';

}, 2000);

});

}

if (document.readyState === 'loading') {

document.addEventListener('DOMContentLoaded', init);

} else {

init();

}