User:SuperHamster/rsp-to-json.js
/**
* Parses the perennial sources table and generates a JSON structure for each source.
* @param {string} tableClass - The class name of the table to parse.
* @returns {Array
*/
function parsePerennialSourcesTable(tableClass) {
const table = document.querySelector(`.${tableClass}`);
const sources = [];
if (table) {
const rows = table.querySelectorAll("tbody > tr");
rows.forEach((row, rowIndex) => {
const cells = row.querySelectorAll("td");
if (cells.length < 6) {
return;
}
const sourceNameCell = cells[0];
const statusCell = cells[1];
const discussionCell = cells[2];
const lastCell = cells[3];
const summaryCell = cells[4];
const domainsCell = cells[5];
const source = {
name: findSourceName(sourceNameCell),
link: findSourceLink(sourceNameCell),
shortcuts: findShortcuts(sourceNameCell),
status: findStatus(statusCell),
blacklisted: isBlacklisted(statusCell),
discussions: parseDiscussions(discussionCell),
lastDiscussed: lastCell.textContent.trim(),
summary: summaryCell.textContent.trim(),
summary_wikitext: convertHtmlToWikiMarkup(summaryCell),
domains: findDomains(domainsCell),
};
sources.push(source);
});
} else {
console.error(`[RSP-to-JSON] Table with class ${tableClass} not found`);
}
return sources;
}
/**
* Checks if an element or any of its ancestors has a given class.
* @param {Element} element - The DOM element to check.
* @param {string} className - The class name to look for.
* @returns {boolean} True if the class is found, false otherwise.
*/
function hasAncestorWithClass(element, className) {
while (element) {
if (element.classList && element.classList.contains(className)) return true;
element = element.parentElement;
}
return false;
}
/**
* Extracts the source name from a table cell.
* @param {Element} cell - The table cell element.
* @returns {string} The extracted source name.
*/
function findSourceName(cell) {
function extractTextFromNode(node) {
if (node.nodeType === Node.TEXT_NODE) {
return node.textContent.trim();
} else if (node.nodeType === Node.ELEMENT_NODE && (node.tagName === "A" || node.tagName === "I")) {
return Array.from(node.childNodes).map(extractTextFromNode).join(" ").trim();
}
return "";
}
// Traverse child nodes to locate the source name and combine all text
let sourceName = Array.from(cell.childNodes)
.map(extractTextFromNode)
.filter(text => text)
.join(" ")
.trim();
return sourceName || "";
}
/**
* Finds the main source link in a table cell, ignoring shortcut links.
* @param {Element} cell - The table cell element.
* @returns {string} The href of the main source link, or an empty string if not found.
*/
function findSourceLink(cell) {
const linkElement = Array.from(cell.querySelectorAll("a")).find(link => !hasAncestorWithClass(link, "wp-rsp-sc"));
return linkElement ? linkElement.href : "";
}
/**
* Finds all shortcut links in a table cell.
* @param {Element} cell - The table cell element.
* @returns {Array
*/
function findShortcuts(cell) {
const shortcuts = Array.from(cell.querySelectorAll(".wp-rsp-sc a")).map(anchor => anchor.textContent.trim());
return shortcuts;
}
/**
* Determines the status of a source from a table cell.
* @param {Element} cell - The table cell element.
* @returns {string} The status string (e.g., 'deprecated', 'generally reliable', etc.).
*/
function findStatus(cell) {
anchors = cell.querySelectorAll('a');
statuses = [];
anchors.forEach(anchor => {
statuses.push(anchor.title.toLowerCase());
});
if (statuses.includes("deprecated")) return "deprecated";
if (statuses.includes("generally reliable")) return "generally reliable";
if (statuses.includes("generally unreliable")) return "generally unreliable";
if (statuses.includes("no consensus")) return "no consensus";
if (statuses.includes("blacklisted")) return "blacklisted";
return "unknown";
}
/**
* Checks if a source is blacklisted based on the cell content.
* @param {Element} cell - The table cell element.
* @returns {boolean} True if blacklisted, false otherwise.
*/
function isBlacklisted(cell) {
const blacklisted = !!cell.querySelector("a[title='Blacklisted']");
return blacklisted;
}
/**
* Parses the discussions cell to extract discussion links and metadata.
* @param {Element} cell - The table cell element.
* @returns {Array
*/
function parseDiscussions(cell) {
const discussions = [];
const links = cell.querySelectorAll("a");
links.forEach(link => {
const typeIcon = link.previousElementSibling?.querySelector("img[alt]");
const type = typeIcon ? typeIcon.getAttribute("alt") : "General";
const discussionLink = link.getAttribute("href");
// If cite-note, fetch the links from the corresponding citation note
if (discussionLink && discussionLink.startsWith("#cite_note-")) {
const noteId = discussionLink.replace("#", "");
const citationLinks = parseCitationLinks(noteId);
discussions.push(...citationLinks);
} else {
// Check that the link has text content
// otherwise, it is likely an icon and can be skipped
if (link.textContent.length) {
discussions.push({
link: discussionLink.startsWith("/") ? `https://en.wikipedia.org${discussionLink}` : discussionLink,
type: type,
display: "inline",
label: link.textContent.trim()
});
}
}
});
return discussions;
}
/**
* Converts the HTML content of a cell to Wikipedia wikitext markup.
* @param {Element} cell - The table cell element.
* @returns {string} The wikitext representation of the cell's content.
*/
function convertHtmlToWikiMarkup(cell) {
const wikiMarkup = Array.from(cell.childNodes).map(node => {
if (node.nodeType === Node.ELEMENT_NODE) {
if (node.tagName === "A") return `${node.textContent}`;
if (node.tagName === "I") return `${node.textContent}`;
if (node.tagName === "B") return `${node.textContent}`;
}
return node.textContent;
}).join("");
return wikiMarkup.trim();
}
/**
* Extracts all domain strings from a domains cell.
* @param {Element} cell - The table cell element.
* @returns {Array
*/
function findDomains(cell) {
const domains = Array.from(cell.querySelectorAll("a")).map(link => {
const domainMatch = link.href.match(/insource:%22([^"]+)%22/);
return domainMatch ? domainMatch[1] : "";
}).filter(Boolean); // Remove empty entries
return domains;
}
/**
* Parses a citation note to extract discussion links and their context.
* @param {string} noteId - The ID of the citation note element.
* @returns {Array
*/
function parseCitationLinks(noteId) {
const citationLinks = [];
const noteElement = document.getElementById(noteId);
if (noteElement) {
const referenceText = noteElement.querySelector(".reference-text");
if (referenceText) {
const links = Array.from(referenceText.querySelectorAll("a"));
const contextMatches = [];
let currentContext = "";
let accumulatingContext = false;
referenceText.childNodes.forEach(node => {
// Most citation notes have a structure like "See these discussions of
// from which we want to extract those links to discussions,
// so we check for the existence of " of ":
if (node.nodeType === Node.TEXT_NODE && node.textContent.includes(" of ")) {
currentContext = "";
accumulatingContext = true;
let textAfterOf = node.textContent.split(" of ")[1] || "";
// Extract the content after the colon, if it exists
if (textAfterOf) {
const colonIndex = textAfterOf.indexOf(":");
if (colonIndex !== -1) {
currentContext = textAfterOf.slice(0, colonIndex).trim();
contextMatches.push({ context: currentContext.trim(), node });
accumulatingContext = false;
} else {
currentContext = textAfterOf.trim();
}
}
// Some citation notes have multiple text nodes,
// covering multiple contexts
// e.g. arXiv and bioRxiv
if (accumulatingContext) {
let nextNode = node.nextSibling;
while (nextNode && accumulatingContext) {
if (nextNode.nodeType === Node.TEXT_NODE) {
const colonIndex = nextNode.textContent.indexOf(":");
if (colonIndex !== -1) {
currentContext += " " + nextNode.textContent.slice(0, colonIndex).trim();
contextMatches.push({ context: currentContext.trim(), node: nextNode });
accumulatingContext = false;
} else {
currentContext += " " + nextNode.textContent.trim();
}
} else if (nextNode.nodeType === Node.ELEMENT_NODE && nextNode.tagName === "I") {
currentContext += " " + nextNode.textContent.trim();
}
nextNode = nextNode.nextSibling;
}
}
}
});
const multipleContexts = contextMatches.length > 1;
let currentContextIndex = 0;
currentContext = contextMatches[currentContextIndex]?.context.trim() || "";
links.forEach(link => {
// Check that the link has text content
// otherwise, it is likely an icon and can be skipped
if (link.textContent.length) {
const nextContextNode = contextMatches[currentContextIndex + 1]?.node;
if (nextContextNode && link.compareDocumentPosition(nextContextNode) & Node.DOCUMENT_POSITION_PRECEDING) {
if (contextMatches[currentContextIndex + 1]) {
currentContextIndex++;
currentContext = contextMatches[currentContextIndex].context.trim();
}
}
const discussionLink = link.getAttribute("href");
let label = link.textContent.trim();
if (multipleContexts && currentContext) {
label += ` (${currentContext})`;
}
const typeIcon = link.previousElementSibling?.querySelector("img[alt]");
const type = typeIcon ? typeIcon.getAttribute("alt") : "General";
citationLinks.push({
link: discussionLink.startsWith("/") ? `https://en.wikipedia.org${discussionLink}` : discussionLink,
type: type,
display: "footnote",
label: label
});
}
});
}
} else {
console.warn(`[RSP-to-JSON] No element found for citation note ID: ${noteId}`);
}
return citationLinks;
}
/**
* Removes the 'discussions' field from each source object in the array.
* @param {Array
* @returns {Array
*/
function filterOutDiscussions(sources) {
return sources.map(source => {
const { discussions, ...rest } = source;
return rest;
});
}
/**
* Initializes the dropdown UI and handles copy-to-clipboard actions for the perennial sources table.
*/
function init() {
const table = document.querySelector('.perennial-sources');
if (!table) {
return;
}
// Create container div for dropdown
const container = document.createElement('div');
container.style.float = 'right';
container.style.marginBottom = '10px';
container.style.marginTop = '10px';
// Create select element
const select = document.createElement('select');
select.classList = 'cdx-select';
select.style.padding = '8px';
select.style.borderRadius = '2px';
// Add default option
const defaultOption = document.createElement('option');
defaultOption.value = '';
defaultOption.textContent = 'Copy JSON...';
defaultOption.disabled = true;
defaultOption.selected = true;
select.appendChild(defaultOption);
// Add copy options
const options = [
{ value: 'with-discussions', text: 'Copy with discussions' },
{ value: 'without-discussions', text: 'Copy without discussions' }
];
options.forEach(option => {
const optElement = document.createElement('option');
optElement.value = option.value;
optElement.textContent = option.text;
select.appendChild(optElement);
});
// Add elements to container
container.appendChild(select);
// Add documentation link below the select
const docLink = document.createElement('a');
docLink.href = 'https://en.wikipedia.org/wiki/User:SuperHamster/RSP-to-JSON';
docLink.textContent = 'RSP-to-JSON Documentation';
docLink.target = '_blank';
docLink.style.display = 'block';
docLink.style.fontSize = '11px';
docLink.style.marginTop = '2px';
docLink.style.color = '#3366cc';
docLink.style.textDecoration = 'underline';
docLink.style.textAlign = 'right';
container.appendChild(docLink);
// Clear float for table
table.style.clear = 'both';
// Insert container before table
table.parentNode.insertBefore(container, table);
select.addEventListener('change', async () => {
try {
let result = parsePerennialSourcesTable('perennial-sources');
if (!result || result.length === 0) {
console.error(`[RSP-to-JSON] Failed to produce JSON`);
select.style.backgroundColor = '#f9dde9';
} else {
if (select.value === 'without-discussions') {
result = filterOutDiscussions(result);
}
await navigator.clipboard.writeText(JSON.stringify(result));
select.style.backgroundColor = '#dbf3ec';
}
} catch (error) {
console.error('Failed to copy JSON to clipboard:', error);
select.style.backgroundColor = '#f9dde9';
}
// Reset select to default after 2 seconds
setTimeout(() => {
select.style.backgroundColor = '';
select.value = '';
}, 2000);
});
}
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', init);
} else {
init();
}