User:Andrybak/Scripts/Archiver.js#L-80

/*

*

* This script is a fork of https://en.wikipedia.org/w/index.php?title=User:Enterprisey/archiver.js&oldid=1113588553

* which was forked from https://en.wikipedia.org/w/index.php?title=User:%CE%A3/Testing_facility/Archiver.js&oldid=1003561411

*/

/*

* Documentation of CSS classes.

*

* .arky-span is the main custom class of the script.

* Inside a .arky-span is an archive link, which triggers selection.

* .arky-span tags also store data (not visible in the UI) associated with

* corresponding sections: the index of the section and heading level

* (i.e. ==2==, ===3===, etc)

* Tags with .arky-span class are also called "archive spans".

*

* .arky-selected-section is put onto the whole semantic heading

* of a section, selected by user for archiving.

* During selection the class is used for styling (a light-blue background).

* After clicking "archive ... selected threads" the class is used to

* find all the archive spans, which live inside the semantic heading.

*/

const USERSCRIPT_NAME = "Archiver";

function notifyUser(messageText, important) {

let message = messageText;

if (important) {

const img = document.createElement('img');

img.width = 20;

img.height = 20;

// File:Ambox important.svg

img.src = 'https://upload.wikimedia.org/wikipedia/en/thumb/b/b4/Ambox_important.svg/40px-Ambox_important.svg.png';

const span = document.createElement('span');

span.append(img, ' ', messageText);

message = span;

}

mw.notify(message, {

title: USERSCRIPT_NAME,

autoHide: false

});

}

const LOG_PREFIX = `[${USERSCRIPT_NAME}]:`;

function error(...toLog) {

console.error(LOG_PREFIX, ...toLog);

}

function warn(...toLog) {

console.warn(LOG_PREFIX, ...toLog);

}

function info(...toLog) {

console.info(LOG_PREFIX, ...toLog);

}

function debug(...toLog) {

console.debug(LOG_PREFIX, ...toLog);

}

function constructAd() {

// TODO maybe also introduce versions + include version in the ad?

return `using ${USERSCRIPT_NAME}`;

}

function constructEditSummary(mainEditSummary) {

return `${mainEditSummary} (${constructAd()})`;

}

$.when( mw.loader.using(['mediawiki.util','mediawiki.api']), $.ready).done( function () {

/*

* Reference documentation about keys and values in mw.config:

* https://www.mediawiki.org/wiki/Manual:Interface/JavaScript#mw.config

*/

if (!mw.config.get('wgIsArticle')) { // This variable is badly named -- it is not related to a page being a main namespace "article".

info('Not a wiki page. Aborting.');

return;

}

if (mw.config.get('wgArticleId') === 0 || mw.config.get('wgRevisionId') === 0) {

info('Either the page does not exist yet or it is a diffonly=yes view. Aborting.');

return;

}

if (mw.config.get("wgNamespaceNumber") % 2 == 0 && mw.config.get("wgNamespaceNumber") != 4) {

// not a talk page and not project namespace

info('Not a discussion namespace. Aborting.');

return;

}

if (mw.config.get("wgNamespaceNumber") == -1) {

// is a special page

info('This is a "Special:" page. Aborting.');

return;

}

const parserOutput = document.querySelector('#mw-content-text .mw-parser-output');

if (!parserOutput || $(parserOutput).find(':header').length === 0) {

info('Nothing to archive here. Aborting.');

return;

}

if (mw.config.get('wgDiffNewId') != null || mw.config.get('wgDiffOldId') != null) {

info('Detected diff view. Aborting.');

return;

}

mw.util.addCSS(".arky-selected-section { background-color: color-mix(in srgb, var(--background-color-progressive-subtle, #D9E9FF) 90%, var(--background-color-progressive--hover, #D9E9FF)); }" +

".arky-selected-section .arky-span a { font-weight:bold }");

var sectionCodepointOffsets = new Object();

var wikiText = "";

var revStamp; // The timestamp when we originally got the page contents - we pass it to the "edit" API call for edit conflict detection

var portletLink = mw.util.addPortletLink("p-cactions", "#", "ⵙCA", "ca-oecaAndrybak", "Enter/exit the archival process", null, null);

var archiveButton = $(document.createElement("button"));

let highestArchiveSubpagePromise = null;

$(portletLink).click(function(e) {

$(".arky-selected-section").removeClass('.arky-selected-section');

$(".arky-span").toggle();

archiveButton.toggle();

if (highestArchiveSubpagePromise == null) {

/*

* Start searching for the archive subpage with highest number immediately.

* Then the click listener on `archiveButton` will wait for this `Promise`.

*/

highestArchiveSubpagePromise = findHighestArchiveSubpage();

} else {

// TODO: if "Loading..." was already shown to the user via the button, we need to reset the text here.

}

});

archiveButton.html("archive all the selected threads")

.attr("id", 'arky-archive-button')

.css("position", 'sticky')

.css("bottom", 0)

.css("width", '100%')

.css("font-size", '200%');

$(document.body).append(archiveButton);

archiveButton.toggle();

archiveButton.click(function(e) {

var selectedSections = $(".arky-selected-section .arky-span").map(function() {

return $(this).data("section");

}).toArray();

if (selectedSections.length === 0) {

return alert("No threads selected, aborting");

}

const timeoutId = setTimeout(() => {

/*

* In case highestArchiveSubpagePromise is taking a long time,

* show to the user that stuff is happening.

*/

archiveButton.text("Loading...");

}, 1000);

highestArchiveSubpagePromise.then(result => {

clearTimeout(timeoutId);

info("Successful highestArchiveSubpagePromise:", result);

doArchive(selectedSections, result);

}, rejection => {

info("Failed highestArchiveSubpagePromise:", rejection);

const currentPageName = mw.config.get("wgPageName");

doArchive(selectedSections, archiveSpacedSubpageName(currentPageName, "???"));

});

}); // end of archiveButton click handler

addArchiveLinks();

function midPoint(lower, upper) {

return Math.floor(lower + (upper - lower) / 2);

}

/*

* Based on https://en.wikipedia.org/wiki/Module:Exponential_search

*/

async function exponentialSearch(testFunc, i, lower, upper) {

if (await testFunc(i)) {

if (i + 1 == upper) {

return i;

}

lower = i;

if (upper) {

i = midPoint(lower, upper);

} else {

i = i * 2;

}

return exponentialSearch(testFunc, i, lower, upper);

} else {

upper = i;

i = midPoint(lower, upper);

return exponentialSearch(testFunc, i, lower, upper);

}

}

function archiveSpacedSubpageName(pageName, archiveNumber) {

return pageName + "/Archive " + archiveNumber;

}

function archiveSlashedSubpageName(pageName, archiveNumber) {

return pageName + "/Archive/" + archiveNumber;

}

/*

* Based on https://en.wikipedia.org/wiki/Wikipedia_talk:User_scripts/Archive_7#nocreate-missing

*/

async function pageExists(title) {

const api = new mw.Api();

const response = await api.get({

"action": "query",

"format": "json",

"titles": title

});

const missing = "missing" in Object.values(response.query.pages)[0];

return !missing;

}

/*

* Find the subpage of this page, which will be used as destination/target of archiving.

* It is just "Archive 1" by default, but can be increased by exponentialSearch.

*/

function findHighestArchiveSubpage() {

info("findHighestArchiveSubpage: start");

// mw.config.get("wgPageName")

return new Promise(async (resolve, reject) => {

try {

const currentPageName = mw.config.get("wgPageName");

const currentYear = new Date().getUTCFullYear();

let subpageFunc;

/*

* Check if "current year" subpage is a good candidate for

* pages with https://en.wikipedia.org/wiki/Template:Archived_annually

* TODO: maybe implement checking if {{Archived annually}} is transcluded.

*/

if (await pageExists(archiveSpacedSubpageName(currentPageName, currentYear - 1)) && !await pageExists(archiveSpacedSubpageName(currentPageName, currentYear + 1))) {

resolve(archiveSpacedSubpageName(currentPageName, currentYear));

return;

} else if (await pageExists(archiveSpacedSubpageName(currentPageName, 1))) {

subpageFunc = archiveSpacedSubpageName;

} else if (await pageExists(archiveSlashedSubpageName(currentPageName, 1))) {

subpageFunc = archiveSlashedSubpageName;

} else {

notifyUser("Cannot find the first archive subpage", true);

info('Assuming zero archive subpages.');

resolve(archiveSpacedSubpageName(currentPageName, 1));

return;

}

async function checkArchiveSubpageExists(archiveNumber) {

const archiveSubpageTitle = subpageFunc(currentPageName, archiveNumber);

return pageExists(archiveSubpageTitle);

}

// see also https://en.wikipedia.org/wiki/Module:Highest_archive_number

const highestNumber = await exponentialSearch(checkArchiveSubpageExists, 10, 1, null);

const highestArchiveSubpage = subpageFunc(currentPageName, highestNumber);

resolve(highestArchiveSubpage);

} catch (e) {

const msg = "Cannot find archive subpage with the highest number";

error(msg, e);

notifyUser(msg, true);

reject(e);

}

});

}

function doArchive(selectedSections, highestArchiveSubpage) {

// returns `s` without the substring starting at `start` and ending at `end`

function cut(s, start, end) {

return s.substr(0, start) + s.substring(end);

}

const archivePageName = prompt("Archiving " + selectedSections.length + " threads: where should we move them to? The latest archive number seems to be:", highestArchiveSubpage);

if (!archivePageName || archivePageName == mw.config.get("wgPageName")) {

return alert("No archive target selected, aborting");

}

// codepointToUtf16Idx maps codepoint idx (i.e. MediaWiki index into page text) to utf-16 idx (i.e. JavaScript index into wikiText)

var codepointToUtf16Idx = {};

// Initialize "important" (= either a section start or end) values to 0

selectedSections.forEach(function(n) {

codepointToUtf16Idx[sectionCodepointOffsets[n].start] = 0;

codepointToUtf16Idx[sectionCodepointOffsets[n].end] = 0;

});

codepointToUtf16Idx[Infinity] = Infinity; // Because sometimes we'll have Infinity as an "end" value

// fill in our mapping from codepoints (MediaWiki indices) to utf-16 (i.e. JavaScript).

// yes, this loops through every character in the wikitext. very unfortunate.

var codepointPos = 0;

for (var utf16Pos = 0; utf16Pos < wikiText.length; utf16Pos++, codepointPos++) {

if (codepointToUtf16Idx.hasOwnProperty(codepointPos)) {

codepointToUtf16Idx[codepointPos] = utf16Pos;

}

if ((0xD800 <= wikiText.charCodeAt(utf16Pos)) && (wikiText.charCodeAt(utf16Pos) <= 0xDBFF)) {

// high surrogate! utf16Pos goes up by 2, but codepointPos goes up by only 1.

utf16Pos++; // skip the low surrogate

}

}

var newTextForArchivePage = selectedSections.map(function(n) {

return wikiText.substring(

codepointToUtf16Idx[sectionCodepointOffsets[n].start],

codepointToUtf16Idx[sectionCodepointOffsets[n].end]

);

}).join("");

selectedSections.reverse(); // go in reverse order so that we don't invalidate the offsets of earlier sections

var newWikiText = wikiText;

selectedSections.forEach(function(n) {

newWikiText = cut(

newWikiText,

codepointToUtf16Idx[sectionCodepointOffsets[n].start],

codepointToUtf16Idx[sectionCodepointOffsets[n].end]

);

});

info("archive this:" + newTextForArchivePage);

info("revised page:" + newWikiText);

var pluralizedThreads = selectedSections.length + ' thread' + ((selectedSections.length === 1) ? '' : 's');

new mw.Api().postWithToken("csrf", {

action: 'edit',

title: mw.config.get("wgPageName"),

text: newWikiText,

summary: constructEditSummary(`Removing ${pluralizedThreads}, will be on ${archivePageName}`),

basetimestamp: revStamp,

starttimestamp: revStamp

})

.done(function(res1) {

alert("Successfully removed threads from talk page");

info(res1);

new mw.Api().postWithToken("csrf", {

action: 'edit',

title: archivePageName,

appendtext: "\n" + newTextForArchivePage,

summary: constructEditSummary(`Adding ${pluralizedThreads} from ${mw.config.get("wgPageName")}`)

})

.done(() => alert("Successfully added threads to archive page"))

.fail(() => alert("Failed to add threads to archive page. Manual inspection needed."))

.always(function(res2) {

info(res2);

window.location.reload();

});

})

.fail(function(res1) {

alert("Failed to remove threads from talk page. Aborting archive process.");

error(res1);

window.location.reload();

});

} // end of doArchive()

/*

* Filters the result of the API query.

* Plus, importantly, populates the global variable `sectionCodepointOffsets`.

*/

function extractValidSections(apiResultSections) {

const validSections = {};

// generate the list/array

$(apiResultSections)

// For sections transcluded from other pages, s.index will look

// like T-1 instead of just 1. Remove those.

.filter((i, s) => { return s.index == parseInt(s.index) })

.each((i, s) => { validSections[s.index] = s });

// record the offsets in the global variable

for (var i in validSections) {

i = parseInt(i);

// What MediaWiki calls "byteoffset" is actually a codepoint offset!! Drat!!

sectionCodepointOffsets[i] = {

start: validSections[i].byteoffset,

end: validSections.hasOwnProperty(i+1)?validSections[i+1].byteoffset:Infinity

};

}

return validSections;

}

/*

* The convoluted way of "header" vs "headerContainer" is needed, because

* there are different HTML layouts for "headings" in different skins.

* In Vector 2022, layout of ==Second level== versus ===Third level===

* headings is different even for a _single_ skin.

*

* The HTML layout is either

*

* or

*

*

* "headerContainer" is always the outer of the tags, it always contains the tags.

* "header" is always one of the tags.

* Meaning that in some cases "header" and "headContainer" is the same HTML element.

*

* arky-span, aka archiveSpans are put inside the "".

*

* For details, see:

* - https://www.mediawiki.org/w/index.php?title=Heading_HTML_changes&oldid=6538029

* - https://en.wikipedia.org/wiki/Wikipedia:Village_pump_(technical)/Archive_213#Tech_News_%E2%80%93_User%3AEnterprisey%2Farchiver.js

*/

// Returns a plain HTMLElement

function findEditSectionForHeader(header) {

// in Vector, the bracketed [edit] section link is a direct child element/node

const maybeVectorEditSection = header.querySelector('.mw-editsection');

if (maybeVectorEditSection) {

return maybeVectorEditSection;

}

// in other skins, the bracketed [edit] section link is a sibling of

etc

if (header.parentElement.classList.contains('mw-heading')) {

const maybeEditSection = header.parentElement.querySelector('.mw-editsection');

if (maybeEditSection) {

return maybeEditSection;

}

}

return null;

}

// Returns a jQuery object

function findHeaderContainerForArchiveSpan(archiveSpan) {

const jQueryArchiveSpan = $(archiveSpan);

const maybeDivMwHeading = jQueryArchiveSpan.parents('.mw-heading');

if (maybeDivMwHeading.length > 0) {

return maybeDivMwHeading;

}

const maybeHeaderParent = jQueryArchiveSpan.parents(':header');

if (maybeHeaderParent.length > 0) {

return maybeHeaderParent;

}

notifyUser("findHeaderContainerForArchiveSpan: Cannot parse section headings in this skin. Aborting.", true);

error("findHeaderContainerForArchiveSpan: Tags for bug report:", archiveSpan, archiveSpan.parentElement);

return null;

}

/*

* We need to get the top-level element of the whole header.

* In some cases it's a

* In other cases it's just a

,

, etc tag.

*

* Returns a plain HTML element.

*/

function getHeaderContainer(header) {

if (header.parentElement.classList.contains('mw-heading')) {

return header.parentElement;

}

return header;

}

/*

* Create the bracketed [archive] links next to the [edit] section links.

* These [archive] links are used by a user to select sections for archival.

*/

function addArchiveLinks() {

// grab page sections and wikitext so we can add the "archive" links to appropriate sections

new mw.Api().get({action: 'parse', page: mw.config.get("wgPageName")}).done(function(parseApiResult) {

new mw.Api().get({action: 'query', pageids: mw.config.get("wgArticleId"), prop: ['revisions'], rvprop: ['content', 'timestamp']}).done(function(revisionsApiResult) {

var rv;

rv = revisionsApiResult.query.pages[mw.config.get("wgArticleId")].revisions[0];

wikiText = rv["*"];

revStamp = rv['timestamp'];

});

const validSections = extractValidSections(parseApiResult.parse.sections);

/*

* The search for all section headings starts with

* finding all tags, which aren't for the table of contents.

* From the tags, we find the "[edit] section links" and

* "header containers" (see big comment above).

*/

const allHeaders = $("#mw-content-text .mw-parser-output").find(":header").filter(':not(#mw-toc-heading)');

if (allHeaders.length == 0) {

warn('Nothing to archive here. The script should have aborted earlier. Aborting.');

return;

}

allHeaders.each(function(i, header) {

var sectionNumber = undefined;

const headerLevel = header.tagName.slice(1) * 1; // wtf javascript

const editSection = findEditSectionForHeader(header);

if (!editSection) {

// we're either in an archived page ([edit] links are hidden with magic word __NOEDITSECTION__)

return;

}

{

const editSectionLink = editSection.querySelector('a');

if (editSectionLink) {

// Note: href may not be set.

const sectionNumberMatch = editSectionLink.href && editSectionLink.href.match(/§ion=(\d+)/);

if (sectionNumberMatch) {

sectionNumber = sectionNumberMatch[1];

}

}

}

// if the if statement fails, it might be something like

not a real section

if (validSections.hasOwnProperty(sectionNumber)) {

const archiveLink = $('')

.text('archive')

.click(function() {

const correspondingHeaderContainer = $(getHeaderContainer(header));

correspondingHeaderContainer.toggleClass('arky-selected-section');

// now, click all sub-sections of this section

// i.e. mark all needed header containers with our CSS class .arky-selected-section

const isThisSectionSelected = correspondingHeaderContainer.hasClass('arky-selected-section');

const thisHeaderLevel = archiveLink.parents('.arky-span').data('header-level');

// starting from the current section, loop through each section

const allArchiveSpans = $('.arky-span');

const currSectionIdx = allArchiveSpans.index(archiveLink.parents('.arky-span'));

for (var i = currSectionIdx + 1; i < allArchiveSpans.length; i++) {

if ($(allArchiveSpans[i]).data('header-level') <= thisHeaderLevel) {

// if this isn't a subsection, quit

break;

}

const closestHeaderContainer = findHeaderContainerForArchiveSpan(allArchiveSpans[i]);

if (closestHeaderContainer.hasClass('arky-selected-section') != isThisSectionSelected) {

// if this section needs toggling, toggle it

closestHeaderContainer.toggleClass('arky-selected-section');

}

}

// finally, update button

const selectedSectionCount = $('.arky-selected-section').length;

archiveButton

.prop('disabled', selectedSectionCount === 0)

.text('archive ' + selectedSectionCount + ' selected thread' +

((selectedSectionCount === 1) ? '' : 's'));

});

const arkySpan = $("", { "class": "arky-span" })

.css({'display':'none'})

.data({'header-level': headerLevel, 'section': sectionNumber})

.append(

$('', { 'class': 'mw-editsection-bracket' }).text('['),

archiveLink,

$('', { 'class': 'mw-editsection-bracket' }).text(']')

);

$(editSection).append(" ", arkySpan);

}

});

})

.fail(() => warn('addArchiveLinks: Cannot download current page. Aborting.'));

}

}); //