User:Novem Linguae/Scripts/DraftCleaner.js

//

// === Compiled with Novem Linguae's publish.php script ======================

// === modules/DraftCleaner.js ======================================================

class DraftCleaner {

cleanDraft( wikicode, namespaceNumber, titleWithNamespaceAndSpaces ) {

// run before other stuff

wikicode = this.deleteSomeHTMLTags( wikicode );

wikicode = this.deleteNonAFCDraftTags( wikicode );

wikicode = this.deleteAFCDraftTagsIfMainspace( wikicode, mw.config.get( 'wgNamespaceNumber' ) );

wikicode = this.fixWikilinksContainingURL( wikicode );

wikicode = this.fixExternalLinksToWikipediaArticles( wikicode );

wikicode = this.deleteWeirdUnicodeCharacters( wikicode );

wikicode = this.trimEveryLine( wikicode );

wikicode = this.convertH1ToH2( wikicode );

wikicode = this.convertVeryLongHeadingToParagraph( wikicode );

wikicode = this.deleteHeadingsWithTitle( wikicode, titleWithNamespaceAndSpaces );

wikicode = this.unlinkWikilinksToThisTitle( wikicode, titleWithNamespaceAndSpaces );

wikicode = this.capitalizeCOVID19( wikicode );

wikicode = this.removeBoldFromHeadings( wikicode );

wikicode = this.convertReferenceToReferences( wikicode );

wikicode = this.deleteMultipleReferenceTags( wikicode );

wikicode = this.addReferencesSectionIfMissing( wikicode );

wikicode = this.fixEmptyReferencesSection( wikicode );

wikicode = this.deleteWhitespaceAtEndOfLines( wikicode );

wikicode = this.convertSmartQuotesToRegularQuotes( wikicode );

// wikicode = this.fixWordEmphasizedWithSingleQuotes(wikicode); // most of these appear in citations as names of newspaper articles, arguably should keep these single quotes

wikicode = this.convertDoubleSpacesToSingleSpaces( wikicode );

wikicode = this.deleteBlankHeadings( wikicode );

wikicode = this.changeYearRangeDashToNDash( wikicode );

wikicode = this.disableCategoriesInDraftspace( wikicode, namespaceNumber );

// wikicode = this.deleteBRTagsOutsideInfoboxes(wikicode, namespaceNumber); // edge case in image captions, and probably other places

// wikicode = this.rightAlignImages(wikicode); // commenting out, too many false positives in featured articles

wikicode = this.correctCapitalizationOfEndMatterHeaders( wikicode );

wikicode = this.ifNoLeadSectionDeleteFirstHeading( wikicode );

wikicode = this.deleteCopyPastedEditAndEditSource( wikicode );

wikicode = this.replaceUnicodeBulletsWithAsterisks( wikicode );

wikicode = this.deleteEmptySections( wikicode );

wikicode = this.fixHeadingsInAllCaps( wikicode );

wikicode = this.deleteDuplicateReferencesSection( wikicode );

wikicode = this.deleteBlankLinesBetweenBullets( wikicode );

wikicode = this.removeUnderscoresFromWikilinks( wikicode );

wikicode = this.fixPipedWikilinksWithIdenticalParameters( wikicode );

wikicode = this.removeBorderFromImagesInInfoboxes( wikicode );

wikicode = this.removeExtraAFCSubmissionTemplates( wikicode );

wikicode = this.moveAFCSubmissionTemplatesToTop( wikicode );

// all ==sections== should start with a capital letter

// after swap, if citation has no spaces on either side, and is not touching two other citations, add a space on the right

// strip [[File: from infobox's image field

// example 1: | image = File:SAMIR 1626.png

// example 2: | image = File:SAMIR 1626.png

// trim whitespace inside refs, e.g. abc

// replace unreliable sources with {{cn}}.

// if adjacent to other sources, just delete

// if ref is used multiple times, account for that

// duplicate citation fixer

// move refs that are below {{Reflist}}, to above {{Reflist}}

// move refs out of headings

// delete AFC submission templates located mid-article, they end up self-hiding then appear as inexplicable whitespace. example: {{AfC submission|t||ts=20211212134609|u=Doezdemir|ns=118|demo=}}

// fix redundant wikilinks, e.g. Spotify

// change youtu.be to youtube.com, to avoid the blacklist. test: https://youtu.be/bnWHeRNIPiA

// delete ©®™

// convert all to {{Reflist}}. doesn't use two column format and looks weird with a large # of references

// remove px from images, should use default

// convert refs toward the end. we want deleteSomeHTMLTags() to run first, to get rid of tags around URLs

wikicode = this.bareURLToRef( wikicode );

wikicode = this.refShortLinkToLongLink( wikicode );

wikicode = this.inlineExternalLinksToRefs( wikicode );

wikicode = this.moveRefsOutsideOfItalics( wikicode );

wikicode = this.deleteSpacesInFrontOfRefs( wikicode );

wikicode = this.deleteNewLinesBetweenRefs( wikicode );

wikicode = this.swapRefPeriodWithPeriodRef( wikicode );

wikicode = this.swapRefCommaWithCommaRef( wikicode );

// stuff we want to run at the end

wikicode = this.fixDoublePeriod( wikicode ); // need test cases. I've seen this one not work.

wikicode = this.boldArticleTitle( wikicode, titleWithNamespaceAndSpaces );

wikicode = this.trimEmptyLines( wikicode );

wikicode = this.deleteMoreThanTwoEntersInARow( wikicode );

return wikicode;

}

// surround bare URL's with

// Useful for seeing all URL's in the reflist section, and for CiteHighlighter ref quality highlighting

bareURLToRef( wikicode ) {

return wikicode.replace( /^(http[^\n ]*) {0,}$/gm, '$1' );

}

// in refs, turn [short links] into long links, so you can see the domain

// also fixes link issues with extra spaces in the ref

refShortLinkToLongLink( wikicode ) {

// [https://test.comTest]

wikicode = wikicode.replace( /(]*>) {0,}\[ {0,}([^'\]]*)(''[^\]]*)\] {0,}(<\/ref>)/gm, '$1$2 $3$4' );

// [https://test.com Test]

wikicode = wikicode.replace( /(]*>) {0,}\[ {0,}([^\]]*) {0,}\] {0,}(<\/ref>)/gm, '$1$2$3' );

return wikicode;

}

// convert inline external links to references

inlineExternalLinksToRefs( wikicode ) {

const sectionsToSkip = [ 'External link', 'Further reading', 'Links' ];

let regExString = '== ?(?:';

for ( const sectionToSkip of sectionsToSkip ) {

regExString += sectionToSkip + '|';

}

regExString = regExString.slice( 0, -1 ) + ')';

const hasSectionToSkip = wikicode.match( new RegExp( regExString, 'i' ) );

const sf = new StringFilter();

if ( hasSectionToSkip ) {

const regExToSplitArticle = new RegExp( '((' + regExString + ').*$)', 'is' );

const topHalf = wikicode.replace( regExToSplitArticle, '' );

const bottomHalf = wikicode.match( regExToSplitArticle )[ 1 ];

const buffer = sf.surgicalReplaceOutsideTags(

/(?|> )\[(http[^ \]]+) ?(.*?)\](?!<\/ref>| <\/ref>)/gm,

'$2$1',

topHalf,

[ '

[ '', '/>', '}}' ]

);

wikicode = buffer + bottomHalf;

} else {

wikicode = sf.surgicalReplaceOutsideTags(

/(?|> )\[(http[^ \]]+) ?(.*?)\](?!<\/ref>| <\/ref>)/gm,

'$2$1',

wikicode,

[ '

[ '', '/>', '}}' ]

);

}

return wikicode;

}

/**

* Test => Test

*/

moveRefsOutsideOfItalics( wikicode ) {

wikicode = wikicode.replace( /([^']+)([^<]+<\/ref>)/gm, '\'\'$1\'\'$2' );

return wikicode;

}

// get rid of spaces in front of

deleteSpacesInFrontOfRefs( wikicode ) {

return wikicode.replace( /(?

}

// get rid of any level 2 heading that contains the article's title

// this takes care of 2 common cases: heading at the bottom next to the {{AFC Submission}} template, and heading at the top above the lead

deleteHeadingsWithTitle( wikicode, titleWithNamespaceAndSpaces ) {

let headingNameToLookFor = titleWithNamespaceAndSpaces;

headingNameToLookFor = headingNameToLookFor.replace( /^Draft:/, '' );

headingNameToLookFor = this._escapeRegEx( headingNameToLookFor );

let regEx = new RegExp( '^== ?' + headingNameToLookFor + ' ?==\n', 'gmi' );

wikicode = wikicode.replace( regEx, '' );

// now look for titles that contain Draft: at the beginning, too

headingNameToLookFor = titleWithNamespaceAndSpaces;

headingNameToLookFor = this._escapeRegEx( headingNameToLookFor );

regEx = new RegExp( '^== ?' + headingNameToLookFor + ' ?==\n', 'gmi' );

wikicode = wikicode.replace( regEx, '' );

return wikicode;

}

// remove wikilinks to article name

// Example: if title is Draft:Menna Shahin, change Menna Shahin to Menna Shahin

unlinkWikilinksToThisTitle( wikicode, titleWithNamespaceAndSpaces ) {

let wikilinkToLookFor = titleWithNamespaceAndSpaces;

wikilinkToLookFor = wikilinkToLookFor.replace( /^Draft:/, '' );

wikilinkToLookFor = this._escapeRegEx( wikilinkToLookFor );

const regEx = new RegExp( '\\[\\[(' + wikilinkToLookFor + ')\\]\\]', 'gm' );

wikicode = wikicode.replace( regEx, '$1' );

return wikicode;

}

// if located in the first paragraph, bold the article title

boldArticleTitle( wikicode, titleWithNamespaceAndSpaces ) {

let titleToLookFor = titleWithNamespaceAndSpaces;

titleToLookFor = titleToLookFor.replace( /^Draft:/, '' );

titleToLookFor = titleToLookFor.replace( / \(.*?\)$/, '' );

titleToLookFor = this._escapeRegEx( titleToLookFor );

// Don't bold the title if it's already bolded. Fixes a "bold twice" bug.

const hasBoldedTitle = wikicode.match( new RegExp( `${ titleToLookFor }`, 'i' ) );

if ( hasBoldedTitle ) {

return wikicode;

}

// Be pretty strict, to avoid adding ''' to image paths and infoboxes, which messes up the image. Also, only replace first match.

const regEx = new RegExp( '^(The )?(' + titleToLookFor + ')([ <,])', 'mi' );

wikicode = wikicode.replace( regEx, "$1$2$3" );

return wikicode;

}

// /covid-19/i -> COVID-19

// Careful of this string in URLs.

capitalizeCOVID19( wikicode ) {

const sf = new StringFilter();

wikicode = sf.surgicalReplaceOutsideTags( / covid-19/gmi, ' COVID-19', wikicode, [ '{{', '' ], [ '}}', '' ] );

wikicode = sf.surgicalReplaceOutsideTags( /\ncovid-19/gmi, '\nCOVID-19', wikicode, [ '{{', '' ], [ '}}', '' ] );

return wikicode;

}

// remove bold from headings

removeBoldFromHeadings( wikicode ) {

return wikicode.replace( /^(=.*)(.*)(.*=)$/gm, '$1$2$3' );

}

// remove enter characters between s

deleteNewLinesBetweenRefs( wikicode ) {

return wikicode.replace( /<\/ref>\n{1,}/gm, '' );

}

// convert ==Reference== to ==References==

convertReferenceToReferences( wikicode ) {

return wikicode.replace( /^== ?Reference ?==$/gmi, '== References ==' );

}

// TOOL - swap ref period with period ref

swapRefPeriodWithPeriodRef( wikicode ) {

wikicode = wikicode.replace( /((?:]*?>[^>]*?<\/ref>){1,})\. /gm, '.$1 ' );

wikicode = wikicode.replace( /((?:]*?>[^>]*?<\/ref>){1,})\.\n/gm, '.$1\n' );

return wikicode;

}

swapRefCommaWithCommaRef( wikicode ) {

wikicode = wikicode.replace( /((?:]*?>[^>]*?<\/ref>){1,}), /gm, ',$1 ' );

wikicode = wikicode.replace( /((?:]*?>[^>]*?<\/ref>){1,}),\n/gm, ',$1\n' );

return wikicode;

}

// fix errant spaces at beginning of lines, which makes a blockquote looking thing (AFCH does it)

trimEveryLine( wikicode ) {

let output = '';

const lines = wikicode.split( '\n' );

const lineCount = lines.length;

let i = 0;

for ( const line of lines ) {

i++;

const trimmed = line.trim();

if ( trimmed.startsWith( '|' ) || trimmed.startsWith( '}' ) ) { // don't trim lines that start with | or }. It is common in FAs to indent these a bit.

output += line;

} else {

output += line.trim();

}

if ( i !== lineCount ) {

output += '\n';

}

}

return output;

}

// add references section if missing

addReferencesSectionIfMissing( wikicode ) {

const hasRefSection = wikicode.match( /^== ?References ?==$/mi );

const hasReflist = wikicode.match( /(?:{{Reflist|

if ( !hasRefSection && !hasReflist ) {

const hasBottomAFCTemplate = wikicode.match( /(\n{{AfC submission[^}]*}}\s*)$/ );

if ( hasBottomAFCTemplate ) {

wikicode = wikicode.replace( /(\n{{AfC submission[^}]*}}\s*)$/, '\n\n== References ==\n{{Reflist}}$1' );

} else {

wikicode = wikicode.replace( /$/, '\n\n== References ==\n{{Reflist}}' );

}

}

return wikicode;

}

// fix empty references section

fixEmptyReferencesSection( wikicode ) {

const hasRefSection = wikicode.match( /^== ?References ?==$/mi );

const hasReflist = wikicode.match( /(?:{{Reflist|

if ( !hasReflist && hasRefSection ) {

wikicode = wikicode.replace( /(?<=== ?References ?==)/gmi, '\n{{Reflist}}' );

}

return wikicode;

}

// delete whitespace at the end of lines

// (?!\|)(?!\}\}) is to stop this from deleting spaces after = in infoboxes

deleteWhitespaceAtEndOfLines( wikicode ) {

return wikicode.replace( /[ \t]+\n(?!\|)(?!\}\})/g, '\n' );

}

// convert smart quotes to regular quotes

convertSmartQuotesToRegularQuotes( wikicode ) {

const sf = new StringFilter();

wikicode = sf.surgicalReplaceOutsideTags( /”/g, '"', wikicode, [ 'File:' ], [ '' ] );

wikicode = sf.surgicalReplaceOutsideTags( /“/g, '"', wikicode, [ 'File:' ], [ '' ] );

wikicode = sf.surgicalReplaceOutsideTags( /‘/g, "'", wikicode, [ 'File:' ], [ '' ] );

wikicode = sf.surgicalReplaceOutsideTags( /’/g, "'", wikicode, [ 'File:' ], [ '' ] );

wikicode = sf.surgicalReplaceOutsideTags( /…/g, '...', wikicode, [ 'File:' ], [ '' ] );

return wikicode;

}

// convert double spaces to single spaces

convertDoubleSpacesToSingleSpaces( wikicode ) {

return wikicode.replace( /\. {2,}/g, '. ' );

}

// remove blank heading

deleteBlankHeadings( wikicode ) {

return wikicode.replace( /\n={2,} {0,}={2,}\n/g, '\n' );

}

// Change year range dash to ndash. Skip text inside of [[File:

changeYearRangeDashToNDash( wikicode ) {

const sf = new StringFilter();

// (1111-1111)

wikicode = sf.surgicalReplaceOutsideTags( /(\(\d{4}) ?- ?(\d{4}\))/gm, '$1–$2', wikicode, [ 'File:' ], [ '' ] );

// 1839 - 1926)

wikicode = sf.surgicalReplaceOutsideTags( /( \d{4}) ?- ?(\d{4}\))/gm, '$1–$2', wikicode, [ 'File:' ], [ '' ] );

return wikicode;

}

// if in draftspace, and draft has categories, disable the categories

disableCategoriesInDraftspace( wikicode, namespace ) {

const draft = ( namespace == 118 );

if ( draft ) {

wikicode = wikicode.replace( /:?(\[\[)(Category:[^\]]*\]\])/gm, '$1:$2' );

}

wikicode = wikicode.replace( /\[\[:Category:Created via preloaddraft\]\]/gi, 'Category:Created via preloaddraft' );

return wikicode;

}

// delete
in drafts, these are usually poorly placed

deleteBRTagsOutsideInfoboxes( wikicode ) {

let output = '';

const lines = wikicode.split( '\n' );

const lineCount = lines.length;

let i = 0;

for ( const line of lines ) {

i++;

// Skip lines that start with { or |. This is the easiest way to detect infoboxes

if ( line.startsWith( '{' ) || line.startsWith( '|' ) ) {

output += line;

} else {

output += line.replace( /
/gm, '' );

}

if ( i !== lineCount ) {

output += '\n';

}

}

return output;

}

// right align images

rightAlignImages( wikicode ) {

return wikicode.replace( /(\[\[File:[^\]]*\|)left(\|[^\]]*\]\])/gm, '$1right$2' );

}

// correct capitalization of see also, references, further reading, external links

correctCapitalizationOfEndMatterHeaders( wikicode ) {

wikicode = wikicode.replace( /^(== ?)References( ?==)$/gmi, '$1References$2' );

wikicode = wikicode.replace( /^(== ?)External links( ?==)$/gmi, '$1External links$2' );

wikicode = wikicode.replace( /^(== ?)Further reading( ?==)$/gmi, '$1Further reading$2' );

wikicode = wikicode.replace( /^(== ?)See also( ?==)$/gmi, '$1See also$2' );

return wikicode;

}

// if article has headings but no lead, remove first heading

ifNoLeadSectionDeleteFirstHeading( wikicode ) {

let output = '';

const lines = wikicode.split( '\n' );

const lineCount = lines.length;

let i = 0;

let textCount = 0;

for ( const line of lines ) {

i++;

// scan for first heading.

// empty lines, lines with templates, or lines with images do not count.

if ( line.startsWith( '{' ) || line.length === 0 || line.startsWith( '[[File:' ) ) {

output += line;

} else if ( line.startsWith( '==' ) && !textCount ) {

continue; // delete this line by not putting it in the output string

} else {

textCount++;

output += line;

}

if ( i !== lineCount ) {

output += '\n';

}

}

return output;

}

// delete [edit], [edit source], and [editar] from headings

deleteCopyPastedEditAndEditSource( wikicode ) {

wikicode = wikicode.replace( /\[edit\]( ?={2,})$/gm, '$1' );

wikicode = wikicode.replace( /\[edit source\]( ?={2,})$/gm, '$1' );

wikicode = wikicode.replace( /\[editar\]( ?={2,})$/gm, '$1' );

return wikicode;

}

// at beginning of lines, replace unicode bullets with asterisks

replaceUnicodeBulletsWithAsterisks( wikicode ) {

return wikicode.replace( /^\s{0,}[·•●]\s{0,}/gm, '* ' );

}

// remove whitespace if that is the only character on a line

trimEmptyLines( wikicode ) {

return wikicode.replace( /^\s*$/gm, '' );

}

// no more than 2 newlines (1 blank line) in a row.

// Note: AFCH does this too

deleteMoreThanTwoEntersInARow( wikicode ) {

wikicode = wikicode.replace( /\n{3,}/gm, '\n\n' );

return wikicode;

}

// convert =TitleHeading= to ==H2Heading==

convertH1ToH2( wikicode ) {

return wikicode.replace( /^= ?([^=]*?) ?=$/gm, '== $1 ==' );

}

convertVeryLongHeadingToParagraph( wikicode ) {

let output = '';

const lines = wikicode.split( '\n' );

const lineCount = lines.length;

let i = 0;

for ( const line of lines ) {

i++;

if ( line.length > 150 && line.match( /^==.*==$/gm ) && !line.match( /

output += line.replace( /^={1,}\s*(.*?)\s*={1,}$/m, '$1' );

} else {

output += line;

}

if ( i !== lineCount ) {

output += '\n';

}

}

return output;

}

fixWordEmphasizedWithSingleQuotes( wikicode ) {

return wikicode.replace( / '(\w+)' /g, ' "$1" ' );

}

fixDoublePeriod( wikicode ) {

return wikicode.replace( /(?<=[A-Za-z\]])\.\.(?=

}

fixWikilinksContainingURL( wikicode ) {

// non-piped wikilink

wikicode = wikicode.replace( /\[\[https?:\/\/en\.(?:m\.)?wikipedia\.org\/wiki\/([^|]*)\]\]/g, '$1' );

// piped wikilink

wikicode = wikicode.replace( /\[\[https?:\/\/en\.(?:m\.)?wikipedia\.org\/wiki\/([^|]*)\|([^\]]*)\]\]/g, '$2' );

// non-piped external link

wikicode = wikicode.replace( /\[\[(http[^|]*)\]\]/g, '[$1]' );

// piped external link

wikicode = wikicode.replace( /\[\[(http[^|]*)\|([^\]]*)\]\]/g, '[$1 $2]' );

return wikicode;

}

fixExternalLinksToWikipediaArticles( wikicode ) {

// [https://en.wikipedia.org/wiki/Article] and [https://en.wikipedia.org/wiki/Article Article name]

return wikicode.replace( /(? {

p1 = decodeURIComponent( p1 );

p1 = p1.replace( /_/g, ' ' );

return `${ p1 }`;

} );

}

deleteBlankLinesBetweenBullets( wikicode ) {

const lines = wikicode.split( '\n' );

const buffer = [];

const length = lines.length;

for ( let i = 0; i < length; i++ ) {

const previous = lines[ i - 1 ];

const current = lines[ i ];

const next = lines[ i + 1 ];

if (

typeof previous !== 'undefined' &&

typeof next !== 'undefined' &&

previous.startsWith( '*' ) &&

current === '' &&

next.startsWith( '*' )

) {

continue;

}

buffer.push( current );

}

return buffer.join( '\n' );

}

deleteWeirdUnicodeCharacters( wikicode ) {

return wikicode.replace( /[–]/g, '' );

}

deleteSomeHTMLTags( wikicode ) {

wikicode = wikicode.replace( /<\/?p( [^>]*)?\/?>/g, '' );

wikicode = wikicode.replace( /<\/?strong( [^>]*)?\/?>/g, '' );

wikicode = wikicode.replace( /<\/?em( [^>]*)?\/?>/g, '' );

wikicode = wikicode.replace( /<\/?nowiki( [^>]*)?\/?>/g, '' );

wikicode = wikicode.replace( /<\/?u( [^>]*)?\/?>/g, '' );

wikicode = wikicode.replace( /(?:|<\/big>)/g, '' );

return wikicode;

}

deleteNonAFCDraftTags( wikicode ) {

wikicode = wikicode.replace( /{{Preloaddraft submit}}\n{0,2}/gi, '' );

wikicode = wikicode.replace( /\n{0,2}/gi, '' );

wikicode = wikicode.replace( /{{Draft}}\n{0,2}/gi, '' );

return wikicode;

}

deleteAFCDraftTagsIfMainspace( wikicode, namespaceNumber ) {

const isMainspace = namespaceNumber == 0;

if ( isMainspace ) {

// {{AfC submission}}, {{AfC topic}}, {{AfC comment}}, etc.

wikicode = wikicode.replace( /{{AfC [^}]*}}\n?/g, '' );

wikicode = wikicode.replace( /{{Draft topics[^}]*}}\n?/g, '' );

}

return wikicode;

}

fixHeadingsInAllCaps( wikicode ) {

// create a concatenated string with the text from every heading

const matches = wikicode.matchAll( /== {0,}(.+) {0,}==/g );

let headingString = '';

for ( const match of matches ) {

headingString += match[ 1 ];

}

// if string only contains caps

if ( this._isUpperCase( headingString ) ) {

// convert all headings to sentence case

const matches = wikicode.matchAll( /== {0,}(.+) {0,}==/g );

for ( const match of matches ) {

let matchRegex = this._escapeRegEx( match[ 1 ].trim() );

matchRegex = new RegExp( '== {0,}' + matchRegex + ' {0,}==', 'g' );

const sentenceCase = this._toSentenceCase( match[ 1 ].trim() );

wikicode = wikicode.replace( matchRegex, '== ' + sentenceCase + ' ==' );

}

}

return wikicode;

}

deleteEmptySections( wikicode ) {

return wikicode.replace( /\n*== ?(?:See also|External links) ?==\n*$/, '' );

}

deleteDuplicateReferencesSection( wikicode ) {

const matches = wikicode.match( /==\s*References\s*==/gi );

if ( matches !== null && matches.length > 1 ) {

// run regexes that are likely to delete the extra section

const attempt = wikicode.replace(

`== References ==

{{reflist}}`

, '' );

const matches2 = attempt.match( /==\s*References\s*==/gi );

if ( matches2.length === 1 ) {

wikicode = attempt.trim();

wikicode = wikicode.replace( /==\s*References\s*==/gi, '== References ==' );

}

}

return wikicode;

}

removeUnderscoresFromWikilinks( wikicode ) {

const sf = new StringFilter();

wikicode = sf.surgicalReplaceInsideTags( /_/g, ' ', wikicode, [ '' ], [ '' ] );

return wikicode;

}

fixPipedWikilinksWithIdenticalParameters( wikicode ) {

const matches = wikicode.matchAll( /\[\[([^|\]]+)\|([^\]]+)\]\]/g );

for ( const match of matches ) {

if ( match[ 1 ] === match[ 2 ] ) {

wikicode = this._replaceAll( wikicode, `${ match[ 1 ] }`, `${ match[ 1 ] }` );

}

}

return wikicode;

}

removeBorderFromImagesInInfoboxes( wikicode ) {

wikicode = wikicode.replace( /(\|\s*logo\s*=\s*)\[\[File:([^\]|]*)[^\]\]]*\]\]/g, '$1$2' );

wikicode = wikicode.replace( /(\|\s*cover\s*=\s*)\[\[File:([^\]|]*)[^\]\]]*\]\]/g, '$1$2' );

return wikicode;

}

/** These often hide towards the bottom of a draft. When the draft is submitted, unsubmitted templates (t) detect this and show up as blank, creating a weird extra line break. So this basically fixes the line break. */

removeExtraAFCSubmissionTemplates( wikicode ) {

const hasSubmittedTemplate = wikicode.match( /{{AfC submission\|\|/ );

const hasUnsubmittedTemplate = wikicode.match( /{{AfC submission\|t\|/ );

if ( hasSubmittedTemplate && hasUnsubmittedTemplate ) {

wikicode = wikicode.replace( /{{AfC submission\|t\|[^}}]*\}\}\n?/gm, '' );

}

return wikicode;

}

moveAFCSubmissionTemplatesToTop( wikicode ) {

const hasTemplateAtBottom = wikicode.match( /\n[^\n]+\n*({{AfC submission[^}]*}})\s*$/i );

if ( hasTemplateAtBottom ) {

// delete all submission templates

wikicode = wikicode.replace( /{{AfC submission[^}}]*\}\}\n?/gm, '' );

// insert template at top

wikicode = hasTemplateAtBottom[ 1 ] + '\n----\n\n' + wikicode;

}

return wikicode;

}

deleteMultipleReferenceTags( wikicode ) {

const hasReflist = wikicode.match( /{{Reflist}}/i );

const hasReferencesTag = wikicode.match( //i );

if ( hasReflist && hasReferencesTag ) {

// delete all references tags

wikicode = wikicode.replace( /\n?/gi, '' );

}

return wikicode;

}

_isUpperCase( str ) {

return str === str.toUpperCase();

}

_toSentenceCase( string ) {

return string.charAt( 0 ).toUpperCase() + string.slice( 1 ).toLowerCase();

}

_replaceAll( haystack, needle, replacement ) {

const regex = new RegExp( this._escapeRegEx( needle ), 'g' );

haystack = haystack.replace( regex, replacement );

return haystack;

}

_escapeRegEx( string ) {

return string.replace( /[.*+?^${}()|[\]\\]/g, '\\$&' ); // $& means the whole matched string

}

}

// === modules/StringFilter.js ======================================================

/**

* Lets you use regex to specify what parts of a very long string you want to specify as "off limits", then you can do additional regex's and search/replace to the remaining parts of the string.

*/

class StringFilter {

/**

* Does a replace, but specifies areas of the file that should NOT be replaced. Those areas are specified by providing an openingTag and a closingTag, and those areas are marked as off limits.

*/

surgicalReplaceOutsideTags( regex, replacement, haystack, openingTags, closingTags ) {

const allTags = [ ...openingTags, ...closingTags ];

const parts = this._splitStringUsingMultiplePatterns( haystack, allTags );

const resultArray = [];

for ( let part of parts ) {

let openingTagMatch = false;

for ( const tag of openingTags ) {

if ( part.startsWith( tag ) ) {

openingTagMatch = true;

break;

}

}

if ( !openingTagMatch ) {

part = part.replace( regex, replacement );

}

resultArray.push( part );

}

return resultArray.join( '' );

}

/**

* Does a replace, but specifies areas of the file that SHOULD be replaced, then skips the rest of the file. The area that should be replaced is specified by providing an openingTag and a closingTag.

*/

surgicalReplaceInsideTags( regex, replacement, haystack, openingTags, closingTags ) {

const allTags = [ ...openingTags, ...closingTags ];

const parts = this._splitStringUsingMultiplePatterns( haystack, allTags );

const resultArray = [];

for ( let part of parts ) {

for ( const tag of openingTags ) {

if ( part.startsWith( tag ) ) {

part = part.replace( regex, replacement );

}

}

resultArray.push( part );

}

return resultArray.join( '' );

}

/**

* Also keeps the pattern in the result, unlike string.prototype.split. Algorithm isn't perfect, will fail with this pattern: Test/>Test. But should be good enough for DraftCleaner stuff.

*

* @param {string} string

* @param {string[]} patterns

* @return {string[]}

*/

_splitStringUsingMultiplePatterns( string, patterns ) {

const length = string.length;

const result = [];

let positionOfLastMatch = 0;

for ( let i = 0; i < length; i++ ) {

const lookAhead = string.slice( i ); // the rest of the string after current position

let patternMatch = false;

for ( const pattern of patterns ) {

if ( lookAhead.startsWith( pattern ) ) {

patternMatch = true;

break;

}

}

if ( patternMatch ) {

const chunk = string.slice( positionOfLastMatch, i );

// if blank (happens if i=0 matches), continue instead of putting an empty "" into the array

if ( !chunk ) {

continue;

}

result.push( chunk );

positionOfLastMatch = i;

}

}

// Don't forget the last chunk.

result.push( string.slice( positionOfLastMatch ) );

return result;

}

}

$(async function() {

// === main.js ======================================================

/* THIS SCRIPT IS BUGGY ABOUT 10% OF THE TIME. Be sure to check the diff that pops up before submitting.

- Adds "Run DraftCleaner" link to the left sidebar

- Top uses:

- remove extra line breaks (for example, 3 enters in a row)

- in the first sentence, bold the title

- convert curly quotes to regular quotes

- put s after periods

- clean external links out of the main article area (turn them into references)

- add ==References== section

- remove bold from headings

- Other uses:

- converts [inline external links] to s

- removes spaces in front of s

- get rid of any level 2 heading that contains the article's title

- converts =TitleHeading= to ==H2Heading==

- replaces Covid-19 with COVID-19

- removes enter characters between s

- trims whitespace at beginning and end

- remove self wikilinks to the article title

- convert ==Reference== to ==References==

- turn bare URLs into references

- fix errant spaces at beginning of lines, which makes a blockquote looking thing

- delete whitespace at the end of lines

- convert double spaces to single spaces

- remove blank heading

- in refs, turn short links into long links, so you can see the domain

- change year range dash to ndash

- if in draftspace, and draft in categories, disable the categories

- delete
. in drafts, these are usually poorly placed

- fix empty references section

- right align images

- remove whitespace if that is the only character on a line

- correct capitalization of see also, references, further reading, external links

- if article has headings but no lead, remove first heading

- replace unicode bullets with asterisks

Add one of the following to your User:yourName/common.js (at the top) to change the position where DraftCleaner puts its link:

window.draftCleanerPutInToolsMenu = true;

window.draftCleanerPutInMoreMenu = true;

This page was assembled from 3 files using my publish.php script. I have an offline test suite with around 100 unit tests for the DraftCleaner and StringFilter classes.

  • /

( function () {

async function getWikicode( title ) {

const pageIsDeleted = !mw.config.get( 'wgCurRevisionId' );

if ( pageIsDeleted ) {

return '';

}

let wikicode = '';

title = encodeURIComponent( title );

await $.ajax( {

url: 'https://en.wikipedia.org/w/api.php?action=parse&page=' + title + '&prop=wikitext&formatversion=2&format=json',

success: function ( result ) {

wikicode = result.parse.wikitext;

},

dataType: 'json'

} );

return wikicode;

}

function goToShowChangesScreen( titleWithNamespaceAndUnderscores, wikicode, editSummary ) {

const titleEncoded = encodeURIComponent( titleWithNamespaceAndUnderscores );

const wgServer = mw.config.get( 'wgServer' );

const wgScriptPath = mw.config.get( 'wgScriptPath' );

const baseURL = wgServer + wgScriptPath + '/';

// https://stackoverflow.com/a/12464290/3480193

$( `

` )

.append( $( '' ).val( wikicode ) )

.append( $( '' ).val( editSummary ) )

.append( $( '' ).val( 'preview' ) )

.append( $( '' ).val( 'Show changes' ) )

.append( $( '' ).val( '1' ) )

.appendTo( $( document.body ) ) // it has to be added somewhere into the

.trigger( 'submit' );

}

/** returns the pagename, including the namespace name, but with spaces replaced by underscores */

function getArticleName() {

return mw.config.get( 'wgPageName' );

}

// don't run when not viewing articles

const action = mw.config.get( 'wgAction' );

const isNotViewing = action != 'view';

if ( isNotViewing ) {

return;

}

// don't run when viewing diffs

const isDiff = mw.config.get( 'wgDiffNewId' );

if ( isDiff ) {

return;

}

// Don't run in virtual namespaces

const isVirtualNamespace = mw.config.get( 'wgNamespaceNumber' ) < 0;

if ( isVirtualNamespace ) {

return;

}

let menuID = 'p-navigation';

// @ts-ignore

if ( window.draftCleanerPutInToolsMenu ) {

menuID = 'p-tb';

// @ts-ignore

} else if ( window.draftCleanerPutInMoreMenu ) {

menuID = 'p-cactions';

}

const titleWithNamespaceAndUnderscores = getArticleName();

const namespaceNumber = mw.config.get( 'wgNamespaceNumber' );

let running = false;

// Add DraftCleaner to the toolbar

mw.loader.using( [ 'mediawiki.util' ], () => {

mw.util.addPortletLink( menuID, '#', 'Run DraftCleaner', 'DraftCleanerLink' );

$( '#DraftCleanerLink' ).on( 'click', async () => {

// prevent running the script while script is already in progress

if ( running ) {

return;

}

running = true;

mw.notify( 'Parsing page content...' );

// get page wikicode

const titleWithNamespaceAndSpaces = titleWithNamespaceAndUnderscores.replace( /_/g, ' ' );

const originalWikicode = await getWikicode( titleWithNamespaceAndUnderscores );

let wikicode = originalWikicode;

const dc = new DraftCleaner();

wikicode = dc.cleanDraft( wikicode, namespaceNumber, titleWithNamespaceAndSpaces );

const needsChanges = wikicode != originalWikicode;

if ( needsChanges ) {

const summary = 'clean up (DraftCleaner)';

await goToShowChangesScreen( titleWithNamespaceAndUnderscores, wikicode, summary );

} else {

mw.notify( 'No changes needed!' );

}

} );

} );

}() );

});

//