User:Misza13/spoilerkill.py

This is an interactive pywikipedia script that aids in the removal of {{tl|spoiler}} (and related) tags from articles. It works on a list of articles from a given file, showing the context within which the tags are placed and asks whether to remove them. In this (default) mode, it is a manually-assisted tool, which means that it doesn't require any approval whatsoever. The --auto option turns it into a fully automated bot that would remove all tags from all given articles.

Help

usage: spoilerkill.py [options] FILE

options:

-h, --help show this help message and exit

-s ARTICLE, --start=ARTICLE

skip articles before ARTICLE

-a, --auto don't ask for removal confirmation - remove

automatically

FILE - name of file that contains a list of articles to browse through (one article per line, inside wiki parens)

A suitable file (updated daily) is located here:

: http://tools.wikimedia.de/~misza13/spoilers.txt

Code

import re, difflib

from optparse import OptionParser

summaries = {

u'c' : u'cleanup',

u'rm' : u'Removing redundant template(s) per Wikipedia:Spoiler',

u'ue' : u'Removing unencyclopedic template(s)',

}

def bufline(ch=u'*', clr=10, L=40):

wikipedia.output(ch*L,colors=[clr for i in range(L)])

def main(options, args):

articles = []

ctx = 500

for f in args:

articles += file(f,'r').read().decode('utf-8').split('\n')

articles = [a.strip('\n[]') for a in articles if a != '\n']

if options.start:

articles = [a for a in articles if a >= options.start]

RX = re.compile('\n*(?P{{[^}\|]*?spoil[^}]*?}})\n*',re.IGNORECASE)

RXh = re.compile('(?P=+.*(?:plot|summary|synopsis|recap|overview|history|character|story|background|biography).*=+)',re.IGNORECASE)

Site = wikipedia.getSite()

for a in articles:

article = wikipedia.Page(Site,a)

bufline(ch=u'=', clr=13, L=60)

wikipedia.output(u'Checking for spoilers in %s...' % article.title())

try:

oldtxt = article.get()

except wikipedia.IsRedirectPage:

wikipedia.output(u'Skipping redirect page...')

continue

if Site.messages:

msg = u'You have new messages!'

wikipedia.output(msg,colors=[10 for i in msg])

found = False

for match in RX.finditer(oldtxt):

found = True

bufline(clr=14)

st = match.start('tpl')

en = match.end('tpl')

pre = oldtxt[max(0,st-ctx):st]

tpl = match.group('tpl')

post = oldtxt[en:en+ctx]

txtcolors = [None] * len(pre) + [12] * len(tpl) + [None] * len(post)

for hdr in RXh.finditer(pre+tpl+post):

txtcolors[hdr.start('hdr'):hdr.end('hdr')] = [11 for i in hdr.group('hdr')]

wikipedia.output(pre+tpl+post,colors=txtcolors)

if found:

bufline(clr=14)

newtxt = oldtxt

summary = u''

if options.auto:

ch = 'y'

else:

ch = wikipedia.inputChoice('Remove spoiler templates?',['y','e','n'],['y','e','n'])

if ch == 'y':

newtxt = RX.sub('\n\n',oldtxt,re.IGNORECASE)

summary = u'Removing redundant template(s) per Wikipedia:Spoiler'

elif ch == 'e':

edt = editarticle.TextEditor()

newtxt = edt.edit(oldtxt)

if newtxt:

summary = wikipedia.input(u'Edit summary [empty to abort]:')

if summary in summaries:

summary = summaries[summary]

if summary and newtxt != oldtxt:

try:

article.put(newtxt,comment=summary,minorEdit=True)

except wikipedia.SpamfilterError:

wikipedia.output(u'Spamfilter error has occured!')

except wikipedia.EditConflict:

wikipedia.output(u'An edit conflict has occured!')

else:

wikipedia.output(u'Not found.')

if __name__ == '__main__':

parser = OptionParser(usage='usage: %prog [options] FILE')

parser.add_option('-s', '--start', dest='start',

help='skip articles before ARTICLE', metavar='ARTICLE')

parser.add_option('-a', '--auto', action='store_true', dest='auto',

help='don\'t ask for removal confirmation - remove automatically')

options, args = parser.parse_args()

try:

import wikipedia, editarticle

main(options, args)

finally:

wikipedia.stopme()

spoilerkill.py