User:Ritchie333/arcsinebot.py

Open-source emulation of User:SineBot
Will currently dump out posts in common talk namespaces in the last five minutes that might need to be signed
Work in progress - many false positives

import pywikibot

import lxml.etree

import datetime

import re

def RecentChanges(site):

start = site.server_time() - datetime.timedelta(minutes=1)

end = start - datetime.timedelta(minutes=5)

return site.recentchanges(namespaces=u'1|3|5|7',bot=False,start=start,end=end)

def OptedInOrOut(site,user,name):

page = pywikibot.Page(site,name)

transclusions = page.getReferences(namespaces='3',only_template_inclusion=True)

for t in transclusions:

if user == t.title(with_ns=False):

return True

return False

def OptedIn(site,user):

return OptedInOrOut(site,user,'Template:YesAutosign')

def OptedOut(site,user):

return OptedInOrOut(site,user,u'Template:NoAutosign')

def IsNonText(text):

if re.match( '^[^\w\d]*{{.*}}[^\w\d]*$', text ) is not None: # Ignore template banner creation

return True

if re.search( '$', text ) is not None:

return True

if re.search( '$', text ) is not None: # Already signed it

return True

return False

def CreateSearch(user):

return '\[\[[uU]ser([ _][tT]alk)?:' + user.replace( '(', '$' ).replace( ')', '$' ) + '(#.*)?\|.*\]\].*\d+:\d+, \d+ \w+ \d+ $UTC$'

def CreateUnsigned(user,anon,timestamp):

unsigned = 'unsigned'

if anon:

unsigned = 'unsignedIP'

change = '{{subst:' + unsigned + '|' + user + '|' + timestamp.strftime( '%H:%M, %d %B %Y (UTC)') + '}}'

return change

def ProcessTag(line,title,user,anon,timestamp,text):

if not text is None:

if not IsNonText( text ):

expr = CreateSearch( user )

if re.search( expr, text, re.IGNORECASE ) is None:

print( title + ' : line ' + str( line ) )

change = CreateUnsigned(user,anon,timestamp)

print( change )

print(' Searching for : ' + expr)

print(' in : ' + text)

print('--------------------')

def GetFirstTextLine(site,title):

page = pywikibot.Page(site,title)

lines = page.text.split('\n')

firstTextLine = 1

for textLine in lines:

if re.match( '^[^\w\d]*==.*==[^\w\d]*$', textLine):

break

firstTextLine += 1

return firstTextLine

def ProcessDiff(site,title,user,anon,timestamp,diff):

dom = lxml.etree.HTML(diff)

tdLines = dom.xpath( '//tr/td[@class="diff-lineno"]' )

if( len( tdLines ) > 0 ):

tdLine = tdLines[-1]

match = re.match( 'Line (\d+):', tdLine.text )

if not match is None:

line = int( match.group(1) )

# If the comment is above the first section, ignore it

if line > GetFirstTextLine(site,title):

tags = dom.xpath('//tr/td[@class="diff-addedline"]/div')

text = ''

for tag in tags:

for part in tag.itertext():

text += part

if text != '':

ProcessTag(line,title,user,anon,timestamp,text)

def IsUserValid(site,username):

user = pywikibot.page.User(site,username)

if user is not None:

editCount = int( user.editCount() )

if editCount < 800:

if not OptedOut(site,username):

return True

else:

if OptedIn(site,username):

return True

return False

def ProcessChange(site, rc):

if not 'Undo' in rc['tags'] and not 'Rollback' in rc['tags']: # Ignore reverts / anti-vandalism

user = rc['user']

anon = False

if( 'anon' in rc ):

anon = True

if anon or IsUserValid(site,user):

title = rc['title']

old_revid = rc['old_revid']

revid = rc['revid']

if( old_revid > 0 ):

timestamp = pywikibot.Timestamp.fromISOformat(rc['timestamp'])

diff = site.compare(old_revid,revid)

ProcessDiff(site,title,user,anon,timestamp,diff)

def Main():

site = pywikibot.Site()

for rc in RecentChanges(site):

ProcessChange(site, rc)

Main()