User:RonBot/7/Source1
foot.py
mycatlist1=list()
mycatlist2=list()
allplayers=list()
excludelist=list()
donelist=list()
nomatchlist=list()
stublist=list()
redirlist=list()
removed=0
added=0
added=0
Main Program
from wikitools import *
import time
import datetime
import urllib
import json
import userpassbot #Bot password
import warnings
import re
import mwparserfromhell
import datetime
import sys
import foot
site = wiki.Wiki() #Tell Python to use the English Wikipedia's API
site.login(userpassbot.username, userpassbot.password) #login
- routine to autoswitch some of the output - as filenames have accented chars!
def pnt(s):
try:
print(s)
except UnicodeEncodeError:
print(s.encode('utf-8'))
def startAllowed():
textpage = page.Page(site, "User:RonBot/7/Run").getWikiText()
if textpage == "Run":
return "run"
else:
return "no"
def allow_bots(text, user):
user = user.lower().strip()
text = mwparserfromhell.parse(text)
for tl in text.filter_templates():
if tl.name.matches(['bots', 'nobots']):
break
else:
return True
print "template found" #Have we found one
for param in tl.params:
bots = [x.lower().strip() for x in param.value.split(",")]
if param.name == 'allow':
print "We have an ALLOW" # allow found
if ''.join(bots) == 'none': return False
for bot in bots:
if bot in (user, 'all'):
return True
elif param.name == 'deny':
print "We have a DENY" # deny found
if ''.join(bots) == 'none':
print "none - true"
return True
for bot in bots:
if bot in (user, 'all'):
pnt(bot)
pnt(user)
print "all - false"
return False
if (tl.name.matches('nobots') and len(tl.params) == 0):
print "match - false"
return False
return True
def findpages(nextcat):
lastContinue=''
touse=''
while True:
params = {'action':'query',
'list':'categorymembers',
'cmtitle':nextcat,
'cmlimit':'max',
'cmnamespace':'0|14',
'cmcontinue':lastContinue
}
req = api.APIRequest(site, params) #Set the API request
res = req.query(False) #Send the API request and store the result in res
touse = pagelist.listFromQuery(site, res['query']['categorymembers'])#Make a list
for filep in touse: #For page in the list
pagename=filep.unprefixedtitle
if "Category" in pagename:
if pagename not in foot.mycatlist1:
foot.mycatlist1.append(pagename)
pnt("APPENDING "+pagename)
print len(foot.mycatlist1)
else:
pnt("NOT APPENDING "+pagename)
else:
if pagename not in foot.allplayers: #Have we a unique player name?
foot.allplayers.append(pagename)
#pnt(pagename)
if pagename not in foot.excludelist: #Is this name not in the exclude list?
if pagename not in foot.nomatchlist: #Is this name unique
foot.nomatchlist.append(pagename)
if 'continue' not in res:
break
lastContinue = res['continue']['cmcontinue']
print "continue"
return
def splittextpoint(pagetext):
# Only here if we see a "stub"
size=len(pagetext)-1
print size
curly=False
for loopvar in xrange(size,size-100, -1):
mychar=pagetext[loopvar]
print loopvar, repr(mychar)
if mychar=="]":
return size
if mychar=="}":
curly=True
if curly==True:
matchObj = re.match( r'\n', mychar)
if matchObj:
if curly==False:
return size
else:
return loopvar
return size
def ProcessDoneCat(nextcat):
lastContinue=''
touse=''
print("PDC")
while True:
params = {'action':'query',
'list':'categorymembers',
'cmtitle':nextcat,
'cmlimit':'max',
'cmnamespace':'0|14',
'cmcontinue':lastContinue
}
req = api.APIRequest(site, params) #Set the API request
res = req.query(False) #Send the API request and store the result in res
touse = pagelist.listFromQuery(site, res['query']['categorymembers'])#Make a list
for filep in touse: #For page in the list
pnt(filep.unprefixedtitle)
pagename=filep.unprefixedtitle
if pagename in foot.excludelist:
pnt("REMOVE THE CAT IN "+pagename)
foot.donelist.append(pagename)
else:
pnt("EXCUDE "+pagename)
foot.excludelist.append(pagename)
if 'continue' not in res:
break
lastContinue = res['continue']['cmcontinue']
print "continue"
return
def RemoveCat():
size=len(foot.donelist)
print size
for pagetitle in foot.donelist:
pagetitletext = pagetitle.encode('utf-8')
print pagetitletext
pagepage = page.Page(site, pagetitle)
pagetext = pagepage.getWikiText()
go = allow_bots(pagetext, 'RonBot')# does user page allow bots
if go:
print"++++++++++++++++++++++++++++++++++++++++"
print"REMOVAL bot allowed on article"
pnt(pagetext)
pagetext = re.sub(r'\[\[Category:Association footballers not categorized by position\]\]\n*', '', pagetext)
pnt(pagetext)
try:
pagepage.edit(text=pagetext, bot=True, summary="(Task 7) - Removal of :Category:Association footballers not categorized by position") #(DO NOT UNCOMMENT UNTIL BOT IS APPROVED)
foot.removed += 1
print "writing changed page"
except:
print"Failed to write"
print"++++++++++++++++++++++++++++++++++++++++"
return
def AddCat():
print (time.ctime())
size=len(foot.nomatchlist)
print size
for pagetitle in foot.nomatchlist:
pagetitletext = pagetitle.encode('utf-8')
pagepage = page.Page(site, pagetitle, True, False) # dont follow redirects!
pageredir= pagepage.isRedir()
pagetext = pagepage.getWikiText()
size = len(pagetext)-1
cutplace=size
if "stub" in pagetext:
foot.stublist.append(pagetitle)
cutplace=splittextpoint(pagetext)
go = allow_bots(pagetext, 'RonBot')# does user page allow bots
if go:
#print"++++++++++++++++++++++++++++++++++++++++"
print pagetitletext+ " ADDITION bot allowed on article"
if pageredir:
print "REDIRECT " + pagetitletext # show that page, but don't add the cat.
foot.redirlist.append(pagetitle)
else:
if cutplace pagetext=pagetext[0:cutplace]+"\n"+"Category:Association footballers not categorized by position"+"\n"+pagetext[cutplace+1:] else: pagetext = pagetext+"\n"+"Category:Association footballers not categorized by position" try: pagepage.edit(text=pagetext, bot=True, summary="(Task 7) - Addition of :Category:Association footballers not categorized by position") #(DO NOT UNCOMMENT UNTIL BOT IS APPROVED) foot.added += 1 print "writing changed page" except: print"Failed to write" print"++++++++++++++++++++++++++++++++++++++++" if foot.added+foot.removed>=13000: #Termination for trials. comment out this line and next for full run return return def findexclude(nextcat): lastContinue='' touse='' while True: params = {'action':'query', 'list':'categorymembers', 'cmtitle':nextcat, 'cmlimit':'max', 'cmnamespace':'0|14', 'cmcontinue':lastContinue } req = api.APIRequest(site, params) #Set the API request res = req.query(False) #Send the API request and store the result in res touse = pagelist.listFromQuery(site, res['query']['categorymembers'])#Make a list for filep in touse: #For page in the list pagename=filep.unprefixedtitle if "Category" in pagename: if pagename not in foot.mycatlist2: foot.mycatlist2.append(pagename) pnt("APPENDING "+pagename) print len(foot.mycatlist2) else: pnt("NOT APPENDING "+pagename) else: if pagename not in foot.excludelist: foot.excludelist.append(pagename) if 'continue' not in res: break lastContinue = res['continue']['cmcontinue'] print "continue" return def TestMainIO(): #foot.nomatchlist=list() #foot.nomatchlist.append("User:Ronhjones/Sandbox3") #foot.nomatchlist.append("User:Ronhjones/Sandbox4") #foot.nomatchlist.append("User:Ronhjones/Sandbox5") #pagepage = page.Page(site, 'Sammy Frost', True, False) # dont follow redirects! #pagetext = pagepage.getWikiText() ##cutplace=splittextpoint(pagetext) #pagetext=pagetext[0:cutplace]+"\n"+"Category:Association footballers not categorized by position"+"\n"+pagetext[cutplace+1:] #pnt(repr(pagetext)) foot.donelist=list() foot.nomatchlist.append("User:Ronhjones/Sandbox4") foot.nomatchlist.append("User:Ronhjones/Sandbox5") AddCat() sys.exit() def main(): go = startAllowed() #Check if task is enabled #TestMainIO() # - test run was OK. #sys.exit() #Get the exclude list foot.mycatlist2=list() foot.excludlist=list() foot.stublist=list() foot.redirlist=list() foot.mycatlist2.append("Category:Association football defenders") foot.mycatlist2.append("Category:Association football central defenders") foot.mycatlist2.append("Category:Association football fullbacks") foot.mycatlist2.append("Category:Association football sweepers") foot.mycatlist2.append("Category:Association football forwards") foot.mycatlist2.append("Category:Association football inside forwards") foot.mycatlist2.append("Category:Association football outside forwards") foot.mycatlist2.append("Category:Association football goalkeepers") foot.mycatlist2.append("Category:Association football midfielders") foot.mycatlist2.append("Category:Association football wing halves") foot.mycatlist2.append("Category:Association football wingers") foot.mycatlist2.append("Category:Women's association football defenders") foot.mycatlist2.append("Category:Women's association football forwards") foot.mycatlist2.append("Category:Women's association football goalkeepers") foot.mycatlist2.append("Category:Women's association football midfielders") foot.mycatlist2.append("Category:Association football player non-biographical articles") foot.mycatlist2.append("Category:Association football utility players") foot.mycatlist2.append("Category:Women's association football central defenders") foot.mycatlist2.append("Category:Women's association football fullbacks") foot.mycatlist2.append("Category:Women's association football wingers") foot.mycatlist2.append("Category:Women's association football utility players") listnum=0 while listnum pnt( "CAT " + foot.mycatlist2[listnum]) findexclude(foot.mycatlist2[listnum]) listnum+=1 print "LIST No. ", listnum print len(foot.excludelist) #Get the target cat, if not in exclude then add to that list #Otherwise add to donelist - these will need to have the cat removed. foot.removed=0 foot.added=0 print "check the done cat" ProcessDoneCat("Category:Association footballers not categorized by position") print len(foot.donelist) print len(foot.excludelist) if len(foot.donelist)>0: RemoveCat() #write local file myfile=open('C:\\Python27\\bot\\log7\\articlelist1.txt','w') print "OPEN FILE 1" for item in foot.excludelist: try: myfile.write("%s\n" % item) except UnicodeEncodeError: myfile.write("%s\n" % item.encode('utf-8')) myfile.close #Now ready to process Mainlist #Make a list of players that are NOT in the exclude list foot.mycatlist1=list() foot.allplayers=list() foot.nomatchlist=list() foot.mycatlist1.append("Category:Association football defenders") foot.mycatlist1.append("Category:Footballers by city or town") foot.mycatlist1.append("Category:Association football players by club") foot.mycatlist1.append("Category:Association football players by competition") foot.mycatlist1.append("Category:Association football players by country") foot.mycatlist1.append("Category:Association football players by national team") foot.mycatlist1.append("Category:Association football players by nationality") foot.mycatlist1.append("Category:Women's association football players") foot.mycatlist1.append("Category:Expatriate association football players") listnum=0 while listnum pnt( "CAT" + foot.mycatlist1[listnum]) findpages(foot.mycatlist1[listnum]) listnum+=1 print "LIST No. ", listnum print len(foot.allplayers) print len(foot.nomatchlist) foot.nomatchlist.sort() if len(foot.nomatchlist)>0: AddCat() #write local file myfile=open('C:\\Python27\\bot\\log7\\articlelist2.txt','w') print "OPEN FILE 1" for item in foot.nomatchlist: try: myfile.write("%s\n" % item) except UnicodeEncodeError: myfile.write("%s\n" % item.encode('utf-8')) myfile.close #write local file myfile=open('C:\\Python27\\bot\\log7\\articlelist3.txt','w') print "OPEN FILE 3" for item in foot.allplayers: try: myfile.write("%s\n" % item) except UnicodeEncodeError: myfile.write("%s\n" % item.encode('utf-8')) myfile.close #write local file myfile=open('C:\\Python27\\bot\\log7\\articlelist4.txt','w') print "OPEN FILE 4" for item in foot.stublist: try: myfile.write("%s\n" % item) except UnicodeEncodeError: myfile.write("%s\n" % item.encode('utf-8')) myfile.close #write local file myfile=open('C:\\Python27\\bot\\log7\\articlelist5.txt','w') print "OPEN FILE 5" for item in foot.redirlist: try: myfile.write("%s\n" % item) except UnicodeEncodeError: myfile.write("%s\n" % item.encode('utf-8')) myfile.close print foot.added print foot.removed print (time.ctime()) if __name__ == "__main__": with warnings.catch_warnings(): warnings.simplefilter("ignore", FutureWarning) main()