User:PointBot/source

coding: utf-8

import urllib2, time, urllib

import random

cookielib:

import cookielib

urlopen = urllib2.urlopen

Request = urllib2.Request

cj = cookielib.LWPCookieJar()

opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))

urllib2.install_opener(opener)

Wikipedia functions #

def parse(page, tag):

for line in page:

if tag + =" in line:

value=''

for letter in line[line.find(tag + =")+len(tag)+2:]:

if letter==":return value

value+=letter

def load(name):

data=urllib.urlencode({'format':'xml', 'action':'query','prop':'revisions', 'rvprop':'content', 'titles':name})

loadString='http://en.wikipedia.org/w/api.php?'

page=urllib2.urlopen(loadString, data)

pagestring=''

for i in page.readlines():pagestring+=i

pagestring=pagestring[pagestring.find(xml:space):]

return pagestring[pagestring.find(>)+1:pagestring.find()]

def login():

#Get token

data=urllib.urlencode({'format':'xml', 'action':'login', 'lgname':'PointBot', 'lgpassword':password})

loginString='http://en.wikipedia.org/w/api.php?'

loginpage=urllib2.urlopen(loginString, data)

loginpage=loginpage.readlines()

token=parse(loginpage, 'token')

cookieprefix=parse(loginpage, 'cookieprefix')

sessionid=parse(loginpage, 'sessionid')

#login

data=urllib.urlencode({'enwiki_session':sessionid,'format':'xml', 'action':'login', 'lgname':'PointBot', 'lgpassword':password, 'lgtoken':token})

loginString='http://en.wikipedia.org/w/api.php?'

loginpage=urllib2.urlopen(loginString, data)

loginpage=loginpage.readlines()

lguserid=parse(loginpage, 'lguserid')

lgtoken=parse(loginpage, 'lgtoken')

sessionid=parse(loginpage, 'sessionid')

print 'Login was: ', parse(loginpage, 'result')

print lguserid, lgtoken, sessionid

return lguserid, lgtoken, sessionid

def get_edit_token(name, lguserid, lgtoken, sessionid):

data=urllib.urlencode({'format':'xml', 'action':'query', 'prop':'info|revisions', 'intoken':'edit', 'titles':'Main Page'})

headers={'enwikiUserName':'PointBot','enwikiUserID':lguserid,'enwikiToken':lgtoken, 'enwiki_session':sessionid}

loadString='http://en.wikipedia.org/w/api.php?'

req=urllib2.Request(loadString, data)

page=urllib2.urlopen(req)

page=page.readlines()

timestamp=parse(page, 'timestamp')

edittoken=parse(page, 'edittoken')

return timestamp, edittoken

def edit_full(name, newcontent, timestamp, edittoken, summary):

data=urllib.urlencode({'format':'xml', 'action':'edit', 'title':name, 'summary':summary, 'text':newcontent, 'basetimestamp':timestamp, 'token':edittoken})

loadString='http://en.wikipedia.org/w/api.php?'

page=urllib2.urlopen(loadString, data)

def edit_add(name, newcontent, timestamp, edittoken, summary):

data=urllib.urlencode({'format':'xml', 'action':'edit', 'title':name,'section':'new', 'summary':summary, 'text':newcontent, 'basetimestamp':timestamp, 'token':edittoken})

loadString='http://en.wikipedia.org/w/api.php?'

page=urllib2.urlopen(loadString, data)

def setup():

lguserid, lgtoken, sessionid=login()

timestamp, edittoken=get_edit_token('User:PointBot/log', lguserid, lgtoken, sessionid)

return lguserid, lgtoken, sessionid, edittoken

Analysis functions #

def findNextLink(page):

#grab all the links in page and return random one. This function takes a list. It is useful for randomly surfing wikipedia.

links=[]

for i in range(len(page)-1):

if page[i] == '[':

if page[i+1] == '[':

link=''

j=int(i)+1

while ']' not in link and '|' not in link:

j+=1

link+=page[j]

if ':' not in link:links.append(link[:-1])#if link is not to another wiki, that would be boring.

return random.choice(links)

def getFirstSentence(page):

#This function trys to get the first sentence of a page, but it uses a lot of rules. There's probably a better way to do this.

score=0

found=0

italics=0

for i in range(len(page)):

if page[i] == '{' or page[i] == '[' or page[i] == '(' or page[i] =='<' or page[i:i+4] == '<':score-=1

if page[i] == '}' or page[i] == ']' or page[i] == ')' or page[i] =='>' or page[i:i+4] == '>':score+=1

if page[i] == """'""" and page[i+1] == """'""":

if italics == 0:italics=1

elif italics == 1:italics=0

if score == 0 and italics == 0:

if page[i]=='.' and page[i-2] != ' ' and page[i-2] != '.':

if page[i-3:i-1] != """''""":

found = 1

return page[:i+1]

def verb_in_first_sentence(page):

#checks if a verb is in the sentence.

verbs=['is', 'are', 'were', 'was', 'will', 'refers']

first=getFirstSentence(page)

found=0

for verb in verbs:

if verb in first:

found=1

return found

def run(name, names, lguserid, lgtoken, sessionid, edittoken):

page=load(name)

print 'Checking: ', name

if page!='':

if {{disambiguation}} not in page and {{disambig}} not in page and page[0] != '#':#if it is not a disambugation page

if verb_in_first_sentence(page) == 0:#if no correct verb is in first sentence

timestamp, edittoken=get_edit_token(name, lguserid, lgtoken, sessionid)

oldpage=load('User:PointBot/log')

first=first=getFirstSentence(page)

if name not in oldpage:

edit_full('User:PointBot/log', load('User:PointBot/log') + '\n\nArticle ' + name + ' lacks a proper descriptive introduction and could use some editing.\n' + str(time.time()), timestamp, edittoken, 'Verb report')

print 'Article ' + name + ' lacks proper descriptive introduction', first

try:

nextname=findNextLink(page)

except:nextname=random.choice(names)

else:nextname=random.choice(names)#if page was disamb

else:nextname=random.choice(names)#if page was blank

return nextname

if True:

password='*********'

#good example: urban design

lguserid, lgtoken, sessionid, edittoken=setup()

names=['wiki']

name=run('wiki', names, lguserid, lgtoken, sessionid, edittoken)

while True:

try:

names.append(name)

name=run(name, names, lguserid, lgtoken, sessionid, edittoken)

except:name=random.choice(names)