User:John Bot III/Code

Here! have some code :).

!/usr/bin/python
-*- coding: utf-8 -*-

"""

Script to check recently uploaded files. This script checks if a file

description is present and if there is only a {{PD}} tag in the description.

It will tag a file "no source" in the former case, and request the uploader

to choose a more specific license in the latter case.

This script will have to be configured for each language. Please submit

translations as addition to the pywikipediabot framework.

Everything that needs customisation is indicated by comments.

This script understands the following command-line arguments:

-limit - The number of images to check (default: 80)

-commons - The Bot will check if an image on Commons has the same name

and if true it report the image.

-break - To break the bot after the first check (default: recursive)

-time[:#] - Time in seconds between repeat runs (default: 30)

-wait[:#] - Wait x second before check the images (default: 0)

-skip[:#] - The bot skip the first [:#] images (default: 0)

-start[:#] - Use allpages() as generator (it starts already form Image:[:#])

-cat[:#] - Use a category as generator

-regex[:#] - Use regex, must be used with -url or -page

-page[:#] - Define the name of the wikipage where are the images

-url[:#] - Define the url where are the images

-untagged[:#] - Use daniel's tool as generator ( http://tools.wikimedia.de/~daniel/WikiSense/UntaggedImages.php )

---- Istructions for the real-time settings ----

For every new block you have to add:

<------- ------->

In this way the Bot can understand where the block start to take the right parameter.

Name= Set the name of the block
Find= Use it to define what search in the text of the image's description,

while Findonly= search only if the exactly text that you give is in the image's description.

Summary= That's the summary that the bot will use when it will notify the problem.
Head= That's the incipit that the bot will use for the message.
Text= This is the template that the bot will use when it will report the image's problem.

---- Known issues/FIXMEs: ----

Fix the "real-time" regex and function
Add the "catch the language" function for commons.
Add new documentation
Add a report for the image tagged.
Fix the settings part when the bot save the data (make it better)

"""

(C) Kyle/Orgullomoore, 2006-2007 (newimage.py)
(C) Siebrand Mazeland, 2007
(C) Filnik, 2007
(C) Compwhizii, 2008
Distributed under the terms of the MIT license.

__version__ = '$Id: checkimages.py 5060 2008-02-20 14:29:29Z filnik $'

import re, time, urllib2

import wikipedia, config, os, locale, sys

import cPickle, pagegenerators, catlib

locale.setlocale(locale.LC_ALL, '')

<------------------------------------------- Change only below! ----------------------------------------------------->#

That's what you want that will be added. (i.e. the {{no source}} with the right day/month/year )

n_txt = {

'commons':'\n{{subst:nld}}',

'en' :'\n{{Untagged|day={{subst:CURRENTDAY}}|month={{subst:CURRENTMONTHNAME}}}}',

'it' :'\n{{subst:unverdata}}',

'ja' :'{{subst:Nsd}}',

'hu' :u'\n{{nincslicenc|~~~~~}}',

'ta' :'\n{{subst:nld}}',

'zh' :'{{subst:No license/auto}}',

}

n2_txt = {

'commons':'',

'en' :'\n{{Untagged Old|day={{subst:CURRENTDAY}}|month={{subst:CURRENTMONTHNAME}}}}',

'it' :'',

'ja' :'',

'hu' :u'',

'ta' :'',

'zh' :'',

}

Text that the bot will try to see if there's already or not. If there's a
{{ I'll use a regex to make a better check.
This will work so:
'{{nld' --> '\{\{(?:template:|)no[ _]license ?(?:\||\n|\}) ?' (case insensitive).
If there's not a {{ it will work as usual (if x in Text)

txt_find = {

'commons':[u'{{no license', u'{{nld', u'{{no permission since'],

'en':[u'{{nld', u'{{no license', u'{{di-', u'{{untagged'],

'hu':[u'{{nincsforrás',u'{{nincslicenc'],

'it':[u'{{unverdata', u'{{unverified'],

'ja':[u'{{no source', u'{{unknown', u'{{non free', u' #

class LogIsFull(wikipedia.Error):

"""An exception indicating that the log is full and the Bot cannot add other data to prevent Errors."""

class NothingFound(wikipedia.Error):

""" An exception indicating that a regex has return [] instead of results."""

def printWithTimeZone(message):

""" Function to print the messages followed by the TimeZone encoded correctly. """

if message[-1] != ' ':

message = '%s ' % unicode(message)

time_zone = time.strftime("%d %b %Y %H:%M:%S (UTC)", time.localtime())

if locale.getlocale()[1]:

time_zone = unicode(time.strftime(u"%d %b %Y %H:%M:%S (UTC)", time.gmtime()), locale.getlocale()[1])

else:

time_zone = unicode(time.strftime(u"%d %b %Y %H:%M:%S (UTC)", time.gmtime()))

wikipedia.output(u"%s%s" % (message, time_zone))

def pageText(url):

""" Function used to get HTML text from every reachable URL """

# When the page is not a wiki-page (as for untagged generator) you need that function

try:

request = urllib2.Request(url)

user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.12) Gecko/20050915 Firefox/1.0.7'

request.add_header("User-Agent", user_agent)

response = urllib2.urlopen(request)

text = response.read()

response.close()

# When you load to many users, urllib2 can give this error.

except urllib2.HTTPError:

printWithTimeZone(u"Server error. Pausing for 10 seconds... ")

time.sleep(10)

request = urllib2.Request(url)

user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.12) Gecko/20050915 Firefox/1.0.7'

request.add_header("User-Agent", user_agent)

response = urllib2.urlopen(request)

text = response.read()

response.close()

return text

Here there is the main class.

class main:

def __init__(self, site, logFulNumber = 25000):

""" Constructor, define some global variable """

self.site = site

self.logFulNumber = logFulNumber

self.settings = wikipedia.translate(site, page_with_settings)

self.rep_page = wikipedia.translate(site, report_page)

self.rep_text = wikipedia.translate(site, report_text)

self.com = wikipedia.translate(site, comm10)

# Commento = Summary in italian

self.commento = wikipedia.translate(self.site, comm)

def general(self, newtext, image, notification, head, botolist):

""" This class can be called for two reason. So I need two different constructors, one with common data

and another with the data that I required... maybe it can be added on the other function, but in this way

seems more clear what parameters I need """

self.newtext = newtext

self.image = image

self.head = head

self.notification = notification

self.botolist = botolist

def put_mex(self, put = True):

""" Function to add the template in the image and to find out

who's the user that has uploaded the image. """

# Defing the image's Page Object

p = wikipedia.ImagePage(self.site, 'Image:%s' % self.image)

# Get the image's description

try:

testoa = p.get()

except wikipedia.NoPage:

wikipedia.output(u'%s has been deleted...' % p.title())

# We have a problem! Report and exit!

return False

# You can use this function also to find only the user that

# has upload the image (FixME: Rewrite a bit this part)

if put:

p.put(testoa + self.newtext, comment = self.commento, minorEdit = True)

image_n = self.site.image_namespace()

image_namespace = "%s:" % image_n # Example: "User_talk:"

# paginetta it's the image page object.

paginetta = wikipedia.ImagePage(self.site, image_namespace + self.image)

# I take the data of the latest uploader and I take only the name

imagedata = paginetta.getFileVersionHistory()

#print imagedata # Let it so for de-buggin porpuse (wikipedia.output gives error)

# When an Image is deleted from Commons and someone has add something in the wikipedia page

# The bot doesn't catch the data properly :-)

if imagedata == list():

wikipedia.output(u"Seems that %s hasn't the image at all, but there is something in the description..." % self.image)

repme = "\n*:Image:%s seems to have problems (no data found in the image)"

self.report_image(self.image, self.rep_page, self.com, repme)

# We have a problem! Report and exit!

return False

try:

nick = paginetta.getFileVersionHistory()[-1][1]

except IndexError:

wikipedia.output(u"Seems that %s hasn't the image at all, but there is something in the description..." % self.image)

repme = "\n*:Image:%s seems to have problems (no data found in the image)"

# We have a problem! Report and exit!

self.report_image(self.image, self.rep_page, self.com, repme)

return False

luser = wikipedia.url2link(nick, self.site, self.site)

pagina_discussione = "%s:%s" % (self.site.namespace(3), luser)

# Defing the talk page (pagina_discussione = talk_page ^__^ )

talk_page = wikipedia.Page(self.site, pagina_discussione)

self.talk_page = talk_page

return True

# There is the function to put the advise in talk page.

def put_talk(self, notification, head, notification2 = None, commx = None):

commento2 = wikipedia.translate(self.site, comm2)

talk_page = self.talk_page

notification = self.notification

if notification2 == None:

notification2 = notification

else:

notification2 = notification2 % self.image

head = self.head

second_text = False

# Getting the talk page's history, to check if there is another advise...

# The try block is used to prevent error if you use an old wikipedia.py's version.

edit_to_load = 10

if talk_page.exists():

try:

history = talk_page.getVersionHistory(False, False, False, edit_to_load)

except TypeError:

history = talk_page.getVersionHistory(False, False, False)

latest_edit = history[0]

latest_user = latest_edit[2]

wikipedia.output(u'The latest user that has written something is: %s' % latest_user)

else:

wikipedia.output(u'The user page is blank')

if talk_page.exists():

testoattuale = talk_page.get() # Actual text

project = self.site.family.name

bot = config.usernames[project]

botnick = bot[self.site.lang]

botolist = self.botolist + [botnick]

for i in botolist:

if latest_user == i:

second_text = True

# A block to prevent the second message if the bot also welcomed users...

if latest_edit == history[-1]:

second_text = False

else:

second_text = False

ti_es_ti = wikipedia.translate(self.site, empty)

testoattuale = ti_es_ti

if commx == None:

commentox = commento2

else:

commentox = commx

if second_text == True:

talk_page.put("%s\n\n%s" % (testoattuale, notification2), comment = commentox, minorEdit = False)

elif second_text == False:

#talk_page.put(testoattuale + notification, comment = commentox, minorEdit = False)

talk_page.put(testoattuale + head + notification, comment = commentox, minorEdit = False)

def untaggedGenerator(self, untaggedProject, limt):

self.limt = limt

lang = untaggedProject.split('.', 1)[0]

project = '.%s' % untaggedProject.split('.', 1)[1]

#print "Cyclec: " + str(cyclec)

print "limt: " + str(self.limt)

if lang == 'commons':

link = 'http://tools.wikimedia.de/~daniel/WikiSense/UntaggedImages.php?wikifam=commons.wikimedia.org&since=-100d&until=&img_user_text=&order=img_timestamp&max=100&order=img_timestamp&format=html'

else:

link = 'http://tools.wikimedia.de/~daniel/WikiSense/UntaggedImages.php?wikilang=%s&wikifam=%s&since=-30d&order=img_timestamp&max=%s&ofs=0&max=%s' % (lang, project, limt, limt)

#DEBUG:

print link

text = pageText(link)

#print text

regexp = r""".*?"""

results = re.findall(regexp, text)

if results == []:

print link

raise NothingFound('Nothing found! Try to use the tool by yourself to be sure that it works!')

else:

for result in results:

wikiPage = wikipedia.Page(self.site, result)

yield wikiPage

def regexGenerator(self, regexp, textrun):

pos = 0

done = list()

ext_list = list()

r = re.compile(r'%s' % regexp, re.UNICODE|re.M)

while 1:

m = r.search(textrun, pos)

if m == None:

wikipedia.output(u"\t\t>> All images checked. <<")

break

pos = m.end()

image = m.group(1)

if image not in done:

done.append(image)

yield image

#continue

def checkImage(self, image):

self.image = image

# Search regular expression to find links like this (and the class attribute is optional too)

# title="Immagine:Nvidia.jpg"

wikipedia.output(u'Checking if %s is on commons...' % image)

commons = wikipedia.getSite('commons', 'commons')

if wikipedia.Page(commons, u'Image:%s' % image).exists():

wikipedia.output(u'%s is on commons!' % image)

imagePage = wikipedia.ImagePage(self.site, 'Image:%s' % image)

on_commons_text = imagePage.getImagePageHtml()

if "

" in on_commons_text:

wikipedia.output(u"But, the image doesn't exist on your project! Skip...")

# Problems? Yes! We have to skip the check part for that image!

# Because it's on commons but someone has added something on your project.

return False

elif 'stemma' in image.lower() and self.site.lang == 'it':

wikipedia.output(u'%s has "stemma" inside, means that it\'s ok.' % image)

return True # Problems? No, it's only not on commons but the image needs a check

else:

repme = "\n*:Image:%s is also on Commons: commons:Image:%s"

self.report_image(self.image, self.rep_page, self.com, repme)

# Problems? No, return True

return True

else:

# Problems? No, return True

return True

def report_image(self, image, rep_page = None, com = None, rep_text = None):

if rep_page == None:

rep_page = self.rep_page

if com == None:

com = self.com

if rep_text == None:

rep_text = self.rep_text

another_page = wikipedia.Page(self.site, rep_page)

if another_page.exists():

text_get = another_page.get()

else:

text_get = str()

if len(text_get) >= self.logFulNumber:

raise LogIsFull("The log page (%s) is full! Please delete the old images reported." % another_page.title())

pos = 0

# The talk page includes "_" between the two names, in this way i replace them to " "

regex = image

n = re.compile(regex, re.UNICODE)

y = n.search(text_get, pos)

if y == None:

# Adding the log :)

if "\'\'\'Commons\'\'\'" in rep_text:

rep_text = rep_text % (image, image)

else:

rep_text = rep_text % image

another_page.put(text_get + rep_text, comment = com, minorEdit = False)

wikipedia.output(u"...Reported...")

reported = True

else:

pos = y.end()

wikipedia.output(u"%s is already in the report page." % image)

reported = False

return reported

def takesettings(self):

pos = 0

if self.settings == None: lista = None

else:

x = wikipedia.Page(self.site, self.settings)

lista = list()

try:

testo = x.get()

rxp = "<------- ------->\n\*[Nn]ame ?= ?['\"](.*?)['\"]\n\*([Ff]ind|[Ff]indonly)=(.*?)\n\*[Ii]magechanges=(.*?)\n\*[Ss]ummary=['\"](.*?)['\"]\n\*[Hh]ead=['\"](.*?)['\"]\n\*[Tt]ext ?= ?['\"](.*?)['\"]\n\*[Mm]ex ?= ?['\"]?(.*?)['\"]?$"

r = re.compile(rxp, re.UNICODE|re.M)

number = 1

while 1:

m = r.search(testo, pos)

if m == None:

if lista == list():

wikipedia.output(u"You've set wrongly your settings, please take a look to the relative page. (run without them)")

lista = None

else:

break

else:

pos = m.end()

name = str(m.group(1))

find_tipe = str(m.group(2))

find = str(m.group(3))

imagechanges = str(m.group(4))

summary = str(m.group(5))

head = str(m.group(6))

text = str(m.group(7))

mexcatched = str(m.group(8))

tupla = [number, name, find_tipe, find, imagechanges, summary, head, text, mexcatched]

lista += [tupla]

number += 1

except wikipedia.NoPage:

wikipedia.output(u"The settings' page doesn't exist!")

lista = None

return lista

def load(self, raw):

list_loaded = list()

pos = 0

load_2 = True

# I search with a regex how many user have not the talk page

# and i put them in a list (i find it more easy and secure)

while 1:

regl = "(\"|\')(.*?)(\"|\')(, |\])"

pl = re.compile(regl, re.UNICODE)

xl = pl.search(raw, pos)

if xl == None:

if len(list_loaded) >= 1:

return list_loaded

break

elif len(done) == 0:

break

pos = xl.end()

word = xl.group(2)

if word not in list_loaded:

list_loaded.append(word)

I've seen that the report class before (the main) was to long to be called so,
here there is a function that has all the settings, so i can call it once ^__^

def report(newtext, image, notification, head, notification2 = None, unver = True, commx = None, bot_list = bot_list):

botolist = wikipedia.translate(wikipedia.getSite(), bot_list)

while 1:

run = main(site = wikipedia.getSite())

secondrun = run.general(newtext, image, notification, head, botolist)

if unver == True:

try:

resPutMex = run.put_mex()

except wikipedia.NoPage:

wikipedia.output(u"The page has been deleted! Skip!")

break

except wikipedia.EditConflict:

wikipedia.output(u"Edit conflict! Skip!")

break

except wikipedia.LockedPage:

wikipedia.output(u"Page Locked. Skip!")

break

else:

if resPutMex == False:

break

else:

try:

resPutMex = run.put_mex(False)

except wikipedia.NoPage:

wikipedia.output(u"The page has been deleted!")

break

except wikipedia.EditConflict:

wikipedia.output(u"Edit conflict! Skip!")

break

except wikipedia.LockedPage:

wikipedia.output(u"Page Locked. Skip!")

break

else:

if resPutMex == False:

break

try:

run.put_talk(notification, head, notification2, commx)

except wikipedia.EditConflict:

wikipedia.output(u"Edit Conflict! Retrying...")

try:

run.put_talk(notification, head, notification2, commx)

except:

wikipedia.output(u"Another error... skipping the user..")

break

else:

break

def checkbot():

# Command line configurable parameters

repeat = True # Restart after having check all the images?

limit = 80 # How many images check?

time_sleep = 30 # How many time sleep after the check?

skip_number = 0 # How many images to skip before checking?

wait_number = 0 # How many time sleep before the check?

commonsActive = False # Check if on commons there's an image with the same name?

normal = False # Check the new images or use another generator?

urlUsed = False # Use the url-related function instead of the new-pages generator

regexGen = False # Use the regex generator

untagged = False # Use the untagged generator

skip_list = list() # Inizialize the skip list used below

# Here below there are the parameters.

for arg in wikipedia.handleArgs():

if arg.startswith('-limit'):

if len(arg) == 7:

limit = int(wikipedia.input(u'How many images do you want to check?'))

else:

limit = int(arg[7:])

if arg.startswith('-time'):

if len(arg) == 5:

time_sleep = int(wikipedia.input(u'How many seconds do you want runs to be apart?'))

else:

time_sleep = int(arg[6:])

elif arg == '-break':

repeat = False

elif arg == '-commons':

commonsActive = True

elif arg.startswith('-skip'):

if len(arg) == 5:

skip = True

skip_number = int(wikipedia.input(u'How many images do you want to skip?'))

elif len(arg) > 5:

skip = True

skip_number = int(arg[6:])

elif arg.startswith('-wait'):

if len(arg) == 5:

wait = True

wait_number = int(wikipedia.input(u'How many time do you want to wait before checking the images?'))

elif len(arg) > 5:

wait = True

wait_number = int(arg[6:])

elif arg.startswith('-start'):

if len(arg) == 6:

firstPageTitle = str(wikipedia.input(u'From witch page do you want to start?'))

elif len(arg) > 6:

firstPageTitle = str(arg[7:])

generator = wikipedia.getSite().allpages(start='Image:%s' % firstPageTitle)

repeat = False

elif arg.startswith('-page'):

if len(arg) == 5:

regexPageName = str(wikipedia.input(u'Which page do you want to use for the regex?'))

elif len(arg) > 5:

regexPageName = str(arg[6:])

repeat = False

regexGen = True

elif arg.startswith('-url'):

if len(arg) == 4:

regexPageUrl = str(wikipedia.input(u'Which url do you want to use for the regex?'))

elif len(arg) > 4:

regexPageUrl = str(arg[5:])

urlUsed = True

repeat = False

regexGen = True

elif arg.startswith('-regex'):

if len(arg) == 6:

regexpToUse = str(wikipedia.input(u'Which regex do you want to use?'))

elif len(arg) > 6:

regexpToUse = str(arg[7:])

generator = 'regex'

repeat = False

elif arg.startswith('-cat'):

if len(arg) == 4:

catName = str(wikipedia.input(u'In which category do I work?'))

elif len(arg) > 4:

catName = str(arg[5:])

catSelected = catlib.Category(wikipedia.getSite(), 'Category:%s' % catName)

generator = pagegenerators.CategorizedPageGenerator(catSelected)

repeat = False

elif arg.startswith('-untagged'):

untagged = True

if len(arg) == 9:

projectUntagged = str(wikipedia.input(u'In which project should I work?'))

elif len(arg) > 9:

projectUntagged = str(arg[10:])

# Understand if the generator it's the default or not.

try:

generator

except NameError:

normal = True

# Define the site.

site = wikipedia.getSite()

# Block of text to translate the parameters set above.

image_n = site.image_namespace()

image_namespace = "%s:" % image_n # Example: "User_talk:"

if untagged == True:

unvertext = wikipedia.translate(site, n2_txt)

else:

unvertext = wikipedia.translate(site, n_txt)

di = wikipedia.translate(site, delete_immediately)

dih = wikipedia.translate(site, delete_immediately_head)

din = wikipedia.translate(site, delete_immediately_notification)

nh = wikipedia.translate(site, nothing_head)

nn = wikipedia.translate(site, nothing_notification)

dels = wikipedia.translate(site, del_comm)

smwl = wikipedia.translate(site, second_message_without_license)

TextFind = wikipedia.translate(site, txt_find)

hiddentemplate = wikipedia.translate(site, HiddenTemplate)

# If there's an hidden template, change the used

HiddenTN = wikipedia.translate(site, HiddenTemplateNotification)

# A template as {{en is not a license! Adding also them in the whitelist template...

for langK in wikipedia.Family('wikipedia').langs.keys():

hiddentemplate.append('%s' % langK)

# If the images to skip are 0, set the skip variable to False (the same for the wait time)

if skip_number == 0:

skip = False

if wait_number == 0:

wait = False

# nothing = Defining an empty image description

nothing = ['', ' ', ' ', ' ', '\n', '\n ', '\n ', '\n\n', '\n \n', ' \n', ' \n ', ' \n \n']

# something = Minimal requirements for an image description.

# If this fits, no tagging will take place (if there aren't other issues)

# MIT license is ok on italian wikipedia, let also this here

something = ['{{', "MIT license"] # Don't put "}}" here, please. Useless and can give problems.

# Unused file extensions. Does not contain PDF.

notallowed = ("xcf", "xls", "sxw", "sxi", "sxc", "sxd")

# A little block-statement to ensure that the bot will not start with en-parameters

if site.lang not in project_inserted:

wikipedia.output(u"Your project is not supported by this script. You have to edit the script and add it!")

wikipedia.stopme()

# Some formatting for delete immediately template

di = '\n%s' % di

dels = dels % di

# Reading the log of the new images if another generator is not given.

if normal == True:

if limit == 1:

wikipedia.output(u"Retrieving the latest file for checking...")

else:

wikipedia.output(u"Retrieving the latest %d files for checking..." % limit)

# Main Loop

while 1:

# Defing the Main Class.

mainClass = main(site)

# Untagged is True? Let's take that generator

if untagged == True:

generator = mainClass.untaggedGenerator(projectUntagged, limit)

normal = False # Ensure that normal is False

# Normal True? Take the default generator

if normal == True:

generator = pagegenerators.NewimagesPageGenerator(number = limit, site = site)

# if urlUsed and regexGen, get the source for the generator

if urlUsed == True and regexGen == True:

textRegex = pagetext(regexPageUrl)

# Not an url but a wiki page as "source" for the regex

elif regexGen == True:

pageRegex = wikipedia.Page(site, regexPageName)

try:

textRegex = pageRegex.get()

except wikipedia.NoPage:

wikipedia.output(u"%s doesn't exist!" % page.title())

textRegex = '' # No source, so the bot will quit later.

# If generator is the regex' one, use your own Generator using an url or page and a regex.

if generator == 'regex' and regexGen == True:

generator = mainClass.regexGenerator(regexpToUse, textRegex)

# Ok, We (should) have a generator, so let's go on.

try:

# Take the additional settings for the Project

tupla_written = mainClass.takesettings()

except wikipedia.Error:

# Error? Settings = None

wikipedia.output(u'Problems with loading the settigs, run without them.')

tupla_written = None

some_problem = False

# Ensure that if the list given is empty it will be converted to "None"

# (but it should be already done in the takesettings() function)

if tupla_written == []: tupla_written = None

# Real-Time page loaded

if tupla_written != None: wikipedia.output(u'\t >> Loaded the real-time page... <<')

# No settings found, No problem, continue.

else: wikipedia.output(u'\t >> No additional settings found! <<')

# Not the main, but the most important loop.

for image in generator:

# If I don't inizialize the generator, wait part and skip part are useless

if wait:

printWithTimeZone(u'Waiting %s seconds before checking the images,' % wait_number)

# Let's sleep...

time.sleep(wait_number)

# Never sleep again (we are in a loop)

wait = False

# If the generator returns something that is not an image, simply skip it.

if normal == False and regexGen == False:

if image_namespace.lower() not in image.title().lower() and \

'image:' not in image.title().lower():

wikipedia.output(u'%s seems not an image, skip it...' % image.title())

continue

imageName = image.title().split(image_namespace)[1] # Deleting the namespace (useless here)

# Skip block

if skip == True:

# If the images to skip are more the images to check, make them the same number

if skip_number > limit: skip_number = limit

# Print a starting message only if no images has been skipped

if skip_list == []:

if skip_number == 1:

wikipedia.output(u'Skipping the first image:\n')

else:

wikipedia.output(u'Skipping the first %s images:\n' % skip_number)

# If we still have pages to skip:

if len(skip_list) < skip_number:

wikipedia.output(u'Skipping %s...' % imageName)

skip_list.append(imageName)

if skip_number == 1:

wikipedia.output('')

skip = False

continue

else:

wikipedia.output('') # Print a blank line.

skip = False

elif skip_list == []: # Skip must be false if we are here but

# the user has set 0 as images to skip

wikipedia.output(u'\t\t>> No images to skip...<<')

skip_list.append('skip = Off') # Only to print it once

# Check on commons if there's already an image with the same name

if commonsActive == True:

response = mainClass.checkImage(imageName)

if response == False:

continue

parentesi = False # parentesi are these in italian: { ( ) } []

delete = False

tagged = False

extension = imageName.split('.')[-1] # get the extension from the image's name

# Page => ImagePage

p = wikipedia.ImagePage(site, image.title())

# Get the text in the image (called g)

try:

g = p.get()

except wikipedia.NoPage:

wikipedia.output(u"Skipping %s because it has been deleted." % imageName)

continue

except wikipedia.IsRedirectPage:

wikipedia.output(u"The file description for %s is a redirect?!" % imageName )

continue

# Is the image already tagged? If yes, no need to double-check, skip

for i in TextFind:

# If there are {{ use regex, otherwise no (if there's not the {{ may not be a template

# and the regex will be wrong)

if '{{' in i:

regexP = re.compile('\{\{(?:template|)%s ?(?:\||\n|\}) ?' % i.split('{{')[1].replace(' ', '[ _]'), re.I)

result = regexP.findall(g)

if result != []:

tagged = True

elif i.lower() in g:

tagged = True

# Deleting the useless template from the description (before adding something

# in the image the original text will be reloaded, don't worry).

hiddenTemplateFound = False

for l in hiddentemplate:

if tagged == False:

res = re.findall(r'\{\{(?:[Tt]emplate:|)%s(?: \n|\||\n|\})' % l.lower(), g.lower())

if res != []:

wikipedia.output(u'\03{yellow}A white template found, skipping the template...\03{default}')

if l != '' and l != ' ': # Check that l is not nothing or a space

# Deleting! (replace the template with nothing)

g = g.lower().replace('{{%s' % l, '')

hiddenTemplateFound = True

for a_word in something: # something is the array with {{, MIT License and so on.

if a_word in g:

# There's a template, probably a license (or I hope so)

parentesi = True

# Is the extension allowed? (is it an image or f.e. a .xls file?)

for parl in notallowed:

if parl.lower() in extension.lower():

delete = True

some_problem = False # If it has "some_problem" it must check

# the additional settings.

# if tupla_writte, use addictional settings

if tupla_written != None:

# In every tupla there's a setting configuration

for tupla in tupla_written:

name = tupla[1]

find_tipe = tupla[2]

find = tupla[3]

find_list = mainClass.load(find)

imagechanges = tupla[4]

if imagechanges.lower() == 'false':

imagestatus = False

elif imagechanges.lower() == 'true':

imagestatus = True

else:

wikipedia.output(u"Error! Imagechanges set wrongly!")

tupla_written = None

break

summary = tupla[5]

head_2 = tupla[6]

text = tupla[7]

text = text % imageName

mexCatched = tupla[8]

wikipedia.setAction(summary)

for k in find_list:

if find_tipe.lower() == 'findonly':

if k.lower() == g.lower():

some_problem = True

text_used = text

head_used = head_2

imagestatus_used = imagestatus

name_used = name

summary_used = summary

mex_used = mexCatched

break

elif find_tipe.lower() == 'find':

if k.lower() in g.lower():

some_problem = True

text_used = text

head_used = head_2

imagestatus_used = imagestatus

name_used = name

summary_used = summary

mex_used = mexCatched

continue

# If the image exists (maybe it has been deleting during the oder

# checking parts or something, who knows? ;-))

if p.exists():

# Here begins the check block.

if tagged == True:

# Tagged? Yes, skip.

printWithTimeZone(u'\03{yellow}%s is already tagged...\03{default}' % imageName)

continue

if some_problem == True:

if mex_used in g:

wikipedia.output(u'Image already fixed. Skip.')

continue

wikipedia.output(u"The image description for %s contains %s..." % (imageName, name_used))

if mex_used.lower() == 'default':

mex_used = unvertext

if imagestatus_used == False:

reported = mainClass.report_image(imageName)

else:

reported = True

if reported == True:

#if imagestatus_used == True:

report(mex_used, imageName, text_used, "\n%s\n" % head_used, None, imagestatus_used, summary_used)

else:

wikipedia.output(u"Skipping the image...")

some_problem = False

continue

elif parentesi == True:

printWithTimeZone(u"\03{green}%s seems ok,\03{default}" % imageName)

# It works also without this... but i want only to be sure ^^

parentesi = False

continue

elif delete == True:

wikipedia.output(u"%s is not a file!" % imageName)

# Modify summary text

wikipedia.setAction(dels)

canctext = di % extension

notification = din % imageName

head = dih

report(canctext, imageName, notification, head)

delete = False

continue

elif g in nothing:

wikipedia.output(u"\03{red}The image description for %s does not contain a license template!\03{default}" % imageName)

if hiddenTemplateFound and HiddenTN != None and HiddenTN != '' and HiddenTN != ' ':

notification = HiddenTN % imageName

else:

notification = nn % imageName

head = nh

report(unvertext, imageName, notification, head, smwl)

continue

else:

wikipedia.output(u"\03{red}%s has only text and not the specific license...\03{default}" % imageName)

if hiddenTemplateFound and HiddenTN != None and HiddenTN != '' and HiddenTN != ' ':

notification = HiddenTN % imageName

else:

notification = nn % imageName

head = nh

report(unvertext, imageName, notification, head, smwl)

continue

# A little block to perform the repeat or to break.

if repeat == True:

printWithTimeZone(u"\03{lightblue}Waiting for %s seconds,\03{default}" % time_sleep)

time.sleep(time_sleep)

elif repeat == False:

wikipedia.output(u"\t\t\t>> STOP! <<")

return True # Exit

Here there is the main loop. I'll take all the (name of the) images and then i'll check them.

if __name__ == "__main__":

try:

#cyclec = 1

checkbot()

except wikipedia.BadTitle:

wikipedia.output(u"Wikidown or server's problem, quit")

wikipedia.stopme()

finally:

wikipedia.stopme()