User:Stefan2bot/shadowsCommons.py

import wikipedia

import catlib

import sys

commons = wikipedia.getSite("commons", "commons")

local = wikipedia.getSite()

editSummary = "Robot: Marking a file which shadows a file on Wikimedia Commons"

shadowTag = "{{ShadowsCommons}}\n"

  1. The bot will edit at most maxEdit pages, even if more shadows are found. Set to negative for an infinite number of edits.
  2. The edit count isn't 100% accurate.

maxEdits = 47

def imageHash(image):

return image.getHash()

def imageExists(image):

  1. Returns True if an image exists locally.

if image.isImage() and image.exists():

return not image.fileIsOnCommons()

else:

return False

def isDifferent(filename):

  1. Returns True if both projects have images with the same file name but with
  2. different SHA1 hash values. Returns False otherwise.

commonsImage = wikipedia.ImagePage(commons, filename)

localImage = wikipedia.ImagePage(local, filename)

if not imageExists(commonsImage):

return False

# It is still necessary to check that the image exists locally: it might have

# been deleted after the script started.

if not imageExists(localImage):

return False

# All redirects are skipped. Not sure what people wish to do with those.

if localImage.isRedirectPage():

print "Redirect which shadows Commons: " + localImage.title().encode("utf-8")

return False

if commonsImage.isRedirectPage():

print "Commons redirect shadowed by Wikipedia: " + localImage.title().encode("utf-8")

return False

return imageHash(localImage) != imageHash(commonsImage)

def tagFile(file):

if isDifferent(file) and maxEdits != 0:

localfile = wikipedia.ImagePage(local, file)

categories = localfile.categories(False, True)

for cats in categories:

if cats.title() == "Category:Wikipedia files that shadow a file on Wikimedia Commons":

return False

fileInfo = localfile.get()

fileInfo = shadowTag + fileInfo

if local.loggedInAs() != "Stefan2bot":

return False

print fileInfo

try:

localfile.put(fileInfo, editSummary)

except:

print "Error: Can't edit " + localfile.title().encode("utf-8")

sys.stderr.write(file.title().encode("utf-8") + " tagged with {{ShadowsCommons}}\n")

return True

return False

  1. All file names are listed in the file conflicts.txt. One file name per line, without namespace. Designed to support both commercial and non-commercial line breaks.

fileList = open("conflicts.txt", "r")

for line in fileList.readlines():

print line

replaced = line.decode("utf-8").replace(u"\n", u"").replace(u"\r", u"")

if tagFile(u"File:" + replaced):

maxEdits -= 1