User:OrphanBot/orphanbot.pl
The source code for OrphanBot's image-removal task. Requires libBot.pm and Pearle.pm.
- !/usr/bin/perl
- OrphanBot
- A bot to remove images from pages in preparation for deletion
use strict;
use warnings;
use utf8;
use Date::Calc qw(Delta_Days Decode_Month Month_to_Text Today);
use Getopt::Long;
use libBot;
my $homedir = '/path/to/bot/working/directory';
my $test = 0;
my $permit_interruptions = 1; # Allow talkpage messages to stop the bot?
my $last_image = undef;
my @last_images;
my $task = ""; # One of "source", "copyright", "unsure", "special", "fairuse", "disputed"
my %users_notified; # List of users notifed. 0, undef = no; 1 = notified once; 2 = notified and second notice
my %notifications; # List of user,image pairs, used to ensure that no user is ever notified about an image twice.
my %dont_notify = (); # List of users to never notify
my ($remove_type, $removal_comment, $removal_prefix, @template_match, $uploader_warning, $uploader_warning_summary, $write_remove_log, $limit_by_date); # Params for changing tasks
GetOptions('task=s' => \$task);
- Generate a signature
sub sig
{
if($task ne 'source' and $task ne 'copyright')
{
return " -- ~~~~~";
}
else
{
return " ~~~~~";
}
}
%notifications = loadNotificationList("$homedir/orphanbot.note");
%dont_notify = loadNotificationList("$homedir/orphanbot.whitelist");
Pearle::init("
", " ", "$homedir/orphanbot.log","$homedir/cookies.pearle.txt"); Pearle::config(nullOK => 1, printlevel => 4);
config(username => "
"); if(!Pearle::login())
{
exit;
}
- while(1)
{
my @images;
my $image;
my $edited = 0;
my $images_removed = 0;
botwarnlog("=== Beginning set at " . time() . " for task '$task' ===\n");
{
if($task eq "source")
{
my $cat = "Category:All images with unknown source";
if($test)
{
@images = ("Image:Nosuchimage.jpg");
}
else
{
@images = Pearle::getCategoryImages($cat);
}
$remove_type = 'normal';
$removal_comment = "Removing image with no source information. Such images that are older than seven days may be deleted at any time.";
$removal_prefix = "Unsourced image removed:";
@template_match = ("Template:Di-no source", "Template:No copyright holder", "Template:Di-no source no license");
$uploader_warning = "{{subst:User:OrphanBot/nosource|";
$uploader_warning_summary = "You've uploaded an unsourced image";
$write_remove_log = 1;
$limit_by_date = 1;
}
elsif($task eq "copyright")
{
my $cat = "Category:All images with unknown copyright status";
if($test)
{
@images = ("");
}
else
{
@images = Pearle::getCategoryImages($cat);
}
$remove_type = 'normal';
$removal_comment = "Removing image with no copyright information. Such images that are older than seven days may be deleted at any time.";
$removal_prefix = "Image with unknown copyright status removed:";
@template_match = ("Template:Di-no license", "Template:No copyright information", "Template:Di-no source no license", "Template:Don't know", "Template:No license needing editor assistance", "Template:Di-no permission");
$uploader_warning = "{{subst:User:OrphanBot/nocopyright|";
$uploader_warning_summary = "You've uploaded an image with unknown copyright";
$write_remove_log = 1;
$limit_by_date = 1;
}
else
{
Pearle::myLog(0, "Unknown task: $task\n");
exit;
}
}
if(scalar(@images) == 0)
{
Pearle::myLog(2, "Category is empty.\n");
exit;
}
IMAGE: foreach $image (@images)
{
my $image_url;
my $image_regex = $image;
my $page;
my @pages = ();
my $page_remove_log;
my ($day, $month, $year);
Pearle::myLog(2, "Processing image $image\n");
# Fetch an image page
my $image_data = Pearle::APIQuery(titles => [$image], prop => ['imageinfo', 'categories', 'templates'],
iiprop => ['user', 'sha1', 'comment'],
cllimit => 500,
tllimit => 500,
list => 'imageusage',
iutitle => $image,
iunamespace => [0, 10, 12, 14, 100],
meta => 'userinfo', # Do I have talkpage messages?
);
next if(!defined($image_data));
my $full_comment = "";
$page_remove_log = '';
$last_image = $image;
if($permit_interruptions and DoIHaveMessages($image_data))
{
Pearle::myLog(1, "Talkpage message found; exiting on image $image.\n");
last;
}
# Images from Commons
if($image_data =~ /imagerepository="shared"/)
{
Pearle::myLog(2, "*Commons image :$image found\n");
botwarnlog("*Commons image :$image found\n");
next;
}
# Check for image existance
if($image_data =~ /missing=""/)
{
Pearle::myLog(2, "Image :$image has been deleted.\n");
next;
}
# The odd case of an image description page without an image
if($image_data =~ /imagerepository=""/)
{
Pearle::myLog(2, "*Image :$image does not appear to exist.\n");
botwarnlog("*Image :$image does not appear to exist.\n");
next;
}
# Check for image copyright tag
if((scalar(@template_match) > 0) and (not usesTemplate($image_data, @template_match)))
{
Pearle::myLog(2, "*Image :$image in category does not have an appropriate template\n");
botwarnlog("*Image :$image in category does not have an appropriate template\n");
next;
}
my ($raw_image) = $image =~ /Image:(.*)/;
$raw_image = MakeWikiRegex($raw_image);
if($image !~ /(\.jpg|\.jpeg|\.png|\.gif|\.svg)$/i)
{
$image_regex = "[ _]*(:?[Ii]mage|[Mm]edia)[ _]*:[ _]*${raw_image}[ _]*";
}
else
{
$image_regex = "[ _]*[Ii]mage[ _]*:[ _]*${raw_image}[ _]*";
}
# Sanity check
if(!defined($raw_image) or $image !~ /$raw_image/)
{
Pearle::myLog(1, "Parse error on image :$image ($raw_image)\n");
botwarnlog("*Parse error on image :$image ($raw_image)\n");
last;
}
Pearle::myLog(2, "Image regex: $image_regex\n");
($day, $month, $year) = getDate($image_data);
# Notify the user
my $uploader = GetImageUploader($image_data);
my $is_notified = 0;
if(defined($uploader_warning) and defined($uploader))
{
$is_notified = IsNotified($uploader, $image_regex, $image, \%notifications, \%dont_notify);
}
if(defined($uploader_warning) and !$is_notified)
{
if(defined($uploader))
{
if(!($users_notified{$uploader}))
{
Pearle::myLog(3, "Warning user $uploader\n");
wikilog("User talk:$uploader", "${uploader_warning}${image}}}" . sig() . "\n", $uploader_warning_summary);
Pearle::limit();
$notifications{"$uploader,$image"} = 1;
$users_notified{$uploader} = 1;
}
else
{
Pearle::myLog(3, "User $uploader has already been warned repeatedly\n");
$users_notified{$uploader} += 1;
}
}
else
{
Pearle::myLog(1, "Could not determine uploader for :$image\n");
}
}
if(!Date::Calc::check_date($year, Decode_Month($month), $day))
{
Pearle::myLog(1, "Date error for image :$image\n");
botwarnlog("*Date error for image :$image\n");
next;
}
if((Delta_Days($year, Decode_Month($month), $day, Today() ) >= 4) or !($limit_by_date))
{
@pages = GetPageList($image_data);
if(scalar(@pages) == 0)
{
Pearle::myLog(2, "Image $image may already be orphaned\n");
}
if(scalar(@pages) > 5)
{
botwarnlog("*Found image :$image on " . scalar(@pages) . " content pages\n");
}
foreach $page (@pages)
{
print "Page for removal: $page\n";
my $parsed_removal_comment = $removal_comment;
$parsed_removal_comment =~ s/image/image/;
if(my $hits = RemoveImageFromPage($image, $page, $image_regex, $removal_prefix, $parsed_removal_comment)) # Don't limit if we just touched the article
{
$page_remove_log .= "#$page\n";
Pearle::myLog(2, "Removed image $image from article $page $hits times\n");
Pearle::limit();
$edited = 1;
}
}
}
else
{
Pearle::myLog(2, "Recent image: notification only\n");
}
# Update image description page
if($write_remove_log)
{
my $edited_idp = 0;
my $text = "";
# Log all removals on the image description page
if($page_remove_log ne "")
{
$text .= "\n\nRemoved from the following pages:\n";
$text .= FixupLinks($page_remove_log);
$text .= "--~~~~\n";
$full_comment .= "Listing pages that the image has been removed from";
$edited_idp = 1;
print "Remove log\n";
}
if($edited_idp)
{
if($test)
{
notelog("Edited image description page\n");
}
else
{
my $wikipage;
$wikipage = Pearle::getPage( $image);
my $pagetext = $wikipage->getEditableText();
$pagetext .= $text;
$wikipage->setEditableText($pagetext);
Pearle::postPage( $wikipage, $full_comment, 0);
}
}
}
- exit if($images_removed >= 100);
if($edited)
{
print "Sleeping for 30 seconds\n";
sleep(30);
}
else
{
print "Sleeping for two seconds\n";
sleep(2);
}
$edited = 0;
}
notelog("Saving notification list\n");
saveNotificationList("/home/mark/orphanbot/orphanbot.note", %notifications);
Pearle::myLog(2, "Finished with category.\n");
}