User:OrphanBot/orphanbot.pl

The source code for OrphanBot's image-removal task. Requires libBot.pm and Pearle.pm.

  1. !/usr/bin/perl
  1. OrphanBot
  2. A bot to remove images from pages in preparation for deletion

use strict;

use warnings;

use utf8;

use Date::Calc qw(Delta_Days Decode_Month Month_to_Text Today);

use Getopt::Long;

use libBot;

my $homedir = '/path/to/bot/working/directory';

my $test = 0;

my $permit_interruptions = 1; # Allow talkpage messages to stop the bot?

my $last_image = undef;

my @last_images;

my $task = ""; # One of "source", "copyright", "unsure", "special", "fairuse", "disputed"

my %users_notified; # List of users notifed. 0, undef = no; 1 = notified once; 2 = notified and second notice

my %notifications; # List of user,image pairs, used to ensure that no user is ever notified about an image twice.

my %dont_notify = (); # List of users to never notify

my ($remove_type, $removal_comment, $removal_prefix, @template_match, $uploader_warning, $uploader_warning_summary, $write_remove_log, $limit_by_date); # Params for changing tasks

GetOptions('task=s' => \$task);

  1. Generate a signature

sub sig

{

if($task ne 'source' and $task ne 'copyright')

{

return " -- ~~~~~";

}

else

{

return " ~~~~~";

}

}

%notifications = loadNotificationList("$homedir/orphanbot.note");

%dont_notify = loadNotificationList("$homedir/orphanbot.whitelist");

Pearle::init("", "", "$homedir/orphanbot.log","$homedir/cookies.pearle.txt");

Pearle::config(nullOK => 1, printlevel => 4);

config(username => "");

if(!Pearle::login())

{

exit;

}

  1. while(1)

{

my @images;

my $image;

my $edited = 0;

my $images_removed = 0;

botwarnlog("=== Beginning set at " . time() . " for task '$task' ===\n");

{

if($task eq "source")

{

my $cat = "Category:All images with unknown source";

if($test)

{

@images = ("Image:Nosuchimage.jpg");

}

else

{

@images = Pearle::getCategoryImages($cat);

}

$remove_type = 'normal';

$removal_comment = "Removing image with no source information. Such images that are older than seven days may be deleted at any time.";

$removal_prefix = "Unsourced image removed:";

@template_match = ("Template:Di-no source", "Template:No copyright holder", "Template:Di-no source no license");

$uploader_warning = "{{subst:User:OrphanBot/nosource|";

$uploader_warning_summary = "You've uploaded an unsourced image";

$write_remove_log = 1;

$limit_by_date = 1;

}

elsif($task eq "copyright")

{

my $cat = "Category:All images with unknown copyright status";

if($test)

{

@images = ("");

}

else

{

@images = Pearle::getCategoryImages($cat);

}

$remove_type = 'normal';

$removal_comment = "Removing image with no copyright information. Such images that are older than seven days may be deleted at any time.";

$removal_prefix = "Image with unknown copyright status removed:";

@template_match = ("Template:Di-no license", "Template:No copyright information", "Template:Di-no source no license", "Template:Don't know", "Template:No license needing editor assistance", "Template:Di-no permission");

$uploader_warning = "{{subst:User:OrphanBot/nocopyright|";

$uploader_warning_summary = "You've uploaded an image with unknown copyright";

$write_remove_log = 1;

$limit_by_date = 1;

}

else

{

Pearle::myLog(0, "Unknown task: $task\n");

exit;

}

}

if(scalar(@images) == 0)

{

Pearle::myLog(2, "Category is empty.\n");

exit;

}

IMAGE: foreach $image (@images)

{

my $image_url;

my $image_regex = $image;

my $page;

my @pages = ();

my $page_remove_log;

my ($day, $month, $year);

Pearle::myLog(2, "Processing image $image\n");

# Fetch an image page

my $image_data = Pearle::APIQuery(titles => [$image], prop => ['imageinfo', 'categories', 'templates'],

iiprop => ['user', 'sha1', 'comment'],

cllimit => 500,

tllimit => 500,

list => 'imageusage',

iutitle => $image,

iunamespace => [0, 10, 12, 14, 100],

meta => 'userinfo', # Do I have talkpage messages?

);

next if(!defined($image_data));

my $full_comment = "";

$page_remove_log = '';

$last_image = $image;

if($permit_interruptions and DoIHaveMessages($image_data))

{

Pearle::myLog(1, "Talkpage message found; exiting on image $image.\n");

last;

}

# Images from Commons

if($image_data =~ /imagerepository="shared"/)

{

Pearle::myLog(2, "*Commons image :$image found\n");

botwarnlog("*Commons image :$image found\n");

next;

}

# Check for image existance

if($image_data =~ /missing=""/)

{

Pearle::myLog(2, "Image :$image has been deleted.\n");

next;

}

# The odd case of an image description page without an image

if($image_data =~ /imagerepository=""/)

{

Pearle::myLog(2, "*Image :$image does not appear to exist.\n");

botwarnlog("*Image :$image does not appear to exist.\n");

next;

}

# Check for image copyright tag

if((scalar(@template_match) > 0) and (not usesTemplate($image_data, @template_match)))

{

Pearle::myLog(2, "*Image :$image in category does not have an appropriate template\n");

botwarnlog("*Image :$image in category does not have an appropriate template\n");

next;

}

my ($raw_image) = $image =~ /Image:(.*)/;

$raw_image = MakeWikiRegex($raw_image);

if($image !~ /(\.jpg|\.jpeg|\.png|\.gif|\.svg)$/i)

{

$image_regex = "[ _]*(:?[Ii]mage|[Mm]edia)[ _]*:[ _]*${raw_image}[ _]*";

}

else

{

$image_regex = "[ _]*[Ii]mage[ _]*:[ _]*${raw_image}[ _]*";

}

# Sanity check

if(!defined($raw_image) or $image !~ /$raw_image/)

{

Pearle::myLog(1, "Parse error on image :$image ($raw_image)\n");

botwarnlog("*Parse error on image :$image ($raw_image)\n");

last;

}

Pearle::myLog(2, "Image regex: $image_regex\n");

($day, $month, $year) = getDate($image_data);

# Notify the user

my $uploader = GetImageUploader($image_data);

my $is_notified = 0;

if(defined($uploader_warning) and defined($uploader))

{

$is_notified = IsNotified($uploader, $image_regex, $image, \%notifications, \%dont_notify);

}

if(defined($uploader_warning) and !$is_notified)

{

if(defined($uploader))

{

if(!($users_notified{$uploader}))

{

Pearle::myLog(3, "Warning user $uploader\n");

wikilog("User talk:$uploader", "${uploader_warning}${image}}}" . sig() . "\n", $uploader_warning_summary);

Pearle::limit();

$notifications{"$uploader,$image"} = 1;

$users_notified{$uploader} = 1;

}

else

{

Pearle::myLog(3, "User $uploader has already been warned repeatedly\n");

$users_notified{$uploader} += 1;

}

}

else

{

Pearle::myLog(1, "Could not determine uploader for :$image\n");

}

}

if(!Date::Calc::check_date($year, Decode_Month($month), $day))

{

Pearle::myLog(1, "Date error for image :$image\n");

botwarnlog("*Date error for image :$image\n");

next;

}

if((Delta_Days($year, Decode_Month($month), $day, Today() ) >= 4) or !($limit_by_date))

{

@pages = GetPageList($image_data);

if(scalar(@pages) == 0)

{

Pearle::myLog(2, "Image $image may already be orphaned\n");

}

if(scalar(@pages) > 5)

{

botwarnlog("*Found image :$image on " . scalar(@pages) . " content pages\n");

}

foreach $page (@pages)

{

print "Page for removal: $page\n";

my $parsed_removal_comment = $removal_comment;

$parsed_removal_comment =~ s/image/image/;

if(my $hits = RemoveImageFromPage($image, $page, $image_regex, $removal_prefix, $parsed_removal_comment)) # Don't limit if we just touched the article

{

$page_remove_log .= "#$page\n";

Pearle::myLog(2, "Removed image $image from article $page $hits times\n");

Pearle::limit();

$edited = 1;

}

}

}

else

{

Pearle::myLog(2, "Recent image: notification only\n");

}

# Update image description page

if($write_remove_log)

{

my $edited_idp = 0;

my $text = "";

# Log all removals on the image description page

if($page_remove_log ne "")

{

$text .= "\n\nRemoved from the following pages:\n";

$text .= FixupLinks($page_remove_log);

$text .= "--~~~~\n";

$full_comment .= "Listing pages that the image has been removed from";

$edited_idp = 1;

print "Remove log\n";

}

if($edited_idp)

{

if($test)

{

notelog("Edited image description page\n");

}

else

{

my $wikipage;

$wikipage = Pearle::getPage( $image);

my $pagetext = $wikipage->getEditableText();

$pagetext .= $text;

$wikipage->setEditableText($pagetext);

Pearle::postPage( $wikipage, $full_comment, 0);

}

}

}

  1. exit if($images_removed >= 100);

if($edited)

{

print "Sleeping for 30 seconds\n";

sleep(30);

}

else

{

print "Sleeping for two seconds\n";

sleep(2);

}

$edited = 0;

}

notelog("Saving notification list\n");

saveNotificationList("/home/mark/orphanbot/orphanbot.note", %notifications);

Pearle::myLog(2, "Finished with category.\n");

}