User:OrphanBot/libBot.pl




!/usr/bin/perl


libBot: A library of useful routines for running a bot

use strict;
use warnings;
require "libPearle2.pl";
my $test_only = 0;
my $username = "";
sub config
{
my %params = @_;
$test_only = $params{test_only} if(defined($params{test_only}));
$username = $params{username} if(defined($params{username}));
}

Log a warning on the talk page of the bot

sub userwarnlog
{
my ($text, $editTime, $startTime, $token, $user, $summary, $session);
$user = $_[1];
$user = $username if(!defined($user));
$summary = $_[2];
$summary = "Logging warning message" if(!defined($summary));
$session = $_[3];
if(defined($session))
{
# We've been handed an editing session
($text, $editTime, $startTime, $token) = @{$session};
Pearle::myLog("Warning with existing edit session\n");
}
else
{
($text, $editTime, $startTime, $token) = Pearle::getPage("User talk:$user");
}
if($test_only)
{
print STDERR $_[0];
return;
}
if($text =~ /^#redirect/i)
{
userwarnlog("*User talk page User talk:$user is a redirect\n");
return;
}
$text .= $_[0];
Pearle::postPage("User talk:$user", $editTime, $startTime, $token, $text, $summary, "no");
print STDERR $_[0];
}

Log a notification message to the console

sub notelog
{
print STDERR @_;
}

Fix all wikilinks in a string so that they shows as a link, not inline, if it's for a category or image

sub FixupLinks
{
my $link = shift;
$link =~ s/\[\[(Category|Image)/[[:$1/g;
return $link;
}

Make a string into a Wikipedia-compatible regex

sub MakeWikiRegex
{
my $string = shift;
# Escape metacharacters
$string =~ s/\\/\\\\/g;
$string =~ s/\./\\\./g;
$string =~ s/\(/\\\(/g;
$string =~ s/\)/\\\)/g;
$string =~ s/\[/\\\[/g;
$string =~ s/\]/\\\]/g;
$string =~ s/\+/\\\+/g;
$string =~ s/\*/\\\*/g;
$string =~ s/\?/\\\?/g;
$string =~ s/\^/\\\^/g;
$string =~ s/\$/\\\$/g;
# Process the string to match both with spaces and with underscores
$string =~ s/[ _]/[ _]+/g;
# Process the string to match both upcase and lowercase first characters
if($string =~ /^[A-Za-z]/)
{
$string =~ s/^(.)/"[$1".lc($1)."]"/e;
}
return $string;
}

Check for new talk page messages

sub DoIHaveMessages
{
my $text = shift;
if($text =~ /
You have/)
{
return 1;
}
else
{
return 0;
}
}
sub GetPageList
{
my $image = shift;
my $image_text = shift;
my @pages = ();
# Extract the page links
# 
Lee Hyori

# 
Daesung Entertainment

# 

while($image_text =~ /
/g)
{
my $title;
$title = $2;
# Unescape any HTML entities in the title
$title =~ s/</
$title =~ s/>/>/g;
$title =~ s/"/"/g;
$title =~ s/&/&/g;
notelog("Matched article $title\n");
# Filter out bad namespaces
if($title =~ /^(User:|Talk:|User talk:|Template talk:|Image:|Image talk:|Category talk:|Wikipedia:|Wikipedia talk:|Portal talk:)/)	# Leave these alone
{
notelog("Ignoring $title due to namespace\n");
}
elsif($title =~ /^Special:/)
{
# Ignore Special: pages completely
}
elsif($title =~ /^(MediaWiki:|MediaWiki talk:|Template:|Help:|Help talk:)/)		# Log a warning about these, but otherwise leave them alone
{
userwarnlog("*Found image :$image in $title\n");
}
else	# Good namespaces: article, Category:, Portal:
{
push @pages, $title;
}
}
return @pages;
}

Get all pages.  Don't filter for bad namespaces.

sub GetFullPageList
{
my $image = shift;
my $image_text = shift;
my @pages = ();
# Extract the page links
# 
Lee Hyori

# 
Daesung Entertainment

# 

while($image_text =~ /
/g)
{
my $title;
$title = $2;
# Unescape any HTML entities in the title
$title =~ s/</
$title =~ s/>/>/g;
$title =~ s/"/"/g;
$title =~ s/&/&/g;
notelog("Matched article $title\n");
push @pages, $title;
}
return @pages;
}
sub SaveImage
{
my $image = shift;
my $image_text = shift;
my $image_path = shift;
my $image_url;
($image_url) = $image_text =~ /
if(defined($image_url))

{
my $filename;
my $image_data;
notelog("Fetching image $image_url\n");
($filename) = $image_url =~ /(\/[^\/]+)$/;
$filename = $image_path . $filename;
if(! -e $filename)
{
if($test_only)
{
notelog("Would save to $filename...");
}
else
{
$image_url = Pearle::urlDecode($image_url);
$image_data = Pearle::getURL($image_url);
notelog("Saving to $filename...");
if(defined($filename) and $filename)
{
open OUTFILE, ">", $filename;
print OUTFILE $image_data;
close OUTFILE;
notelog("Image saved\n");
Pearle::myLog("Image $image saved as $filename\n");
}
else
{
notelog("Failed\n");
}
}
}
else
{
notelog("File already exists\n");
}
}
}
sub RemoveImageFromPage
{
my $image = shift;
my $page = shift;
my $image_regex = shift;
my $removal_prefix = shift;
my $removal_comment = shift;
my ($text, $editTime, $startTime, $token);
my ($match1, $match2);
my $old_length;
my $new_length;
my $change_len;
my $match_len;
# Fetch an article page
($text, $editTime, $startTime, $token) = Pearle::getPage($page);
if(!defined($text))
{
Pearle::myLog("Error: Bad edit page $page\n");
userwarnlog(FixupLinks("*Error: Bad edit page $page\n"));
sleep(300);
return 0;
}
if($text =~ /^\s*$/)
{
# Might be protected instead of empty
Pearle::myLog("Error: Empty page $page\n");
userwarnlog(FixupLinks("*Error: Empty page $page\n"));
sleep(300);
return 0;
}
if($text =~ /^#redirect/i)
{
Pearle::myLog("Redirect found for page $page (image :$image)\n");
userwarnlog(FixupLinks("*Redirect found for page $page (image :$image)\n"));
return 0;
}
# Remove the image
my $regex3 = "(\\[\\[${image_regex}.*?(\\[\\[.*?\\]\\].*?|)+\\]\\][ \\t]*)";	# Regex to match images
my $regex3ex = "\\w[ \\t]*${regex3}[ \\t]*\\w";									# Regex to try to spot inline images
my $regex3c = "";											# Regex to spot images in comments
my $regex3g = "(${image_regex}.*)";												# Regex to match gallery images
my $regex3gc = "";											# Regex to spot gallery images in comments
my ($raw_image) = $image =~ /Image:(.*)/;
my $regex4a = "([Cc]over\\s*=\\s*)" . MakeWikiRegex($raw_image);
my $regex4b = "(image_skyline\\s*=\\s*)" . MakeWikiRegex($raw_image);
my $regex4i = "(image\\s*=\\s*)" . MakeWikiRegex($raw_image);						# Regex to match "image = " sections in infoboxes
my $regex4p = "(picture\\s*=\\s*)" . MakeWikiRegex($raw_image);					# Regex to match "picture = " sections in infoboxes
my $regex4m = "\\[\\([^*)\\]\\]";	# Regex to match inline Media: links
my $regex4g =  "(img\\s*=\\s*)" . MakeWikiRegex($raw_image);	# Regex to match "img = " sections in infoboxes
Pearle::myLog("Regex 3: $regex3\n");
notelog("Regex 3: $regex3\n");
notelog("Regex 3 extended: $regex3ex\n");
notelog("Regex 3 gallery: $regex3g\n");
Pearle::myLog("Raw regex: $raw_image\n");
notelog("Regex 4 Album: $regex4a\n");
notelog("Regex 4 City: $regex4b\n");
notelog("Regex 4 Image: $regex4i\n");
notelog("Regex 4 Media: $regex4m\n");
notelog("Regex 4 Picture: $regex4p\n");
notelog("Regex 4 Img: $regex4g\n");
if($text =~ /$regex3ex/)
{
Pearle::myLog("Possible inline image in $page\n");
userwarnlog(FixupLinks("*Possible inline image :$image in $page\n"));
return 0;	# Can't do gallery matching because that also matches regular images, and odds are, we don't have an infobox
}
if($text =~ /$regex3c/ or $text =~ /$regex3gc/)
{
Pearle::myLog("Image in comment in $page\n");

userwarnlog(FixupLinks("*Image in comment in $page\n"));

return 0;	# Can't do gallery matching because that also matches regular images
}
$text =~ /$regex3/;
$match_len = length($1);
$match2 = $text =~ s/$regex3//g;
$new_length = length($text);
print "Num: $match2 Len: $match_len\n";
if($match2)
{
# If a whole lot of text was removed, log a warning
if($match_len > (500 + length($image)))
{
userwarnlog(FixupLinks("*Long caption of $match_len bytes replaced in $page\n"));
if($match_len > (1000 + length($image)))
{
notelog("Unusually long caption found.  Exiting.\n");
Pearle::myLog("Unusually long caption of $match_len found in $page ($match2 matches).\n");
exit;
}
}
if($match_len < (4 + length($image)))
{
notelog("*Short replacement of $match_len bytes in $page\n");
Pearle::myLog("Short replacement of $match_len bytes (min " . (length($image) + 4) . ") in $page ($match2 matches).  Exiting.\n");
Pearle::myLog("Text:\n$text\n");
exit;
}
# If many matches, log a warning
if($match2 > 2)
{
Pearle::myLog("More than one match ($match2) in page $page\n");

userwarnlog(FixupLinks("*More than one match ($match2) in page $page\n"));

}
if($match2 > 100)
{
Pearle::myLog("Too many matches ($match2) in page $page.  Skipping.\n");
userwarnlog("Too many matches ($match2) in page $page.  Skipping.\n");
return 0;
}
# If there might be a reference, log a warning

if($text =~ /(?:see (?:image|picture|graph|diagram|right|left)|\(left\)|\(right\)|\(below\)|\(above\))/)
{
Pearle::myLog("Possible image reference in page $page\n");
userwarnlog("*Possible image reference in page $page\n");
}

if($text =~ /-->\]/)
{
Pearle::myLog("Possible bracket mixup in page $page\n");
userwarnlog(FixupLinks("*Possible bracket mixup in page $page\n"));
}

if($text =~ /\[\[(?: |)/)
{
$match2 += 1;
}
}
if($match2 > 0)
{
if($text =~ /\[\[(?: |)