User:OrphanBot/libBot.pl

  1. !/usr/bin/perl
  1. libBot: A library of useful routines for running a bot

use strict;

use warnings;

require "libPearle2.pl";

my $test_only = 0;

my $username = "";

sub config

{

my %params = @_;

$test_only = $params{test_only} if(defined($params{test_only}));

$username = $params{username} if(defined($params{username}));

}

  1. Log a warning on the talk page of the bot

sub userwarnlog

{

my ($text, $editTime, $startTime, $token, $user, $summary, $session);

$user = $_[1];

$user = $username if(!defined($user));

$summary = $_[2];

$summary = "Logging warning message" if(!defined($summary));

$session = $_[3];

if(defined($session))

{

# We've been handed an editing session

($text, $editTime, $startTime, $token) = @{$session};

Pearle::myLog("Warning with existing edit session\n");

}

else

{

($text, $editTime, $startTime, $token) = Pearle::getPage("User talk:$user");

}

if($test_only)

{

print STDERR $_[0];

return;

}

if($text =~ /^#redirect/i)

{

userwarnlog("*User talk page User talk:$user is a redirect\n");

return;

}

$text .= $_[0];

Pearle::postPage("User talk:$user", $editTime, $startTime, $token, $text, $summary, "no");

print STDERR $_[0];

}

  1. Log a notification message to the console

sub notelog

{

print STDERR @_;

}

  1. Fix all wikilinks in a string so that they shows as a link, not inline, if it's for a category or image

sub FixupLinks

{

my $link = shift;

$link =~ s/\[\[(Category|Image)/[[:$1/g;

return $link;

}

  1. Make a string into a Wikipedia-compatible regex

sub MakeWikiRegex

{

my $string = shift;

# Escape metacharacters

$string =~ s/\\/\\\\/g;

$string =~ s/\./\\\./g;

$string =~ s/\(/\\\(/g;

$string =~ s/\)/\\\)/g;

$string =~ s/\[/\\\[/g;

$string =~ s/\]/\\\]/g;

$string =~ s/\+/\\\+/g;

$string =~ s/\*/\\\*/g;

$string =~ s/\?/\\\?/g;

$string =~ s/\^/\\\^/g;

$string =~ s/\$/\\\$/g;

# Process the string to match both with spaces and with underscores

$string =~ s/[ _]/[ _]+/g;

# Process the string to match both upcase and lowercase first characters

if($string =~ /^[A-Za-z]/)

{

$string =~ s/^(.)/"[$1".lc($1)."]"/e;

}

return $string;

}

  1. Check for new talk page messages

sub DoIHaveMessages

{

my $text = shift;

if($text =~ /

You have/)

{

return 1;

}

else

{

return 0;

}

}

sub GetPageList

{

my $image = shift;

my $image_text = shift;

my @pages = ();

# Extract the page links

#

while($image_text =~ /

  • /g)

    {

    my $title;

    $title = $2;

    # Unescape any HTML entities in the title

    $title =~ s/</

    $title =~ s/>/>/g;

    $title =~ s/"/"/g;

    $title =~ s/&/&/g;

    notelog("Matched article $title\n");

    # Filter out bad namespaces

    if($title =~ /^(User:|Talk:|User talk:|Template talk:|Image:|Image talk:|Category talk:|Wikipedia:|Wikipedia talk:|Portal talk:)/) # Leave these alone

    {

    notelog("Ignoring $title due to namespace\n");

    }

    elsif($title =~ /^Special:/)

    {

    # Ignore Special: pages completely

    }

    elsif($title =~ /^(MediaWiki:|MediaWiki talk:|Template:|Help:|Help talk:)/) # Log a warning about these, but otherwise leave them alone

    {

    userwarnlog("*Found image :$image in $title\n");

    }

    else # Good namespaces: article, Category:, Portal:

    {

    push @pages, $title;

    }

    }

    return @pages;

    }

    1. Get all pages. Don't filter for bad namespaces.

    sub GetFullPageList

    {

    my $image = shift;

    my $image_text = shift;

    my @pages = ();

    # Extract the page links

    #

    while($image_text =~ /

  • /g)

    {

    my $title;

    $title = $2;

    # Unescape any HTML entities in the title

    $title =~ s/</

    $title =~ s/>/>/g;

    $title =~ s/"/"/g;

    $title =~ s/&/&/g;

    notelog("Matched article $title\n");

    push @pages, $title;

    }

    return @pages;

    }

    sub SaveImage

    {

    my $image = shift;

    my $image_text = shift;

    my $image_path = shift;

    my $image_url;

    ($image_url) = $image_text =~ /

    if(defined($image_url))

    {

    my $filename;

    my $image_data;

    notelog("Fetching image $image_url\n");

    ($filename) = $image_url =~ /(\/[^\/]+)$/;

    $filename = $image_path . $filename;

    if(! -e $filename)

    {

    if($test_only)

    {

    notelog("Would save to $filename...");

    }

    else

    {

    $image_url = Pearle::urlDecode($image_url);

    $image_data = Pearle::getURL($image_url);

    notelog("Saving to $filename...");

    if(defined($filename) and $filename)

    {

    open OUTFILE, ">", $filename;

    print OUTFILE $image_data;

    close OUTFILE;

    notelog("Image saved\n");

    Pearle::myLog("Image $image saved as $filename\n");

    }

    else

    {

    notelog("Failed\n");

    }

    }

    }

    else

    {

    notelog("File already exists\n");

    }

    }

    }

    sub RemoveImageFromPage

    {

    my $image = shift;

    my $page = shift;

    my $image_regex = shift;

    my $removal_prefix = shift;

    my $removal_comment = shift;

    my ($text, $editTime, $startTime, $token);

    my ($match1, $match2);

    my $old_length;

    my $new_length;

    my $change_len;

    my $match_len;

    # Fetch an article page

    ($text, $editTime, $startTime, $token) = Pearle::getPage($page);

    if(!defined($text))

    {

    Pearle::myLog("Error: Bad edit page $page\n");

    userwarnlog(FixupLinks("*Error: Bad edit page $page\n"));

    sleep(300);

    return 0;

    }

    if($text =~ /^\s*$/)

    {

    # Might be protected instead of empty

    Pearle::myLog("Error: Empty page $page\n");

    userwarnlog(FixupLinks("*Error: Empty page $page\n"));

    sleep(300);

    return 0;

    }

    if($text =~ /^#redirect/i)

    {

    Pearle::myLog("Redirect found for page $page (image :$image)\n");

    userwarnlog(FixupLinks("*Redirect found for page $page (image :$image)\n"));

    return 0;

    }

    # Remove the image

    my $regex3 = "(\\[\\[${image_regex}.*?(\\[\\[.*?\\]\\].*?|)+\\]\\][ \\t]*)"; # Regex to match images

    my $regex3ex = "\\w[ \\t]*${regex3}[ \\t]*\\w"; # Regex to try to spot inline images

    my $regex3c = ""; # Regex to spot images in comments

    my $regex3g = "(${image_regex}.*)"; # Regex to match gallery images

    my $regex3gc = ""; # Regex to spot gallery images in comments

    my ($raw_image) = $image =~ /Image:(.*)/;

    my $regex4a = "([Cc]over\\s*=\\s*)" . MakeWikiRegex($raw_image);

    my $regex4b = "(image_skyline\\s*=\\s*)" . MakeWikiRegex($raw_image);

    my $regex4i = "(image\\s*=\\s*)" . MakeWikiRegex($raw_image); # Regex to match "image = " sections in infoboxes

    my $regex4p = "(picture\\s*=\\s*)" . MakeWikiRegex($raw_image); # Regex to match "picture = " sections in infoboxes

    my $regex4m = "\\[\\([^*)\\]\\]"; # Regex to match inline Media: links

    my $regex4g = "(img\\s*=\\s*)" . MakeWikiRegex($raw_image); # Regex to match "img = " sections in infoboxes

    Pearle::myLog("Regex 3: $regex3\n");

    notelog("Regex 3: $regex3\n");

    notelog("Regex 3 extended: $regex3ex\n");

    notelog("Regex 3 gallery: $regex3g\n");

    Pearle::myLog("Raw regex: $raw_image\n");

    notelog("Regex 4 Album: $regex4a\n");

    notelog("Regex 4 City: $regex4b\n");

    notelog("Regex 4 Image: $regex4i\n");

    notelog("Regex 4 Media: $regex4m\n");

    notelog("Regex 4 Picture: $regex4p\n");

    notelog("Regex 4 Img: $regex4g\n");

    if($text =~ /$regex3ex/)

    {

    Pearle::myLog("Possible inline image in $page\n");

    userwarnlog(FixupLinks("*Possible inline image :$image in $page\n"));

    return 0; # Can't do gallery matching because that also matches regular images, and odds are, we don't have an infobox

    }

    if($text =~ /$regex3c/ or $text =~ /$regex3gc/)

    {

    Pearle::myLog("Image in comment in $page\n");

    1. userwarnlog(FixupLinks("*Image in comment in $page\n"));

    return 0; # Can't do gallery matching because that also matches regular images

    }

    $text =~ /$regex3/;

    $match_len = length($1);

    $match2 = $text =~ s/$regex3//g;

    $new_length = length($text);

    print "Num: $match2 Len: $match_len\n";

    if($match2)

    {

    # If a whole lot of text was removed, log a warning

    if($match_len > (500 + length($image)))

    {

    userwarnlog(FixupLinks("*Long caption of $match_len bytes replaced in $page\n"));

    if($match_len > (1000 + length($image)))

    {

    notelog("Unusually long caption found. Exiting.\n");

    Pearle::myLog("Unusually long caption of $match_len found in $page ($match2 matches).\n");

    exit;

    }

    }

    if($match_len < (4 + length($image)))

    {

    notelog("*Short replacement of $match_len bytes in $page\n");

    Pearle::myLog("Short replacement of $match_len bytes (min " . (length($image) + 4) . ") in $page ($match2 matches). Exiting.\n");

    Pearle::myLog("Text:\n$text\n");

    exit;

    }

    # If many matches, log a warning

    if($match2 > 2)

    {

    Pearle::myLog("More than one match ($match2) in page $page\n");

    1. userwarnlog(FixupLinks("*More than one match ($match2) in page $page\n"));

    }

    if($match2 > 100)

    {

    Pearle::myLog("Too many matches ($match2) in page $page. Skipping.\n");

    userwarnlog("Too many matches ($match2) in page $page. Skipping.\n");

    return 0;

    }

    # If there might be a reference, log a warning

    1. if($text =~ /(?:see (?:image|picture|graph|diagram|right|left)|\(left\)|\(right\)|\(below\)|\(above\))/)
    2. {
    3. Pearle::myLog("Possible image reference in page $page\n");
    4. userwarnlog("*Possible image reference in page $page\n");
    5. }

    if($text =~ /-->\]/)

    {

    Pearle::myLog("Possible bracket mixup in page $page\n");

    userwarnlog(FixupLinks("*Possible bracket mixup in page $page\n"));

    }

    1. if($text =~ /\[\[(?: |)/)

      {

      $match2 += 1;

      }

      }

      if($match2 > 0)

      {

      if($text =~ /\[\[(?: |)