User:Whobot/code

  1. IMPORTANT ###
  1. This code is released into the public domain. CONTRIBUTIONS are
  2. welcome, but will also hereby be RELEASED TO THE PUBLIC DOMAIN.
  1. See the documentation distributed with this code for important
  2. warnings and caveats.
  1. Cloned from Pearle Wisebot, modifications by User:Who

use strict;

use Time::HiRes;

  1. The following may be helpful in debugging character encoding
  2. problems.
  1. use utf8;
  2. use encoding 'utf8';
  1. Initialization

use LWP::UserAgent;

use HTTP::Cookies;

use HTTP::Request::Common qw(POST);

use HTML::Entities;

print "\n";

  1. LWP:UserAgent is a library which allows us to create a "user agent"
  2. object that handles the low-level details of making HTTP requests.

$::ua = LWP::UserAgent->new(timeout => 300);

$::ua->agent("Whobot Wisebot/0.1");

$::ua->cookie_jar(HTTP::Cookies->new(file => "cookies.whobot.txt", autosave => 1));

$::ua->cookie_jar->load();

  1. Hot pipes

$| = 1;

  1. ---
  2. test();
  3. sub test
  4. {
  5. my ($target, $text, $editTime, $startTime, $token);
  6. $target = "Wikipedia:Sandbox";
  7. ($text, $editTime, $startTime, $token) = getPage($target);
  8. print $text;
  9. $text .= "\Eat my electrons! -- Whobot\n";
  10. print "---\n";
  11. postPage ($target, $editTime, $startTime, $token, $text, "Test 008");
  12. die ("Test complete.");
  13. }
  14. ---

interpretCommand(@ARGV);

sub interpretCommand

{

my ($command, @arguments, $i, $line, $argument, @newArguments,

$from, $to, $page, $pageCopy);

($command, @arguments) = @_;

$command =~ s/\*\s*//;

myLog(`date /t`);

myLog ($command.": ".join(" ", @arguments)."\n");

print `date /t`;

print $command.": ".join(" ", @arguments)."\n";

if ($command eq "POST_STDIN")

{

if ($arguments[2] ne "")

{

myLog ("Too many arguments to POST_STDIN.\n");

die ("Too many arguments to POST_STDIN.\n");

}

postSTDIN($arguments[0],$arguments[1]);

}

elsif ($command eq "POST_STDIN_NULLOK")

{

if ($arguments[2] ne "")

{

myLog ("Too many arguments to POST_STDIN.\n");

die ("Too many arguments to POST_STDIN.\n");

}

$::nullOK = "yes";

postSTDIN($arguments[0],$arguments[1]);

$::nullOK = "no";

}

elsif ($command eq "MOVE_CONTENTS")

{

if ($arguments[3] ne "")

{

if (($arguments[4] eq "")

and ($arguments[1] eq "->"))

{

moveCategoryContents($arguments[0],$arguments[2],$arguments[3],"");

return();

}

else

{

myLog ("Too many arguments to MOVE_CONTENTS.\n");

die ("Too many arguments to MOVE_CONTENTS.\n");

}

}

moveCategoryContents($arguments[0],$arguments[1],"no","yes",$arguments[2]);

}

elsif ($command eq "MOVE_CONTENTS_INCL_CATS")

{

if ($arguments[3] ne "")

{

if (($arguments[4] eq "")

and ($arguments[1] eq "->"))

{

moveCategoryContents($arguments[0],$arguments[2],"yes","yes",$arguments[3]);

return();

}

else

{

myLog ("Too many arguments to MOVE_CONTENTS_INCL_CATS.\n");

die ("Too many arguments to MOVE_CONTENTS_INCL_CATS.\n");

}

}

moveCategoryContents($arguments[0],$arguments[1],"yes","yes",$arguments[2],"");

}

elsif ($command eq "REMOVE_X_FROM_CAT")

{

if ($arguments[3] ne "")

{

myLog ("Too many arguments to REMOVE_X_FROM_CAT.\n");

die ("Too many arguments to REMOVE_X_FROM_CAT.\n");

}

removeXFromCat($arguments[0],$arguments[1],$arguments[2],"");

}

elsif ($command eq "DEPOPULATE_CAT")

{

if ($arguments[1] ne "")

{

if (($arguments[2] eq "")

and ($arguments[1] eq "special"))

{

depopulateCat($arguments[0],"special");

}

else

{

myLog ("Too many arguments to DEPOPULATE_CAT.\n");

die ("Too many arguments to DEPOPULATE_CAT.\n");

}

}

depopulateCat($arguments[0]);

}

elsif ($command eq "PRINT_WIKITEXT")

{

if ($arguments[1] ne "")

{

myLog ("Too many arguments to PRINT_WIKITEXT.\n");

die ("Too many arguments to PRINT_WIKITEXT.\n");

}

printWikitext($arguments[0]);

}

elsif ($command eq "ADD_CFD_TAG")

{

if ($arguments[1] ne "")

{

myLog ("Too many arguments to ADD_CFD_TAG.\n");

die ("Too many arguments to ADD_CFD_TAG.\n");

}

addCFDTag($arguments[0]);

}

elsif ($command eq "ADD_CFDU_TAG")

{

if ($arguments[2] ne "")

{

myLog ("Too many arguments to ADD_CFDU_TAG.\n");

die ("Too many arguments to ADD_CFDU_TAG.\n");

}

addCFDUTag($arguments[0],$arguments[1],"");

}

elsif ($command eq "REMOVE_CFD_TAG")

{

if ($arguments[1] ne "")

{

myLog ("Too many arguments to REMOVE_CFD_TAG.\n");

die ("Too many arguments to REMOVE_CFD_TAG.\n");

}

removeCFDTag($arguments[0]);

}

elsif ($command eq "REMOVE_CFDU_TAG")

{

if ($arguments[1] ne "")

{

myLog ("Too many arguments to REMOVE_CFDU_TAG.\n");

die ("Too many arguments to REMOVE_CFDU_TAG.\n");

}

removeCFDUTag($arguments[0]);

}

elsif ($command eq "ADD_TO_CAT")

{

if ($arguments[2] ne "")

{

myLog ("Too many arguments to ADD_TO_CAT.\n");

die ("Too many arguments to ADD_TO_CAT.\n");

}

addToCat($arguments[0],$arguments[1],"");

}

elsif ($command eq "ADD_TO_CAT_NULL_OK")

{

if ($arguments[2] ne "")

{

myLog ("Too many arguments to ADD_TO_CAT_NULL_OK.\n");

die ("Too many arguments to ADD_TO_CAT_NULL_OK.\n");

}

$::nullOK = "yes";

addToCat($arguments[0],$arguments[1],"");

$::nullOK = "no";

}

elsif ($command eq "TRANSFER_TEXT")

{

if ($arguments[2] ne "")

{

myLog ("Too many arguments to TRANSFER_TEXT.\n");

die ("Too many arguments to TRANSFER_TEXT.\n");

}

transferText($arguments[0], $arguments[1]);

}

# DON'T USE THE BELOW COMMAND; IT'S NOT IMPLEMENTED PROPERLY YET.

  1. elsif ($command eq "LIST_TO_CAT_CHECK")
  2. {
  3. if ($arguments[2] ne "")
  4. {
  5. myLog ("Too many arguments to LIST_TO_CAT_CHECK.\n");
  6. die ("Too many arguments to LIST_TO_CAT_CHECK.\n");
  7. }
  8. listToCat($arguments[0], $arguments[1], "no");
  9. }

elsif ($command eq "CHANGE_CATEGORY")

{

if ($arguments[4] ne "")

{

myLog ("Too many arguments to CHANGE_CATEGORY.\n");

die ("Too many arguments to CHANGE_CATEGORY.\n");

}

changeCategory($arguments[0], $arguments[1], $arguments[2], $arguments[3]);

}

elsif ($command eq "CLEANUP_DATE")

{

if ($arguments[0] ne "")

{

myLog ("Too many arguments to CLEANUP_DATE.\n");

die ("Too many arguments to CLEANUP_DATE.\n");

}

cleanupDate();

}

elsif ($command eq "OPENTASK_UPDATE")

{

if ($arguments[0] ne "")

{

myLog ("Too many arguments to OPENTASK_UPDATE.\n");

die ("Too many arguments to OPENTASK_UPDATE.\n");

}

opentaskUpdate();

}

# DON'T USE THE BELOW COMMAND; IT'S NOT IMPLEMENTED PROPERLY YET.

#elsif ($command eq "ENFORCE_CATEGORY_REDIRECTS_CHECK")

#{

# enforceCategoryRedirects("no");

#}

# This command is for remedial cleanup only.

#elsif ($command eq "INTERWIKI_LOOP")

#{

# interwikiLoop();

#}

elsif ($command eq "ENFORCE_CATEGORY_INTERWIKI")

{

if ($arguments[1] ne "")

{

myLog ("Too many arguments to ENFORCE_CATEGORY_INTERWIKI.\n");

die ("Too many arguments to ENFORCE_CATEGORY_INTERWIKI.\n");

}

enforceCategoryInterwiki($arguments[0]);

}

  1. Broken due to recent changes on WP:CFD
  2. elsif ($command eq "ENFORCE_CFD")
  3. {
  4. enforceCFD();
  5. }

elsif ($command eq "STOP")

{

myLog ("Stopped.");

die ("Stopped.");

}

elsif (($command eq "READ_COMMANDS")

or ($command eq ""))

{

while ()

{

$line = $_;

if ($line =~ m/READ_COMMANDS/)

{

myLog ("interpretCommands(): Infinite loop!");

die ("interpretCommands(): Infinite loop!");

}

if ($line =~ m/^\s*$/)

{

next;

}

$line =~ s/\s+$//s;

$line =~ s/\*\s*//;

if ($line =~ m/\[\[:?(.*?)\]\] -> \[\[:?(.*?)\]\]/)

{

$line =~ s/\[\[:?(.*?)\]\] -> \[\[:?(.*?)\]\]//;

$from = $1;

$to = $2;

$line =~ s/\s*$//;

$from =~ s/ /_/g;

$to =~ s/ /_/g;

interpretCommand($line, $from, $to);

}

else

{

while ($line =~ m/\[\[:?(.*?)\]\]/)

{

$line =~ m/\[\[:?(.*?)\]\]/;

$page = $1;

$pageCopy = $page;

$page =~ s/ /_/g;

$line =~ s/\[\[:?$pageCopy\]\]/$page/;

}

interpretCommand(split (" ", $line));

}

  1. unless (($line =~ m/TRANSFER_TEXT_CHECK/) or
  2. ($line =~ m/ENFORCE_CATEGORY_INTERWIKI/))

unless ($line =~ m/TRANSFER_TEXT_CHECK/)

{

limit();

}

}

myLog ("Execution complete.\n");

print ("Execution complete.\n");

}

else

{

myLog ("Unrecognized command '".$command."': ".join(" ", @arguments)."\n");

die ("Unrecognized command '".$command."': ".join(" ", @arguments));

}

}

sub limit

{

my ($i);

# Rate-limiting to avoid hosing the wiki server

# Min 30 sec unmarked

# Min 10 sec marked

# May be raised by retry() if load is heavy

### ATTENTION ###

# Increasing the speed of the bot to faster than 1 edit every 10

# seconds violates English Wikipedia rules as of April, 2005, and

# will cause your bot to be banned. So don't change $normalDelay

# unless you know what you are doing. Other sites may have

# similar policies, and you are advised to check before using your

# bot at the default speed.

#################

if ($::speedLimit < 10)

{

$::speedLimit = 10;

}

$i = $::speedLimit;

while ($i >= 0)

{

sleep (1);

print STDERR "Sleeping $i seconds...\r";

$i--;

}

print STDERR " \r";

}

  1. perl whobot.pl POST_STDIN User:Whobot/categories-alpha "Update from 13 Oct 2004 database dump"

sub postSTDIN

{

my ($text, $articleName, $comment, $editTime, $startTime, $junk, $token);

$articleName = $_[0];

$comment = $_[1];

#urlSafe($articleName);

while ()

{

$text .= $_;

}

if ($text =~ m/^\s*$/)

{

myLog ("postSTDIN(): Null input.\n");

die ("postSTDIN(): Null input.\n");

}

($junk, $editTime, $startTime, $token) = getPage($articleName);

if ($comment eq "")

{

$comment = "Automated post";

}

postPage ($articleName, $editTime, $startTime, $token, $text, $comment);

}

  1. perl whobot.pl ADD_TO_CAT Page_name Category:Category_name sortkey

sub addToCat

{

my ($text, $articleName, $category, $editTime, $startTime, $comment, $status,

@junk, $sortkey, $token);

$articleName = $_[0];

$category = $_[1];

$sortkey = $_[2];

#urlSafe($articleName);

#urlSafe($category);

($text, $editTime, $startTime, $token) = getPage($articleName);

$comment = "Add ${category} per WP:CFD";

($status, $text, @junk) = addCatToText($category, $text, $sortkey, $articleName);

if ($status ne "success")

{

return();

}

postPage ($articleName, $editTime, $startTime, $token, $text, $comment);

}

sub myLog

{

open (LOG, ">>whobot-log.txt")

|| die "Could not append to log!";

print LOG $_[0];

close (LOG);

}

sub getPage

{

my ($target, $request, $response, $reply, $text, $text2,

$editTime, $startTime, $attemptStartTime, $attemptFinishTime,

$token);

$target = $_[0];

if ($target =~ m/^\s*$/)

{

myLog("getPage: Null target.");

die("getPage: Null target.");

}

# urlSafe ($target);

# Monitor wiki server responsiveness

$attemptStartTime = Time::HiRes::time();

# Create a request-object

print "GET http://en.wikipedia.org/w/wiki.phtml?title=${target}&action=edit\n";

myLog("GET http://en.wikipedia.org/w/wiki.phtml?title=${target}&action=edit\n");

$request = HTTP::Request->new(GET => "http://en.wikipedia.org/w/wiki.phtml?title=${target}&action=edit");

$response = $::ua->request($request);

if ($response->is_success)

{

$reply = $response->content;

# Monitor wiki server responsiveness

$attemptFinishTime = Time::HiRes::time();

retry ("success", "getPage", sprintf("%.3f", $attemptFinishTime-$attemptStartTime));

# This detects whether or not we're logged in.

unless ($reply =~ m%My talk%)

{

# We've lost our identity.

myLog ("Wiki server is not recognizing me (1).\n---\n${reply}\n---\n");

die ("Wiki server is not recognizing me (1).\n");

}

#$reply =~ m%(.*?)%s;

$reply =~ m%(.*?)%s;

$text = $1;

$reply =~ m/value="(\d+)" name="wpEdittime"/;

$editTime = $1;

# Added 22 Aug 2005 to correctly handle articles that have

# been undeleted

$reply =~ m/value="(\d+)" name="wpStarttime"/;

$startTime = $1;

# Added 9 Mar 2005 after recent software change.

$reply =~ m/value="(\w+)" name="wpEditToken"/;

$token = $1;

###

if (($text =~ m/^\s*$/)

and ($::nullOK ne "yes"))

{

myLog ("getPage($target): Null text!\n");

myLog "\n---\n$reply\n---\n";

die ("getPage($target): Null text!\n");

}

if (($editTime =~ m/^\s*$/)

and ($::nullOK ne "yes"))

{

myLog ("getPage($target): Null time!\n");

myLog "\n---\n$reply\n---\n";

die ("getPage($target): Null time!\n");

}

if (($text =~ m/>/) or

($text =~ m/

{

print $text;

myLog "\n---\n$text\n---\n";

myLog ("getPage($target): Bad text suck!\n");

die ("getPage($target): Bad text suck!\n");

}

# Change ( " -> " ) etc

# This function is from HTML::Entities.

decode_entities($text);

# This may or may not actually work

$::ua->cookie_jar->save();

return ($text, $editTime, $startTime, $token);

}

else

{

myLog ("getPage($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/w/wiki.phtml?title=${target}&action=edit\n".$response->content."\n");

print ("getPage($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/w/wiki.phtml?title=${target}&action=edit\n".$response->content."\n");

# 50X HTTP errors mean there is a problem connecting to the wiki server

if (($response->status_line =~ m/^500/)

or ($response->status_line =~ m/^502/)

or ($response->status_line =~ m/^503/))

{

return(retry("getPage", @_));

}

else

{

# Unhandled HTTP response

die ("getPage($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/w/wiki.phtml?title=${target}&action=edit\n");

}

}

}

sub postPage

{

my ($request, $response, $pageName, $textToPost, $summaryEntry,

$editTime, $startTime, $actual, $expected, $attemptStartTime,

$attemptFinishTime, $date, $editToken, $minor);

$pageName = $_[0];

$editTime = $_[1];

$startTime = $_[2];

$editToken = $_[3];

$textToPost = $_[4];

$summaryEntry = $_[5]; # Max 200 chars!

$minor = $_[6];

$summaryEntry = substr($summaryEntry, 0, 200);

if ($pageName eq "")

{

myLog ("postPage(): Empty pageName.\n");

die ("postPage(): Empty pageName.\n");

}

if ($summaryEntry eq "")

{

$summaryEntry = "Automated editing.";

}

# Monitor server responsiveness

$attemptStartTime = Time::HiRes::time();

if ($minor eq "yes")

{

$request = POST "http://en.wikipedia.org/w/wiki.phtml?title=${pageName}&action=submit",

[wpTextbox1 => $textToPost,

wpSummary => $summaryEntry,

wpSave => "Save page",

wpMinoredit => "on",

wpEditToken => $editToken,

wpStarttime => $startTime,

wpEdittime => $editTime];

# Optional: wpWatchthis

}

else

{

$request = POST "http://en.wikipedia.org/w/wiki.phtml?title=${pageName}&action=submit",

[wpTextbox1 => $textToPost,

wpSummary => $summaryEntry,

wpSave => "Save page",

wpEditToken => $editToken,

wpStarttime => $startTime,

wpEdittime => $editTime];

# Optional: wpWatchthis, wpMinoredit

}

# ---

## If posts are failing, you can uncomment the below to see what

## HTTP request is being made.

# myLog($request->as_string());

# print $request->as_string(); $::speedLimit = 60 * 10;

# print $::ua->request($request)->as_string;

# ---

myLog("POSTing...");

print "POSTing...";

# Pass request to the user agent and get a response back

$response = $::ua->request($request);

myLog("POSTed.\n");

print "POSTed.\n";

if ($response->content =~ m/Please confirm that really want to recreate this article./)

{

myLog ($response->content."\n");

die ("Deleted article conflict! See log!");

}

# Check the outcome of the response

if (($response->is_success) or ($response->is_redirect))

{

# Monitor server responsiveness

$attemptFinishTime = Time::HiRes::time();

retry ("success", "postPage", sprintf("%.3f", $attemptFinishTime-$attemptStartTime));

$expected = "302 Moved Temporarily";

$actual = $response->status_line;

if (($expected ne $actual)

and ($actual ne "200 OK"))

{

myLog ("postPage(${pageName}, $editTime)#1 - expected =! actual\n");

myLog ($request->as_string());

myLog ("EXPECTED: '${expected}'\n");

myLog (" ACTUAL: '${actual}'\n");

die ("postPage(${pageName}, $editTime)#1 - expected =! actual - see log\n");

}

$expected = "http://en.wikipedia.org/wiki/${pageName}";

$expected =~ s/\'/%27/g;

$expected =~ s/\*/%2A/g;

$expected = urlEncode($expected);

$actual = $response->headers->header("Location");

if (($expected ne $actual)

and !(($actual eq "") and ($response->status_line eq "200 OK")))

{

myLog ("postPage(${pageName}, $editTime)#2 - expected =! actual\n");

myLog ("EXPECTED: '${expected}'\n");

myLog (" ACTUAL: '${actual}'\n");

die ("postPage(${pageName}, $editTime)#2 - expected =! actual - see log\n");

}

if ($response->content =~ m/

Edit conflict/)

{

myLog ("Edit conflict on '$pageName' at '$editTime'!\n");

die ("Edit conflict on '$pageName' at '$editTime'!\n");

}

$::ua->cookie_jar->save();

return ($response->content);

}

else

{

$date = `date /t`;

$date =~ s/\n//g;

myLog ("Bad response to POST to $pageName at $date.\n".$response->status_line."\n".$response->content."\n");

# 50X HTTP errors mean there is a problem connecting to the wiki server

if (($response->status_line =~ m/^500/)

or ($response->status_line =~ m/^502/)

or ($response->status_line =~ m/^503/))

{

print "Bad response to POST to $pageName at $date.\n".$response->status_line."\n".$response->content."\n";

return(retry("postPage", @_));

}

else

{

# Unhandled HTTP response

die ("Bad response to POST to $pageName at $date.\n".$response->status_line."\n");

}

}

}

sub urlSafe

{

# This function is no longer called because the LWP::UserAgent and

# HTTP::Request libraries handle character escaping.

my ($text, $textCopy);

$text = $_[0];

$textCopy = $text;

# & may not be included in this list!

$textCopy =~ s%[\p{IsWord}\w\-,\(\):\/\'\.\;\!]*%%g;

unless ($textCopy eq "")

{

myLog ("urlSafe(): Bad character in ${text}: '${textCopy}'\n");

die ("urlSafe(): Bad character in ${text}: '${textCopy}'\n");

}

}

  1. perl whobot.pl MOVE_CONTENTS_INCL_CATS Category:From_here Category:To_here CFDListingDay

sub moveCategoryContents

{

my (@articles, $categoryFrom, $categoryTo, $article, $status,

@subcats, $includeCategories, $subcat, @junk, $sortkey,

$includeSortkey, $cfdlisting);

# -- INITIALIZATION --

$categoryFrom = $_[0];

$categoryTo = $_[1];

$includeCategories = $_[2];

$includeSortkey = $_[3];

$cfdlisting = $_[4];

if ($categoryFrom =~ m/^\[\[:(Category:.*?)\]\]/)

{

$categoryFrom =~ s/^\[\[:(Category:.*?)\]\]/$1/;

$categoryFrom =~ s/\s+/_/g;

}

if ($categoryTo =~ m/^\[\[:(Category:.*?)\]\]/)

{

$categoryTo =~ s/^\[\[:(Category:.*?)\]\]/$1/;

$categoryTo =~ s/\s+/_/g;

}

$categoryFrom =~ s/^\[\[://;

$categoryTo =~ s/^\[\[://;

$categoryFrom =~ s/\]\]$//;

$categoryTo =~ s/\]\]$//;

unless (($categoryFrom =~ m/^Category:/) and

($categoryTo =~ m/^Category:/))

{

myLog ("moveCategoryContents(): Are you sure these are categories? ".$categoryFrom."/".$categoryTo."\n");

die ("moveCategoryContents(): Are you sure these are categories? ".$categoryFrom."/".$categoryTo."\n");

}

transferText ($categoryFrom, $categoryTo, $cfdlisting);

# Subcategory transfer

if ($includeCategories eq "yes")

{

@subcats = getSubcategories($categoryFrom);

foreach $subcat (@subcats)

{

if ($subcat =~ m/^\s*$/)

{

next;

}

$subcat = urlDecode($subcat);

print "changeCategory($subcat, $categoryFrom, $categoryTo) c\n";

myLog "changeCategory($subcat, $categoryFrom, $categoryTo) c\n";

changeCategory($subcat, $categoryFrom, $categoryTo, $cfdlisting);

limit();

}

}

# Article transfer

@articles = getCategoryArticles($categoryFrom);

foreach $article (reverse(@articles))

  1. foreach $article (@articles)

{

#die "article name is $article";

if ($article =~ m/^\s*$/)

{

next;

}

$article = urlDecode($article);

print "changeCategory($article, $categoryFrom, $categoryTo) a\n";

myLog "changeCategory($article, $categoryFrom, $categoryTo) a\n";

changeCategory($article, $categoryFrom, $categoryTo, $cfdlisting);

limit();

}

}

  1. perl whobot.pl DEPOPULATE_CAT Category:To_be_depopulated

sub depopulateCat #($category);

{

my (@articles, $category, $article, $status, @subcats, $subcat, @junk, $doSpecial);

$category = $_[0];

$doSpecial = $_[1];

if ($category =~ m/^\[\[:(Category:.*?)\]\]/)

{

$category =~ s/^\[\[:(Category:.*?)\]\]/$1/;

$category =~ s/\s+/_/g;

}

if (!$doSpecial)

{

unless ($category =~ m/^Category:/)

{

myLog ("depopulateCat(): Are you sure '$category' is a category?\n");

die ("depopulateCat(): Are you sure '$category' is a category?\n");

}

# Remove all subcategories

@subcats = getSubcategories($category);

foreach $subcat (@subcats)

{

$subcat = urlDecode($subcat);

print "removeXFromCat($subcat, $category) c\n";

myLog "removeXFromCat($subcat, $category) c\n";

($status, @junk) = removeXFromCat($subcat, $category);

unless ($status == 0)

{

myLog ("Status: $status\n");

print "Status: $status\n";

}

}

}

# Remove all articles

@articles = getCategoryArticles($category, $doSpecial);

#foreach $article (reverse(@articles))

foreach $article (@articles)

{

$article = urlDecode($article);

print "removeXFromCat($article, $category, $doSpecial) a\n";

myLog "removeXFromCat($article, $category, $doSpecial) a\n";

($status, @junk) = removeXFromCat($article, $category, $doSpecial);

unless ($status == 0)

{

myLog ("Status: $status\n");

print "Status: $status\n";

}

}

}

  1. perl whobot.pl REMOVE_X_FROM_CAT Article_name Category:Where_the_article_is CFDListingDay

sub removeXFromCat

{

my ($text, $articleName, $category, $editTime, $startTime, $comment, $catTmp,

$sortkey, @junk, $token, $categoryUnd, $categoryHuman, $cfdlisting, $doSpecial);

$articleName = $_[0];

$category = $_[1];

$cfdlisting = $_[2];

$doSpecial = $_[3];

if (!$doSpecial)

{

$doSpecial = $cfdlisting;

}

#urlSafe($articleName);

#urlSafe($category);

if (!$doSpecial)

{

unless ($category =~ m/^Category:\w+/)

{

myLog ("addToCat(): Bad format on category.\n");

die ("addToCat(): Bad format on category.\n");

}

}

($text, $editTime, $startTime, $token) = getPage($articleName);

$comment = "Removed ${category} per WP:CFD";

#$comment = "test edits ${cfdlisting}";

# Convert underscore to spaces; this is human-readable.

$category =~ s/_/ /g;

$categoryHuman = $category;

# Insert possible whitespace

$category =~ s/^Category://;

  1. $category = "Category:\\s*\\Q".$category."\\E"; # THIS DOES NOT WORK

$category = "Category:\\s*".$category;

$category =~ s%\(%\\(%g;

$category =~ s%\)%\\)%g;

$category =~ s%\'%\\\'%g;

$categoryUnd = $category;

$categoryUnd =~ s/ /_/g;

unless (($text =~ m/\[\[\s*${category}\s*\]\]/is)

or ($text =~ m/\[\[\s*${category}\s*\|.*?\]\]/is)

or ($text =~ m/\[\[\s*${categoryUnd}\s*\]\]/is)

or ($text =~ m/\[\[\s*${categoryUnd}\s*\|.*?\]\]/is))

{

print "removeXFromCat(): $articleName is not in '$category'.\n";

myLog ("removeXFromCat(): $articleName is not in '$category'.\n");

### TEMPORARY ###

### Uncomment these lines if you want category remove attempts

### to trigger null edits. This is useful if you have have

### changed the category on a template, but due to a bug this

### does not actually move member articles until they are

### edited.

($text, @junk) = fixCategoryInterwiki($text);

postPage ($articleName, $editTime, $startTime, $token, $text, "Mostly null edit to actually remove from ${categoryHuman}", "yes");

limit();

### TEMPORARY ###

return(1);

}

if ($text =~ m/^\s*\#REDIRECT/is)

{

print "addToCat(): $articleName is a redirect!\n";

myLog ("addToCat(): $articleName is a redirect!\n");

return(2);

}

$text =~ m/\[\[\s*${category}\s*\|\s*(.*?)\]\]/is;

$sortkey = $1;

if ($sortkey eq "")

{

$text =~ m/\[\[\s*${categoryUnd}\s*\|\s*(.*?)\]\]/is;

}

# Remove the page from the category and any trailing newline.

$text =~ s/\[\[\s*${category}\s*\|?(.*?)\]\]\n?//isg;

$text =~ s/\[\[\s*${categoryUnd}\s*\|?(.*?)\]\]\n?//isg;

($text, @junk) = fixCategoryInterwiki($text);

postPage ($articleName, $editTime, $startTime, $token, $text, $comment);

return(0, $sortkey);

}

  1. perl whobot.pl PRINT_WIKITEXT Article_you_want_to_get
  2. Warning: Saves to a file in the current directory with the same name
  3. as the article, plus another file with the .html extention.

sub printWikitext

{

my ($editTime, $startTime, $text, $target, $token);

$target = $_[0];

$target =~ s/^\[\[://;

$target =~ s/\]\]$//;

($text, $editTime, $startTime, $token) = getPage($target);

# Save the wikicode version to a file.

open (WIKITEXT, ">./${target}");

print WIKITEXT $text;

close (WIKITEXT);

# Save the HTML version to a file.

print `wget http://en.wikipedia.org/wiki/${target} -O ./${target}.html`;

}

  1. Get a list of the names of articles in a given category.

sub getCategoryArticles

{

my ($target, $request, $response, $reply, $articles, $article,

@articles, @articles1, @articles2, $attemptStartTime, $attemptFinishTime, $doSpecial);

$target = $_[0];

$doSpecial = $_[1];

#urlSafe ($target);

if (!$doSpecial)

{

unless ($target =~ m/^Category:/)

{

myLog ("getCategoryArticles(): Are you sure '$target' is a category?\n");

die ("getCategoryArticles(): Are you sure '$target' is a category?\n");

}

}

# Monitor wiki server responsiveness

$attemptStartTime = Time::HiRes::time();

# Create a request-object

if (!$doSpecial)

{

print "GET http://en.wikipedia.org/wiki/${target}\n";

myLog("GET http://en.wikipedia.org/wiki/${target}\n");

$request = HTTP::Request->new(GET => "http://en.wikipedia.org/wiki/${target}");

}

else

{

print "GET http://en.wikipedia.org/w/wiki.phtml?title=Special:Whatlinkshere&target=${target}&limit=200&offset=0\n";

myLog("GET http://en.wikipedia.org/w/wiki.phtml?title=Special:Whatlinkshere&target=${target}&limit=200&offset=0\n");

$request = HTTP::Request->new(GET => "http://en.wikipedia.org/w/wiki.phtml?title=Special:Whatlinkshere&target=${target}&limit=200&offset=0\n");

}

$response = $::ua->request($request);

if ($response->is_success)

{

# Monitor wiki server responsiveness

$attemptFinishTime = Time::HiRes::time();

retry ("success", "getCategoryArticles", sprintf("%.3f", $attemptFinishTime-$attemptStartTime));

$reply = $response->content;

# This detects whether or not we're logged in.

unless ($reply =~ m%My talk%)

{

# We've lost our identity.

myLog ("Wiki server is not recognizing me (2).\n---\n${reply}\n---\n");

die ("Wiki server is not recognizing me (2).\n");

}

$articles = $reply;

$articles =~ s%^.*?

Articles in category.*?

%%s;

$articles =~ s%

.*?$%%s;

@articles1 = $articles =~ m%

  • @articles2 = $articles =~ m%px 0;">

    my @articles = (@articles1, @articles2);

    $::ua->cookie_jar->save();

    return @articles;

    }

    else

    {

    myLog ("getCategoryArticles($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/wiki/${target}\n".$response->content."\n");

    # 50X HTTP errors mean there is a problem connecting to the wiki server

    if (($response->status_line =~ m/^500/)

    or ($response->status_line =~ m/^502/)

    or ($response->status_line =~ m/^503/))

    {

    print "getCategoryArticles($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/wiki/${target}\n".$response->content."\n";

    return(retry("getCategoryArticles", @_));

    }

    else

    {

    # Unhandled HTTP response

    die ("getCategoryArticles($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/wiki/${target}\n");

    }

    }

    }

    1. Get a list of the names of subcategories of a given category.

    sub getSubcategories

    {

    my ($target, $request, $response, $reply, $subcats, $subcat,

    @subcats, $attemptStartTime, $attemptFinishTime);

    $target = $_[0];

    #urlSafe ($target);

    unless ($target =~ m/^Category:/)

    {

    myLog ("getSubcategories(): Are you sure '$target' is a category?\n");

    die ("getSubcategories(): Are you sure '$target' is a category?\n");

    }

    # Monitor wiki server responsiveness

    $attemptStartTime = Time::HiRes::time();

    # Create a request-object

    print "GET http://en.wikipedia.org/wiki/${target}\n";

    myLog("GET http://en.wikipedia.org/wiki/${target}\n");

    $request = HTTP::Request->new(GET => "http://en.wikipedia.org/wiki/${target}");

    $response = $::ua->request($request);

    if ($response->is_success)

    {

    # Monitor wiki server responsiveness

    $attemptFinishTime = Time::HiRes::time();

    retry ("success", "getSubcategories", sprintf("%.3f", $attemptFinishTime-$attemptStartTime));

    $reply = $response->content;

    # This detects whether or not we're logged in.

    unless ($reply =~ m%My talk%)

    {

    # We've lost our identity.

    myLog ("Wikipedia is not recognizing me (3).\n---\n${reply}\n---\n");

    die ("Wikipedia is not recognizing me (3).\n");

    }

    $subcats = $reply;

    if ($subcats =~ m%^.*?

    Subcategories

    (.*?)

    Articles in category.*?

    .*?$%s)

    {

    $subcats =~ s%^.*?

    Subcategories

    (.*?)

    Articles in category.*?

    .*?$%$1%s;

    }

    else

    {

    return ();

    }

    @subcats = $subcats =~ m%

  • $::ua->cookie_jar->save();

    return @subcats;

    }

    else

    {

    myLog ("getSubcategories($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/wiki/${target}\n".$response->content."\n");

    # 50X HTTP errors mean there is a problem connecting to the wiki server

    if (($response->status_line =~ m/^500/)

    or ($response->status_line =~ m/^502/)

    or ($response->status_line =~ m/^503/))

    {

    print "getSubcategories($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/wiki/${target}\n".$response->content."\n";

    return(retry("getCategoryArticles", @_));

    }

    else

    {

    # Unhandled HTTP response

    die ("getSubcategories($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/wiki/${target}\n");

    }

    }

    }

    1. perl whobot.pl ADD_CFD_TAG Category:Category_name

    sub addCFDTag

    {

    my ($text, $category, $editTime, $startTime, $comment, $catTmp, @junk, $token);

    $category = $_[0];

    #urlSafe($category);

    unless ($category =~ m/^Category:\w+/)

    {

    myLog ("addCFDTag(): Bad format on category.\n");

    die ("addCFDTag(): Bad format on category.\n");

    }

    $::nullOK = "yes";

    ($text, $editTime, $startTime, $token) = getPage($category);

    $::nullOK = "no";

    $comment = "Nominated for deletion or renaming";

    if (($text =~ m/\{\{cfd\}\}/is) or

    ($text =~ m/\{\{cfm/is) or

    ($text =~ m/\{\{cfr/is) or

    ($text =~ m/\{\{cfr-speedy/is))

    {

    print "addCFDTag(): $category is already tagged.\n";

    myLog ("addCFDTag(): $category is already tagged.\n");

    return();

    }

    if ($text =~ m/^\s*\#REDIRECT/is)

    {

    print "addCFDTag(): $category is a redirect!\n";

    myLog ("addCFDTag(): $category is a redirect!\n");

    return();

    }

    # Add the CFD tag to the beginning of the page.

    $text = "{{cfd}}\n".$text;

    ($text, @junk) = fixCategoryInterwiki($text);

    postPage ($category, $editTime, $startTime, $token, $text, $comment);

    }

    1. perl whobot.pl ADD_CFDU_TAG Category:Category_name

    sub addCFDUTag

    {

    my ($text, $category, $editTime, $startTime, $comment, $catTmp, @junk, $token, $stuff);

    $category = $_[0];

    $stuff = $_[1];

    urlSafe($category);

    #urlSafe($stuff);

    unless ($category =~ m/^Category:\w+/)

    {

    myLog ("addCFDUTag(): Bad format on category.\n");

    die ("addCFDUTag(): Bad format on category.\n");

    }

    $::nullOK = "yes";

    ($text, $editTime, $startTime, $token) = getPage($category);

    $::nullOK = "no";

    $comment = "Nominated for deletion or renaming";

    #$comment = "Test edit";

    if (($text =~ m/\{\{cfd\}\}/is) or

    ($text =~ m/\{\{cfm/is) or

    ($text =~ m/\{\{cfr/is) or

    ($text =~ m/\{\{cfr-speedy/is))

    {

    print "addCFDUTag(): $category is already tagged.\n";

    myLog ("addCFDUTag(): $category is already tagged.\n");

    return();

    }

    if ($text =~ m/^\s*\#REDIRECT/is)

    {

    print "addCFDUTag(): $category is a redirect!\n";

    myLog ("addCFDUTag(): $category is a redirect!\n");

    return();

    }

    # Add the CFDU tag to the beginning of the page.

    $text = "{{". $stuff. "}}\n".$text;

    # $text = $stuff;

    ($text, @junk) = fixCategoryInterwiki($text);

    postPage ($category, $editTime, $startTime, $token, $text, $comment);

    }

    1. perl whobot.pl REMOVE_CFD_TAG Category:Category_name

    sub removeCFDTag

    {

    my ($text, $category, $editTime, $startTime, $comment, $catTmp, @junk, $token);

    $category = $_[0];

    #urlSafe($category);

    unless ($category =~ m/^Category:\w+/)

    {

    myLog ("removeCFDTag(): Bad format on category.\n");

    die ("removeCFDTag(): Bad format on category.\n");

    }

    $::nullOK = "yes";

    ($text, $editTime, $startTime, $token) = getPage($category);

    $::nullOK = "no";

    $comment = "De-listed from Wikipedia:Categories for deletion";

    unless (($text =~ m/\{\{cfd\}\}/is) or

    ($text =~ m/\{\{cfm/is) or

    ($text =~ m/\{\{cfr/is) or

    ($text =~ m/\{\{cfr-speedy/is))

    {

    print "removeCFDTag(): $category is not tagged.\n";

    myLog ("removeCFDTag(): $category is not tagged.\n");

    return();

    }

    if ($text =~ m/^\s*\#REDIRECT/is)

    {

    print "removeCFDTag(): $category is a redirect!\n";

    myLog ("removeCFDTag(): $category is a redirect!\n");

    return();

    }

    # Remove the CFD tag.

    $text =~ s/{{cfd}}\s*//gis;

    $text =~ s/\{\{cfr.*?\}\}\s*//is;

    $text =~ s/\{\{cfm.*?\}\}\s*//is;

    $text =~ s/\{\{cfdu.*?\}\}\s*//is;

    $text =~ s/\{\{cfru.*?\}\}\s*//is;

    $text =~ s/\{\{cfr-speedy.*?\}\}\s*//is;

    ($text, @junk) = fixCategoryInterwiki($text);

    postPage ($category, $editTime, $startTime, $token, $text, $comment);

    }

    1. perl whobot.pl REMOVE_CFDU_TAG Category:Containing subs to remove CFDU

    sub removeCFDUTag #($category);

    {

    my (@articles, $category, $article, $status, @subcats, $subcat, @junk, $text, $editTime, $startTime, $comment, $catTmp, $token);

    $category = $_[0];

    if ($category =~ m/^\[\[:(Category:.*?)\]\]/)

    {

    $category =~ s/^\[\[:(Category:.*?)\]\]/$1/;

    $category =~ s/\s+/_/g;

    }

    unless ($category =~ m/^Category:/)

    {

    myLog ("removeCFDUtag(): Are you sure '$category' is a category?\n");

    die ("removeCFDUtag(): Are you sure '$category' is a category?\n");

    }

    # Remove all subcategories

    @subcats = getSubcategories($category);

    foreach $subcat (@subcats)

    {

    $subcat = urlDecode($subcat);

    print "removeCFDTag($subcat, $category) c\n";

    myLog "removeCFDTag($subcat, $category) c\n";

    ($status, @junk) = removeCFDTag($subcat, $category, $editTime, $startTime, $token, $text, $comment);

    unless ($status == 0)

    {

    myLog ("Status: $status\n");

    print "Status: $status\n";

    }

    }

    }

    1. perl whobot.pl TRANSFER_TEXT Category:From_here Category:To_there
    1. Note that this code is called automatically whenever moving a
    2. category, so you probably don't need to call it yourself from the
    3. command line.

    sub transferText

    {

    my ($source, $destination, $sourceText, $destinationText,

    $sourceTime, $destinationTime, @sourceCategories,

    @destinationCategories, $category, $lastCategory,

    $sourceTextOrig, $destinationTextOrig, $comment, $sourceHuman,

    $destinationHuman, $noMergeFlag, $sourceToken,

    $destinationToken, $junk, $sourceStartTime,

    $destinationStartTime, $cfdlisting, $summaryText);

    $source = $_[0];

    $destination = $_[1];

    $cfdlisting = $_[2];

    if ($cfdlisting eq "speedy")

    {

    $comment = "Cleanup per CFD Speedy rename (moving $source to $destination)";

    }

    else

    {

    $comment = "Cleanup per WP:CFD (moving $source to $destination)";

    }

    # Make human-readable versions of these variables for use in edit summaries

    $sourceHuman = $source;

    $sourceHuman =~ s/_/ /g;

    $destinationHuman = $destination;

    $destinationHuman =~ s/_/ /g;

    unless (($source =~ m/^Category:/) and

    ($destination =~ m/^Category:/))

    {

    myLog ("transferText(): Are you sure these are categories? ".$source."/".$destination."\n");

    die ("transferText(): Are you sure these are categories? ".$source."/".$destination."\n");

    }

    ($sourceText, $sourceTime, $sourceStartTime, $sourceToken) = getPage($source);

    # Avoid double runs!

    # This text must be the same as that which is implanted below, and

    # it should be an HTML comment, so that it's invisible.

    if ($sourceText =~ m/<\!--WHOBOT-MOVE-CATEGORY-CONTENTS-SRC-FLAG-->/)

    {

    return;

    }

    $sourceTextOrig = $sourceText;

    $sourceText =~ s/{{cfd}}//;

    $sourceText =~ s/\{\{cfr.*?\}\}\s*//is;

    $sourceText =~ s/\{\{cfm.*?\}\}\s*//is;

    $sourceText =~ s/\{\{cfdu.*?\}\}\s*//is;

    $sourceText =~ s/\{\{cfru.*?\}\}\s*//is;

    $sourceText =~ s/\{\{cfr-speedy.*?\}\}\s*//is;

    $sourceText =~ s/^\s+//s;

    $sourceText =~ s/\s+$//s;

    $::nullOK = "yes";

    ($destinationText, $destinationTime, $destinationStartTime, $destinationToken)

    = getPage($destination);

    $::nullOK = "no";

    $destinationTextOrig = $destinationText;

    $destinationText =~ s/{{cfd}}//;

    $destinationText =~ s/\{\{cfm.*?\}\}\s*//is;

    $destinationText =~ s/\{\{cfr.*?\}\}\s*//is;

    $destinationText =~ s/\{\{cfdu.*?\}\}\s*//is;

    $destinationText =~ s/\{\{cfru.*?\}\}\s*//is;

    $destinationText =~ s/\{\{cfr-speedy.*?\}\}\s*//is;

    $destinationText =~ s/^\s+//s;

    $destinationText =~ s/\s+$//s;

    # To help keep things straight when we're in a loop.

    print STDOUT "\n----\n";

    if ($cfdlisting eq "speedy")

    {

    $summaryText = "CFD Speedy rename";

    }

    else

    {

    $summaryText = "WP:CFD";

    }

    if (($sourceText eq "") and

    ($destinationText ne ""))

    {

    # The HTML comment must be the same as that above.

    $sourceText = "{{cfd}}\nThis category has been moved to :$destinationHuman. Any remaining articles and subcategories will soon be moved by a bot unless otherwise noted on $summaryText.\n\n";

    }

    elsif (($sourceText ne "") and

    ($destinationText eq ""))

    {

    $destinationText = $sourceText;

    # The HTML comment must be the same as that above.

    $sourceText = "{{cfd}}\nThis category has been moved to :$destinationHuman. Any remaining articles and subcategories will soon be moved by a bot unless otherwise noted on $summaryText.\n\n";

    }

    elsif (($sourceText ne "") and

    ($destinationText ne ""))

    {

    @sourceCategories = $sourceText =~ m/\[\[\s*(Category:.*?)\s*\]\]/gs;

    @destinationCategories = $destinationText =~ m/\[\[\s*(Category:.*?)\s*\]\]/gs;

    $sourceText =~ s/\[\[\s*(Category:.*?)\s*\]\]\s*//gs;

    $sourceText =~ s/^\s+//s;

    $sourceText =~ s/\s+$//s;

    $destinationText =~ s/\[\[\s*(Category:.*?)\s*\]\]\s*//gs;

    $destinationText =~ s/^\s+//s;

    $destinationText =~ s/\s+$//s;

    $destinationText = $sourceText."\n".$destinationText;

    $destinationText =~ s/^\s+//s;

    $destinationText =~ s/\s+$//s;

    foreach $category (sort (@sourceCategories, @destinationCategories))

    {

    if ($category eq $lastCategory)

    {

    next;

    }

    $destinationText .= "\n${category}";

    $lastCategory = $category;

    }

    # The HTML comment must be the same as that above.

    $sourceText = "{{cfd}}\nThis category has been moved to :$destinationHuman. Any remaining articles and subcategories will soon be moved by a bot unless otherwise noted on WP:CFD.\n\n";

    }

    $sourceText =~ s/\n\s+\n/\n\n/sg;

    $destinationText =~ s/\n\s+\n/\n\n/sg;

    # You may need to futz with this, depending on the templates

    # currently in use.

    unless (($sourceTextOrig =~ m/\{\{cfd/)

    or ($sourceTextOrig =~ m/\{\{cfr/)

    or ($sourceTextOrig =~ m/\{\{cfru|/)

    or ($sourceTextOrig =~ m/\{\{cfdu|/)

    or ($sourceTextOrig =~ m/\{\{cfr-speedy/)

    or ($sourceTextOrig =~ m/\{\{cfm/))

    {

    print STDOUT "FATAL ERROR: $source was not tagged {{cfd}}, {{cfm}}, {{cfr}}, {{cfdu}}, {{cfru}}, or {{cfr-speedy}}!\n";

    myLog("FATAL ERROR: $source was not tagged {{cfd}}, {{cfr}}, {{cfm}}, {{cfdu}}, {{cfru}}, or {{cfr-speedy}}!\n");

    die("FATAL ERROR: $source was not tagged {{cfd}}, {{cfr}}, {{cfm}}, {{cfdu}}, {{cfru}}, or {{cfr-speedy}}!\n");

    }

    if (($sourceText eq $sourceTextOrig) and

    ($destinationText eq $destinationTextOrig))

    {

    print STDOUT "No changes for $source and $destination.\n";

    return();

    }

    if ($destinationTextOrig =~ m/^\s*$/)

    {

    print "No merging was required from $source into $destination.\n";

    $noMergeFlag = "yes";

    }

    unless ($noMergeFlag eq "yes")

    {

    $destinationText .= "{{pearle-manual-cleanup}}\n";

    }

    # Make sure category and interwiki links conform to style

    # guidelines.

    ($destinationText, $junk) = fixCategoryInterwiki($destinationText);

    # If we did have to change things around, print the changes and post them to the wiki.

    if ($sourceText ne $sourceTextOrig)

    {

    unless ($noMergeFlag eq "yes")

    {

    print STDOUT "SOURCE FROM:\n%%%${sourceTextOrig}%%%\nSOURCE TO:\n%%%${sourceText}%%%\n";

    }

    postPage ($source, $sourceTime, $sourceStartTime, $sourceToken, $sourceText, $comment);

    }

    if ($destinationText ne $destinationTextOrig)

    {

    unless ($noMergeFlag eq "yes")

    {

    print STDOUT "DESTINATION FROM:\n%%%${destinationTextOrig}%%%\nDESTINATION TO:\n%%%${destinationText}%%%\n";

    }

    postPage ($destination, $destinationTime, $destinationStartTime, $destinationToken, $destinationText, $comment);

    }

    }

    1. Translate from HTTP URL encoding to the native character set.

    sub urlDecode

    {

    my ($input);

    $input = $_[0];

    $input =~ s/\%([a-f|A-F|0-9][a-f|A-F|0-9])/chr(hex($1))/eg;

    return ($input);

    }

    1. Translate from the native character set to HTTP URL encoding.

    sub urlEncode

    {

    my ($char, $input, $output);

    $input = $_[0];

    foreach $char (split("",$input))

    {

    1. if ($char =~ m/[a-z|A-Z|0-9|\-_\.\!\~\*\'\(\)]/)

    # The below exclusions should conform to Wikipedia practice

    # (possibly non-standard)

    if ($char =~ m/[a-z|A-Z|0-9|\-_\.\*\/:]/)

    {

    $output .= $char;

    }

    elsif ($char eq " ")

    {

    $output .= "+";

    }

    else

    {

    $output .= uc(sprintf("%%%x", ord($char)));

    # %HH where HH is the (Unicode?) hex code of $char

    }

    }

    return ($output);

    }

    1. perl whobot.pl CHANGE_CATEGORY Article_name Category:From Category:To CFDlistingDay

    sub changeCategory

    {

    my ($articleName, $categoryFrom, $categoryTo, $editTime, $startTime, $text,

    $comment, $catTmp, $sortkey, $token, $junk, $categoryFromUnd, $cfdlisting);

    $articleName = $_[0];

    $categoryFrom = $_[1];

    $categoryTo = $_[2];

    $cfdlisting = $_[3];

    #urlSafe($articleName);

    #urlSafe($categoryFrom);

    #urlSafe($categoryTo);

    unless (($categoryFrom =~ m/^Category:/) and

    ($categoryTo =~ m/^Category:/))

    {

    myLog ("moveCategoryContents(): Are you sure these are categories? ".$categoryFrom."/".$categoryTo."\n");

    die ("moveCategoryContents(): Are you sure these are categories? ".$categoryFrom."/".$categoryTo."\n");

    }

    1. die ($articleName ."does not exist");

    if ($articleName =~ m/^\s*$/)

    {

    myLog("changeCategory(): Null target.");

    die("changeCategory(): Null target.");

    }

    ($text, $editTime, $startTime, $token) = getPage($articleName);

    if ($cfdlisting eq "speedy")

    {

    $comment = "Recat per CFD Speedy rename ${categoryFrom} to ${categoryTo}";

    }

    else

    {

    $comment = "Recat per WP:CFD ${categoryFrom} to ${categoryTo}";

    }

    # --- Start the removing part ---

    # Convert underscore to spaces; this is human-readable.

    $categoryFrom =~ s/_/ /g;

    # Insert possible whitespace

    $categoryFrom =~ s/^Category://;

    $categoryFrom = "Category:\\s*".$categoryFrom;

    # Escape special characters

    $categoryFrom =~ s%\(%\\(%g;

    $categoryFrom =~ s%\)%\\)%g;

    $categoryFrom =~ s%\'%\\\'%g;

    $categoryFromUnd = $categoryFrom;

    $categoryFromUnd =~ s/ /_/g;

    unless (($text =~ m/\[\[\s*${categoryFrom}\s*\]\]/is)

    or ($text =~ m/\[\[\s*${categoryFrom}\s*\|.*?\]\]/is)

    or ($text =~ m/\[\[\s*${categoryFromUnd}\s*\]\]/is)

    or ($text =~ m/\[\[\s*${categoryFromUnd}\s*\|.*?\]\]/is))

    {

    myLog ("changeCategory.r(): $articleName is not in '$categoryFrom'.\n");

    my ($nullEditFlag);

    # Set this to "yes" if you want mass category change attempts

    # to trigger null edits automatically. You should check the

    # category later to see if everything worked or not, to see if

    # any templates should be changed. The below will add a small

    # amount of unnecessary server load to try the null edits if

    # template changes haven't already been made.

    $nullEditFlag = "yes";

    if ($nullEditFlag eq "yes")

    {

    myLog ("changeCategory(): Attempting null edit on $articleName.\n");

    print "changeCategory(): Attempting null edit on $articleName.\n";

    nullEdit($articleName);

    return();

    }

    else

    {

    print "###${text}###\n";

    die ("changeCategory.r(): $articleName is not in '$categoryFrom'.\n");

    }

    }

    if ($text =~ m/^\s*\#REDIRECT/is)

    {

    myLog ("changeCategory.r(): $articleName is a redirect!\n");

    die ("changeCategory.r(): $articleName is a redirect!\n");

    }

    # We're lazy and don't fully parse the document to properly check

    # for escaped category tags, so there may be some unnecssary

    # aborts from the following, but they are rare and easily

    # overridden by manually editing the page in question.

    if ($text =~ m/.*?category.*?<\/nowiki>/is)

    {

    myLog ("changeCategory.r(): $articleName has a dangerous nowiki tag!\n");

    die ("changeCategory.r(): $articleName has a dangerous nowiki tag!\n");

    }

    $text =~ m/\[\[\s*${categoryFrom}\s*\|\s*(.*?)\]\]/is;

    $sortkey = $1;

    if ($sortkey eq "")

    {

    $text =~ m/\[\[\s*${categoryFromUnd}\s*\|\s*(.*?)\]\]/is;

    }

    # Remove the page from the category and any trailing newline.

    $text =~ s/\[\[\s*${categoryFrom}\s*\|?(.*?)\]\]\n?//isg;

    $text =~ s/\[\[\s*${categoryFromUnd}\s*\|?(.*?)\]\]\n?//isg;

    # --- Start the adding part ---

    # Remove any newlines at the end of the document.

    $text =~ s/\n*$//s;

    $catTmp = $categoryTo;

    # _ and spaces are equivalent and may be intermingled in wikicode.

    $catTmp =~ s/Category:\s*/Category:\\s*/g;

    $catTmp =~ s/_/[_ ]/g;

    $catTmp =~ s%\(%\\\(%g;

    $catTmp =~ s%\)%\\\)%g;

    $catTmp =~ s%\.%\\\.%g;

    if (($text =~ m/(\[\[\s*${catTmp}\s*\|.*?\]\])/is)

    or ($text =~ m/(\[\[\s*${catTmp}\s*\]\])/is))

    {

    myLog ("changeCategory.a(): $articleName is already in '$categoryTo'.\n");

    print "\n1: '${1}'\n";

    print "\ncattmp: '${catTmp}'\n";

    print "changeCategory.a(): $articleName is already in '$categoryTo'.\n";

    ## It's generally OK to merge it in, so don't do this:

    # die "changeCategory.a(): $articleName is already in '$categoryTo'.\n";

    # return();

    }

    elsif ($text =~ m/^\s*\#REDIRECT/is)

    {

    print "changeCategory.a(): $articleName is a redirect!\n";

    myLog ("changeCategory.a(): $articleName is a redirect!\n");

    return();

    }

    else

    {

    # Convert underscore to spaces; this is human-readable.

    $categoryTo =~ s/_/ /g;

    # Add the category on a new line.

    if ($sortkey eq "")

    {

    $text .= "\n${categoryTo}";

    }

    else

    {

    $text .= "\n${sortkey}";

    }

    }

    # --- Post-processing ---

    ($text, $junk) = fixCategoryInterwiki($text);

    postPage ($articleName, $editTime, $startTime, $token, $text, $comment, "yes");

    }

    1. This function is not yet finished. Right now it simply compares the
    2. membership of a given list and a given category. Eventually, it is
    3. intended to be used to convert lists into categories. This is not
    4. yet authorized behavior.

    sub listToCat

    {

    my ($lists, $cats, $list, $cat, $listText, @junk, @articlesInList,

    @articlesInCat, %articlesInCat, $article, $implement);

    $lists = $_[0];

    $cats = $_[1];

    $implement = $_[2];

    if ($implement ne "yes")

    {

    print "Diffing membership of '$lists' and '$cats'\n";

    }

    foreach $list (split(";", $lists))

    {

    $list =~ s/^\[\[:?//;

    $list =~ s/\]\]$//;

    ($listText, @junk) = getPage($list);

    $listText =~ s%.*?%%gis;

    $listText =~ s%

    .*?
    %%gis;

    #

    @articlesInList = (@articlesInList, $listText =~ m%\[\[(.*?)\]\]%sg);

    sleep 1;

    }

    foreach $cat (split(";", $cats))

    {

    $cat =~ s/^\[\[:?//;

    $cat =~ s/\]\]$//;

    $cat =~ s/^:Category/Category/;

    @articlesInCat = (@articlesInCat, getCategoryArticles($cat));

    sleep 1;

    }

    foreach $article (@articlesInCat)

    {

    $article = urlDecode ($article);

    $articlesInCat{$article} = 1;

    # print "In cat: $article\n";

    }

    foreach $article (@articlesInList)

    {

    $article =~ s/\s+/_/gs;

    $article =~ s/\|.*$//;

    if (exists $articlesInCat{$article})

    {

    # print "OK: $article\n";

    delete $articlesInCat{$article};

    }

    else

    {

    print "Only in list(s): $article\n";

    }

    }

    foreach $article (sort(keys(%articlesInCat)))

    {

    print "Only in cat(s): $article\n";

    }

    }

    1. A little paranoia never hurt anyone.

    sub shellfix

    {

    my ($string, $stringTmp);

    $string = $_[0];

    $string =~ s/([\*\?\!\(\)\&\>\<])\"\'/\\$1/g;

    $stringTmp = $string;

    $stringTmp =~ s/[Å\p{IsWord}[:alpha:][:digit:]\*,:_.\'\"\)\(\?\-\/\&\>\<\!]//g;

    if ($stringTmp ne "")

    {

    die ("\nUnsafe character(s) in '${string}': '$stringTmp'\n");

    }

    return $string;

    }

    1. You will not be able to use this function; it requires a dataset
    2. processed by scripts which have not been included. (It's not
    3. finished, anyway.)

    sub enforceCategoryRedirects

    {

    my ($implementActually, $line, $lineTmp, $articlesToMove,

    $article, $flatResults, $entry, $contents, $catTo, $lineTmp2);

    $implementActually = $_[0];

    $flatResults = `cat data/reverse-category-links-sorted.txt | grep ^Category:Wikipedia_category_redirects`;

    foreach $line (split("\n", $flatResults))

    {

    $line =~ s/^Category:Wikipedia_category_redirects <\- //;

    $lineTmp = shellfix($line);

    $lineTmp2 = $lineTmp;

    $lineTmp2 =~ s/^Category://;

    if ($line =~ m/^Category/)

    {

    $articlesToMove = `type data/reverse-category-links-sorted.txt | grep ^${lineTmp}`;

    if ($articlesToMove eq "")

    {

    next;

    }

    print "ATM: $articlesToMove\n";

    $entry = `egrep \"^\\([0-9]+,14,'$lineTmp2'\" data/entries-categoryredirect.txt `;

    $entry =~ m/^\([0-9]+,14,'$lineTmp2','(.*?)',/;

    $contents = $1;

    $contents =~ m/\{\{categoryredirect\|(.*?)\}\}/;

    $catTo = $1;

    $catTo = ":Category:".$catTo;

    $catTo =~ s/_/ /g;

    $lineTmp = $line;

    $lineTmp =~ s/^Category/:Category/i;

    $lineTmp =~ s/_/ /g;

    foreach $article (split("\n", $articlesToMove))

    {

    print "ARTICLE: $article\n";

    print "LINE: $line\n";

    $article =~ s/^$line <\- //;

    print "* Move $article from $lineTmp to $catTo\n";

    }

    }

    }

    }

    1. A call to this recursive function handles any retries necessary to
    2. wait out network or server problems. It's a bit of a hack.

    sub retry

    {

    my ($callType, @args, $i, $normalDelay, $firstRetry,

    $secondRetry, $thirdRetry);

    ($callType, @args) = @_;

    ### ATTENTION ###

    # Increasing the speed of the bot to faster than 1 edit every 10

    # seconds violates English Wikipedia rules as of April, 2005, and

    # will cause your bot to be banned. So don't change $normalDelay

    # unless you know what you are doing. Other sites may have

    # similar policies, and you are advised to check before using your

    # bot at the default speed.

    #################

    # HTTP failures are usually an indication of high server load.

    # The retry settings here are designed to give human editors

    # priority use of the server, by allowing it ample recovering time

    # when load is high.

    # Time to wait before retry on failure, in seconds

    $normalDelay = 10; # Normal interval between edits is 10 seconds

    $firstRetry = 60; # First delay on fail is 1 minute

    $secondRetry = 60 * 10; # Second delay on fail is 10 minutes

    $thirdRetry = 60 * 60; # Third delay on fail is 1 hour

    # SUCCESS CASE

    # e.g. retry ("success", "getPage", "0.23");

    if ($callType eq "success")

    {

    myLog("Response time for ".$args[0]." (sec): ".$args[1]."\n");

    $::retryDelay = $normalDelay;

    if ($args[0] eq "postPage")

    {

    # If the response time is greater than 20 seconds...

    if ($args[1] > 20)

    {

    print "Wikipedia is very slow. Increasing minimum wait to 10 min...\n";

    myLog("Wikipedia is very slow. Increasing minimum wait to 10 min...\n");

    $::speedLimit = 60 * 10;

    }

    # If the response time is between 10 and 20 seconds...

    elsif ($args[1] > 10)

    {

    print "Wikipedia is somewhat slow. Setting minimum wait to 60 sec...\n";

    myLog("Wikipedia is somewhat slow. Setting minimum wait to 60 sec...\n");

    $::speedLimit = 60;

    }

    # If the response time is less than 10 seconds...

    else

    {

    if ($::speedLimit > 10)

    {

    print "Returning to normal minimum wait time.\n";

    myLog("Returning to normal minimum wait time.\n");

    $::speedLimit = 10;

    }

    }

    }

    return();

    }

    # e.g. retry ("getPage", "George_Washington")

    # FAILURE CASES

    elsif (($::retryDelay == $normalDelay)

    or ($::retryDelay == 0))

    {

    print "First retry for ".$args[0]."\n";

    myLog("First retry for ".$args[0]."\n");

    $::retryDelay = $firstRetry;

    $::speedLimit = 60 * 10;

    }

    elsif ($::retryDelay == $firstRetry)

    {

    print "Second retry for ".$args[0]."\n";

    myLog("Second retry for ".$args[0]."\n");

    $::retryDelay = $secondRetry;

    $::speedLimit = 60 * 10;

    }

    elsif ($::retryDelay == $secondRetry)

    {

    print "Third retry for ".$args[0]."\n";

    myLog("Third retry for ".$args[0]."\n");

    $::retryDelay = $thirdRetry;

    $::speedLimit = 60 * 10;

    }

    elsif ($::retryDelay == $thirdRetry)

    {

    print "Nth retry for ".$args[0]."\n";

    myLog("Nth retry for ".$args[0]."\n");

    $::retryDelay = $thirdRetry;

    $::speedLimit = 60 * 10;

    }

    else

    {

    die ("retry(): Internal error - unknown delay factor '".$::retryDelay."'\n");

    }

    # DEFAULT TO FAILURE CASE HANDLING

    $i = $::retryDelay;

    while ($i >= 0)

    {

    sleep (1);

    print STDERR "Waiting $i seconds for retry...\r";

    $i--;

    }

    print " \r";

    # DO THE ACTUAL RETRY

    if ($callType eq "getPage")

    {

    return(getPage(@args));

    }

    elsif ($callType eq "postPage")

    {

    return(postPage(@args));

    }

    elsif ($callType eq "getCategoryArticles")

    {

    return(getCategoryArticles(@args));

    }

    elsif ($callType eq "getSubcategories")

    {

    return(getSubcategories(@args));

    }

    elsif ($callType eq "getURL")

    {

    return(getURL(@args));

    }

    else

    {

    myLog ("retry(): Unknown callType: $callType\n");

    die ("retry(): Unknown callType: $callType\n");

    }

    }

    1. perl pearle ENFORCE_CFD
    2. This just compares the contents of Category:Categories_for_deletion
    3. with WP:CFD and /resolved and /unresolved. It is broken now due to
    4. recent changes which list all nominations on subpages. It also
    5. does not check above the first 200 members of the category, due to
    6. recent changes which paginates in 200-page blocks.

    sub enforceCFD

    {

    my (@subcats, $subcat, $cfd, $editTime, $startTime, $token, $cfdU, $cfdR);

    @subcats = getSubcategories("Category:Categories_for_deletion");

    ($cfd, $editTime, $startTime, $token) = getPage("Wikipedia:Categories_for_deletion");

    ($cfdU, $editTime, $startTime, $token) = getPage("Wikipedia:Categories_for_deletion/unresolved");

    ($cfdR, $editTime, $startTime, $token) = getPage("Wikipedia:Categories_for_deletion/resolved");

    $cfd =~ s/[\r\n_]/ /g;

    $cfd =~ s/\s+/ /g;

    $cfdU =~ s/[\r\n_]/ /g;

    $cfdU =~ s/\s+/ /g;

    $cfdR =~ s/[\r\n_]/ /g;

    $cfdR =~ s/\s+/ /g;

    foreach $subcat (@subcats)

    {

    $subcat =~ s/[\r\n_]/ /g;

    $subcat =~ s/\s+/ /g;

    $subcat = urlDecode ($subcat);

    unless ($cfd =~ m/$subcat/)

    {

    print "$subcat is not in WP:CFD";

    if ($cfdR =~ m/$subcat/)

    {

    print " (listed on /resolved)";

    }

    if ($cfdU =~ m/$subcat/)

    {

    print " (listed on /unresolved)";

    }

    print "\n";

    }

    }

    }

    1. An internal function that handles the complexity of adding a
    2. category tag to the wikicode of a page.

    sub addCatToText

    {

    my ($category, $text, $catTmp, $sortkey, $articleName, $junk);

    $category = $_[0];

    $text = $_[1];

    $sortkey = $_[2];

    $articleName = $_[3];

    unless ($category =~ m/^Category:\w+/)

    {

    myLog ("addCatToText(): Bad format on category.\n");

    die ("addCatToText(): Bad format on category.\n");

    }

    $catTmp = $category;

    # _ and spaces are equivalent and may be intermingled.

    $catTmp =~ s/Category:\s*/Category:\\s*/g;

    $catTmp =~ s/_/[_ ]/g;

    $catTmp =~ s%\(%\\\(%g;

    $catTmp =~ s%\)%\\\)%g;

    $catTmp =~ s%\.%\\\.%g;

    if (($text =~ m/(\[\[\s*${catTmp}\s*\|.*?\]\])/is)

    or ($text =~ m/(\[\[\s*${catTmp}\s*\]\])/is))

    {

    print "addCatToText(): $articleName is already in '$category'.\n";

    myLog ("addCatToText(): $articleName is already in '$category'.\n");

    print "\n1: '${1}'\n";

    print "\ncattmp: '${catTmp}'\n";

    return("fail", $text);

    }

    if ($text =~ m/^\s*\#REDIRECT/is)

    {

    print "addCatToText(): $articleName is a redirect!\n";

    myLog ("addCatToText(): $articleName is a redirect!\n");

    return("fail", $text);

    }

    # Convert underscore to spaces; this is human-readable.

    $category =~ s/_/ /g;

    # Add the category

    $text .= "\n$category";

    # Move the category to the right place

    ($text, $junk) = fixCategoryInterwiki($text);

    return ("success", $text);

    }

    1. THIS ROUTINE IS CURRENTLY UNUSED ###
    1. It will probably not be useful to you, anyway, since it requires
    2. pre-processed database dumps which are not included in Whobot.

    sub getPageOffline

    {

    my ($target, $result, $targetTmp);

    $target = $_[0];

    # Must run the following before using this function, from 200YMMDD/data:

    # cat entries.txt | perl ../../scripts/rewrite-entries.pl > entries-simple.txt

    # Even after this pre-processing, this routine is incredibly slow.

    # Set up and use MySQL instead if you care about speed.

    $target =~ s/\s/_/g;

    # Double escape the tab, once for Perl, once for the shell

    # -P means "treat as Perl regexp" (yay!)

    1. $result = `grep -P '^${target}\\t' /home/beland/wikipedia/20050107/data/entries-simple.txt`;

    $targetTmp = shellfix($target);

    $result = `grep -P '^${targetTmp}\\t' /home/beland/wikipedia/20050107/data/matches2.txt`;

    $result =~ s/^${target}\t//;

    $result =~ s/\\n/\n/g;

    return ($result, "junk");

    }

    1. --- CATEGORY AND INTERWIKI STYLE CLEANUP ROUTINES ---
    1. perl whobot.pl INTERWIKI_LOOP
    2. This command is for remedial cleanup only, and so is probably not
    3. useful anymore. This loop takes input of the form:
    4. "ArticleName\tBodyText\n{repeat...}" on STDIN.

    sub interwikiLoop

    {

    my ($article, $text, @junk, $enforceCategoryInterwikiCalls);

    while ()

    {

    if ($_ =~ m/^\s*$/)

    {

    next;

    }

    ($article, $text, @junk) = split ("\t", $_);

    $text =~ s/\\n/\n/g;

    enforceCategoryInterwiki($article, $text);

    $enforceCategoryInterwikiCalls++;

    print STDOUT "\r interwikiLoop iteration ".$enforceCategoryInterwikiCalls;

    }

    }

    1. perl whobot.pl ENFORCE_CATEGORY_INTERWIKI Article_name
    2. This function is for both external use. From the command line, use
    3. it to tidy up a live page's category and interwiki tags, specifying
    4. only the name of the page. It can also be used by interwikiLoop(),
    5. which supplies the full text on its own. It will post any changes
    6. to the live wiki that involve anything more than whitespace
    7. changes.
    8. This function also does {{msg:foo}} -> {{foo}} conversion, so that
    9. the article parsing algorithm can be recycled.

    sub enforceCategoryInterwiki

    {

    my ($articleName, $text, $editTime, $startTime, $textOrig, @newLines, $line,

    $textCopy, $textOrigCopy, $message, @junk, $diff, $token,

    $online);

    $articleName = $_[0];

    myLog("enforceCategoryInterwiki($articleName)\n");

    $text = $_[1];

    $online = 0;

    if ($text eq "")

    {

    $online = 1;

    ($text, $editTime, $startTime, $token) = getPage($articleName);

    }

    $textOrig = $text;

    ($text, $message) = fixCategoryInterwiki($text);

    if (substantiallyDifferent($text, $textOrig))

    {

    @newLines = split ("\n", $text);

    $textCopy = $text;

    $textOrigCopy = $textOrig;

    open (ONE, ">/tmp/article1.$$");

    print ONE $textOrig;

    close (ONE);

    open (TWO, ">/tmp/article2.$$");

    print TWO $text;

    close (TWO);

    $diff = `diff /tmp/article1.$$ /tmp/article2.$$`;

    unlink("/tmp/article1.$$");

    unlink("/tmp/article2.$$");

    myLog("*** $articleName - $message\n");

    myLog("*** DIFF FOR $articleName\n");

    myLog($diff);

    if ($online == 0)

    {

    # Isolate changed files for later runs

    open (FIXME, ">>./fixme.interwiki.txt.$$");

    $text =~ s/\t/\\t/g;

    $text =~ s/\n/\\n/g;

    print FIXME $articleName."\t".$text."\n";

    close (FIXME);

    }

    myLog($articleName." changed by fixCategoryInterwiki(): $message\n");

    print STDOUT $articleName." changed by fixCategoryInterwiki(): $message\n";

    if ($online == 1)

    {

    postPage ($articleName, $editTime, $startTime, $token, $text, $message, "yes");

    }

    }

    else

    {

    print STDOUT "--- No change for ${articleName}.\n";

    myLog ("--- No change for ${articleName}.\n");

    ### TEMPORARY ###

    ### Uncomment this line if you want category changes to

    ### trigger null edits. This is useful if you have have

    ### changed the category on a template, but due to a bug this

    ### does not actually move member articles until they are

    ### edited.

    postPage ($articleName, $editTime, $startTime, $token, $textOrig, "null edit", "yes");

    ### TEMPORARY ###

    }

    }

    sub substantiallyDifferent

    {

    my($a, $b);

    $a = $_[0];

    $b = $_[1];

    $a =~ s/\s//g;

    $b =~ s/\s//g;

    return ($a ne $b);

    }

    1. Given some wikicode as input, this function will tidy up the
    2. category and interwiki links and return the result and a comment
    3. suitable for edit summaries.

    sub fixCategoryInterwiki

    {

    my ($input, @segmentNames, @segmentContents, $langlist, $i,

    $message, $output, $flagForReview, $interwikiBlock,

    $categoryBlock, $flagError, $bodyBlock, $contents, $name,

    @interwikiNames, @interwikiContents, @categoryNames,

    @categoryContents, @bodyNames, @bodyContents, $bodyFlag,

    @bottomNames, @bottomContents, @segmentNamesNew,

    @segmentContentsNew, $lastContents, @stubContents,

    @stubNames, $stubBlock, $msgFlag);

    $input = $_[0];

    # The algorithm here is complex. The general idea is to split the

    # page in to segments, each of which is assigned a type, and then

    # to rearrange, consolidate, and frob the segments as needed.

    # Start with one segment that includes the whole page.

    @::segmentNames = ("bodyText");

    @::segmentContents = ($input);

    # Recognize and tag certain types of segments. The order of

    # processing is very important.

    metaTagInterwiki("nowiki", "^(.*?)(\s*.*?\s*)");

    metaTagInterwiki("comment", "^(.*?)(\\n?)");

    metaTagInterwiki("html", "^(.*?)(<.*?>\\n?)");

    metaTagInterwiki("category", "^(.*?)(\\[\\[\\s*Category\\s*:\\s*.*?\\]\\]\\n?)");

    $langlist = `type langlist`;

    $langlist =~ s/^\s*//s;

    $langlist =~ s/\s*$//s;

    $langlist =~ s/\n/\|/gs;

    $langlist .= "|minnan|zh\-cn|zh\-tw|nb";

    metaTagInterwiki("interwiki", "^(.*?)(\\[\\[\\s*($langlist)\\s*:\\s*.*?\\]\\]\\n?)");

    metaTagInterwiki("tag", "^(.*?)(\{\{.*?\}\})");

    # Allow category and interwiki segments to be followed by HTML

    # comments only (plus any intervening whitespace).

    $i = 0;

    while ($i < @::segmentNames)

    {

    $name = $::segmentNames[$i];

    $contents = $::segmentContents[$i];

    # {{msg:foo}} -> {{foo}} conversion

    if (($name eq "tag") and

    ($contents =~ m/^{{msg:(.*?)}}/))

    {

    $msgFlag = 1;

    $contents =~ s/^{{msg:(.*?)}}/{{$1}}/;

    }

    if (($name eq "category") or ($name eq "interwiki"))

    {

    if (!($contents =~ m/\n/) and ($::segmentNames[$i+1] eq "comment"))

    {

    push (@segmentNamesNew, $name);

    push (@segmentContentsNew, $contents.$::segmentContents[$i+1]);

    $i += 2;

    1. DEBUG print "AAA - ".$contents.$::segmentContents[$i+1]);

    next;

    }

    if (!($contents =~ m/\n/)

    and ($::segmentNames[$i+1] eq "bodyText")

    and ($::segmentContents[$i+1] =~ m/^\s*$/)

    and !($::segmentContents[$i+1] =~ m/^\n$/)

    and ($::segmentNames[$i+2] eq "comment")

    )

    {

    push (@segmentNamesNew, $name);

    push (@segmentContentsNew,

    $contents.$::segmentContents[$i+1].$::segmentContents[$i+2]);

    $i += 3;

    1. DEBUG print "BBB".$contents.$::segmentContents[$i+1].$::segmentContents[$i+2]);

    next;

    }

    # Consolidate with any following whitespace

    if (($::segmentNames[$i+1] eq "bodyText")

    and ($::segmentContents[$i+1] =~ m/^\s*$/)

    )

    {

    push (@segmentNamesNew, $name);

    push (@segmentContentsNew,

    $contents.$::segmentContents[$i+1]);

    $i += 2;

    next;

    }

    }

    push (@segmentNamesNew, $name);

    push (@segmentContentsNew, $contents);

    $i++;

    }

    # Clean up results

    @::segmentNames = @segmentNamesNew;

    @::segmentContents = @segmentContentsNew;

    @segmentContentsNew = ();

    @segmentNamesNew = ();

    # Move category and interwiki tags that precede the body text (at

    # the top of the page) to the bottom of the page.

    $bodyFlag = 0;

    foreach $i (0 ... @::segmentNames-1)

    {

    $name = $::segmentNames[$i];

    $contents = $::segmentContents[$i];

    if ($bodyFlag == 1)

    {

    push (@segmentNamesNew, $name);

    push (@segmentContentsNew, $contents);

    }

    elsif (($name eq "category") or ($name eq "interwiki"))

    {

    push (@bottomNames, $name);

    push (@bottomContents, $contents);

    }

    else

    {

    push (@segmentNamesNew, $name);

    push (@segmentContentsNew, $contents);

    $bodyFlag = 1;

    }

    }

    # Clean up results

    @::segmentNames = (@segmentNamesNew, @bottomNames);

    @::segmentContents = (@segmentContentsNew, @bottomContents);

    @segmentContentsNew = ();

    @segmentNamesNew = ();

    @bottomNames = ();

    @bottomContents = ();

    # Starting at the bottom of the page, isolate category, interwiki,

    # and body text. If categories or interwiki links are mixed with

    # body text, flag for human review.

    ### DEBUG ###

    # foreach $i (0 ... @::segmentNames-1)

    # {

    # print "---$i ".$::segmentNames[$i]."---\n";

    # print "%%%".$::segmentContents[$i]."%%%\n";

    # }

    ### DEBUG ###

    ### DEBUG ###

    #my ($first);

    #$first = 1;

    ### DEBUG ###

    $bodyFlag = 0;

    $flagForReview = 0;

    foreach $i (reverse(0 ... @::segmentNames-1))

    {

    $name = $::segmentNames[$i];

    $contents = $::segmentContents[$i];

    if (($name eq "category") and ($bodyFlag == 0))

    {

    # Push in reverse

    @categoryNames = ($name, @categoryNames);

    @categoryContents = ($contents, @categoryContents);

    next;

    }

    elsif (($name eq "interwiki") and ($bodyFlag == 0))

    {

    # Push in reverse

    @interwikiNames = ($name, @interwikiNames);

    @interwikiContents = ($contents, @interwikiContents);

    next;

    }

    elsif (($bodyFlag == 0)

    and ($name eq "tag")

    and (($contents =~ m/\{\{[ \w\-]+[\- ]?stub\}\}/) or

    ($contents =~ m/\{\{[ \w\-]+[\- ]?stub\|.*?\}\}/)))

    {

    ### IF THIS IS A STUB TAG AND WE ARE STILL $bodyFlag == 0,

    ### THEN ADD THIS TO $stubBlock!

    # Canonicalize by making {{msg:Foo}} into {{Foo}}

    s/^\{\{\s*msg:(.*?)\}\}/\{\{$1\}\}/i;

    # Push in reverse

    @stubNames = ($name, @stubNames);

    @stubContents = ($contents, @stubContents);

    next;

    }

    elsif (($name eq "category") or ($name eq "interwiki"))

    # bodyFlag implicitly == 1

    {

    if ($flagForReview == 0)

    {

    $flagForReview = 1;

    $lastContents =~ s/^\s*//s;

    $lastContents =~ s/\s*$//s;

    $flagError = substr ($lastContents, 0, 30);

    }

    # Drop down to push onto main body stack.

    }

    # Handle this below instead.

    ## Skip whitespace

    #if (($contents =~ m/^\s*$/s) and ($bodyFlag == 0))

    #{

    # next;

    #}

    # Delete these comments

    if (($bodyFlag == 0) and ($name == "comment"))

    {

    if (

    ($contents =~ m//i) or

    ($contents =~ m//i) or

    ($contents =~ m//i) or

    ($contents =~ m//i) or

    ($contents =~ m//i) or

    ($contents =~ m//i)

    )

    {

    ### DEBUG ###

    #print STDOUT ("YELP!\n");

    #

    #foreach $i (0 ... @bodyNames-1)

    #{

    # print "---$i ".$bodyNames[$i]."---\n";

    # print "%%%".$bodyContents[$i]."%%%\n";

    #}

    #

    #print STDOUT ("END-YELP!");

    ### DEBUG ###

    next;

    }

    }

    # Push onto main body stack (in reverse).

    @bodyNames = ($name, @bodyNames);

    @bodyContents = ($contents, @bodyContents);

    ### DEBUG ###

    #if (($flagForReview == 1) and ($first == 1))

    #{

    # $first = 0;

    # print "\@\@\@${lastContents}\#\#\#\n";

    #}

    ### DEBUG ###

    # This should let tags mixed in with the category and

    # interwiki links (not comingled with body text) bubble up.

    unless (($contents =~ m/^\s*$/s) or ($name eq "tag"))

    {

    $bodyFlag = 1;

    }

    $lastContents = $contents;

    }

    ### DEBUG ###

    1. foreach $i (0 ... @bodyNames-1)
    2. {
    3. print "---$i ".$bodyNames[$i]."---\n";
    4. print "%%%".$bodyContents[$i]."%%%\n";
    5. }
    6. foreach $i (0 ... @categoryNames-1)
    7. {
    8. print "---$i ".$categoryNames[$i]."---\n";
    9. print "^^^".$categoryContents[$i]."^^^\n";
    10. }
    11. foreach $i (0 ... @interwikiNames-1)
    12. {
    13. print "---$i ".$interwikiNames[$i]."---\n";
    14. print "&&&".$interwikiContents[$i]."&&&\n";
    15. }

    ### DEBUG ###

    # Assemble body text, category, interwiki, and stub arrays into strings

    foreach $i (0 ... @bodyNames-1)

    {

    $name = $bodyNames[$i];

    $contents = $bodyContents[$i];

    $bodyBlock .= $contents;

    }

    foreach $i (0 ... @categoryNames-1)

    {

    $name = $categoryNames[$i];

    $contents = $categoryContents[$i];

    # Enforce style conventions

    $contents =~ s/\[\[category\s*:\s*/\[\[Category:/i;

    # Enforce a single newline at the end of each category line.

    $contents =~ s/\s*$//;

    $categoryBlock .= $contents."\n";

    }

    foreach $i (0 ... @interwikiNames-1)

    {

    $name = $interwikiNames[$i];

    $contents = $interwikiContents[$i];

    # Canonicalize minnan to zh-min-nan, since that's what's in

    # the officially distributed langlist.

    $contents =~ s/^\[\[minnan:/\[\[zh-min-nan:/;

    # Canonicalize zh-ch, Chinese (simplified) and zn-tw, Chinese

    # (traditional) to "zh"; the distinction is being managed

    # implicitly by software now, not explicitly in wikicode.

    $contents =~ s/^\[\[zh-cn:/\[\[zh:/g;

    $contents =~ s/^\[\[zh-tw:/\[\[zh:/g;

    # Canonicalize nb to no

    $contents =~ s/^\[\[nb:/\[\[no:/g;

    # Canonicalize dk to da

    $contents =~ s/^\[\[dk:/\[\[da:/g;

    # Enforce a single newline at the end of each interwiki line.

    $contents =~ s/\s*$//;

    $interwikiBlock .= $contents."\n";

    }

    foreach $i (0 ... @stubNames-1)

    {

    $name = $stubNames[$i];

    $contents = $stubContents[$i];

    # Enforce a single newline at the end of each stub line.

    $contents =~ s/\s*$//;

    $contents =~ s/^\s*//;

    $stubBlock .= $contents."\n";

    }

    # Minimize interblock whitespace

    $bodyBlock =~ s/^\s*//s;

    $bodyBlock =~ s/\s*$//s;

    $categoryBlock =~ s/^\s*//s;

    $categoryBlock =~ s/\s*$//s;

    $interwikiBlock =~ s/^\s*//s;

    $interwikiBlock =~ s/\s*$//s;

    $stubBlock =~ s/^\s*//s;

    $stubBlock =~ s/\s*$//s;

    # Assemble the three blocks into a single string, flagging for

    # human review if necessary.

    $output = "";

    if ($bodyBlock ne "")

    {

    $output .= $bodyBlock."\n\n";

    }

    if (($flagForReview == 1)

    and !($input =~ m/\{\{interwiki-category-check/)

    and !($input =~ m/\{\{split/)

    and !($input =~ m/\[\[Category:Pages for deletion\]\]/))

    {

    $output .= "{{interwiki-category-check|${flagError}}}\n\n";

    }

    if ($categoryBlock ne "")

    {

    $output .= $categoryBlock."\n";

    }

    if ($interwikiBlock ne "")

    {

    1. $output .= "\n".$interwikiBlock."\n";

    $output .= $interwikiBlock."\n";

    }

    if ($stubBlock ne "")

    {

    $output .= $stubBlock."\n";

    }

    if ($input ne $output)

    {

    $message = "Minor category, interwiki, or template style cleanup";

    if ($flagForReview == 1)

    {

    $message = "Flagged for manual review of category/interwiki style";

    }

    if ($msgFlag == 1)

    {

    $message .= "; {{msg:foo}} -> {{foo}} conversion for MediaWiki 1.5+ compatibility";

    }

    }

    else

    {

    $message = "No change";

    }

    return($output, $message);

    }

    1. sub displayInterwiki
    2. {
    3. my ($i);
    4. ## THIS FUNCTION CANNOT BE CALLED DUE TO SCOPING; YOU MUST MANUALLY
    5. ## COPY THIS TEXT INTO fixCategoryInterwiki(). IT IS ONLY USEFUL
    6. ## FOR DIAGNOSTIC PURPOSES.
    7. foreach $i (0 ... @::segmentNames-1)
    8. {
    9. print "---$i ".$::segmentNames[$i]."---\n";
    10. print "%%%".$::segmentContents[$i]."%%%\n";
    11. }
    12. }
    1. A subroutine of fixCategoryInterwiki(), this function isolates
    2. certain parts of existing segments based on a regular expression
    3. pattern, and tags them with the supplied name (which indicates their
    4. type). Sorry for the global variables.

    sub metaTagInterwiki

    {

    my ($tag, $pattern, $i, $meta, $body, @segmentNamesNew,

    @segmentContentsNew, $name, $contents, $bodyText, );

    $tag = $_[0];

    $pattern = $_[1];

    foreach $i (0 ... @::segmentNames-1)

    {

    $name = $::segmentNames[$i];

    $contents = $::segmentContents[$i];

    unless ($name eq "bodyText")

    {

    push (@segmentNamesNew, $name);

    push (@segmentContentsNew, $contents);

    next;

    }

    while (1)

    {

    if ($contents =~ m%$pattern%is)

    {

    $bodyText = $1;

    $meta = $2;

    if ($bodyText ne "")

    {

    push (@segmentNamesNew, "bodyText");

    push (@segmentContentsNew, $bodyText);

    }

    push (@segmentNamesNew, $tag);

    push (@segmentContentsNew, $meta);

    $contents =~ s/\Q${bodyText}${meta}\E//s;

    }

    else

    {

    if ($contents ne "")

    {

    push (@segmentNamesNew, $name);

    push (@segmentContentsNew, $contents);

    }

    last;

    }

    }

    }

    @::segmentNames = @segmentNamesNew;

    @::segmentContents = @segmentContentsNew;

    @segmentContentsNew = ();

    @segmentNamesNew = ();

    }

    sub nullEdit

    {

    my ($text, $articleName, $comment, $editTime, $startTime, $token);

    $articleName = $_[0];

    print "nullEdit($articleName)\n";

    myLog ("nullEdit($articleName)\n");

    ($text, $editTime, $startTime, $token) = getPage($articleName);

    postPage ($articleName, $editTime, $startTime, $token, $text, "null edit");

    }

    sub cleanupDate

    {

    my ($article, @articles);

    # Get all articles from Category:Wikipedia cleanup

    @articles = getCategoryArticles ("Category:Wikipedia cleanup");

    1. @articles = reverse (sort(@articles));

    @articles = (sort(@articles));

    foreach $article (@articles)

    {

    if (($article =~ m/^Wikipedia:/)

    or ($article =~ m/^Template:/)

    or ($article =~ m/^User:/)

    or ($article =~ m/talk:/i)

    )

    {

    next;

    }

    cleanupDateArticle($article);

    limit();

    }

    }

    sub cleanupDateArticle #($target)

    {

    my (@result, $link, $currentMonth, $currentYear, $junk, $line,

    $month, $year, $found, $lineCounter, $target);

    $target = $_[0];

    print "cleanupDateArticle($target)\n";

    @result = parseHistory($target);

    ($currentMonth, $currentYear, $junk) = split(" ", $result[0]);

    $found = "";

    foreach $line (@result)

    {

    $lineCounter++;

    ($month, $year, $link) = split(" ", $line);

    if (($month eq $currentMonth)

    and ($year eq $currentYear))

    {

    1. print "$month $year - SKIP\n";

    next;

    }

    1. Skip this, because it produces false positives on articles that were
    2. protected at the end of last month, but no longer are. The correct
    3. thing to do is to check if an article is CURRENTLY protected by
    4. fetching the current version, but this seems like a waste of network
    5. resources.
    1. if (checkForTag("protected", $link) eq "yes")
    2. {
    3. print "$target is {{protected}}; skipping\n";
    4. myLog("$target is {{protected}}; skipping\n");
    5. return();
    6. }

    if (checkForTag("sectionclean", $link) eq "yes")

    {

    print "$target has {{sectionclean}}\n";

    myLog("$target has {{sectionclean}}\n");

    nullEdit($target);

    return();

    }

    if (checkForTag("Sect-Cleanup", $link) eq "yes")

    {

    print "$target has {{Sect-Cleanup}}\n";

    myLog("$target has {{Sect-Cleanup}}\n");

    nullEdit($target);

    return();

    }

    if (checkForTag("section cleanup", $link) eq "yes")

    {

    print "$target has {{section cleanup}}\n";

    myLog("$target has {{section cleanup}}\n");

    nullEdit($target);

    return();

    }

    if (checkForTag("sectcleanup", $link) eq "yes")

    {

    print "$target has {{sectcleanup}}\n";

    myLog("$target has {{sectcleanup}}\n");

    nullEdit($target);

    return();

    }

    if (checkForTag("cleanup-section", $link) eq "yes")

    {

    print "$target has {{cleanup-section}}\n";

    myLog("$target has {{cleanup-section}}\n");

    nullEdit($target);

    return();

    }

    if (checkForTag("cleanup-list", $link) eq "yes")

    {

    print "$target has {{cleanup-list}}\n";

    myLog("$target has {{cleanup-list}}\n");

    nullEdit($target);

    return();

    }

    if (checkForTag("cleanup-nonsense", $link) eq "yes")

    {

    print "$target has {{cleanup-nonsense}}\n";

    myLog("$target has {{cleanup-nonsense}}\n");

    nullEdit($target);

    return();

    }

    if ((checkForTag("cleanup", $link) eq "yes") or

    (checkForTag("clean", $link) eq "yes") or

    (checkForTag("CU", $link) eq "yes") or

    (checkForTag("cu", $link) eq "yes") or

    (checkForTag("cleanup-quality", $link) eq "yes") or

    (checkForTag("tidy", $link) eq "yes"))

    {

    $currentMonth = $month;

    $currentYear = $year;

    1. print "$month $year - YES\n";

    next;

    }

    else

    {

    1. print "$month $year - NO\n";
    2. print "Tag added $currentMonth $currentYear\n";

    $found = "Tag added $currentMonth $currentYear\n";

    last;

    }

    }

    if ($found eq "")

    {

    1. print "HISTORY EXHAUSTED\n";

    if ($lineCounter < 498)

    {

    $found = "Tag added $currentMonth $currentYear\n";

    }

    else

    {

    1. print "Unable to determine when tag was added to $target.\n";

    myLog("Unable to determine when tag was added to $target.\n");

    die("Unable to determine when tag was added to $target.\n");

    }

    }

    if ($found ne "")

    {

    changeTag("cleanup", "cleanup-date\|${currentMonth} ${currentYear}", $target)

    || changeTag("tidy", "cleanup-date\|${currentMonth} ${currentYear}", $target)

    || changeTag("CU", "cleanup-date\|${currentMonth} ${currentYear}", $target)

    || changeTag("cu", "cleanup-date\|${currentMonth} ${currentYear}", $target)

    || changeTag("cleanup-quality", "cleanup-date\|${currentMonth} ${currentYear}", $target)

    || changeTag("clean", "cleanup-date\|${currentMonth} ${currentYear}", $target)

    || nullEdit($target);

    }

    }

    sub changeTag

    {

    my ($tagFrom, $tagFromUpper, $tagTo, $tagToUpper, $articleName,

    $editTime, $startTime, $text, $token, $comment, $junk);

    $tagFrom = $_[0]; # "cleanup"

    $tagTo = $_[1]; # "cleanup-date|August 2005"

    $articleName = $_[2]; # Article name

    print "changeTag (${tagFrom}, ${tagTo}, ${articleName})\n";

    myLog("changeTag (${tagFrom}, ${tagTo}, ${articleName})\n");

    $tagFromUpper = ucfirst($tagFrom);

    $tagToUpper = ucfirst($tagTo);

    if ($articleName =~ m/^\s*$/)

    {

    myLog("changeTag(): Null target.");

    die("changeTag(): Null target.");

    }

    ($text, $editTime, $startTime, $token) = getPage($articleName);

    unless (($text =~ m/\{\{\s*\Q$tagFrom\E\s*\}\}/)

    or ($text =~ m/\{\{\s*\Q$tagFromUpper\E\s*\}\}/)

    or ($text =~ m/\{\{\s*\Qmsg:$tagFrom\E\s*\}\}/)

    or ($text =~ m/\{\{\s*\Qmsg:$tagFromUpper\E\s*\}\}/)

    or ($text =~ m/\{\{\s*\QTemplate:$tagFrom\E\s*\}\}/)

    or ($text =~ m/\{\{\s*\QTemplate:$tagFromUpper\E\s*\}\}/)

    or ($text =~ m/\{\{\s*\Q$tagFrom\E\|.*?\s*\}\}/)

    or ($text =~ m/\{\{\s*\Q$tagFromUpper\E\|.*?\s*\}\}/)

    )

    {

    myLog("changeTag(): {{$tagFrom}} is not in $articleName.\n");

    print "changeTag(): {{$tagFrom}} is not in $articleName.\n";

    # die("changeTag(): {{$tagFrom}} is not in $articleName.\n");

    ### TEMPORARY ###

    # Just skip articles with {{tidy}}, {{clean}} {{sectionclean}}, {{advert}}, etc.

    sleep(1); # READ THROTTLE!

    return(0);

    }

    if (($text =~ m/\{\{\s*\Q$tagTo\E\s*\}\}/)

    or ($text =~ m/\{\{\s*\Q$tagToUpper\E\s*\}\}/))

    {

    myLog("changeTag(): $articleName already contains {{$tagTo}}.");

    die("changeTag(): $articleName already contains {{$tagTo}}.");

    }

    if ($text =~ m/^\s*\#REDIRECT/is)

    {

    myLog ("changeTag.a(): $articleName is a redirect!\n");

    die ("changeTag.a(): $articleName is a redirect!\n");

    sleep(1); # READ THROTTLE!

    return(0);

    }

    # Escape special characters

    $tagFrom =~ s%\(%\\(%g;

    $tagFrom =~ s%\)%\\)%g;

    $tagFrom =~ s%\'%\\\'%g;

    # We're lazy and don't fully parse the document to properly check

    # for escaped tags, so there may be some unnecssary aborts from

    # the following, but they are rare and easily overridden by

    # manually editing the page in question.

    if (($text =~ m/.*?\Q$tagFrom\E.*?<\/nowiki>/is) or

    ($text =~ m/

    .*?\Q$tagFrom\E.*?<\/pre>/is))

    #

    {

    myLog ("changeTag.r(): $articleName has a dangerous nowiki or pre tag!\n");

    die ("changeTag.r(): $articleName has a dangerous nowiki or pre tag!\n");

    }

    # Make the swap!

    $text =~ s/\{\{\s*\Q$tagFrom\E\s*\}\}/{{${tagTo}}}/g;

    $text =~ s/\{\{\s*\Q$tagFromUpper\E\s*\}\}/{{${tagTo}}}/g;

    $text =~ s/\{\{\s*\Qmsg:$tagFrom\E\s*\}\}/{{${tagTo}}}/g;

    $text =~ s/\{\{\s*\Qmsg:$tagFromUpper\E\s*\}\}/{{${tagTo}}}/g;

    $text =~ s/\{\{\s*\QTemplate:$tagFrom\E\s*\}\}/{{${tagTo}}}/g;

    $text =~ s/\{\{\s*\QTemplate:$tagFromUpper\E\s*\}\}/{{${tagTo}}}/g;

    $text =~ s/\{\{\s*\Q$tagFrom\E\|(.*?)\s*\}\}/{{${tagTo}}}/g;

    $text =~ s/\{\{\s*\Q$tagFromUpper\E\|(.*?)\s*\}\}/{{${tagTo}}}/g;

    # Tidy up the article in general

    ($text, $junk) = fixCategoryInterwiki($text);

    # Post the changes

    $comment = "Changing \{\{${tagFrom}\}\} to \{\{${tagTo}\}\}";

    postPage ($articleName, $editTime, $startTime, $token, $text, $comment, "yes");

    return (1);

    }

    sub parseHistory

    {

    my ($pageName, $html, @lines, $line, $date, $month, $year,

    $htmlCopy, $link, @result);

    $pageName = $_[0];

    $html = getURL("http://en.wikipedia.org/w/wiki.phtml?title=${pageName}&action=history&limit=500");

    $htmlCopy = $html;

    $html =~ s%^.*?

      %%s;

      $html =~ s%(.*?)

    .*$%$1%s;

    $html =~ s%

  • \s*%%s;

    @lines = split ("", $html);

    foreach $line (@lines)

    {

    $line =~ s/\n/ /g;

    if ($line =~ m/^\s*$/)

    {

    next;

    }

    $line =~ s%.*?$%%;

    $line =~ s%^.*?Select a newer version for comparison%%;

    $line =~ s%^.*?Select a older version for comparison%%;

    $line =~ s%^.*?name="diff" />%%;

    1. print "LINE: ".$line."\n";

    $line =~ m%(.*?)%;

    $link = $1;

    $date = $3;

    1. print $link." / $date\n";

    if ($date =~ m/Jan/)

    {

    $month = "January";

    }

    elsif ($date =~ m/Feb/)

    {

    $month = "February";

    }

    elsif ($date =~ m/Mar/)

    {

    $month = "March";

    }

    elsif ($date =~ m/Apr/)

    {

    $month = "April";

    }

    elsif ($date =~ m/May/)

    {

    $month = "May";

    }

    elsif ($date =~ m/Jun/)

    {

    $month = "June";

    }

    elsif ($date =~ m/Jul/)

    {

    $month = "July";

    }

    elsif ($date =~ m/Aug/)

    {

    $month = "August";

    }

    elsif ($date =~ m/Sep/)

    {

    $month = "September";

    }

    elsif ($date =~ m/Oct/)

    {

    $month = "October";

    }

    elsif ($date =~ m/Nov/)

    {

    $month = "November";

    }

    elsif ($date =~ m/Dec/)

    {

    $month = "December";

    }

    else

    {

    $month = "Unknown month";

    myLog ("Unknown month - parse failure! $line\nHTML:\n$html\n");

    die ("Unknown month - parse failure! (see log) LINE: $line\n");

    }

    $date =~ m/(\d\d\d\d)/;

    $year = $1;

    @result = (@result, "$month $year $link");

    }

    return (@result);

    }

    sub checkForTag #($targetURLWithOldIDAttached)

    {

    my ($tag, $target, $text);

    $tag = $_[0];

    $target = $_[1];

    # Must be absolute; assuming English Wikipedia here.

    $target =~ s%^/w/wiki.phtml%http://en.wikipedia.org/w/wiki.phtml%;

    # Decode HTML entities in links

    $target =~ s/\&/\&/g;

    if ($target eq $::cachedTarget)

    {

    $text = $::cachedText;

    }

    else

    {

    $text = getURL ($target."&action=edit");

    $::cachedTarget = $target;

    $::cachedText = $text;

    }

    if ($text =~ m/\{\{\s*\Q$tag\E\s*\}\}/)

    {

    1. print $text; die "Cough!";

    return "yes";

    }

    $tag = ucfirst($tag);

    if ($text =~ m/\{\{\s*\Q$tag\E\s*\}\}/)

    {

    1. print "\n\nSneeze!\n\n"; print $text."\n\n";

    return "yes";

    }

    return "no";

    }

    sub getURL #($target)

    {

    # Read throttle!

    sleep (1);

    my ($attemptStartTime, $attemptFinishTime, $request, $response, $reply, $url);

    $url = $_[0];

    # Monitor wiki server responsiveness

    $attemptStartTime = Time::HiRes::time();

    # Create a request-object

    print "GET ${url}\n";

    myLog("GET ${url}\n");

    $request = HTTP::Request->new(GET => "${url}");

    $response = $::ua->request($request);

    if ($response->is_success)

    {

    $reply = $response->content;

    # Monitor wiki server responsiveness

    $attemptFinishTime = Time::HiRes::time();

    retry ("success", "getURL", sprintf("%.3f", $attemptFinishTime-$attemptStartTime));

    # This may or may not actually work

    $::ua->cookie_jar->save();

    return ($reply);

    }

    else

    {

    myLog ("getURL(): HTTP ERR (".$response->status_line.") ${url}\n".$response->content."\n");

    print ("getURL(): HTTP ERR (".$response->status_line.") ${url}\n".$response->content."\n");

    # 50X HTTP errors mean there is a problem connecting to the wiki server

    if (($response->status_line =~ m/^500/)

    or ($response->status_line =~ m/^502/)

    or ($response->status_line =~ m/^503/))

    {

    return(retry("getURL", @_));

    }

    else

    {

    # Unhandled HTTP response

    die ("getURL(): HTTP ERR (".$response->status_line.") ${url}\n");

    }

    }

    }

    sub opentaskUpdate

    {

    my ($target, $historyFile, $opentaskText, $editTime, $startTime,

    $token, $key, $historyDump);

    $target = "User:Beland/workspace";

    $historyFile = "/home/beland/wikipedia/pearle-wisebot/opentask-history.pl";

    ($opentaskText, $editTime, $startTime, $token) = getPage($target);

    eval(`type $historyFile`);

    $opentaskText = doOpentaskUpdate("NPOV",

    "Category:NPOV disputes",

    $opentaskText);

    $opentaskText = doOpentaskUpdate("COPYEDIT",

    "Category:Wikipedia articles needing copy edit",

    $opentaskText);

    $opentaskText = doOpentaskUpdate("WIKIFY",

    "Category:Articles that need to be wikified",

    $opentaskText);

    $opentaskText = doOpentaskUpdate("MERGE",

    "Category:Articles to be merged",

    $opentaskText);

    # Dump history

    $historyDump = "\%::history = (\n";

    foreach $key (sort(keys(%::history)))

    {

    $historyDump .= "\"${key}\" => \"".$::history{$key}."\",\n";

    }

    $historyDump =~ s/,\n$//s;

    $historyDump .= "\n)\n";

    open (HISTORY, ">".$historyFile);

    print HISTORY $historyDump;

    close (HISTORY);

    postPage ($target, $editTime, $startTime, $token, $opentaskText, "Automatic rotation of NPOV, copyedit, wikify, and merge", "yes");

    }

    sub doOpentaskUpdate

    {

    my ($categoryID, $sourceCategory, $opentaskText, @articles,

    $article, %rank, $featuredString, $characterLimit,

    $featuredStringTmp);

    $categoryID = $_[0];

    $sourceCategory = $_[1];

    $opentaskText = $_[2];

    $characterLimit = 100;

    @articles = getCategoryArticles ($sourceCategory);

    # Shuffle and clean up article names; and exclude unwanted entries

    foreach $article (@articles)

    {

    if (($article =~ m/^Wikipedia:/)

    or ($article =~ m/^Template:/)

    or ($article =~ m/^User:/)

    or ($article =~ m/talk:/i)

    )

    {

    next;

    }

    $article = urlDecode($article);

    $article =~ s/_/ /g;

    $rank{$article} = rand();

    }

    # Pick as many articles as will fit in the space allowed

    foreach $article (sort {$rank{$a} <=> $rank {$b}} (keys(%rank)))

    {

    if (length($article)+1 < $characterLimit - length($featuredString))

    {

    $featuredString .= "${article},\n";

    # Record how many times each article is featured.

    $::history{"${article}-${categoryID}"}++;

    }

    }

    $featuredStringTmp = $featuredString;

    $featuredStringTmp =~ s/\n/ /g;

    print "Featuring: $featuredStringTmp\n";

    myLog("Featuring: $featuredStringTmp\n");

    # Insert into actual page text and finish

    $opentaskText =~ s/().*?()/${1}\n$featuredString${2}/gs;

    return ($opentaskText);

    }