User:Whobot/code
- IMPORTANT ###
- This code is released into the public domain. CONTRIBUTIONS are
- welcome, but will also hereby be RELEASED TO THE PUBLIC DOMAIN.
- See the documentation distributed with this code for important
- warnings and caveats.
- Cloned from Pearle Wisebot, modifications by User:Who
use strict;
use Time::HiRes;
- The following may be helpful in debugging character encoding
- problems.
- use utf8;
- use encoding 'utf8';
- Initialization
use LWP::UserAgent;
use HTTP::Cookies;
use HTTP::Request::Common qw(POST);
use HTML::Entities;
print "\n";
- LWP:UserAgent is a library which allows us to create a "user agent"
- object that handles the low-level details of making HTTP requests.
$::ua = LWP::UserAgent->new(timeout => 300);
$::ua->agent("Whobot Wisebot/0.1");
$::ua->cookie_jar(HTTP::Cookies->new(file => "cookies.whobot.txt", autosave => 1));
$::ua->cookie_jar->load();
- Hot pipes
$| = 1;
- ---
- test();
- sub test
- {
- my ($target, $text, $editTime, $startTime, $token);
- $target = "Wikipedia:Sandbox";
- ($text, $editTime, $startTime, $token) = getPage($target);
- print $text;
- $text .= "\Eat my electrons! -- Whobot\n";
- print "---\n";
- postPage ($target, $editTime, $startTime, $token, $text, "Test 008");
- die ("Test complete.");
- }
- ---
interpretCommand(@ARGV);
sub interpretCommand
{
my ($command, @arguments, $i, $line, $argument, @newArguments,
$from, $to, $page, $pageCopy);
($command, @arguments) = @_;
$command =~ s/\*\s*//;
myLog(`date /t`);
myLog ($command.": ".join(" ", @arguments)."\n");
print `date /t`;
print $command.": ".join(" ", @arguments)."\n";
if ($command eq "POST_STDIN")
{
if ($arguments[2] ne "")
{
myLog ("Too many arguments to POST_STDIN.\n");
die ("Too many arguments to POST_STDIN.\n");
}
postSTDIN($arguments[0],$arguments[1]);
}
elsif ($command eq "POST_STDIN_NULLOK")
{
if ($arguments[2] ne "")
{
myLog ("Too many arguments to POST_STDIN.\n");
die ("Too many arguments to POST_STDIN.\n");
}
$::nullOK = "yes";
postSTDIN($arguments[0],$arguments[1]);
$::nullOK = "no";
}
elsif ($command eq "MOVE_CONTENTS")
{
if ($arguments[3] ne "")
{
if (($arguments[4] eq "")
and ($arguments[1] eq "->"))
{
moveCategoryContents($arguments[0],$arguments[2],$arguments[3],"");
return();
}
else
{
myLog ("Too many arguments to MOVE_CONTENTS.\n");
die ("Too many arguments to MOVE_CONTENTS.\n");
}
}
moveCategoryContents($arguments[0],$arguments[1],"no","yes",$arguments[2]);
}
elsif ($command eq "MOVE_CONTENTS_INCL_CATS")
{
if ($arguments[3] ne "")
{
if (($arguments[4] eq "")
and ($arguments[1] eq "->"))
{
moveCategoryContents($arguments[0],$arguments[2],"yes","yes",$arguments[3]);
return();
}
else
{
myLog ("Too many arguments to MOVE_CONTENTS_INCL_CATS.\n");
die ("Too many arguments to MOVE_CONTENTS_INCL_CATS.\n");
}
}
moveCategoryContents($arguments[0],$arguments[1],"yes","yes",$arguments[2],"");
}
elsif ($command eq "REMOVE_X_FROM_CAT")
{
if ($arguments[3] ne "")
{
myLog ("Too many arguments to REMOVE_X_FROM_CAT.\n");
die ("Too many arguments to REMOVE_X_FROM_CAT.\n");
}
removeXFromCat($arguments[0],$arguments[1],$arguments[2],"");
}
elsif ($command eq "DEPOPULATE_CAT")
{
if ($arguments[1] ne "")
{
if (($arguments[2] eq "")
and ($arguments[1] eq "special"))
{
depopulateCat($arguments[0],"special");
}
else
{
myLog ("Too many arguments to DEPOPULATE_CAT.\n");
die ("Too many arguments to DEPOPULATE_CAT.\n");
}
}
depopulateCat($arguments[0]);
}
elsif ($command eq "PRINT_WIKITEXT")
{
if ($arguments[1] ne "")
{
myLog ("Too many arguments to PRINT_WIKITEXT.\n");
die ("Too many arguments to PRINT_WIKITEXT.\n");
}
printWikitext($arguments[0]);
}
elsif ($command eq "ADD_CFD_TAG")
{
if ($arguments[1] ne "")
{
myLog ("Too many arguments to ADD_CFD_TAG.\n");
die ("Too many arguments to ADD_CFD_TAG.\n");
}
addCFDTag($arguments[0]);
}
elsif ($command eq "ADD_CFDU_TAG")
{
if ($arguments[2] ne "")
{
myLog ("Too many arguments to ADD_CFDU_TAG.\n");
die ("Too many arguments to ADD_CFDU_TAG.\n");
}
addCFDUTag($arguments[0],$arguments[1],"");
}
elsif ($command eq "REMOVE_CFD_TAG")
{
if ($arguments[1] ne "")
{
myLog ("Too many arguments to REMOVE_CFD_TAG.\n");
die ("Too many arguments to REMOVE_CFD_TAG.\n");
}
removeCFDTag($arguments[0]);
}
elsif ($command eq "REMOVE_CFDU_TAG")
{
if ($arguments[1] ne "")
{
myLog ("Too many arguments to REMOVE_CFDU_TAG.\n");
die ("Too many arguments to REMOVE_CFDU_TAG.\n");
}
removeCFDUTag($arguments[0]);
}
elsif ($command eq "ADD_TO_CAT")
{
if ($arguments[2] ne "")
{
myLog ("Too many arguments to ADD_TO_CAT.\n");
die ("Too many arguments to ADD_TO_CAT.\n");
}
addToCat($arguments[0],$arguments[1],"");
}
elsif ($command eq "ADD_TO_CAT_NULL_OK")
{
if ($arguments[2] ne "")
{
myLog ("Too many arguments to ADD_TO_CAT_NULL_OK.\n");
die ("Too many arguments to ADD_TO_CAT_NULL_OK.\n");
}
$::nullOK = "yes";
addToCat($arguments[0],$arguments[1],"");
$::nullOK = "no";
}
elsif ($command eq "TRANSFER_TEXT")
{
if ($arguments[2] ne "")
{
myLog ("Too many arguments to TRANSFER_TEXT.\n");
die ("Too many arguments to TRANSFER_TEXT.\n");
}
transferText($arguments[0], $arguments[1]);
}
# DON'T USE THE BELOW COMMAND; IT'S NOT IMPLEMENTED PROPERLY YET.
- elsif ($command eq "LIST_TO_CAT_CHECK")
- {
- if ($arguments[2] ne "")
- {
- myLog ("Too many arguments to LIST_TO_CAT_CHECK.\n");
- die ("Too many arguments to LIST_TO_CAT_CHECK.\n");
- }
- listToCat($arguments[0], $arguments[1], "no");
- }
elsif ($command eq "CHANGE_CATEGORY")
{
if ($arguments[4] ne "")
{
myLog ("Too many arguments to CHANGE_CATEGORY.\n");
die ("Too many arguments to CHANGE_CATEGORY.\n");
}
changeCategory($arguments[0], $arguments[1], $arguments[2], $arguments[3]);
}
elsif ($command eq "CLEANUP_DATE")
{
if ($arguments[0] ne "")
{
myLog ("Too many arguments to CLEANUP_DATE.\n");
die ("Too many arguments to CLEANUP_DATE.\n");
}
cleanupDate();
}
elsif ($command eq "OPENTASK_UPDATE")
{
if ($arguments[0] ne "")
{
myLog ("Too many arguments to OPENTASK_UPDATE.\n");
die ("Too many arguments to OPENTASK_UPDATE.\n");
}
opentaskUpdate();
}
# DON'T USE THE BELOW COMMAND; IT'S NOT IMPLEMENTED PROPERLY YET.
#elsif ($command eq "ENFORCE_CATEGORY_REDIRECTS_CHECK")
#{
# enforceCategoryRedirects("no");
#}
# This command is for remedial cleanup only.
#elsif ($command eq "INTERWIKI_LOOP")
#{
# interwikiLoop();
#}
elsif ($command eq "ENFORCE_CATEGORY_INTERWIKI")
{
if ($arguments[1] ne "")
{
myLog ("Too many arguments to ENFORCE_CATEGORY_INTERWIKI.\n");
die ("Too many arguments to ENFORCE_CATEGORY_INTERWIKI.\n");
}
enforceCategoryInterwiki($arguments[0]);
}
- Broken due to recent changes on WP:CFD
- elsif ($command eq "ENFORCE_CFD")
- {
- enforceCFD();
- }
elsif ($command eq "STOP")
{
myLog ("Stopped.");
die ("Stopped.");
}
elsif (($command eq "READ_COMMANDS")
or ($command eq ""))
{
while (
) {
$line = $_;
if ($line =~ m/READ_COMMANDS/)
{
myLog ("interpretCommands(): Infinite loop!");
die ("interpretCommands(): Infinite loop!");
}
if ($line =~ m/^\s*$/)
{
next;
}
$line =~ s/\s+$//s;
$line =~ s/\*\s*//;
if ($line =~ m/\[\[:?(.*?)\]\] -> \[\[:?(.*?)\]\]/)
{
$line =~ s/\[\[:?(.*?)\]\] -> \[\[:?(.*?)\]\]//;
$from = $1;
$to = $2;
$line =~ s/\s*$//;
$from =~ s/ /_/g;
$to =~ s/ /_/g;
interpretCommand($line, $from, $to);
}
else
{
while ($line =~ m/\[\[:?(.*?)\]\]/)
{
$line =~ m/\[\[:?(.*?)\]\]/;
$page = $1;
$pageCopy = $page;
$page =~ s/ /_/g;
$line =~ s/\[\[:?$pageCopy\]\]/$page/;
}
interpretCommand(split (" ", $line));
}
- unless (($line =~ m/TRANSFER_TEXT_CHECK/) or
- ($line =~ m/ENFORCE_CATEGORY_INTERWIKI/))
unless ($line =~ m/TRANSFER_TEXT_CHECK/)
{
limit();
}
}
myLog ("Execution complete.\n");
print ("Execution complete.\n");
}
else
{
myLog ("Unrecognized command '".$command."': ".join(" ", @arguments)."\n");
die ("Unrecognized command '".$command."': ".join(" ", @arguments));
}
}
sub limit
{
my ($i);
# Rate-limiting to avoid hosing the wiki server
# Min 30 sec unmarked
# Min 10 sec marked
# May be raised by retry() if load is heavy
### ATTENTION ###
# Increasing the speed of the bot to faster than 1 edit every 10
# seconds violates English Wikipedia rules as of April, 2005, and
# will cause your bot to be banned. So don't change $normalDelay
# unless you know what you are doing. Other sites may have
# similar policies, and you are advised to check before using your
# bot at the default speed.
#################
if ($::speedLimit < 10)
{
$::speedLimit = 10;
}
$i = $::speedLimit;
while ($i >= 0)
{
sleep (1);
print STDERR "Sleeping $i seconds...\r";
$i--;
}
print STDERR " \r";
}
- perl whobot.pl POST_STDIN User:Whobot/categories-alpha "Update from 13 Oct 2004 database dump"
sub postSTDIN
{
my ($text, $articleName, $comment, $editTime, $startTime, $junk, $token);
$articleName = $_[0];
$comment = $_[1];
#urlSafe($articleName);
while (
) {
$text .= $_;
}
if ($text =~ m/^\s*$/)
{
myLog ("postSTDIN(): Null input.\n");
die ("postSTDIN(): Null input.\n");
}
($junk, $editTime, $startTime, $token) = getPage($articleName);
if ($comment eq "")
{
$comment = "Automated post";
}
postPage ($articleName, $editTime, $startTime, $token, $text, $comment);
}
- perl whobot.pl ADD_TO_CAT Page_name Category:Category_name sortkey
sub addToCat
{
my ($text, $articleName, $category, $editTime, $startTime, $comment, $status,
@junk, $sortkey, $token);
$articleName = $_[0];
$category = $_[1];
$sortkey = $_[2];
#urlSafe($articleName);
#urlSafe($category);
($text, $editTime, $startTime, $token) = getPage($articleName);
$comment = "Add ${category} per WP:CFD";
($status, $text, @junk) = addCatToText($category, $text, $sortkey, $articleName);
if ($status ne "success")
{
return();
}
postPage ($articleName, $editTime, $startTime, $token, $text, $comment);
}
sub myLog
{
open (LOG, ">>whobot-log.txt")
|| die "Could not append to log!";
print LOG $_[0];
close (LOG);
}
sub getPage
{
my ($target, $request, $response, $reply, $text, $text2,
$editTime, $startTime, $attemptStartTime, $attemptFinishTime,
$token);
$target = $_[0];
if ($target =~ m/^\s*$/)
{
myLog("getPage: Null target.");
die("getPage: Null target.");
}
# urlSafe ($target);
# Monitor wiki server responsiveness
$attemptStartTime = Time::HiRes::time();
# Create a request-object
print "GET http://en.wikipedia.org/w/wiki.phtml?title=${target}&action=edit\n";
myLog("GET http://en.wikipedia.org/w/wiki.phtml?title=${target}&action=edit\n");
$request = HTTP::Request->new(GET => "http://en.wikipedia.org/w/wiki.phtml?title=${target}&action=edit");
$response = $::ua->request($request);
if ($response->is_success)
{
$reply = $response->content;
# Monitor wiki server responsiveness
$attemptFinishTime = Time::HiRes::time();
retry ("success", "getPage", sprintf("%.3f", $attemptFinishTime-$attemptStartTime));
# This detects whether or not we're logged in.
unless ($reply =~ m%My talk%)
{
# We've lost our identity.
myLog ("Wiki server is not recognizing me (1).\n---\n${reply}\n---\n");
die ("Wiki server is not recognizing me (1).\n");
}
#$reply =~ m%%s;
$reply =~ m%%s;
$text = $1;
$reply =~ m/value="(\d+)" name="wpEdittime"/;
$editTime = $1;
# Added 22 Aug 2005 to correctly handle articles that have
# been undeleted
$reply =~ m/value="(\d+)" name="wpStarttime"/;
$startTime = $1;
# Added 9 Mar 2005 after recent software change.
$reply =~ m/value="(\w+)" name="wpEditToken"/;
$token = $1;
###
if (($text =~ m/^\s*$/)
and ($::nullOK ne "yes"))
{
myLog ("getPage($target): Null text!\n");
myLog "\n---\n$reply\n---\n";
die ("getPage($target): Null text!\n");
}
if (($editTime =~ m/^\s*$/)
and ($::nullOK ne "yes"))
{
myLog ("getPage($target): Null time!\n");
myLog "\n---\n$reply\n---\n";
die ("getPage($target): Null time!\n");
}
if (($text =~ m/>/) or
($text =~ m/))
{
print $text;
myLog "\n---\n$text\n---\n";
myLog ("getPage($target): Bad text suck!\n");
die ("getPage($target): Bad text suck!\n");
}
# Change ( " -> " ) etc
# This function is from HTML::Entities.
decode_entities($text);
# This may or may not actually work
$::ua->cookie_jar->save();
return ($text, $editTime, $startTime, $token);
}
else
{
myLog ("getPage($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/w/wiki.phtml?title=${target}&action=edit\n".$response->content."\n");
print ("getPage($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/w/wiki.phtml?title=${target}&action=edit\n".$response->content."\n");
# 50X HTTP errors mean there is a problem connecting to the wiki server
if (($response->status_line =~ m/^500/)
or ($response->status_line =~ m/^502/)
or ($response->status_line =~ m/^503/))
{
return(retry("getPage", @_));
}
else
{
# Unhandled HTTP response
die ("getPage($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/w/wiki.phtml?title=${target}&action=edit\n");
}
}
}
sub postPage
{
my ($request, $response, $pageName, $textToPost, $summaryEntry,
$editTime, $startTime, $actual, $expected, $attemptStartTime,
$attemptFinishTime, $date, $editToken, $minor);
$pageName = $_[0];
$editTime = $_[1];
$startTime = $_[2];
$editToken = $_[3];
$textToPost = $_[4];
$summaryEntry = $_[5]; # Max 200 chars!
$minor = $_[6];
$summaryEntry = substr($summaryEntry, 0, 200);
if ($pageName eq "")
{
myLog ("postPage(): Empty pageName.\n");
die ("postPage(): Empty pageName.\n");
}
if ($summaryEntry eq "")
{
$summaryEntry = "Automated editing.";
}
# Monitor server responsiveness
$attemptStartTime = Time::HiRes::time();
if ($minor eq "yes")
{
$request = POST "http://en.wikipedia.org/w/wiki.phtml?title=${pageName}&action=submit",
[wpTextbox1 => $textToPost,
wpSummary => $summaryEntry,
wpSave => "Save page",
wpMinoredit => "on",
wpEditToken => $editToken,
wpStarttime => $startTime,
wpEdittime => $editTime];
# Optional: wpWatchthis
}
else
{
$request = POST "http://en.wikipedia.org/w/wiki.phtml?title=${pageName}&action=submit",
[wpTextbox1 => $textToPost,
wpSummary => $summaryEntry,
wpSave => "Save page",
wpEditToken => $editToken,
wpStarttime => $startTime,
wpEdittime => $editTime];
# Optional: wpWatchthis, wpMinoredit
}
# ---
## If posts are failing, you can uncomment the below to see what
## HTTP request is being made.
# myLog($request->as_string());
# print $request->as_string(); $::speedLimit = 60 * 10;
# print $::ua->request($request)->as_string;
# ---
myLog("POSTing...");
print "POSTing...";
# Pass request to the user agent and get a response back
$response = $::ua->request($request);
myLog("POSTed.\n");
print "POSTed.\n";
if ($response->content =~ m/Please confirm that really want to recreate this article./)
{
myLog ($response->content."\n");
die ("Deleted article conflict! See log!");
}
# Check the outcome of the response
if (($response->is_success) or ($response->is_redirect))
{
# Monitor server responsiveness
$attemptFinishTime = Time::HiRes::time();
retry ("success", "postPage", sprintf("%.3f", $attemptFinishTime-$attemptStartTime));
$expected = "302 Moved Temporarily";
$actual = $response->status_line;
if (($expected ne $actual)
and ($actual ne "200 OK"))
{
myLog ("postPage(${pageName}, $editTime)#1 - expected =! actual\n");
myLog ($request->as_string());
myLog ("EXPECTED: '${expected}'\n");
myLog (" ACTUAL: '${actual}'\n");
die ("postPage(${pageName}, $editTime)#1 - expected =! actual - see log\n");
}
$expected = "http://en.wikipedia.org/wiki/${pageName}";
$expected =~ s/\'/%27/g;
$expected =~ s/\*/%2A/g;
$expected = urlEncode($expected);
$actual = $response->headers->header("Location");
if (($expected ne $actual)
and !(($actual eq "") and ($response->status_line eq "200 OK")))
{
myLog ("postPage(${pageName}, $editTime)#2 - expected =! actual\n");
myLog ("EXPECTED: '${expected}'\n");
myLog (" ACTUAL: '${actual}'\n");
die ("postPage(${pageName}, $editTime)#2 - expected =! actual - see log\n");
}
if ($response->content =~ m/
Edit conflict/)
{
myLog ("Edit conflict on '$pageName' at '$editTime'!\n");
die ("Edit conflict on '$pageName' at '$editTime'!\n");
}
$::ua->cookie_jar->save();
return ($response->content);
}
else
{
$date = `date /t`;
$date =~ s/\n//g;
myLog ("Bad response to POST to $pageName at $date.\n".$response->status_line."\n".$response->content."\n");
# 50X HTTP errors mean there is a problem connecting to the wiki server
if (($response->status_line =~ m/^500/)
or ($response->status_line =~ m/^502/)
or ($response->status_line =~ m/^503/))
{
print "Bad response to POST to $pageName at $date.\n".$response->status_line."\n".$response->content."\n";
return(retry("postPage", @_));
}
else
{
# Unhandled HTTP response
die ("Bad response to POST to $pageName at $date.\n".$response->status_line."\n");
}
}
}
sub urlSafe
{
# This function is no longer called because the LWP::UserAgent and
# HTTP::Request libraries handle character escaping.
my ($text, $textCopy);
$text = $_[0];
$textCopy = $text;
# & may not be included in this list!
$textCopy =~ s%[\p{IsWord}\w\-,\(\):\/\'\.\;\!]*%%g;
unless ($textCopy eq "")
{
myLog ("urlSafe(): Bad character in ${text}: '${textCopy}'\n");
die ("urlSafe(): Bad character in ${text}: '${textCopy}'\n");
}
}
- perl whobot.pl MOVE_CONTENTS_INCL_CATS Category:From_here Category:To_here CFDListingDay
sub moveCategoryContents
{
my (@articles, $categoryFrom, $categoryTo, $article, $status,
@subcats, $includeCategories, $subcat, @junk, $sortkey,
$includeSortkey, $cfdlisting);
# -- INITIALIZATION --
$categoryFrom = $_[0];
$categoryTo = $_[1];
$includeCategories = $_[2];
$includeSortkey = $_[3];
$cfdlisting = $_[4];
if ($categoryFrom =~ m/^\[\[:(Category:.*?)\]\]/)
{
$categoryFrom =~ s/^\[\[:(Category:.*?)\]\]/$1/;
$categoryFrom =~ s/\s+/_/g;
}
if ($categoryTo =~ m/^\[\[:(Category:.*?)\]\]/)
{
$categoryTo =~ s/^\[\[:(Category:.*?)\]\]/$1/;
$categoryTo =~ s/\s+/_/g;
}
$categoryFrom =~ s/^\[\[://;
$categoryTo =~ s/^\[\[://;
$categoryFrom =~ s/\]\]$//;
$categoryTo =~ s/\]\]$//;
unless (($categoryFrom =~ m/^Category:/) and
($categoryTo =~ m/^Category:/))
{
myLog ("moveCategoryContents(): Are you sure these are categories? ".$categoryFrom."/".$categoryTo."\n");
die ("moveCategoryContents(): Are you sure these are categories? ".$categoryFrom."/".$categoryTo."\n");
}
transferText ($categoryFrom, $categoryTo, $cfdlisting);
# Subcategory transfer
if ($includeCategories eq "yes")
{
@subcats = getSubcategories($categoryFrom);
foreach $subcat (@subcats)
{
if ($subcat =~ m/^\s*$/)
{
next;
}
$subcat = urlDecode($subcat);
print "changeCategory($subcat, $categoryFrom, $categoryTo) c\n";
myLog "changeCategory($subcat, $categoryFrom, $categoryTo) c\n";
changeCategory($subcat, $categoryFrom, $categoryTo, $cfdlisting);
limit();
}
}
# Article transfer
@articles = getCategoryArticles($categoryFrom);
foreach $article (reverse(@articles))
- foreach $article (@articles)
{
#die "article name is $article";
if ($article =~ m/^\s*$/)
{
next;
}
$article = urlDecode($article);
print "changeCategory($article, $categoryFrom, $categoryTo) a\n";
myLog "changeCategory($article, $categoryFrom, $categoryTo) a\n";
changeCategory($article, $categoryFrom, $categoryTo, $cfdlisting);
limit();
}
}
- perl whobot.pl DEPOPULATE_CAT Category:To_be_depopulated
sub depopulateCat #($category);
{
my (@articles, $category, $article, $status, @subcats, $subcat, @junk, $doSpecial);
$category = $_[0];
$doSpecial = $_[1];
if ($category =~ m/^\[\[:(Category:.*?)\]\]/)
{
$category =~ s/^\[\[:(Category:.*?)\]\]/$1/;
$category =~ s/\s+/_/g;
}
if (!$doSpecial)
{
unless ($category =~ m/^Category:/)
{
myLog ("depopulateCat(): Are you sure '$category' is a category?\n");
die ("depopulateCat(): Are you sure '$category' is a category?\n");
}
# Remove all subcategories
@subcats = getSubcategories($category);
foreach $subcat (@subcats)
{
$subcat = urlDecode($subcat);
print "removeXFromCat($subcat, $category) c\n";
myLog "removeXFromCat($subcat, $category) c\n";
($status, @junk) = removeXFromCat($subcat, $category);
unless ($status == 0)
{
myLog ("Status: $status\n");
print "Status: $status\n";
}
}
}
# Remove all articles
@articles = getCategoryArticles($category, $doSpecial);
#foreach $article (reverse(@articles))
foreach $article (@articles)
{
$article = urlDecode($article);
print "removeXFromCat($article, $category, $doSpecial) a\n";
myLog "removeXFromCat($article, $category, $doSpecial) a\n";
($status, @junk) = removeXFromCat($article, $category, $doSpecial);
unless ($status == 0)
{
myLog ("Status: $status\n");
print "Status: $status\n";
}
}
}
- perl whobot.pl REMOVE_X_FROM_CAT Article_name Category:Where_the_article_is CFDListingDay
sub removeXFromCat
{
my ($text, $articleName, $category, $editTime, $startTime, $comment, $catTmp,
$sortkey, @junk, $token, $categoryUnd, $categoryHuman, $cfdlisting, $doSpecial);
$articleName = $_[0];
$category = $_[1];
$cfdlisting = $_[2];
$doSpecial = $_[3];
if (!$doSpecial)
{
$doSpecial = $cfdlisting;
}
#urlSafe($articleName);
#urlSafe($category);
if (!$doSpecial)
{
unless ($category =~ m/^Category:\w+/)
{
myLog ("addToCat(): Bad format on category.\n");
die ("addToCat(): Bad format on category.\n");
}
}
($text, $editTime, $startTime, $token) = getPage($articleName);
$comment = "Removed ${category} per WP:CFD";
#$comment = "test edits ${cfdlisting}";
# Convert underscore to spaces; this is human-readable.
$category =~ s/_/ /g;
$categoryHuman = $category;
# Insert possible whitespace
$category =~ s/^Category://;
- $category = "Category:\\s*\\Q".$category."\\E"; # THIS DOES NOT WORK
$category = "Category:\\s*".$category;
$category =~ s%\(%\\(%g;
$category =~ s%\)%\\)%g;
$category =~ s%\'%\\\'%g;
$categoryUnd = $category;
$categoryUnd =~ s/ /_/g;
unless (($text =~ m/\[\[\s*${category}\s*\]\]/is)
or ($text =~ m/\[\[\s*${category}\s*\|.*?\]\]/is)
or ($text =~ m/\[\[\s*${categoryUnd}\s*\]\]/is)
or ($text =~ m/\[\[\s*${categoryUnd}\s*\|.*?\]\]/is))
{
print "removeXFromCat(): $articleName is not in '$category'.\n";
myLog ("removeXFromCat(): $articleName is not in '$category'.\n");
### TEMPORARY ###
### Uncomment these lines if you want category remove attempts
### to trigger null edits. This is useful if you have have
### changed the category on a template, but due to a bug this
### does not actually move member articles until they are
### edited.
($text, @junk) = fixCategoryInterwiki($text);
postPage ($articleName, $editTime, $startTime, $token, $text, "Mostly null edit to actually remove from ${categoryHuman}", "yes");
limit();
### TEMPORARY ###
return(1);
}
if ($text =~ m/^\s*\#REDIRECT/is)
{
print "addToCat(): $articleName is a redirect!\n";
myLog ("addToCat(): $articleName is a redirect!\n");
return(2);
}
$text =~ m/\[\[\s*${category}\s*\|\s*(.*?)\]\]/is;
$sortkey = $1;
if ($sortkey eq "")
{
$text =~ m/\[\[\s*${categoryUnd}\s*\|\s*(.*?)\]\]/is;
}
# Remove the page from the category and any trailing newline.
$text =~ s/\[\[\s*${category}\s*\|?(.*?)\]\]\n?//isg;
$text =~ s/\[\[\s*${categoryUnd}\s*\|?(.*?)\]\]\n?//isg;
($text, @junk) = fixCategoryInterwiki($text);
postPage ($articleName, $editTime, $startTime, $token, $text, $comment);
return(0, $sortkey);
}
- perl whobot.pl PRINT_WIKITEXT Article_you_want_to_get
- Warning: Saves to a file in the current directory with the same name
- as the article, plus another file with the .html extention.
sub printWikitext
{
my ($editTime, $startTime, $text, $target, $token);
$target = $_[0];
$target =~ s/^\[\[://;
$target =~ s/\]\]$//;
($text, $editTime, $startTime, $token) = getPage($target);
# Save the wikicode version to a file.
open (WIKITEXT, ">./${target}");
print WIKITEXT $text;
close (WIKITEXT);
# Save the HTML version to a file.
print `wget http://en.wikipedia.org/wiki/${target} -O ./${target}.html`;
}
- Get a list of the names of articles in a given category.
sub getCategoryArticles
{
my ($target, $request, $response, $reply, $articles, $article,
@articles, @articles1, @articles2, $attemptStartTime, $attemptFinishTime, $doSpecial);
$target = $_[0];
$doSpecial = $_[1];
#urlSafe ($target);
if (!$doSpecial)
{
unless ($target =~ m/^Category:/)
{
myLog ("getCategoryArticles(): Are you sure '$target' is a category?\n");
die ("getCategoryArticles(): Are you sure '$target' is a category?\n");
}
}
# Monitor wiki server responsiveness
$attemptStartTime = Time::HiRes::time();
# Create a request-object
if (!$doSpecial)
{
print "GET http://en.wikipedia.org/wiki/${target}\n";
myLog("GET http://en.wikipedia.org/wiki/${target}\n");
$request = HTTP::Request->new(GET => "http://en.wikipedia.org/wiki/${target}");
}
else
{
print "GET http://en.wikipedia.org/w/wiki.phtml?title=Special:Whatlinkshere&target=${target}&limit=200&offset=0\n";
myLog("GET http://en.wikipedia.org/w/wiki.phtml?title=Special:Whatlinkshere&target=${target}&limit=200&offset=0\n");
$request = HTTP::Request->new(GET => "http://en.wikipedia.org/w/wiki.phtml?title=Special:Whatlinkshere&target=${target}&limit=200&offset=0\n");
}
$response = $::ua->request($request);
if ($response->is_success)
{
# Monitor wiki server responsiveness
$attemptFinishTime = Time::HiRes::time();
retry ("success", "getCategoryArticles", sprintf("%.3f", $attemptFinishTime-$attemptStartTime));
$reply = $response->content;
# This detects whether or not we're logged in.
unless ($reply =~ m%My talk%)
{
# We've lost our identity.
myLog ("Wiki server is not recognizing me (2).\n---\n${reply}\n---\n");
die ("Wiki server is not recognizing me (2).\n");
}
$articles = $reply;
$articles =~ s%^.*?
Articles in category.*?
%%s;$articles =~ s%