User:Polbot/source/Make rds from altnames
use strict;
use Perlwikipedia;
use URI::Escape;
my $soonest_next_op = time;
my $pw=Perlwikipedia->new();
- $pw->{debug} = 1;
$pw->{mech}->agent('Bot/WP/EN/Quadell/polbot');
print "\nStarting polbot, logging in.\n" ;
my $login_status=$pw->login('bot name','bot password');
die "I can't log in." unless ($login_status eq 0);
my @lines;
print "Reading needingdab file\n";
my %needingdab_names = ();
my $needingdab_list = $pw->get_text('User:Polbot/altnames/needingdab');
@lines = split(/\n/, $needingdab_list);
foreach my $line (@lines) {
if ($line =~ m/^\* \[\[([^]]*)\]\].*$/) {
$needingdab_names{$1} = $line;
}
}
print "Reading in-process file\n";
my %inprocess_names = ();
my $inprocess_list = $pw->get_text('User:Polbot/altnames/inprocess');
@lines = split(/\n/, $inprocess_list);
foreach my $line (@lines) {
if ($line =~ m/^\* \[\[([^]]*)\]\]\|([^*]*)\*(.*)$/) {
my $main_name = $1;
my $altnames = $2;
my $jobtitle = $3;
# Escape wikichars for the main (article) name
my $enc_main_name = $main_name;
$enc_main_name =~ s/ /[ _]/g;
$enc_main_name =~ s/\(/\\\(/g;
$enc_main_name =~ s/\)/\\\)/g;
$enc_main_name =~ s/\./\\\./g;
print "\nReading $main_name ($jobtitle)\n";
my @altnames = split(/\|/, $altnames);
foreach my $altname (@altnames) {
my $link_descr = $altname;
$altname =~ s/\[\[(.*)\]\]/$1/;
my $final_article_name = $altname;
# Read the altname article.
my $wiki = wikiread($altname, $pw);
if ($wiki =~ /\S/) {
# The article exists.
# Okay, here are The Rules:
# 1. If it mentions $main_name, then DELETE. End.
# 2. If it's a redirect, then follow it, and check #1 again.
# 3. If there's a link to anything (disambiguation), then follow it, and check #1 again.
# 4. We won't be deleting at this point. But if it's a dab, then note that.
# These rules will fail in the following situations:
# A. $altname is an article that dabs to x, a dab page that mentions $main_name, but that isn't x (disambiguation)
# B. $altname is a dab that links to a rd to $main_name.
# 1. Does it mention the main article (perhaps even rd-ing there)?
if (($wiki =~ m/\[\[\s*$enc_main_name\s*(\||\]\])/) or ($wiki =~ m/\|\s*$enc_main_name\s*}}/)) {
$altname = "DELETE";
}
if ($altname ne "DELETE") {
# 2. Is it a rd? If so, follow.
if ($wiki =~ /\#\s*redirect\s*\[\[(.*)\]\]/i) {
$final_article_name = $1;
$link_descr .= " r $final_article_name";
$wiki = wikiread($final_article_name, $pw);
# 1. again.
if (($wiki =~ m/\[\[\s*$enc_main_name\s*(\||\]\])/) or ($wiki =~ m/\|\s*$enc_main_name\s*}}/)) {
$altname = "DELETE";
}
}
}
if ($altname ne "DELETE") {
# 3. Does it link to a dab page? If so, follow.
if (($wiki =~ m/\[\[([^]]* \(disambiguation\))\]\]/)
or ($wiki =~ m/{{\s*(?:otherpersons2|otherpeople2)\s*\|\s*(.*?)\s*}}/)) {
$final_article_name = $1;
$link_descr .= " f $final_article_name";
$wiki = wikiread($final_article_name, $pw);
# 1. again.
if (($wiki =~ m/\[\[\s*$enc_main_name\s*(\||\]\])/) or ($wiki =~ m/\|\s*$enc_main_name\s*}}/)) {
$altname = "DELETE";
}
} elsif ($wiki =~ m/{{\s*(?:otherpeople|otherpersons|othernames)\s*}}/) {
$final_article_name .= " (disambiguation)";
$link_descr .= " f $final_article_name";
$wiki = wikiread($final_article_name, $pw);
# 1. again.
if (($wiki =~ m/\[\[\s*$enc_main_name\s*(\||\]\])/) or ($wiki =~ m/\|\s*$enc_main_name\s*}}/)) {
$altname = "DELETE";
}
} elsif ($wiki =~ m/{{\s*(?:otherpeople|otherpersons|othernames)\s*\|\s*(.*?)\s*}}/) {
$final_article_name = "$1 (disambiguation)";
$link_descr .= " f $final_article_name";
$wiki = wikiread($final_article_name, $pw);
# 1. again.
if (($wiki =~ m/\[\[\s*$enc_main_name\s*(\||\]\])/) or ($wiki =~ m/\|\s*$enc_main_name\s*}}/)) {
$altname = "DELETE";
}
}
}
if ($wiki =~ m/({{dab|{{disambig|{{disamb|{{disambiguation|{{hndis|{{namedab|{{bio-dab|{{hndisambig)(}}|\|)/i) {
$link_descr .= " d";
}
if ($altname ne "DELETE") {
$altname = $link_descr;
}
} else {
# No article exists. Make a rd!
$|=1;
print "Waiting " . ($soonest_next_op - time) . " secs... ";
$|=1;
while (time < $soonest_next_op) {};
$soonest_next_op = time + 9;
print "rd $altname to $main_name\n";
$pw->edit($altname, "#Redirect $main_name", "Redirecting to $main_name, auto-generated by User:polbot");
$altname = "DELETE";
}
}
# Remove the elements that say "DELETE"
@altnames = grep(!/^DELETE$/, @altnames);
if (scalar(@altnames) == 0) {
# No altnames left. Do nothing.
} else {
# Put it in inprocess
$inprocess_names{$main_name} = "* $main_name|" . join('|', @altnames) . "*$jobtitle";
}
}
}
print "Merging old and new needingdab lists\n";
foreach my $inprocess_key (keys %inprocess_names) {
$needingdab_names{$inprocess_key} = $inprocess_names{$inprocess_key} unless ($needingdab_names{$inprocess_key});
}
print "Writing needingdab list\n";
my $wiki_code = "";
foreach my $dab_key (sort keys %needingdab_names) {
$wiki_code .= $needingdab_names{$dab_key} . "\n";
}
$pw->edit('User:Polbot/altnames/needingdab', $wiki_code, "Auto-updating based on input at inprocess list");
print "Finis!";
sub wikiread {
my $article = shift;
my $connection = shift;
my $i = 0;
my $wiki = '';
$wiki = $connection->get_text($article);
while ($wiki eq "0") {
$i++;
if ($i > 5) {
return '';
}
sleep $i;
print " retry. . .\n";
$wiki = $connection->get_text($article);
}
return $wiki;
}