User:Polbot/source/Make rds from altnames

use strict;

use Perlwikipedia;

use URI::Escape;

my $soonest_next_op = time;

my $pw=Perlwikipedia->new();

  1. $pw->{debug} = 1;

$pw->{mech}->agent('Bot/WP/EN/Quadell/polbot');

print "\nStarting polbot, logging in.\n" ;

my $login_status=$pw->login('bot name','bot password');

die "I can't log in." unless ($login_status eq 0);

my @lines;

print "Reading needingdab file\n";

my %needingdab_names = ();

my $needingdab_list = $pw->get_text('User:Polbot/altnames/needingdab');

@lines = split(/\n/, $needingdab_list);

foreach my $line (@lines) {

if ($line =~ m/^\* \[\[([^]]*)\]\].*$/) {

$needingdab_names{$1} = $line;

}

}

print "Reading in-process file\n";

my %inprocess_names = ();

my $inprocess_list = $pw->get_text('User:Polbot/altnames/inprocess');

@lines = split(/\n/, $inprocess_list);

foreach my $line (@lines) {

if ($line =~ m/^\* \[\[([^]]*)\]\]\|([^*]*)\*(.*)$/) {

my $main_name = $1;

my $altnames = $2;

my $jobtitle = $3;

# Escape wikichars for the main (article) name

my $enc_main_name = $main_name;

$enc_main_name =~ s/ /[ _]/g;

$enc_main_name =~ s/\(/\\\(/g;

$enc_main_name =~ s/\)/\\\)/g;

$enc_main_name =~ s/\./\\\./g;

print "\nReading $main_name ($jobtitle)\n";

my @altnames = split(/\|/, $altnames);

foreach my $altname (@altnames) {

my $link_descr = $altname;

$altname =~ s/\[\[(.*)\]\]/$1/;

my $final_article_name = $altname;

# Read the altname article.

my $wiki = wikiread($altname, $pw);

if ($wiki =~ /\S/) {

# The article exists.

# Okay, here are The Rules:

# 1. If it mentions $main_name, then DELETE. End.

# 2. If it's a redirect, then follow it, and check #1 again.

# 3. If there's a link to anything (disambiguation), then follow it, and check #1 again.

# 4. We won't be deleting at this point. But if it's a dab, then note that.

# These rules will fail in the following situations:

# A. $altname is an article that dabs to x, a dab page that mentions $main_name, but that isn't x (disambiguation)

# B. $altname is a dab that links to a rd to $main_name.

# 1. Does it mention the main article (perhaps even rd-ing there)?

if (($wiki =~ m/\[\[\s*$enc_main_name\s*(\||\]\])/) or ($wiki =~ m/\|\s*$enc_main_name\s*}}/)) {

$altname = "DELETE";

}

if ($altname ne "DELETE") {

# 2. Is it a rd? If so, follow.

if ($wiki =~ /\#\s*redirect\s*\[\[(.*)\]\]/i) {

$final_article_name = $1;

$link_descr .= " r $final_article_name";

$wiki = wikiread($final_article_name, $pw);

# 1. again.

if (($wiki =~ m/\[\[\s*$enc_main_name\s*(\||\]\])/) or ($wiki =~ m/\|\s*$enc_main_name\s*}}/)) {

$altname = "DELETE";

}

}

}

if ($altname ne "DELETE") {

# 3. Does it link to a dab page? If so, follow.

if (($wiki =~ m/\[\[([^]]* \(disambiguation\))\]\]/)

or ($wiki =~ m/{{\s*(?:otherpersons2|otherpeople2)\s*\|\s*(.*?)\s*}}/)) {

$final_article_name = $1;

$link_descr .= " f $final_article_name";

$wiki = wikiread($final_article_name, $pw);

# 1. again.

if (($wiki =~ m/\[\[\s*$enc_main_name\s*(\||\]\])/) or ($wiki =~ m/\|\s*$enc_main_name\s*}}/)) {

$altname = "DELETE";

}

} elsif ($wiki =~ m/{{\s*(?:otherpeople|otherpersons|othernames)\s*}}/) {

$final_article_name .= " (disambiguation)";

$link_descr .= " f $final_article_name";

$wiki = wikiread($final_article_name, $pw);

# 1. again.

if (($wiki =~ m/\[\[\s*$enc_main_name\s*(\||\]\])/) or ($wiki =~ m/\|\s*$enc_main_name\s*}}/)) {

$altname = "DELETE";

}

} elsif ($wiki =~ m/{{\s*(?:otherpeople|otherpersons|othernames)\s*\|\s*(.*?)\s*}}/) {

$final_article_name = "$1 (disambiguation)";

$link_descr .= " f $final_article_name";

$wiki = wikiread($final_article_name, $pw);

# 1. again.

if (($wiki =~ m/\[\[\s*$enc_main_name\s*(\||\]\])/) or ($wiki =~ m/\|\s*$enc_main_name\s*}}/)) {

$altname = "DELETE";

}

}

}

if ($wiki =~ m/({{dab|{{disambig|{{disamb|{{disambiguation|{{hndis|{{namedab|{{bio-dab|{{hndisambig)(}}|\|)/i) {

$link_descr .= " d";

}

if ($altname ne "DELETE") {

$altname = $link_descr;

}

} else {

# No article exists. Make a rd!

$|=1;

print "Waiting " . ($soonest_next_op - time) . " secs... ";

$|=1;

while (time < $soonest_next_op) {};

$soonest_next_op = time + 9;

print "rd $altname to $main_name\n";

$pw->edit($altname, "#Redirect $main_name", "Redirecting to $main_name, auto-generated by User:polbot");

$altname = "DELETE";

}

}

# Remove the elements that say "DELETE"

@altnames = grep(!/^DELETE$/, @altnames);

if (scalar(@altnames) == 0) {

# No altnames left. Do nothing.

} else {

# Put it in inprocess

$inprocess_names{$main_name} = "* $main_name|" . join('|', @altnames) . "*$jobtitle";

}

}

}

print "Merging old and new needingdab lists\n";

foreach my $inprocess_key (keys %inprocess_names) {

$needingdab_names{$inprocess_key} = $inprocess_names{$inprocess_key} unless ($needingdab_names{$inprocess_key});

}

print "Writing needingdab list\n";

my $wiki_code = "";

foreach my $dab_key (sort keys %needingdab_names) {

$wiki_code .= $needingdab_names{$dab_key} . "\n";

}

$pw->edit('User:Polbot/altnames/needingdab', $wiki_code, "Auto-updating based on input at inprocess list");

print "Finis!";

sub wikiread {

my $article = shift;

my $connection = shift;

my $i = 0;

my $wiki = '';

$wiki = $connection->get_text($article);

while ($wiki eq "0") {

$i++;

if ($i > 5) {

return '';

}

sleep $i;

print " retry. . .\n";

$wiki = $connection->get_text($article);

}

return $wiki;

}