User:Firefly/code

Main 'master control' program:

use strict;

use Data::Dumper;

use Time::HiRes qw(sleep);

my $data_root = 'data/';

our(%shared_data);

%shared_data =

(

job_list => [],

add_job => sub {my ($r_job , $timing) = @_;push (@{$shared_data{job_list}} , [$r_job , (time()+$timing)]);}

);

my(%plugins);

open(CFG,'HBC_MCP.cfg');

sysread(CFG, my $cfg, -s(CFG));

close(CFG);

eval($cfg);

warn "Initializing plugins...\n";

foreach my $name (keys(%plugins))

{

my $obj;

$plugins{$name}{shared} = \%shared_data;

$plugins{$name}{files} = $data_root.$name.'/';

mkdir ($data_root.$name.'/') unless (-d($data_root.$name.'/'));

my $plugin_command = 'use HBCPlugins::'.$name.';$obj = HBCPlugins::'.$name.'->new($plugins{\''.$name.'\'});';

eval $plugin_command;

$shared_data{$obj->{label}} = $obj;

}

warn "Initialization complete.\n\n";

until (6 == 9) # Infinite loop, a serpent biting it's own tail.

{

my $ra_job_list = $shared_data{job_list};

sleep(.1); # Important in all infinite loops to keep it calm

my (@kept_jobs); # A place to put jobs not ready to run yet

while (my $job = shift(@{$ra_job_list})) # Go through each job pending

{

my($r_job , $timing) = @{$job};

if ($timing < time()) # If it is time to run it then run it

{

if (ref($r_job) eq 'ARRAY') # Callback style, reference to an array with a sub followed by paramaters

{

my $cmd = shift(@{$r_job});

&{$cmd}(@{$r_job});

}

elsif (ref($r_job) eq 'CODE') # Otherwise just the reference to the sub

{

&{$r_job};

}

}

else # If it is not time yet, save it for later

{

push(@kept_jobs , $job)

}

}

push (@{$ra_job_list} , @kept_jobs); # Keep jobs that are still pending

}

RenameChecker:

package HBCPlugins::RenameChecker;

use Encode;

use MediaWiki;

use strict;

use Data::Dumper;

use URI::Escape;

our $self;

sub new

{

shift;

$self = shift;

bless($self);

warn "RenameChecker active.\n";

my(@pages) =

(

'Wikipedia:Changing username/Usurpations',

'Wikipedia:Changing username'

);

my $timing = 0;

&{$self->{shared}{add_job}}(\&login,0);

&{$self->{shared}{add_job}}(\&contact_LogWatcher_plugin, 0);

&{$self->{shared}{add_job}}([\&contact_irc_plugin,\@pages] , 0);

foreach my $page (@pages)

{

&{$self->{shared}{add_job}}([\&parse_page,undef,$page], $timing);

$timing += 30;

}

return $self;

}

sub login

{

warn "Connecting to Wikipedia...\n";

my $c = MediaWiki->new;

$c->setup

({

'bot' => {'user' => $self->{params}{username},'pass' => $self->{params}{password}},

'wiki' => {'host' => 'en.wikipedia.org','path' => 'w'}

}) || warn "Failed to log in\n";

my $whoami = $c->user();

warn "$whoami connected\n";

$self->{WP_obj} = $c;

&{$self->{shared}{add_job}}(\&login,3600);

}

sub contact_LogWatcher_plugin

{

$self->{LogMonitor} = $self->{shared}{$self->{params}{log_label}} || die;

$self->{LogMonitor}->add_job

(

type => 'renameuser', # The type of log to read

start_point => 'all', # Where to start reading the log from, timestamp or 'all'(for everything) or 'now'(to only log from now)

catch_up_frequency => 0, # Delay between reading while catching up to current state

regular_frequency => 36000, # Delay between reading after catching up to current state

step_size => 500 # How many entries to load per attempt. Limit of 500 for users, 5000 for bots and admins

);

}

sub contact_irc_plugin

{

my $ra_pages = shift;

$self->{IRCFeed} = $self->{shared}{$self->{params}{irc_label}} || die;

my $esc = chr(0x03);

# my $rename_pattern = ($esc.'07Special:Log/renameuser'.$esc.'14');

# $self->{IRCFeed}->add_hook

# ({

# 'check' => sub {return $_[0] =~ m|$rename_pattern|i;},

# 'callback' => sub {

# sleep 3;

# $self->{LogMonitor}->update_now('renameuser');

# warn "Rename detected, checking.\n";

# foreach my $page (@{$ra_pages})

# {

# &{$self->{shared}{add_job}}([\&parse_page,undef,$page],2);

# }

# warn "Page checks called.\n";

# },

# });

foreach my $page (@{$ra_pages})

{

my $pattern = ($esc.'07('.$page.')'.$esc.'14');

$self->{IRCFeed}->add_hook

({

'check' => sub {(($_[0] !~ m|HBC RenameClerkBot|) && ($_[0] =~ m|$pattern|) );return $1;},

'callback' => [\&parse_page,$page],

});

}

return;

}

sub parse_page

{

my $page = $_[1];

my $ra_name_history;

unless ($self->{LogMonitor}{params}{jobs}{renameuser}{current})

{

warn "Delaying 10 seconds till logs are loaded...\n";

&{$self->{shared}{add_job}}([\&parse_page,undef,$page], 10);

return;

}

warn "Loading $page\n";

my $page_obj = $self->{WP_obj}->get($page,'rw');

my $start_content = $page_obj->{'content'};

my(@lines) = split("\n", $page_obj->{'content'});

my @new_content;

my $current_name;

my $wanted_name;

my $report_count;

my $has_rename_count;

my $need_save = 0;

warn "Parsing page.\n";

my %status_table;

while (scalar(@lines))

{

my $line = shift(@lines);

if ($line =~ m/\*\s?Current (user)?name:.*\{\{User13\|(.*?)\}\}/i)

{

$current_name = $2;

$lines[0] =~ m/\*\s?(Target|Requested) (user)?name:.*\{\{(User13|Listuser)\|(.*?)\}\}/i;

$wanted_name = $4;

unless ($wanted_name)

{

$current_name = undef;

}

}

if ($lines[0] =~ m/Robot clerk's notes/) #'

{

$status_table{$current_name} = $lines[0];

$status_table{$current_name} =~ s/\s\[\[User:HBC RenameClerkBot\|HBC RenameClerkBot\]\] .*$// || die;

}

push(@new_content, $line) unless ($line =~ m/Robot clerk's notes/); #'

if ((($line =~ m/For bureaucrat use/) || ($line =~ m|\* Reason: |) || (scalar(@lines) < 1)) && $current_name)

{

my $ra_name_history = [];

$ra_name_history = find_rename_history($wanted_name, $ra_name_history) if ($wanted_name);

unless ($ra_name_history)

{

$ra_name_history = find_rename_history($current_name, $ra_name_history);

}

if ($ra_name_history)

{

foreach (@{$ra_name_history}) {$_ = "".$_."" if ($_ =~ m/\|$current_name\]/);}

my $rename_string = join(' ← ', @{$ra_name_history});

my $addition = "*Robot clerk's notes: Rename history of \"\[\[User:$current_name|$current_name\]\]\": \"".$rename_string."\"";

  1. warn "\n\n$addition\n\n";

push(@new_content, $addition.' ~~~~');

if ($addition ne $status_table{$current_name})

{

$need_save = 1;

$has_rename_count++;

$report_count++;

}

}

else

{

my $addition = "*Robot clerk's notes: \[\[User:$current_name|$current_name\]\] does not have any history of being renamed in the logs";

push(@new_content, $addition.' ~~~~');

if ($addition ne $status_table{$current_name})

{

$need_save = 1;

$report_count++;

}

}

$current_name = undef;

}

}

my $new_content = join("\n", @new_content);

unless ($need_save)

{

warn "Don't need change\n";

return;

}

$has_rename_count ||= 0;

$report_count ||= 0;

$page_obj->{'content'} = $new_content;

$page_obj->{'summary'} = "(Testing) Updating rename history on $report_count user".(($report_count != 1) ? ('s') : ()).", $has_rename_count user".(($has_rename_count != 1) ? ('s') : ())." renamed.";

warn "saving...\n";

warn $page_obj->save();

# warn $page_obj->{'summary'};

# warn $page_obj->{'content'};

warn "I have saved $page\n";

}

sub find_rename_history

{

my $name = shift;

my $ra_name_history = shift;

foreach my $check (@{$ra_name_history})

{

return $ra_name_history if ($check =~ m/\|$name\]\]/);

}

# warn "Adding: $name\n";

my $name_string = encode_utf8($name);

push(@{$ra_name_history}, "\[\[User:$name_string|$name_string\]\]");

my $ra_logs = $self->{LogMonitor}{params}{jobs}{renameuser}{log};

foreach my $rh_log (@{$ra_logs})

{

${$rh_log}{comment} =~ m/\[\[User:(.*?)\|.*?\]\].*\[\[User:(.*?)\|.*\]\]/;

my $old_name = $1;

my $new_name = $2;

if ($name eq $new_name)

{

find_rename_history($old_name, $ra_name_history);

}

}

if (@{$ra_name_history} > 1)

{

return $ra_name_history;

}

else

{

return undef;

}

}

1;

LogMonitor

package HBCPlugins::LogMonitor;

use strict;

use XML::Simple;

use Data::Dumper;

use URI::Escape;

use LWP::UserAgent;

our $self;

sub new

{

shift;

$self = shift;

bless($self);

$self->{params}{jobs} = {};

$self->{UA} = my $ua = LWP::UserAgent->new('agent' => 'LogMonitor .0001b');

warn "LogMonitor active.\n";

return $self;

}

sub add_job

{

my $self = shift;

my %params = @_;

my $type = $params{type};

$self->{params}{jobs}{$type} = \%params;

$self->{params}{jobs}{$type}{offset} = ((lc($params{start_point}) eq 'all') ? (0) : ($params{start_point}));

$self->{params}{jobs}{$type}{log} = [];

$self->{params}{jobs}{$type}{step_size} ||= 250;

$self->{params}{jobs}{$type}{current} = 0;

warn "Set initial offset for $type to ".$self->{params}{jobs}{$type}{offset}."\n\n";

&{$self->{shared}{add_job}}([\&handle_jobs,$type],0);

}

sub update_now

{

my $self = shift;

my $type = shift;

warn "Forcing manual update for '$type' log.\n";

&{$self->{shared}{add_job}}([\&handle_jobs,$type],0);

}

sub handle_jobs

{

my $type = shift;

my $url_template = 'http://en.wikipedia.org/w/api.php?action=query&format=xml&list=logevents&letype=&lelimit=& ledir=newer';

my $url = $url_template;

$self->{params}{jobs}{$type}{offset} ||= 0;

warn "Reading up to ".$self->{params}{jobs}{$type}{step_size}." log entries from $type starting at: ".$self->{params}{jobs}{$type} {offset}."\n";

$url =~ s||$type|;

$url =~ s||$self->{params}{jobs}{$type}{step_size}|;

my $offset_line = ('&lestart='.$self->{params}{jobs}{$type}{offset});

if ($self->{params}{jobs}{$type}{offset}){$url =~ s||$offset_line|} else {$url =~ s|||}

my $rh_xml = XMLin($self->{UA}->get($url)->content());

my $ra_renames = ${$rh_xml}{query}{logevents}{item};

($ra_renames = [$ra_renames]) if (ref($ra_renames) eq 'HASH');

shift(@{$ra_renames}) if ($self->{params}{jobs}{$type}{offset} > 0);

push(@{$self->{params}{jobs}{$type}{log}}, @{$ra_renames});

@{$self->{params}{jobs}{$type}{log}} = sort {return ${$a}{timestamp} <=> ${$b}{timestamp}} (@{$self->{params}{jobs}{$type}{log}});

$self->{params}{jobs}{$type}{offset} = ${${$self->{params}{jobs}{$type}{log}}[scalar(@{$self->{params}{jobs}{$type}{log}}) - 1]}{'timestamp'};

unless (scalar(@{$ra_renames}) < ($self->{params}{jobs}{$type}{step_size}-1)) # Unless we got less than what we asked for, ask again using the last timestamp as an offset

{

&{$self->{shared}{add_job}}([\&handle_jobs,$type],$self->{params}{jobs}{$type}{catch_up_frequency});

$self->{params}{jobs}{$type}{current} = 0;

}

else

{

&{$self->{shared}{add_job}}([\&handle_jobs,$type],$self->{params}{jobs}{$type}{regular_frequency});

$self->{params}{jobs}{$type}{current} = 1;

}

warn "Added ".scalar(@{$ra_renames})." log entries on this pass.\n";

warn "Current total of: ".scalar(@{$self->{params}{jobs}{$type}{log}})."\n";

warn ((($self->{params}{jobs}{$type}{current}) ? ('This is current') : ('This is not current'))."\n\n");

}

1;