User:AnomieBOT/source/tasks/TFDClerk.pm

{{ombox|type=notice|text= Approved 2011-11-08
Wikipedia:Bots/Requests for approval/AnomieBOT 59}}

package tasks::TFDClerk;

=pod

=begin metadata

Bot: AnomieBOT

Task: TFDClerk

BRFA: Wikipedia:Bots/Requests for approval/AnomieBOT 59

Status: Approved 2011-11-08

Created: 2011-10-30

Peform the following tasks at WP:TFD:

=end metadata

=cut

use utf8;

use strict;

use AnomieBOT::Task qw/:time/;

use Data::Dumper;

use vars qw/@ISA/;

@ISA=qw/AnomieBOT::Task/;

my @months=('','January','February','March','April','May','June','July','August','September','October','November','December');

my $is_closed_re=qr((?:\{\{\s*tfd[_ ]?top\s*[|}]|

my %db=(

nonsense => 'G1',

test => 'G2',

vandalism => 'G3',

pagemove => 'G3',

hoax => 'G3',

repost => 'G4',

banned => 'G5',

histmerge => 'G6',

move => 'G6',

copypaste => 'G6',

xfd => 'G6',

maintenance => 'G6',

house => 'G6',

disambig => 'G6',

movedab => 'G6',

unpatrolled => 'G6',

author => 'G7',

self => 'G7',

blanked => 'G7',

talk => 'G8',

subpage => 'G8',

imagepage => 'G8',

redirnone => 'G8',

templatecat => 'G8',

attack => 'G10',

blp => 'G10',

attackorg => 'G10',

spam => 'G11',

promo => 'G11',

copyvio => 'G12',

policy => 'T2',

misrepresentation => 'T2',

misrepresent => 'T2',

fake => 'T2',

't3-s' => 'T3',

duplicatetemplate => 'T3',

duplicate => 'T3',

);

sub new {

my $class=shift;

my $self=$class->SUPER::new();

$self->{'lasttime'}=0;

$self->{'broken'}=0;

bless $self, $class;

return $self;

}

=pod

=for info

Approved 2011-11-08
Wikipedia:Bots/Requests for approval/AnomieBOT 59

=cut

sub approved {

return 2;

}

sub run {

my ($self, $api)=@_;

my $res;

$api->task('TFDClerk', 0, 10, qw/d::Talk d::Templates d::Redirects d::Sections/);

my %p=$api->redirects_to_resolved('Template:Delrevxfd');

if(exists($p{''})){

if($p{''}{'code'} eq 'shutoff'){

$api->warn("Task disabled: ".$p{''}{'content'}."\n");

return 300;

}

$api->warn("Failed to get notice template redirects: ".$p{''}{'error'}."\n");

return 60;

}

my @p=map { s/^Template://; my ($a,$b)=split(//,$_,2); "(?i:\Q$a\E)\Q$b\E"; } keys %p;

my $p=join('|',@p);

my $noticere=qr/(?:(?i:\s*)?\{\{\s*(?i:Template\s*:\s*)?(?:$p)\s*(?:\|.*?)?\}\}\s*(?i:<\/noinclude>\s*)?)/;

my %tosubst=$api->redirects_to_resolved('Template:Tfd top', 'Template:Tfd bottom');

if(exists($tosubst{''})){

if($tosubst{''}{'code'} eq 'shutoff'){

$api->warn("Task disabled: ".$tosubst{''}{'content'}."\n");

return 300;

}

$api->warn("Failed to get top/bottom template redirects: ".$tosubst{''}{'error'}."\n");

return 60;

}

my %tfdlinks=$api->redirects_to_resolved('Template:Tfd links');

if(exists($tfdlinks{''})){

if($tfdlinks{''}{'code'} eq 'shutoff'){

$api->warn("Task disabled: ".$tfdlinks{''}{'content'}."\n");

return 300;

}

$api->warn("Failed to get {{tfd links}} redirects: ".$tfdlinks{''}{'error'}."\n");

return 60;

}

# Only check once per hour

if($self->{'lasttime'}==0){

if(exists($api->store->{'lasttime'})){

my $t=$api->store->{'lasttime'};

$self->{'lasttime'}=$t if($t=~/^\d+$/ && $t<=time());

}

$self->{'broken'}=$api->store->{'broken'} if(exists($api->store->{'broken'}));

}

my $starttime=time();

my $t=$self->{'lasttime'}+($self->{'broken'}?300:3600)-$starttime;

return $t if $t>0;

# If it's close enough to 23:00, just wait for 23:00.

$t=82800-($starttime%86400);

return $t if($t>0 && $t<($self->{'broken'}?300:3600));

# If it's close enough to 00:00, just wait for 00:00.

$t=86400-($starttime%86400);

return $t if($t>0 && $t<($self->{'broken'}?300:1800));

my $startdate=[1,3,2018];

$startdate=$api->store->{'startdate'} if exists($api->store->{'startdate'});

my $screwup=' Errors? User:'.$api->user.'/shutoff/TFDClerk';

# Get the content of all versions of "tfd top" since the startdate

my $re='\{\{\s*[tT]fd[ _]?top\s*(?s:\|.*?)?\}\}';

my %cont=();

my $first=1;

while($first || %cont) {

my $t=$api->query(

titles => 'Template:tfd top',

prop => 'revisions',

rvprop => 'timestamp|content',

rvslots => 'main',

rvlimit => 1,

%cont,

);

if($t->{'code'} ne 'success'){

$api->warn("Failed to load revisions for Template:tfd top: ".$t->{'error'}."\n");

return 60;

}

%cont=exists($t->{'query-continue'})?%{$t->{'query-continue'}{'revisions'}}:();

$t=(values(%{$t->{'query'}{'pages'}}))[0]{'revisions'}[0];

%cont=() if $t->{'timestamp'} lt sprintf("%04d-%02d-%02d", reverse @$startdate);

$t=$t->{'slots'}{'main'}{'*'};

$t=~s!.*!!gs;

$t=~s!!!g;

$t=~s!\{\{\{1\|delete\.\}\}\}\{\{(?:safe)?subst:#ifeq:\{\{\{2\|\}\}\}\|y\| ~~~~\}\}!\x07!g;

unless($t =~ m/^\s*$is_closed_re/o){

next unless $first;

$api->whine("Template:Tfd top is broken", "Help! The template {{tl|tfd top}} is missing the \"is_closed\" regex, or this regex is not at the beginning of the template's output. To avoid confusion, I'm not going to process any TFDs until it's fixed or I'm fixed.");

return 300;

}

if($t =~ m/\x07\s*$/){

next unless $first;

$api->whine("Template:Tfd top is broken", "Help! The template {{tl|tfd top}} does not end with some constant text, i.e. {{{1|}}} is at the very end of the template. To avoid confusion, I'm not going to process any TFDs until it's fixed or I'm fixed.");

return 300;

}

if($t =~ m/\{\{\{/){

next unless $first;

$api->whine("Template:Tfd top is broken", "Help! The template {{tl|tfd top}} contains unknown parameters. To avoid confusion, I'm not going to process any TFDs until it's fixed or I'm fixed.");

return 300;

}

$t=quotemeta($t);

$t=~s/\\\x07/(?s:.*?)/g;

$re.="|$t";

$first = 0;

}

# Iterate over all our pages

my $broken=0;

my $new_start=_make_date();

my $sevendays=_date_add(_make_date(),-7,0,0);

my @old=();

my @oldsumm=();

my @oldlinks=();

MAINLOOP: for(my $date=_make_date(time+3600); _cmp_date($startdate,$date)<=0; $date=_date_add($date,-1,0,0)){

return 0 if $api->halting;

my $title='Wikipedia:Templates for discussion/Log/'.$date->[2].' '.$months[$date->[1]].' '.$date->[0];

$api->log("Checking TFDs in $title");

my $tok=$api->edittoken($title);

if($tok->{'code'} eq 'shutoff'){

$api->warn("Task disabled: ".$tok->{'content'}."\n");

return 300;

}

if($tok->{'code'} ne 'success'){

$api->warn("Failed to get edit token for $title: ".$tok->{'error'}."\n");

return 60;

}

my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'} // '';

my $outtxt=$intxt;

# Fix header if necessary

my $fixedhead=0;

my ($pageheader,$err)=_makepagehead($api,$title,$date);

if ( ! defined( $pageheader ) ) {

$api->warn("Failed to get page header for $title: $err\n");

return 60;

}

if($outtxt!~/^\Q$pageheader\E/){

my $dt=$months[$date->[1]].' '.$date->[0];

my $dt2=$date->[2].' '.$months[$date->[1]].' '.$date->[0];

my $oldtxt;

do {

$oldtxt=$outtxt;

$outtxt=~s!^(?:|.*?\n)===\s*(?:\Q$dt\E|\Q$dt\E)\s*===[^\n]*\n!!s;

$outtxt=~s/^\s*\[\[\s*(?i:Category)\s*:\s*Non-talk pages that are automatically signed(?:\|.*?)?\]\].*?\n//;

$outtxt=~s/^\s*\s*//s;

$outtxt=~s/^\s*//;

} while($oldtxt ne $outtxt);

$outtxt="$pageheader\n$outtxt";

}

$fixedhead=($outtxt ne $intxt);

# If the page has been edited in the last day, keep watching it in case

# the last closing gets reverted.

my $ts=ISO2timestamp($tok->{'revisions'}[0]{'timestamp'}) // time;

$new_start=[@$date] if(time()-$ts<86400);

# Fix any simple mispositioned headers: armor any good headers, then

# fix any mispositioned ones, then unarmor.

my ($marker, $i)=('', 0);

do {

$marker = "\x02--$i--\x03";

$i++;

} while($outtxt=~/$marker/);

$outtxt=~s/(?:^|(?<=\n))((====+)[^=](?:.*[^=])?\2\s*?\n\s*$noticere*$is_closed_re)/$1$marker/go;

my $fixed=($outtxt=~s/(?:^|(?<=\n))((?>$re).*\n)\s*((====+)[^=](?:.*[^=])?\3\s*?\n)/$2$1/go);

$outtxt=~s/$marker//go;

# Split into level-4+ sections, and check if each is closed

my @sections=$api->split_sections($outtxt, "456");

my $ct=0;

my %tmpl=();

my @secs=();

my @closed=();

my @pageoldlinks=();

for(my $i=0; $i<@sections; $i++){

my $s=$sections[$i];

if($s->{'body'}=~m/^\s*$noticere*$is_closed_re/o){

# Someone closed a section, so merge in all its subsections

my $j;

for($j=$i+1; $j<@sections && $sections[$j]->{'level'} > $s->{'level'}; $j++){}

if($j>$i+1){

$s->{'body'}=~s/\s*$/\n\n/;

$s->{'body'}.=$api->join_sections(splice(@sections, $i+1, $j-$i-1));

}

}

$_=$s->{'body'};

# Check manual closes

my $bad=/(?>^\s*$noticere*\S).*$is_closed_re/so;

next if !$bad && m/^\s*$noticere*$is_closed_re/o;

if($bad || /$is_closed_re/so){

$api->log("Crap, $title is b0rken");

$api->warn("Crap, $title is b0rken\n");

$api->whine("$title is broken", "Help! A section in $title contains the \"is_closed\" regex but not at the beginning of the section. Probably someone put the {{tl|tfd top}} before a section header instead of after. Anyway, I can't do anything to that page until someone fixes it.");

if(_cmp_date($date,$sevendays)<0){

push @old, "{{$title}}\n";

unshift @oldsumm, [@$date];

push @oldlinks, "* $title is broken\n";

}

$new_start=[@$date];

$broken=1;

next MAINLOOP;

}

# Check for bot-closability

my @templates=();

$api->process_templates($_, sub {

my $name=shift;

my $params=shift;

my $ns = 'Template';

my $tpl = undef;

return undef unless exists($tfdlinks{"Template:$name"});

for my $p ($api->process_paramlist(@$params)){

$tpl = $p->{'value'} if $p->{'name'} eq '1';

$ns = 'Module' if ( $p->{'name'} eq 'module' && $p->{'value'} ne '' );

$ns = 'Category' if ( $p->{'name'} eq 'catfd' && $p->{'value'} ne '' );

}

push @templates, "$ns:$tpl" if defined( $tpl );

return undef;

});

unless ( @templates ) {

if ( $s->{'level'} ) {

push @pageoldlinks, "* $title#{{anchorencode:" . $s->{'title'} . "}} – No {{tl|tfd links}}, a human must close it.\n";

$ct++;

}

next;

}

next unless $s->{'level'};

$ct++;

foreach my $t (@templates){

$tmpl{$t}=undef;

}

push @secs, [ $s, @templates ];

}

next if($ct==0 && @closed==0 && !$fixedhead && !$fixed);

# Check if the unclosed templates still exist.

my @titles=keys %tmpl;

while(@titles){

my @tmpl=splice(@titles, 0, 500);

$res=$api->query(

titles => join('|', @tmpl),

prop => 'info',

);

if($res->{'code'} ne 'success'){

$api->warn("Failed to retrieve template info for $title: ".$res->{'error'}."\n");

return 60;

}

my %map=();

%map=map { $_->{'to'}, $_->{'from'} } @{$res->{'query'}{'normalized'}} if exists($res->{'query'}{'normalized'});

PAGE: foreach (values %{$res->{'query'}{'pages'}}){

next PAGE unless exists($_->{'missing'});

my $tmpl=$api->apply_redirect_map( $_->{'title'}, \%map );

if(!exists($tmpl{$tmpl})){

my $d=Dumper($_); $d=~s/\s+/ /g;

$api->warn("How odd, this was apparently returned even though it wasn't requested: $d\n");

next PAGE;

}

my $msg=undef;

# First, check for a deletion.

my $r=$api->query(

letitle => $_->{'title'},

list => 'logevents',

letype => 'delete',

lelimit => 1,

leprop => 'user|timestamp|comment',

);

if($r->{'code'} ne 'success'){

$api->warn("Failed to retrieve logs for ".$_->{'title'}.": ".$r->{'error'}."\n");

return 60;

}

if(exists($r->{'query'}{'logevents'}[0])){

my $log=$r->{'query'}{'logevents'}[0];

# Skip for now if it was deleted within the past hour, to

# give the closing admin a chance to close it themself.

my $t=ISO2timestamp($log->{'timestamp'});

next PAGE if(time()-$t<3600);

# Check whether the deletion log entry could reasonably

# belong to this TFD: the deletion log entry should be

# dated on or after the TFD page date. Give them a day of

# leeway just in case.

$t=_date_add(_make_date($t),1,0,0);

if(_cmp_date($t,$date)>=0){

# Close it!

$msg="Delete; deleted";

if($log->{'comment'}=~/CSD(?:#|\]\] | )([GT]\d+)/i ||

$log->{'comment'}=~/db-([gt]\d+)/){

my $c=uc($1);

$msg.=" as $c";

} elsif($log->{'comment'}=~/db-([a-z])/ && exists($db{$1})){

my $c=$db{$1};

$msg.=" as $c";

}

$msg.=" by {{admin|".$log->{'user'}."}}";

}

}

# Check for a move-without-redirect

$r=$api->query(

letitle => $_->{'title'},

list => 'logevents',

letype => 'move',

lelimit => 1,

leprop => 'user|timestamp|comment|details',

);

if($r->{'code'} ne 'success'){

$api->warn("Failed to retrieve logs for ".$_->{'title'}.": ".$r->{'error'}."\n");

return 60;

}

if(exists($r->{'query'}{'logevents'}[0])){

my $log=$r->{'query'}{'logevents'}[0];

next PAGE unless exists( $log->{'params'}{'suppressredirect'} );

# Skip for now if it was deleted within the past hour, to

# give the closing admin a chance to close it themself.

my $t=ISO2timestamp($log->{'timestamp'});

next PAGE if(time()-$t<3600);

# Check whether the move log entry could reasonably

# belong to this TFD: the move log entry should be

# dated on or after the TFD page date. Give them a day of

# leeway just in case.

$t=_date_add(_make_date($t),1,0,0);

if(_cmp_date($t,$date)>=0){

# Close it!

$msg="Moved without redirect";

if(($log->{'params'}{'target_title'} // ) ne ) {

$msg .= ' to :' . $log->{'params'}{'target_title'} . '';

}

if($log->{'comment'} ne '') {

$msg .= ' with comment "' . $log->{'comment'} . '"';

}

$msg.=" by {{admin|".$log->{'user'}."}}";

}

}

if(!defined($msg)){

my $what = $_->{'title'} =~ /^Module:/ ? 'Module' : 'Template';

$msg="$what does not exist. If the title linked contains a typo, feel free to correct the typo and un-close this discussion.";

}

next if !defined($msg);

$tmpl{$tmpl}=$msg;

}

}

# Now check the discussions to determine if all are closed.

foreach my $sec (@secs){

my @tmpl=@$sec;

my $s = shift @tmpl;

my %msgs=();

my $ok=1;

foreach my $t (@tmpl){

$ok=0 unless defined($tmpl{$t});

last unless $ok;

push @{$msgs{$tmpl{$t}}}, ":$t";

}

unless ( $ok ) {

push @pageoldlinks, "* $title#{{anchorencode:" . $s->{'title'} . "}}\n";

next;

}

$sec->[0]{'body'}=~s/^\s+|\s+$//g;

my @msgs=keys %msgs;

my $msg="\x02!!!!\x03\n";

if(@msgs==1){

$msg="{{subst:tfd top|1=".$msgs[0]." ~~~~}}\n";

} else {

$msg="{{subst:tfd top|1=Delete for multiple reasons:\n";

foreach my $m (@msgs){

$msg.="* ".join(", ", @{$msgs{$m}}).": $m\n";

}

$msg.="~~~~}}\n";

}

$sec->[0]{'body'}=$msg.$sec->[0]{'body'}."\n{{subst:tfd bottom}}\n";

$ct--;

push @closed, (@tmpl==1 ? ":".$tmpl[0]."" : '"'.$sec->[0]{'title'}.'"');

}

# Mark for entry on the list of old TFDs, if applicable

if($ct>0){

if(_cmp_date($date,$sevendays)<0){

push @old, "{{$title}}\n";

unshift @oldsumm, [@$date];

}

$new_start=[@$date];

}

if(_cmp_date($date,$sevendays)<0){

push @oldlinks, @pageoldlinks;

}

# Need to edit?

next unless(@closed || $fixed || $fixedhead);

# Processed, now reconstruct the page

$outtxt=$api->join_sections(@sections);

# Subst templates, if necessary

my $subst=0;

$outtxt=$api->process_templates($outtxt, sub {

my $name=shift;

shift; #$params

my $wikitext=shift;

return undef unless exists($tosubst{"Template:$name"});

$subst++;

$wikitext=~s/^\{\{\s*/\{\{subst:/;

return $wikitext;

});

# Create summary

my @summary=();

if($fixedhead){

if(exists($tok->{'missing'})){

push @summary, "new discussion page: ".$date->[2].' '.$months[$date->[1]].' '.$date->[0];

} else {

push @summary, "fix page header";

}

}

push @summary, "subst {{tfd top}} and/or {{tfd bottom}}" if $subst>0;

push @summary, 'move closing box'.(($fixed>1)?'es':'').' per WP:DPR#TFD' if $fixed;

my $toomany=@summary;

push @summary, 'close discussions for deleted/nonexistent templates: '.join(', ', @closed) if @closed;

my $summary='(BOT) '.ucfirst(join('; ', @summary)).".$screwup";

$api->log("$summary in $title");

if(length($summary)>500){

$summary[$toomany]='close discussions for deleted/nonexistent templates: [too many to list]' if @closed;

$summary='(BOT) '.ucfirst(join('; ', @summary)).$screwup;

}

my $r=$api->edit($tok, $outtxt, $summary, 0, 1);

if($r->{'code'} ne 'success'){

$api->warn("Write failed on $title: ".$r->{'error'}."\n");

return 60;

}

}

# Ok, we've processed all the subpages. Now update the page of links to old

# unclosed discussions.

my $title='Wikipedia:Templates for discussion/Old unclosed discussions';

$api->log("Updating discussions lists on $title");

my $tok=$api->edittoken($title);

if($tok->{'code'} eq 'shutoff'){

$api->warn("Task disabled: ".$tok->{'content'}."\n");

return 300;

}

if($tok->{'code'} ne 'success'){

$api->warn("Failed to get edit token for $title: ".$tok->{'error'}."\n");

return 60;

}

my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'} // '';

$intxt=~s/Last updated .*?/Last updated ~~~~~/;

$intxt=~s/\s*$/\n/;

my $outtxt="This is a list of unclosed TfDs over 7 days old. It is automatically maintained by a bot, but humans are free to remove lines when closing discussions if they'd like. Last updated ~~~~~.\n\n";

if ( @oldlinks ) {

$outtxt .= join( '', @oldlinks );

} else {

$outtxt .= "* None at this time\n";

}

if($intxt ne $outtxt){

my $summary;

if(@oldsumm){

my $m=0;

my @oldsumm2=map {

my $ret;

if($_->[1]!=$m){

$m=$_->[1];

$ret=substr($months[$_->[1]],0,3).' '.$_->[0];

} else {

$ret=$_->[0];

}

$ret

} @oldsumm;

$oldsumm2[-1].='.';

$summary='(BOT) Updating discussions: '.join(', ', @oldsumm2).$screwup;

$api->log("$summary in $title");

$summary='(BOT) Updating discussions: major backlog!'.$screwup if length($summary)>500;

} else {

$summary='(BOT) Updating discussions: no old discussions'.$screwup;

}

my $r=$api->edit($tok, $outtxt, $summary, 0, 1);

if($r->{'code'} ne 'success'){

$api->warn("Write failed on $title: ".$r->{'error'}."\n");

return 60;

}

}

# Save checked revision

$self->{'lasttime'}=$starttime;

$self->{'broken'}=$broken;

$api->store->{'startdate'}=$new_start;

$api->store->{'lasttime'}=$starttime;

$api->store->{'broken'}=$broken;

return $starttime+($self->{'broken'}?300:3600)-time;

}

sub _make_date {

my $t=shift || time;

if(ref($t) eq 'ARRAY'){

return _fix_date([@$t]);

} else {

my @t=gmtime($t);

@t=@t[3..5];

$t[1]+=1;

$t[2]+=1900;

return [@t];

}

}

sub _date_add {

my @t=@{$_[0]};

$t[0]+=$_[1];

$t[1]+=$_[2];

$t[2]+=$_[3];

return _fix_date([@t]);

}

sub _fix_date {

my $t=shift;

my @t=gmtime(timegm(0,0,0,$t->[0],$t->[1]-1,$t->[2]-1900));

@t=@t[3..5];

$t[1]+=1;

$t[2]+=1900;

return [@t];

}

sub _cmp_date {

my $a=shift;

my $b=shift;

my $x;

$x=$a->[2]-$b->[2];

$x=$a->[1]-$b->[1] if $x==0;

$x=$a->[0]-$b->[0] if $x==0;

return $x;

}

sub _makepagehead {

my $api = shift;

my $title = shift;

my $date = shift;

my $res = $api->query(

action => 'parse',

title => $title,

text => '{{subst:TfD log day|' . $date->[2] . '|' . $months[$date->[1]] . '|' . $date->[0] . '}}',

onlypst => 1,

formatversion => 2,

);

return ( undef, $res->{'error'} ) if $res->{'code'} ne 'success';

my $txt = $res->{'parse'}{'text'};

$txt =~ s/\s*$/\n/s;

return ($txt, undef);

}

1;