User:AnomieBOT/source/d/Templates.pm

{{ombox|text=See /doc for formatted documentation}}

package d::Templates;

use utf8;

use strict;

use Data::Dumper;

use AnomieBOT::API;

AnomieBOT::API::load('d::Nowiki');

use vars qw/@ISA/;

@ISA=qw/d::Nowiki/;

=pod

=head1 NAME

d::Templates - AnomieBOT template-handling decorator

=head1 SYNOPSIS

use AnomieBOT::API;

$api = new AnomieBOT::API('conf.ini', 1);

$api->decorators(qw/d::Templates/);

=head1 DESCRIPTION

C contains template manipulating functions for use by an

AnomieBOT task. When "d::Templates" is used as a decorator on the API object,

the following methods are available.

In addition, all A methods are also available, as this decorator

uses them internally.

=head1 METHODS PROVIDED

=over

=item $api->process_templates( $wikitext, $callback, $data )

Runs a parser over the wikitext, calling the callback function for each

template, magic word, or parser function found (basically anything encosed in

double-braces). The callback may return a replacement string, and the final

processed version is returned.

The callback function will be passed the following parameters:

=over

=item $name

The template name or the parser function/magic word invocation. For example,

"reflist" or "#tag:ref". Stripped of leading/trailing spaces and with the first

character uppercased.

=item $params

An array of the parameters. Spaces are not stripped, nor is there any attempt

to interpret named parameters.

=item $wikitext

The raw wikitext of the template.

=item $data

The data object passed in the original call.

=item $orig_name

C<$name> before the stripping and uppercasing.

=item $nl

Boolean, whether the template invocation immediately follows a newline. Possibly useful for working around T14974.

=back

Any non-C return value will be used to replace the original template.

=cut

sub process_templates {

my $api=shift;

my ($text,$nowiki)=$api->strip_nowiki(shift);

my $cb=shift;

my $data=shift;

my $notags=undef;

($text,$notags)=$api->strip_tags([$api->extension_tags], $text);

if(exists($notags->{$text})){

# The entire text was in one tag (probably because of a recursive call),

# so process the contents of that one tag.

$text=$notags->{$text};

$notags=undef;

} else {

while(my ($k,$v)=each %$notags){

next unless $text=~/\Q$k\E/;

$v=$api->replace_stripped($v,$nowiki);

$notags->{$k}=process_templates($api,$v,$cb,$data);

}

}

my @stack=();

while($text=~/(\{\{+|\}\}+|\[\[+|\]\]+|\|)/g){

my $ct=length($1);

my $i=pos($text)-$ct;

my $x=@stack?$stack[$#stack]:undef;

my $c=substr($1,0,1);

if($c eq "\x7b"){

# Found at least two open-braces

push @stack, {

char=>"\x7b",

start=>$i,

count=>$ct,

pstart=>$i+$ct,

params=>[]

};

$i+=$ct;

} elsif($c eq "\x5b"){

# Found at least two open-brackets

push @stack, {

char=>"\x5b",

start=>$i,

count=>$ct,

pstart=>$i+$ct,

params=>[]

};

$i+=$ct;

} elsif($c eq "\x7d" && defined($x) && $x->{'char'} eq "\x7b"){

# Found at least two close-braces, and we have at least one

# possible template/variable on the stack.

$ct=$x->{'count'} if $ct>$x->{'count'};

$i+=$ct;

push @{$x->{'params'}}, substr($text, $x->{'pstart'}, $i-$ct-$x->{'pstart'});

# First, pull out variables

if($ct>=3){

$x->{'count'}-=$ct-($ct%3);

$ct=$ct%3;

my $s=$x->{'start'}+$x->{'count'};

$x->{'params'}=[substr($text, $s, $i-$ct-$s)];

}

# Ok, any left is a template

if($ct>=2){

$x->{'count'}-=2;

$ct-=2;

my $s=$x->{'start'}+$x->{'count'};

my $orig=$api->replace_stripped(substr($text,$s,$i-$ct-$s), $notags, $nowiki);

map { $_=$api->replace_stripped($_, $notags, $nowiki); } @{$x->{'params'}};

my $name=shift @{$x->{'params'}};

my $oname=$name;

$name=~s///g;

$name=~s/[\x{200e}\x{200f}\x{202a}-\x{202e}]//g; # MediaWiki strips these from titles

$name=~s/[\s_\xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{2028}\x{2029}\x{202f}\x{205f}\x{3000}]+/ /g; # Mediawiki considers all these as whitespace

$name=~s/^\s+|\s+$//g;

$name=~s/^Template\s*:\s*//ig;

$name=ucfirst($name);

my $ret=&$cb($name, $x->{'params'}, $orig, $data, $oname, ($s>0 && substr($text,$s-1,1) eq "\n")?1:0);

if(defined($ret)){

$ret="$ret";

# If we're completely removing the template and the

# template is the only thing on its line, remove the line

# too instead of leaving an empty one.

my $d=($ret eq '' && ($s==0 || substr($text,$s-1,1) eq "\n") && substr($text,$i-$ct,1) eq "\n")?1:0;

substr($text, $s, $i-$ct-$s+$d)=$ret;

$i=$s+length($ret)+$ct;

$x->{'params'}=[$ret];

} else {

$x->{'params'}=[$orig];

}

}

if($x->{'count'}<2){

pop @stack;

} else {

# The one we just completed might not be the end of the param,

# so reset the param array and pstart.

$x->{'params'}=[];

$x->{'pstart'}=$x->{'start'}+$x->{'count'};

}

} elsif($c eq "\x5d" && defined($x) && $x->{'char'} eq "\x5b"){

# Found at least two close-brackets, and we have at least one

# possible wikilink on the stack

# Eat however many brackets are matched

$ct=$x->{'count'} if $ct>$x->{'count'};

$i+=$ct;

$x->{'count'}-=$ct;

if($x->{'count'}<2){

pop @stack;

} else {

# The one we just completed might not be the end of the param,

# so reset the param array and pstart.

$x->{'params'}=[];

$x->{'pstart'}=$x->{'start'}+$x->{'count'};

}

} elsif($c eq '|' && defined($x)){

push @{$x->{'params'}}, substr($text, $x->{'pstart'}, $i-$x->{'pstart'});

$x->{'pstart'}=++$i;

} else {

$i++;

}

pos($text)=$i if !defined( pos($text) ) || pos($text) != $i;

}

return $api->replace_stripped($text, $notags, $nowiki);

}

=pod

=item $api->process_paramlist( @params )

Parse named parameters. Returns an array of objects having C, C,

C, and C parameters. If the parameter was unnamed, C will

be undef and C will be the calculated parameter number.

=cut

sub process_paramlist {

my $api=shift;

my @params=@_;

my @ret=();

my $idx=0;

foreach (@params){

# Normal unnamed params are easy to detect.

if(!/=/){ push @ret, { oname=>undef, name=>++$idx, value=>$_, text=>$_ }; next; }

# As long as the naive "name" part doesn't contain the start of a

# template or a tag or an internal link, it's correct.

if(/^(\s*([^=<\x5b\x7b]*?)\s*)=\s*(.*?)\s*$/s){ push @ret, { oname=>$1, name=>$2, value=>$3, text=>$_ }; next; }

# Must be complicated now, the name can contain an equals if it's

# inside a template, a parameter, an internal link, or a comment. Also,

# replaced tags just completely screw things up (the parameter name

# ends up containing the unique token, which is probably impossible to

# actually _use_ as a parameter).

my ($text,$nowiki)=$api->strip_tags([$api->extension_tags], $_);

# Just comments/nowikis/refs fixed it?

if($text!~/=/){ push @ret, { oname=>undef, name=>++$idx, value=>$_, text=>$_ }; next; }

if($text=~/^\s*([^=\x5b\x7b]*?)\s*=\s*(.*?)\s*$/s){

my ($oname,$name,$v)=($1,$1,$2);

$oname=$api->replace_stripped($oname, $nowiki);

$name=$api->replace_stripped($name, $nowiki);

$v=$api->replace_stripped($v, $nowiki);

push @ret, { oname=>$oname, name=>$name, value=>$v, text=>$_ };

next;

}

# No, there must be a template or link in there somewhere...

my @stack=();

my $i=0;

my $len=length($text);

while($i<$len){

my $x=@stack?$stack[$#stack]:undef;

my $xb=undef;

map { $xb=$_ if $_->{'char'} eq "\x5b" } @stack;

if(substr($text,$i,2) eq "\x7b\x7b" || substr($text,$i,2) eq "\x5b\x5b"){

# Found at least two open-braces/brackets

my $ct;

my $c=substr($text,$i,1);

for($ct=2; substr($text,$i+$ct,1) eq $c; $ct++){}

push @stack, {

char=>$c,

start=>$i,

count=>$ct,

};

$i+=$ct;

} elsif(defined($x) && $x->{'char'} eq "\x7b" && substr($text,$i,2) eq "\x7d\x7d"){

# Found at least two close-braces, and we have at least one

# possible template/variable on the stack.

my $ct;

for($ct=2; substr($text,$i+$ct,1) eq "\x7d"; $ct++){}

$ct=$x->{'count'} if $ct>$x->{'count'};

$i+=$ct;

while($ct>=3){

$x->{'count'}-=3;

$ct-=3;

}

while($ct>=2){

$x->{'count'}-=2;

$ct-=2;

}

if($x->{'count'}<2){

pop @stack;

}

} elsif(defined($xb) && substr($text,$i,2) eq "\x5d\x5d"){

# Found at least two close-brackets, and we have at least one

# possible wikilink on the stack

# Drop any pending templates, they're not really templates

while($stack[$#stack] ne $xb){

pop @stack;

}

# Eat however many brackets are matched

my $ct;

for($ct=2; substr($text,$i+$ct,1) eq "\x5d"; $ct++){}

$ct=$xb->{'count'} if $ct>$xb->{'count'};

$i+=$ct;

$xb->{'count'}-=$ct;

if($xb->{'count'}<2){

pop @stack;

}

} elsif(!defined($x) && substr($text,$i,1) eq '='){

# Found the equals!

last;

} else {

$i++;

}

}

if($i>=$len){ push @ret, { oname=>undef, name=>++$idx, value=>$_, text=>$_ }; next; }

my $oname=substr($text,0,$i);

$oname=$api->replace_stripped($oname, $nowiki);

my $name=$oname; $name=~s/^\s+|\s+$//g;

my $v=substr($text,$i+1);

$v=~s/^\s+|\s+$//g;

$v=$api->replace_stripped($v, $nowiki);

push @ret, { oname=>$oname, name=>$name, value=>$v, text=>$_ };

next;

}

return @ret;

}

=pod

=item $api->strip_templates( $wikitext, \&callback, $data )

=item $api->strip_templates( $wikitext, \&callback, $data, \%matches )

Runs a parser over the wikitext, calling the callback function for each

template, magic word, or parser function found (basically anything encosed in

double-braces). If the callback returns a true value, the template is replaced

by an opaque token.

The callback function will be passed the same parameters as for

C. The return value is the same as for C from

the A decorator.

=item $api->strip_templates( $wikitext, \@templates )

=item $api->strip_templates( $wikitext, \@templates, \%matches )

As above, with a callback function that just tests whether the C<$name> is in

(or matches a regex in) the provided array.

=cut

sub strip_templates {

my $api=shift;

my $text=shift;

my $cb=shift;

my $data;

if(ref($cb) eq 'ARRAY'){

$data=$cb;

$cb=\&_strip_templates_in_list;

} else {

$data=shift;

}

my $mapping=shift // {};

$text=$api->process_templates($text, sub {

return undef unless &$cb(@_);

my $x=$api->replace_stripped($_[2],$mapping);

my $tag=$api->get_token_for($x);

$mapping->{$tag}=$x;

return $tag;

}, $data);

return wantarray ? ($text,$mapping) : $text;

}

sub _strip_templates_in_list {

return grep(ref($_) ? $_[0]=~/$_/ : $_[0] eq $_, @{$_[3]});

}

1;

=pod

=back

=head1 COPYRIGHT

Copyright 2008–2013 Anomie

This library is free software; you can redistribute it and/or

modify it under the same terms as Perl itself.