User:AllyUnion/did you know.pl

The following code is licensed under the GPL and the Creative Commons Attribution License. -- AllyUnion (talk) 03:21, 9 Apr 2005 (UTC)

  • This code is not working -- AllyUnion (talk) 03:21, 9 Apr 2005 (UTC)
  • FYI: The system calls to python2.3 are calls to the pywikipediabot framework... as perl takes like so many packages just to download files and I don't have access to an English module version that allows me to post to the English wikipedia. -- AllyUnion (talk) 03:27, 9 Apr 2005 (UTC)

----

Basic idea:

  1. Leave three blocks of "...that" on Template talk:Did you know
  2. Move all other blocks of "...that" to Wikipedia:Recent additions
  3. Move oldest blocks on Wikipedia:Recent additions to an archive page if Wikipedia:Recent additions exceeds 50 "...that" lines.

didyouknow.pl

  1. !/usr/bin/perl -w
  1. Author: Jason Y. Lee
  2. Purpose: Wikipedia's Did you know archival process
  1. Special thanks to dysprosia, and the person who helped me in #wikipedia
  1. Assumptions:
  1. On Template talk:Did you know:
  2. A did you know line is in the following format:
  3. *...that
  4. Where is any text of any length, no matter if there is a newline or not.
  1. On Wikipedia:Recent additions and any archive pages after Wikipedia:Recent additions 25:
  2. A did you know line starts either with:
  3. [[Image: OR ...that
  4. A did you know line ends with either:
  5. A question mark or a HTML line break (
    )

use Tie::File;

my $HOME = '';

my $BOTDIR = 'wikipedia/bots/kurando-san/';

my $LOGDIR = 'dyklogs/';

my $configfile = $HOME . $BOTDIR . 'didyouknow.cfg';

my $logfile1 = $HOME . $BOTDIR . $LOGDIR . 'dyk1.log';

my $logfile2 = $HOME . $BOTDIR . $LOGDIR . 'dyk2.log';

my $logfile3 = $HOME . $BOTDIR . $LOGDIR . 'dyk3.log';

my $logfile4 = $HOME . $BOTDIR . $LOGDIR . 'newra.log';

my $logfile5 = $HOME . $BOTDIR . $LOGDIR . 'newra-';

my $talkpage = "Template talk:Did you know";

my $pagename = "Wikipedia:Recent additions";

my $archive = "Wikipedia:Recent additions";

my $archivenum = "";

my $archiveheader = "{{DYK archive header}}\n{{DYK archive nav}}\n\n==Did you know...==\n";

  1. Python Page Existance program
  2. $pageexist1 = "\"import config, wikipedia\nimport sys\nmysite = wikipedia.getSite()\nif (wikipedia.PageLink(mysite, \'";
  3. $pageexist2 = "\').exists()):\n\tsys.exit(0)\nelse:\n\tsys.exit(1)\n\"";
  1. Posting a page

$postprog1 = "\"import config, wikipedia\nmysite = wikipedia.getSite()\n";

$postprog2 = "logfile = \'";

$postprog3 = "\'\npagename = \'";

$postprog4 = "\'\ncomment = \'";

$postprog5 = "\'\nlog = file(logfile, 'r')\npage = log.read()\nlog.close()\nwikipedia.PageLink(mysite, pagename).put(page, comment)\n";

tie @config, 'Tie::File', $configfile or die;

$lastarchive = "";

foreach $line (@config)

{

if ($line =~ /lastarchive =/)

{

$archive = $line;

$archivenum = $line;

$archivenum =~ s/lastarchive\s*=\s*//g;

$archive = "Wikipedia:Recent additions " . $archivenum;

$lastarchive = \$line;

}

}

  1. Get the pages

$pythonprog1 = "\"import config, wikipedia\nmysite = wikipedia.getSite()\n";

$pythonprog2 = "logfile = ";

$pythonprog3 = "\npagename = ";

$pythonprog4 = "\nlog = file(logfile, 'w')\nlog.write(wikipedia.getPage(mysite, pagename, True, True, False).encode('iso-8859-1'))\nlog.close()\n\"";

system("python2.3 -c " . $pythonprog1 . $pythonprog2 . '\ . $logfile1 . '\ . $pythonprog3 . '\ . $talkpage . '\ . $pythonprog4);

system("python2.3 -c " . $pythonprog1 . $pythonprog2 . '\ . $logfile2 . '\ . $pythonprog3 . '\ . $pagename . '\ . $pythonprog4);

system("python2.3 -c " . $pythonprog1 . $pythonprog2 . '\ . $logfile3 . '\ . $pythonprog3 . '\ . $archive . '\ . $pythonprog4);

  1. Analysis of 'Template talk:Did you know'

tie @dyklog, 'Tie::File', $logfile1 or die;

$dykmat = [];

$dykblockcount = 0;

$y = 0;

$line = "";

$x = 0;

while ($dyklog[$x] !~ m/=+ARCHIVE USED SUGGESTIONS HERE=+/ig)

{

  1. print $x, ". ", $dyklog[$x], "\n";

$x++;

}

while ($dyklog[$x] !~ m/\'\'include a link to the used picture behind the fact in which it has been \'\'\'used\'\'\' on the front page\.\'\'
/ig)

{

  1. print $x, ". ", $dyklog[$x], "\n";

$x++;

}

  1. print $x, ". ", $dyklog[$x], "\n";

$x++;

  1. print $x, ". ", $dyklog[$x], "\n";

while (($dyklog[$x] !~ m/All older items have been archived at \[\[Wikipedia:Recent additions\]\]/ig) && ($x < scalar(@dyklog)))

{

$dyklog[$x] =~ s/^\*\s*\.\.\.\s*that/\*\.\.\.that/ig;

$dyklog[$x] =~ s/(?:--|&[mn]dash;|[10];|)\s*\[\[User(.*?):(.*?)\(UTC\)//ig;

$dyklog[$x] =~ s/\(\)//g;

$dyklog[$x] =~ s/\?\s

$dyklog[$x] =~ s/\[\[Image/\[\[:Image/ig;

$loopflag = 1;

$line = "";

  1. print "$x. 1\n";

if ($dyklog[$x] =~ m/^\*\.\.\.that/ig)

{

  1. print "$x. 2\n";

$line = $dyklog[$x];

if ($dykblockcount >= 3)

{

splice(@dyklog, $x, 1);

}

else

{

$x++;

}

while (($loopflag == 1) && ($x < scalar(@dyklog)))

{

  1. print "$x. 3\n";

$dyklog[$x] =~ s/^\*\s*\.\.\.\s*that/\*\.\.\.that/ig;

$dyklog[$x] =~ s/(?:--|&[mn]dash;|[10];|)\s*\[\[User(.*?):(.*?)\(UTC\)//ig;

$dyklog[$x] =~ s/\(\)//g;

$dyklog[$x] =~ s/\?\s

$dyklog[$x] =~ s/\[\[Image/\[\[:Image/ig;

if ($dyklog[$x] =~ m/^\*\.\.\.that/ig)

{

  1. print "$x. 4\n";

$loopflag = 0;

}

elsif ($dyklog[$x] eq "")

{

  1. print $x, ". Block detected\n";
  2. print "$x. 5\n";

$loopflag = 0;

}

else

{

  1. print "$x. 6\n";

$test = $dyklog[$x];

$test =~ s/\s+//g;

if ($test eq "")

{

  1. print "$x. 7\n";
  2. print $x, ". Block detected\n";

$loopflag = 0;

}

else

{

  1. print "$x. 8\n";

$line .= " " . $dyklog[$x];

if ($dykblockcount >= 3)

{

splice(@dyklog, $x, 1);

}

else

{

$x++;

}

}

}

}

  1. print "$x. 9\n";
  2. print $x, ". -> (", $dykblockcount, ", ", $y, "): ", $line, "\n\n";

$y++;

$dykmat[$dykblockcount][$y] = $line;

}

elsif ($dyklog[$x] eq "")

{

  1. print "$x. 10\n";

if ($y == 0)

{

$dykblockcount--;

}

else

{

$dykmat[$dykblockcount][0] = $y + 1;

}

$y = 0;

$dykblockcount++;

if ($dykblockcount >= 3)

{

splice(@dyklog, $x, 1);

}

else

{

$x++;

}

}

else

{

  1. print "$x. 11\n";

$test = $dyklog[$x];

$test =~ s/\s+//g;

if ($test eq "")

{

  1. print "$x. 12\n";

if ($y == 0)

{

$dykblockcount--;

}

else

{

$dykmat[$dykblockcount][0] = $y + 1;

}

$y = 0;

$dykblockcount++;

}

if ($dykblockcount >= 3)

{

splice(@dyklog, $x, 1);

}

else

{

$x++;

}

}

  1. print "Exit";

}

  1. print $dykblockcount, "\n";
  1. die;
  1. for ($x = 0; $x < $dykblockcount; $x++)
  2. {
  3. for ($y = 1; $y < $dykmat[$x][0]; $y++)
  4. {
  5. print $y, ". ", $dykmat[$x][$y], "\n";
  6. }
  7. print "\n";
  8. }

if ($dykblockcount <= 3)

{

die;

}

  1. Find the image left and right.

tie @wralog, 'Tie::File', $logfile2 or die;

$side = "left";

$x = 0;

while ($wralog[$x] !~ m/\[\[Image:/i)

{

$x++;

}

if ($wralog[$x] =~ m/left/i)

{

$side = "left";

}

else

{

$side = "right";

}

  1. Process the talk page.

foreach $x (0 .. $dykblockcount-1)

{

foreach $y (1 .. $dykmat[$x][0]-1)

{

$line = $dykmat[$x][$y];

$line =~ s/^\*\.\.\.that/\.\.\.that/ig;

@parts = split /\(?\[\[:?Image:/i, $line;

if (scalar(@parts) == 2)

{

$image = $parts[1];

$image =~ s/\]\]\)?//ig;

$image =~ s/\]\]\)?//g;

$image =~ s/\n//g;

$imagelink = "Image:" . $image . "";

if ($side eq "left")

{

$side = "right";

}

else

{

$side = "left";

}

$line =~ s/\(*?\s*?\[\[:*?Image:.*?\]\]\s*?\)*?//ig;

$line =~ s/\?\s*?\)*?

$dykmat[$x][$y] = "$imagelink\n$line";

}

else

{

$line =~ s/\?\s*?\)*?

$dykmat[$x][$y] = "$line";

}

}

}

  1. Analysis of 'Wikipedia:Recent additions'

$wramat = [];

$wrablockcount = -1;

$y = 1;

$line = "";

$wracount = 0;

  1. $limitflag = False;

$x = 0;

  1. $wralast = 0;

while (($wralog[$x] !~ m//g) && ($x < scalar(@wralog)))

{

$x++;

}

$x++;

while (($wralog[$x] !~ m//g) && ($x < scalar(@wralog)))

{

$wralog[$x] =~ s/^\s*?\.\.\.\s*?that/\.\.\.that/ig;

  1. print "Processing: ", $wralog[$x], "\n";

if ($wralog[$x] =~ m/\[\[Image:/ig)

{

if ($wrablockcount == -1)

{

$wrablockcount++;

}

  1. print $x, ". (nimage). ", $wralog[$x], "\n";

$line = $wralog[$x] . "\n";

while (not (($wralog[$x] =~ m/\?/i) || ($wralog[$x] =~ m//i)))

{

  1. if ($limitflag)
  2. {
  3. splice(@wralog, $x, 1);
  4. }
  5. else
  6. {

$x++;

  1. }
  2. print $x, ". (image). ", $wralog[$x], "\n";

$line .= $wralog[$x] . "\n";

}

$wramat[$wrablockcount][$y] = $line;

$y++;

$wracount++;

}

elsif ($wralog[$x] =~ m/^\.\.\.that/ig)

{

if ($wrablockcount == -1)

{

$wrablockcount++;

}

$line = "";

while (not (($wralog[$x] =~ m/\?/i) || ($wralog[$x] =~ m//i)))

{

  1. print $x, ". (that). ", $wralog[$x], "\n";

$line .= $wralog[$x] . "\n";

  1. if ($limitflag)
  2. {
  3. splice(@wralog, $x, 1);
  4. }
  5. else
  6. {

$x++;

  1. }

}

  1. print $x, ". (that). ", $wralog[$x], "\n";

$line .= $wralog[$x] . "\n";

$wramat[$wrablockcount][$y] = $line;

$y++;

$wracount++;

}

elsif ($wralog[$x] eq "")

{

  1. print $x, ". Block detected!\n";
  2. if ($wracount > 50)
  3. {
  4. $limitflag = True;
  5. }

if ($wrablockcount != -1)

{

$wramat[$wrablockcount][0] = $y; # Save the size

}

$y = 1; # Reset the line count for the block

$wrablockcount++;

}

else

{

$test = $wralog[$x];

$test =~ s/\s+//g;

if ($test eq "")

{

  1. print $x, ". Block detected!\n";
  2. if ($wracount > 50)
  3. {
  4. $limitflag = True;
  5. }

if ($wrablockcount != -1)

{

$wramat[$wrablockcount][0] = $y;

}

$y = 1;

$wrablockcount++;

}

}

  1. if ($limitflag)
  2. {
  3. splice(@wralog, $x, 1);
  4. }
  5. else
  6. {

$x++;

  1. }

}

splice(@wralog, $x, 1, "\n");

  1. Copy everything from "Wikipedia:Recent archives" matrix into the "Did you know" matrix

foreach $x (0 .. $wrablockcount-1)

{

foreach $y (0 .. $wramat[$x][0]-1)

{

$dykmat[$dykblockcount][$y] = $wramat[$x][$y];

}

$dykblockcount++;

}

$x = 0;

open(RALOG, ">", $logfile4) or die;

while ((not ($wralog[$x] =~ m//g)) && ($x < scalar(@wralog)))

{

print RALOG $wralog[$x], "\n";

$x++;

}

print RALOG $wralog[$x], "\n";

$x++;

$total = 0;

for ($i = 3; (($i < $dykblockcount) && ($total < 60)); $i++)

{

foreach $j (1 .. $dykmat[$i][0]-1)

{

print RALOG $dykmat[$i][$j], "\n";

  1. print "(", $i, ", ", $j, "). ", $dykmat[$i][$j], "\n";

}

print RALOG "\n";

$total += $dykmat[$i][0];

}

while (($wralog[$x] !~ m//g) && ($x < scalar(@wralog)))

{

$x++;

}

while ($x < scalar(@wralog))

{

print RALOG $wralog[$x], "\n";

$x++;

}

close(RALOG);

  1. print "----\n";
  1. print $wracount, "\n";
  1. Post the new pages

system("python2.3 -c " . $postprog1 . $postprog2 . $logfile1 . $postprog3 . $talkpage . $postprog4 . "Testing archival bot" . $postprog5 . $postprog2 . $logfile4 . $postprog3 . $pagename . $postprog4 . "Testing archival bot" . $postprog5 . "\"");

  1. Analysis of the archive pages

$arccount = 0;

if ($i < $dykblockcount)

{

tie @arclog, 'Tie::File', $logfile3 or die;

$x = 0;

while (($arclog[$x] !~ m/==Did you know\.\.\.==/ig) && ($x < scalar(@arclog)))

{

$x++;

}

for ($arccount = 0; $x < scalar(@arclog); $x++)

{

if ($arclog[$x] =~ m/\[\[Image:/ig)

{

  1. print $x, ". (nimage). ", $arclog[$x], "\n";

while (not (($arclog[$x] =~ m/\?/i) || ($arclog[$x] =~ m//i)))

{

$x++;

  1. print $x, ". (image). ", $arclog[$x], "\n";

}

$arccount++;

}

elsif ($arclog[$x] =~ m/^\.\.\.that/ig)

{

while (not (($arclog[$x] =~ m/\?/i) || ($arclog[$x] =~ m//i)))

{

  1. print $x, ". (that). ", $arclog[$x], "\n";

$x++;

}

  1. print $x, ". (that). ", $arclog[$x], "\n";

$arccount++;

}

$x++;

}

}

$startingarchive = $archivenum;

$testcount = $arccount;

if ($arccount < 50)

{

$currentlogfile = $logfile5 . $archivenum . ".log";

$x = 0;

open(ARCLOG, ">", $currentlogfile) or die;

print ARCLOG $archiveheader;

close(ARCLOG);

tie @newlog, 'Tie::File', $currentlogfile or die;

for ($a = $dykblockcount - 1; $a >= $i; $a--)

{

$start = 4;

foreach $b (1 .. $dykmat[$a][0]-1)

{

splice(@newlog, $start, 0, $dykmat[$a][$b]);

$start++;

}

splice(@newlog, $start, 0, "");

$testcount += $dykmat[$a][0];

if ($testcount >= 50)

{

$testcount = 0;

$archivenum++;

$currentlog = $logfile5 . $archivenum . ".log";

open(ARCLOG, ">", $currentlog) or die;

print ARCLOG $archiveheader;

close(ARCLOG);

untie @newlog or die;

tie @newlog, 'Tie::File', $currentlog or die;

}

}

open(ARCLOG, ">>", $currentlogfile) or die;

  1. print ARCLOG "\n";

foreach $x (4 .. $#arclog)

{

print ARCLOG $arclog[$x], "\n";

}

close(ARCLOG);

}

else

{

$a = $dykblockcount - 1;

$archivenum++;

$currentlog = $logfile5 . $archivenum . ".log";

$testcount = 0;

open(ARCLOG, ">", $currentlog) or die;

print ARCLOG $archiveheader;

close(ARCLOG);

tie @newlog, 'Tie::File', $currentlog or die;

while ($a >= $i)

{

$start = 4;

for ($b = 1; $b < $dykmat[$a][0]; $b++)

{

splice(@newlog, $start, 0, $dykmat[$a][$b]);

$start++;

}

splice(@newlog, $start, 0, "");

$testcount += $dykmat[$a][0];

if ($testcount >= 50)

{

$testcount = 0;

$archivenum++;

$currentlog = $logfile5 . $archivenum . ".log";

open(ARCLOG, ">", $currentlog) or die;

print ARCLOG $archiveheader;

close(ARCLOG);

untie @newlog or die;

tie @newlog, 'Tie::File', $currentlog or die;

}

$a--;

}

}

  1. print "\n$testcount\n";
  2. print $archivenum, "\n";

foreach $x ($startingarchive .. $archivenum)

{

$currentlog = $logfile5 . $x . ".log";

system("python2.3 -c " . $postprog1 . $postprog2 . $currentlog . $postprog3 . "Wikipedia:Recent additions " . $x . $postprog4 . "Testing archival bot" . $postprog5 . "\"");

}

$$lastarchive = "lastarchive = " . $archivenum;

system("rm", "-f", $HOME . $BOTDIR . $LOGDIR . "*.log");

didyouknow.cfg

  1. Last archive that the program is on, do not attempt to update manually.

lastarchive = 25