User:RFC bot/rfcbot.php

/** rfcbot.php - Automatic update of Wikipedia RFC lists

* STABLE Version 4.2.6

*

* © 2011 James Hare and contributors - http://en.wikipedia.org/wiki/User:Harej

*

* This program is free software; you can redistribute it and/or modify

* it under the terms of the GNU General Public License as published by

* the Free Software Foundation; either version 2 of the License, or

* (at your option) any later version.

*

* This program is distributed in the hope that it will be useful,

* but WITHOUT ANY WARRANTY; without even the implied warranty of

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

* GNU General Public License for more details.

*

* You should have received a copy of the GNU General Public License

* along with this program; if not, write to the Free Software

* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

*

* Developers (add your self here if you worked on the code):

* James Hare - User:Harej - Wrote everything

* Terry E. - User:TerryE - Code recommendations for improved operation

**/

ini_set("display_errors", 1);

error_reporting(E_ALL ^ E_NOTICE);

include("./public_html/botclasses.php"); // Botclasses.php was written by User:Chris_G and is available under the GNU General Public License

include("logininfo.php");

// Definitions

$RFC_categories = array("bio", "hist", "econ", "sci", "lang", "media", "pol", "reli", "soc", "style", "policy", "proj", "unsorted");

$RFC_submissions = array();

$RFC_dashboard = array();

$RFC_listofentries = array();

foreach ($RFC_categories as $cat) {

$RFC_submissions[$cat] = "The following discussions are requested to have community-wide attention:\n\n";

$RFC_dashboard[$cat] = "";

$RFC_listofentries[$cat] = array();

}

$RFC_submissions["reli"] = "The following discussions are requested to have community-wide attention:\n{{Philosophy/Nav}}\n\n"; # This specific exception was provided for upon request of WikiProject Philosophy.

$RFC_pagetitles = array(

"bio" => "Wikipedia:Requests for comment/Biographies",

"econ" => "Wikipedia:Requests for comment/Economy, trade, and companies",

"hist" => "Wikipedia:Requests for comment/History and geography",

"lang" => "Wikipedia:Requests for comment/Language and linguistics",

"sci" => "Wikipedia:Requests for comment/Maths, science, and technology",

"media" => "Wikipedia:Requests for comment/Art, architecture, literature, and media",

"pol" => "Wikipedia:Requests for comment/Politics, government, and law",

"reli" => "Wikipedia:Requests for comment/Religion and philosophy",

"soc" => "Wikipedia:Requests for comment/Society, sports, and culture",

"style" => "Wikipedia:Requests for comment/Wikipedia style and naming",

"policy" => "Wikipedia:Requests for comment/Wikipedia policies and guidelines",

"proj" => "Wikipedia:Requests for comment/WikiProjects and collaborations",

"unsorted" => "Wikipedia:Requests for comment/Unsorted",

);

echo "Logging in...";

$objwiki = new wikipedia();

$objwiki->login($botuser, $botpass);

echo " done.\n";

/* Connect to the database */

echo "Retrieving database login credentials...";

$toolserver_mycnf = parse_ini_file("/home/messedrocker/.my.cnf");

$toolserver_username = $toolserver_mycnf['user'];

$toolserver_password = $toolserver_mycnf['password'];

unset($toolserver_mycnf);

echo " done.\n";

echo "Logging into database...";

mysql_connect("sql",$toolserver_username,$toolserver_password);

@mysql_select_db('u_messedrocker_reqs') or die(mysql_error());

echo " done.\n";

function query($query) {

// We need to use this function in case our MySQL connection times out.

global $toolserver_username;

global $toolserver_password;

if (!mysql_ping()) {

mysql_connect("sql",$toolserver_username,$toolserver_password);

@mysql_select_db('u_messedrocker_reqs') or die(mysql_error());

}

echo $query . "\n";

return mysql_query($query) or die(mysql_error());

}

// Step 1: Check for transclusions

echo "Checking for transclusions...";

$transcludes = $objwiki->getTransclusions("Template:Rfc");

echo " done.\n";

  1. print_r($transcludes);

$listing = array();

for ($pg = 0; $pg < count($transcludes); $pg++) {

echo "Getting page " . $pg . ": " . $transcludes[$pg] . "\n";

$contents = $objwiki->getpage($transcludes[$pg]);

// Syntax Correction. RFC templates with common errors are corrected and then saved on the wiki.

preg_match_all("/(\{{2}\s?Rfc(tag)?\s?[^}]*\}{2}(\n|,| )*){2,}/i", $contents, $fix);

for ($j=0; $j < count($fix[0]); $j++) {

preg_match_all("/(?=\{{2}\s?Rfc(tag)?\s?\|\s?)[^}]*/i", $fix[0][$j], $parts);

$newtag = "";

for ($k=0; $k < count($parts[0]); $k++) {

$newtag .= $parts[0][$k] . "|";

}

$newtag = str_replace("{{rfc|", "", $newtag);

$newtag = str_replace("{{rfctag|", "", $newtag);

$newtag = str_replace("}}", "", $newtag);

$newtag = "{{rfc|" . $newtag . "}}\n\n";

$newtag = str_replace("|}}", "}}", $newtag);

$contents = str_replace($fix[0][$j], $newtag, $contents);

$objwiki->edit($transcludes[$pg],$contents,"Fixing RFC template syntax",false,true);

}

// Step 2: Seeding RFC IDs.

// Before we read the RFC IDs and match them up to a title, description, etc.,

// we want to make sure each RFC template has a corresponding RFC ID.

preg_match_all("/\{{2}\s?Rfc(tag)?\s?[^}]*\}{2}/i", $contents, $match);

for ($result=0; $result < count($match[0]); $result++) {

if (strpos($match[0][$result], "|rfcid=") === false) { // if the rfcid is not found within an RFC template

$id = substr(strtoupper(md5(rand())), 0, 7); # a seven-character random string with capital letters and digits

$contents = str_replace($match[0][$result], $match[0][$result] . "|rfcid=" . $id . "}}", $contents);

$contents = str_replace("}}|rfcid", "|rfcid", $contents);

$objwiki->edit($transcludes[$pg],$contents,"Adding RFC ID",false,true);

mysql_query("insert into `frs` (`id`, `initround`) values (\"" . mysql_real_escape_string($id) . "\", false)");

}

}

// Step 3: Check for RFC templates

preg_match_all("/\{{2}\s?Rfc(tag)?\s?[^}]*\}{2}/i", $contents, $match);

for ($result=0; $result > -1; $result++) { # For each result on a page

if (isset($match[0][$result])) {

// Category

preg_match_all("/\{{2}\s?Rfc(tag)?[^2]\s?[^}]*\}{2}/i", $contents, $m);

$categorymeta = preg_replace("/\{*\s?(Rfc(?!id)(tag)?)\s?\|?\s?(1=)?\s?/i", "", $m[0][$result]);

preg_match("/\|time=([^|]|[^}])*/", $categorymeta, $forcedtimecheck); # An RFC can be forced to have a certain timestamp with the time= parameter in RFC template.

if ($forcedtimecheck[0] != "" || isset($forcedtimecheck[0])) {

$prettytimestamp = str_replace("|time=", "", $forcedtimecheck[0]);

$prettytimestamp = str_replace("}", "", $prettytimestamp);

$timestamp = strtotime($prettytimestamp);

}

// Description and Timestamp

if (!isset($timestamp)) {

preg_match_all("/\{{2}\s?Rfc(tag)?\s?[^}]*\}{2}(.|\n)*?([0-2]\d):([0-5]\d),\s(\d{1,2})\s(\w*)\s(\d{4})\s\(UTC\)/im", $contents, $m);

print_r($m[0]);

$description = preg_replace("/\{{2}\s?Rfc(tag)?\s?[^}]*\}{2}\n*/i", "", $m[0][$result]); // get rid of the RFC template

$description = preg_replace("/={2,}\n+/", "'''\n\n", $description); // replace section headers with boldness

$description = preg_replace("/\n+={2,}/", "\n\n'''", $description);

$description = "{{rfcquote|text=\n" . $description . "}}"; // indents the description

preg_match("/([0-2]\d):([0-5]\d),\s(\d{1,2})\s(\w*)\s(\d{4})\s\(UTC\)/i", $description, $t);

$timestamp = strtotime($t[0]);

echo "Timestamp: " . $timestamp . "\n";

}

else {

$description = $prettytimestamp;

}

// RFC ID

preg_match("/\|rfcid=([^|]|[^}])*/", $categorymeta, $rfcidcheck);

if ($rfcidcheck[0] != "" || isset($rfcidcheck[0])) {

$id = str_replace("|rfcid=", "", $rfcidcheck[0]);

$id = str_replace("}}", "", $id);

}

echo "RFC ID: " . $id . "\n";

$categorymeta = preg_replace("/\s*\}*/", "", $categorymeta);

$categorymeta = preg_replace("/=*/", "", $categorymeta);

$categorymeta = preg_replace("/\|time([^|]|[^}])*/", "", $categorymeta);

$categorymeta = preg_replace("/\|rfcid([^|]|[^}])*/", "", $categorymeta);

echo "Category: " . $categorymeta . "\n";

$category = explode("|", $categorymeta);

unset($forcedtimecheck);

unset($rfcidcheck);

// Step 4: Inspecting for expiration. Something that's expired gets removed; something that's not expired gets moved up to the big leagues! Whee!

if (time() - $timestamp > 2592000 && $timestamp != "" || preg_match("/\/Archive \d+/", $transcludes[$pg])) {

echo "RFC expired. Removing tag.\n";

$contents = preg_replace("/\{{2}rfc(tag)?.*\}{2}(\n|\s)?/i", "", $contents);

$objwiki->edit($transcludes[$pg],$contents,"Removing expired RFC template",false,true);

mysql_query("delete from `frs` where `id` = \"" . mysql_real_escape_string($id) . "\"");

}

else {

$listing[$id]["title"] = $transcludes[$pg];

$listing[$id]["description"] = $description;

$listing[$id]["timestamp"] = $timestamp;

for ($c = 0; $c < count($category); $c++) { # lol c++

if (in_array($category[$c], $RFC_categories)) {

$listing[$id]["category"][] = $category[$c];

}

}

if (count($listing[$id]["category"]) == 0) {

$listing[$id]["category"][0] = "unsorted";

}

}

unset ($section);

unset ($timestamp);

unset ($forcedtimecheck);

unset ($prettytimestamp);

unset ($categorymeta);

unset ($description);

unset ($timestamp);

unset ($rfcidcheck);

unset ($category);

}

else {

break;

}

}

}

// Step 5: Sorting by timestamp.

foreach ($listing as $id => $arr) {

$timestamp[$id] = $arr["timestamp"]; # i.e., $timestamp[$id] = $listing[$id]["timestamp"]

}

$keys = array_keys($timestamp);

$values = array_values($timestamp);

array_multisort($values, SORT_DESC, $keys);

$timestamp = array_combine($keys, $values);

// Step 6: Adding each listing into the submissions.

foreach ($timestamp as $id => $time) {

for ($i = 0; $i < count($listing[$id]["category"]); $i++) {

$RFC_submissions[$listing[$id]["category"][$i]] .= "" . $listing[$id]["title"] . "\n" . $listing[$id]["description"] . "\n";

$RFC_dashboard[$listing[$id]["category"][$i]] .= "" . $listing[$id]["title"] . "{{dot}}";

$RFC_listofentries[$listing[$id]["category"][$i]][] = $listing[$id]["title"];

}

}

foreach ($RFC_dashboard as $type => $filling) {

$RFC_dashboard[$type] = substr($RFC_dashboard[$type], 0, -7);

}

foreach ($RFC_submissions as $type => $filling) {

$RFC_submissions[$type] = preg_replace("/\n{3,}/", "\n\n", $RFC_submissions[$type]);

}

// Step 7: Creating edit summaries.

$rfclisting = "{{navbox\n| name = {{subst:FULLPAGENAME}}\n| title = Requests for comment\n| basestyle = background: #BDD8FF;\n| liststyle = line-height: 220%;\n| oddstyle = background: #EEEEEE;\n| evenstyle = background: #DEDEDE;\n";

$counter = 0;

foreach ($RFC_pagetitles as $abbreviation => $pagename) {

$RFC_submissions[$abbreviation] .= "{{RFC list footer|" . $abbreviation . "|hide_instructions={{{hide_instructions}}} }}";

$counter += 1;

$rfclisting .= "| group" . $counter . " = " . str_replace("Wikipedia:Requests for comment/", "", $pagename) . "\n| list" . $counter . " = " . $RFC_dashboard[$abbreviation] . "\n";

$query = mysql_query("SELECT * from `rfc` WHERE `category` = \"" . mysql_real_escape_string($abbreviation) . "\"");

$row = mysql_fetch_assoc($query);

$oldlist = unserialize($row['pagetitles']); // Retrieving the old list from the database

$newlist = $RFC_listofentries[$abbreviation];

$added = "Added: ";

$justadded = array_diff($newlist, $oldlist);

if (count($justadded) > 0) print_r($justadded);

foreach ($justadded as $key => $item) {

$added .= "" . $item . ", ";

}

$added = substr($added, 0, -2);

if ($added == "Added") {

$added = ""; // If no pages are added to the list, then there's nothing to be reported and this is blanked accordingly.

}

else {

$added .= " ";

}

$removed = "Removed: ";

$justremoved = array_diff($oldlist, $newlist);

if (count($justremoved) > 0) print_r($justremoved);

foreach ($justremoved as $key => $item) {

$removed .= "" . $item . ", ";

}

$removed = substr($removed, 0, -2);

if ($removed == "Removed") {

$removed = "";

}

$summary = $added . $removed;

if ($summary == "") $summary = "Maintenance";

// Step 8: Submission.

if (count($justadded) > 0 || count($justremoved) > 0) {

query("delete from `rfc` where `category` = \"" . $abbreviation . "\"");

query("insert into `rfc` (`category`, `pagetitles`) values (\"" . $abbreviation . "\", \"" . mysql_real_escape_string(serialize($newlist)) . "\")");

}

$objwiki->edit($pagename,$RFC_submissions[$abbreviation],$summary,false,true);

}

$rfclisting .= "}}";

$objwiki->edit("Wikipedia:Dashboard/Requests for comment",$rfclisting,"Updating RFC listings",false,true);

// Step 9: Parsing WP:FRS to create FRS user list arrays.

$RFC_pagetitles["all"] = "Wikipedia:Requests for comment/All RFCs"; # This page does not actually exist. It is a hack to allow people to sign up to receive requests for all RFCs.

do {

$frs = $objwiki->getpage("Wikipedia:Feedback request service");

} while ($frs == "");

$frs = preg_replace("/\n+/", "", $frs); # Get rid of the newlines. Who needs 'em anyway?

preg_match("/==Requests for comment==.*/i", $frs, $m); # This might be the one time in my life I actually want a greedy regex.

$frs = str_replace("==Requests for comment==", "", $m[0]);

$frs = str_replace("===", "", $frs);

$frs = str_replace("", "", $frs);

$temppool = explode("===", $frs);

$counter = 0;

$frs_users = array();

$ineligible = array();

foreach ($RFC_pagetitles as $abbreviation => $pagetitle) {

echo "Compiling user index for " . $abbreviation . "\n";

$temppool[$counter] = str_replace(str_replace("Wikipedia:Requests for comment/", "", $pagetitle), "", $temppool[$counter]);

$temppool[$counter] = str_replace("===", "", $temppool[$counter]);

$counter += 1;

$prepool[$abbreviation] = explode("}}", $temppool[$counter]);

for ($i = 0; $i < count($prepool[$abbreviation]); $i++) {

if (in_array($prepool[$abbreviation][$i], $ineligible)) {

continue;

}

foreach ($frs_users as $key => $item) {

if (in_array($prepool[$abbreviation][$i], $frs_users[$key])) {

$frs_users[$abbreviation][] = $prepool[$abbreviation][$i]; // Qualifying for one of them qualifies you for all of them. This is to save processing time.

continue 2;

}

}

$prepool[$abbreviation][$i] = preg_replace("/\* ?\{\{frs user ?\s?\|\s?/i", "", $prepool[$abbreviation][$i]);

$prepool[$abbreviation][$i] = preg_replace("/\s+\|\s+/", "|", $prepool[$abbreviation][$i]);

// Finding reasons to disqualify users.

$test_value = $prepool[$abbreviation][$i];

$test_value = preg_replace("/\|\d+/", "", $test_value);

$test_value = str_replace(" ", "_", $test_value);

$last_edit_check = $objwiki->query("?action=query&list=usercontribs&ucuser=" . $test_value . "&format=php");

$lastedit = time() - strtotime($last_edit_check["query"]["usercontribs"][0]["timestamp"]);

$block_check = $objwiki->query("?action=query&list=users&ususers=" . $test_value . "&usprop=blockinfo&format=php");

if ($prepool[$abbreviation][$i] != "" # A hack to deal with blank entries in the array.

&& $prepool[$abbreviation][$i] != " " # See above.

&& $prepool[$abbreviation][$i] != "Unsorted" # The bot thinks the different categories are users (because of how I wrote the bot), and as it were, there actually is a User:Unsorted who didn't sign up but is listed because of the Unsorted category.

&& isset($last_edit_check["query"]["usercontribs"][0]["timestamp"]) # To check if a user exists.

&& $lastedit < 2592000 # To check if a user has edited in the past 30 days.

&& !isset($block_check["query"]["users"][0]["blockedby"]) # To check if a user has an extant block.

) {

$frs_users[$abbreviation][] = $prepool[$abbreviation][$i];

}

else {

echo $test_value . " is not eligible.\n";

$ineligible[] = $prepool[$abbreviation][$i];

}

}

for ($i = 0; $i < count($frs_users[$abbreviation]); $i++) {

$frs_users[$abbreviation][$i] = explode("|", $frs_users[$abbreviation][$i]);

if ($frs_users[$abbreviation][$i][1] == "" || $frs_users[$abbreviation][$i][1] == "limit") {

$frs_users[$abbreviation][$i][1] = 1;

}

}

}

unset($temppool);

unset($prepool);

// Step 10: Determining exemption on the basis of reaching user-defined request limit in a given month.

// A separate script resets the number of requests on record at the beginning of each month.

// The following comments should get you through understanding the array complex.

// $frs_users [ RFC category abbreviation ] [ number of user on list ] [ 0 == username; 1 == limit ]

// $pool [ RFC category abbreviation ] [ number of user on list ]

foreach ($frs_users as $abbreviation => $user) { # $user[0] is username; $user[1] is their limit

echo "Creating pool for " . $abbreviation . "\n";

for ($i = 0; $i < count($frs_users[$abbreviation]); $i++) {

$query = mysql_query("SELECT * from `frsuser` WHERE `username` = \"" . mysql_real_escape_string($frs_users[$abbreviation][$i][0]) . "\"") or die(mysql_error());

$row = mysql_fetch_assoc($query);

if (!isset($row['username'])) {

echo "Creating FRS user row for " . $frs_users[$abbreviation][$i][0] . "\n";

query("insert into `frsuser` (`username`, `reqcount`) values (\"" . mysql_real_escape_string($frs_users[$abbreviation][$i][0]) . "\", 0)");

$pool[$abbreviation][] = $frs_users[$abbreviation][$i][0];

}

elseif ($row['reqcount'] < $frs_users[$abbreviation][$i][1]) {

$pool[$abbreviation][] = $frs_users[$abbreviation][$i][0];

}

}

}

unset($frs_users);

print_r($pool["all"]);

// Step 11: Feedback requests sent out.

// Everything's been building to this, kids. Every variable is defined. All's in the system.

$query = mysql_query("SELECT * from `frs` WHERE `initround` = false");

while ($row = mysql_fetch_assoc($query)) {

// Creating a list specific to the RFC. For interdisciplinary RFCs, the contribution base is wider,

// so an array for 'candidates' is made based on merging these arrays together and then weeding out

// the redundant ones.

$candidates = array();

if (isset($listing[$row['id']])) {

echo "Processing article: " . $listing[$row['id']]['title'] . " with RFC ID: " . $row['id'] . "\n";

for ($i = 0; $i < count($listing[$row['id']]["category"]); $i++) {

if (is_array($pool[$listing[$row['id']]["category"][$i]])) {

$candidates = array_merge($candidates, $pool[$listing[$row['id']]["category"][$i]]);

}

}

if (count($pool["all"]) > 0) $candidates = array_merge($candidates, $pool["all"]);

$candidates = array_unique($candidates);

// Specific exemption for those who already participated.

// This is anticipating that an RFC may go through more than one request round.

$row['contacted'] = substr($row['contacted'], 1); # To cut out the comma at the beginning

$already_did = explode(",", $row['contacted']);

$candidates = array_diff($candidates, $already_did);

// Finally, the bot randomly selects who will be contacted.

if (count($candidates) == 0) break; // No users available for comment. Womp womp.

if (count($candidates) > 0 && count($candidates) <= 5) $random_count = 1; // $random_count is the number of people whose talk pages will be edited by the bot.

if (count($candidates) > 5 && count($candidates) <= 15) $random_count = 2;

if (count($candidates) > 15) $random_count = 3;

$randomuser = array();

for ($i = 0; $i < $random_count; $i++) {

// The reason for this switch is because for small enough candidate fields, some people were getting selected twice.

// For instance, if 0-4 were all the candidates, this would be an example random number generation: 3, 1, 3

// This led to people receiving two notifications. So I have resolved that with the following do-while loops.

switch ($i) {

case 0:

$randomuser[$i] = rand(1, count($candidates)) - 1;

$randomuser[$i] = $candidates[$randomuser[$i]];

break;

case 1:

do {

$draftee = rand(1, count($candidates)) - 1;

$draftee = $candidates[$draftee];

} while ($draftee == $randomuser[0]);

$randomuser[$i] = $draftee;

unset($draftee);

break;

case 2:

do {

$draftee = rand(1, count($candidates)) - 1;

$draftee = $candidates[$draftee];

} while ($draftee == $randomuser[0] || $draftee == $randomuser[1]);

$randomuser[$i] = $draftee;

unset($draftee);

break;

}

echo $randomuser[$i] . " will be contacted for an RFC at " . $listing[$row['id']]['title'] . "\n";

$randomuser_talkpage = $objwiki->getpage("User talk:" . $randomuser[$i]);

$objwiki->edit("User talk:" . $randomuser[$i],$randomuser_talkpage . "\n\n{{subst:FRS message|title=" . $listing[$row['id']]['title'] . "|rfcid=" . $row['id'] . "}} ~~~~","Please comment on " . $listing[$row['id'['title'] . "]]",false,false);

query("delete from `frs` where `id` = \"" . $row['id'] . "\"");

$row['contacted'] = $row['contacted'] . "," . $randomuser[$i];

query("insert into `frs` (`id`, `contacted`, `initround`) values (\"" . mysql_real_escape_string($row['id']) . "\", \"" . mysql_real_escape_string($row['contacted']) . "\", 1)");

$innerquery = mysql_query("select * from `frsuser` where `username` = \"" . mysql_real_escape_string($randomuser[$i]) . "\"") or die(mysql_error());

while ($innerrow = mysql_fetch_assoc($innerquery)) {

$currentcount = $innerrow['reqcount'] + 1;

}

query("delete from `frsuser` where `username` = \"" . mysql_real_escape_string($randomuser[$i]) . "\"");

query("insert into `frsuser` (`username`, `reqcount`) values (\"" . mysql_real_escape_string($randomuser[$i]) . "\", " . $currentcount . ")");

}

}

}

// And that's it!

?>