User:IPLRecordsUpdateBot/Version 1.0 source/update.php

ini_set('display_errors', 0);

ini_set('max_execution_time', 2500);

set_error_handler(

function($code, $msg, $file, $line) {

if ( strpos($msg, 'DOMDocument') !== false ) { # Do not log HTML parsing warnings

return false;

}

file_put_contents(

'error_log.txt',

FILE_APPEND

);

}, E_ALL ^ E_NOTICE

);

Delete the status and error logs and backup file if any (not if using resume)

if ( ! @$_GET['resume'] ) {

if ( file_exists('status.txt') ) {

unlink('status.txt');

}

if ( file_exists('error_log.txt') ) {

unlink('error_log.txt');

}

if ( file_exists('edit_failed_backup.txt') ) {

unlink('edit_failed_backup.txt');

}

function queryWikiAPI($method, $headers = [], $getdata = [], $postdata = []) {

$wikiAPIPath = 'https://en.wikipedia.org/w/api.php';

# Add a request ID

if ( $method == 'POST' ) {

$postdata['requestid'] = mt_rand();

}

else {

$getdata['requestid'] = mt_rand();

}

# Additional headers for POST requests

if ( $method == 'POST' && $postdata ) {

$headers[] = 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8';

$headers[] = 'Content-Length: ' . strlen(http_build_query($postdata));

}

$streamContextOptions = [

'http' => [

'method' => $method,

'header' => implode("\r\n", $headers),

'content' => http_build_query($postdata),

]

];

# For non-POST requests, delete the request body

if ( $method != 'POST' ) {

unset($streamContextOptions['http']['content']);

}

$uri = $wikiAPIPath . ($getdata ? ('?' . http_build_query($getdata)) : '');

$result = file_get_contents($uri, 0, stream_context_create($streamContextOptions));

sleep(3);

return $result;

}

$wikiAPIRequestHeaders = [

'Accept: text/xml',

'DNT: 1',

'User-Agent: ', # Sensitive information removed

];

$startTime = time();

function IPLRecordsUpdateBot_login() {

global $wikiAPIRequestHeaders, $wikiAPIEditToken, $username, $password;

# Username and password

$username = 'IPLRecordsUpdateBot';

$password = ''; # Password removed

$obtainLoginTokenResult = queryWikiAPI('POST', $wikiAPIRequestHeaders,

[],

[

'format' => 'xml',

'action' => 'login',

'lgname' => $username,

'lgpassword' => $password,

]

);

if ( $obtainLoginTokenResult === false ) {

die('Failed to log in: Query to Wikipedia API failed');

}

$XMLDOMDoc = new DOMDocument();

$XMLDOMDoc->loadXML($obtainLoginTokenResult);

if ( $XMLDOMDoc->getElementsByTagName('error')->length ) {

$errorCode = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('code');

$errorMessage = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('info');

die("[{$errorCode}] {$errorMessage}");

}

$loginInfo = $XMLDOMDoc->getElementsByTagName('login')->item(0);

$cookiePrefix = $loginInfo->getAttribute('cookieprefix');

$sessionID = $loginInfo->getAttribute('sessionid');

$loginToken = $loginInfo->getAttribute('token');

# Construct the sessionID cookie

$wikiAPIRequestHeaders['cookie'] = "Cookie: {$cookiePrefix}_session={$sessionID}";

# Use a uinque 'cookie' key rather than a numeric key, so that additional headers can be added to $wikiAPIRequestHeaders

# without deleting this one. It does not break the implode() function used to assemble the headers

# Send a second request with the login token

$loginWithTokenResult = queryWikiAPI('POST', $wikiAPIRequestHeaders,

[],

[

'format' => 'xml',

'action' => 'login',

'lgname' => $username,

'lgpassword' => $password,

'lgtoken' => $loginToken,

]

);

if ( $loginWithTokenResult === false ) {

die('Failed to log in: Query to Wikipedia API failed');

}

$XMLDOMDoc = new DOMDocument();

$XMLDOMDoc->loadXML($loginWithTokenResult);

if ( $XMLDOMDoc->getElementsByTagName('error')->length ) {

$errorCode = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('code');

$errorMessage = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('info');

die("[{$errorCode}] {$errorMessage}");

}

$loginInfo = $XMLDOMDoc->getElementsByTagName('login')->item(0);

$loginResult = $loginInfo->getAttribute('result');

if ( $loginResult != 'Success' ) {

die("Login unsuccessful (result: {$loginResult})");

}

$loginUserName = $loginInfo->getAttribute('lgusername');

$loginUserID = $loginInfo->getAttribute('lguserid');

$loginToken = $loginInfo->getAttribute('lgtoken');

# Set additional cookies after login

$wikiAPIRequestHeaders['cookie'] .= "; {$cookiePrefix}UserName={$loginUserName}; {$cookiePrefix}UserID={$loginUserID}; {$cookiePrefix}Token={$loginToken}";

}

IPLRecordsUpdateBot_login();

Once logged in, automatically log out when the execution of the script terminates

register_shutdown_function(

function() {

global $wikiAPIRequestHeaders;

queryWikiAPI('GET', $wikiAPIRequestHeaders,

[

'format' => 'xml',

'action' => 'logout',

]

);

}

);

Get the text of the page, the latest revision timestamp and edit token

$PageTitle = 'List of Indian Premier League records and statistics';

function IPLRecordsUpdateBot_getPageInfo() {

global $wikiAPIRequestHeaders, $wikiAPIEditToken, $PageTitle, $PageText, $PageLatestRevisionTS, $username, $password;

# Before proceeding, check for any new messages on the user talk page

$hasNewMessagesResult = queryWikiAPI('GET', $wikiAPIRequestHeaders,

[

'format' => 'xml',

'action' => 'query',

'meta' => 'userinfo',

'uiprop' => 'hasmsg',

]

);

if ( $hasNewMessagesResult === false ) { # Don't stop the script here, only give a warning

trigger_error('Cannot get info about new talk page messages: Query to Wikipedia API failed', E_USER_WARNING);

}

$XMLDOMDoc = new DOMDocument();

$XMLDOMDoc->loadXML($hasNewMessagesResult);

if ( $XMLDOMDoc->getElementsByTagName('error')->length ) {

$errorCode = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('code');

$errorMessage = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('info');

trigger_error("Cannot get info about new talk page messages: Error: [{$errorCode}] {$errorMessage}", E_USER_WARNING);

}

elseif ( $XMLDOMDoc->getElementsByTagName('userinfo')->item(0)->hasAttribute('messages') ) {

die('New message on user talk page (view | '

. 'last edit)');

}

$getPageInfoResult = queryWikiAPI('GET', $wikiAPIRequestHeaders,

[

'action' => 'query',

'format' => 'xml',

'prop' => 'info|revisions',

'titles' => $PageTitle,

'intoken' => 'edit',

'rvprop' => 'content|timestamp'

]

);

if ( $getPageInfoResult === false ) {

die('Failed to obtain page text: Query to Wikipedia API failed');

}

$XMLDOMDoc = new DOMDocument();

$XMLDOMDoc->loadXML($getPageInfoResult);

if ( $XMLDOMDoc->getElementsByTagName('error')->length ) {

$errorCode = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('code');

$errorMessage = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('info');

die("[{$errorCode}] {$errorMessage}");

}

$pageInfo = $XMLDOMDoc->getElementsByTagName('pages')->item(0)->getElementsByTagName('page')->item(0);

# Stop if the page is missing

if ( $pageInfo->hasAttribute('missing') ) {

die('Failed to obtain page text (page does not exist or has been deleted)');

}

# Get the edit token

$wikiAPIEditToken = $pageInfo->getAttribute('edittoken');

if ( $wikiAPIEditToken == '+\\' || strpos($wikiAPIEditToken, '+\\') === false ) {

die('Bad edit token obtained');

}

$revisionInfo = $pageInfo->getElementsByTagName('rev')->item(0);

$PageText = $revisionInfo->childNodes->item(0)->nodeValue;

$PageLatestRevisionTS = $revisionInfo->getAttribute('timestamp');

}

IPLRecordsUpdateBot_getPageInfo();

Stop the script if the page obtained is a redirect

if ( preg_match('/^#\s*+REDIRECT\s*+\[\[.*\]\]/isu', $PageText) ) {

die('Redirect page obtained');

}

Check for any {{bots}} or {{nobots}} templates

if ( preg_match('/\{\{\s*+(?:[Nn]obots|[Bb]ots\s*+\|(?:.*?\|)?(?:deny\s*+\=\s*+all|allow\s*+\=\s*+none))/su', $PageText)

|| preg_match('/\{\{\s*+[Bb]ots\s*+\|(?:.*?\|)?deny\s*+\=(?:[^\|]*?,)?\s*+IPLRecordsUpdateBot\s*+(?:,|\||\}\})/su', $PageText)

|| (

preg_match('/\{\{\s*+[Bb]ots\s*+\|(?:.*?\|)?allow\s*+\=[^\|]*?(?:\||\}\})/su', $PageText)

&& ! preg_match('/\{\{\s*+[Bb]ots\s*+\|(?:.*?\|)?allow\s*+\=(?:[^\|]*?,)?\s*+IPLRecordsUpdateBot\s*+(?:,|\||\}\})/su', $PageText)

) ) {

die('A {{bots}} or {{nobots}} template does not allow IPLRecordsUpdateBot to edit this page');

}

If the "resume" GET parameter is true, get the text of the backup file and use it to edit.
This backup file is saved in the event of an edit conflict or other error when editing
so that all updates do not have to be redone in the next attempt.

if ( @$_GET['resume'] ) {

$PageText = file_get_contents('edit_failed_backup.txt');

if ( $PageText === false ) {

die("Cannot find the backup file");

}

$PageLatestRevisionTS = date('Y:m:d\TH:i:s\Z', $startTime); # Set the edit confilct detection time to the start time of the script

IPLRecordsUpdateBot_editPage();

unlink('edit_failed_backup.txt');

exit;

}

Encode areas wich should not be edited
These will be decoded with html_entity_decode() before the wikitext is sent back to the server

HTML comments

$PageText = preg_replace_callback('/\<\!--(.*?)--\>/us',

function($match) {

return '';

}, $PageText);

Tags where wikitext is not parsed

$PageText = preg_replace_callback('/(\<(nowiki|pre|math|source|syntaxhighlight)(?(?=\s)[^\>]*+)\>)(.*?)\<\/\2\>/us', # Allow attributes only if there is a space after the tag name

function($match) {

return $match[1] . str_replace(['&', '<', '>', '{', '}', '|', '!', '='],

['&', '<', '>', '{', '}', '|', '!', '=' ],

$match[3]) . '' ;

}, $PageText);

Characters in template calls which may conflict with header and table syntax

$PageText = preg_replace_callback('/\{\{(?:[^\{\}]++|(?

function($match) {

return str_replace(['&', '|', '!', '='], ['&', '|', '!', '='], $match[0]);

}, $PageText);

Page text is obtained and encoded, now update it

$updateStartTime = time();

include 'StatsUpdateFunctions.php';

Filter the stats GET parameter
Remove non-existent function names and place valid ones in correct order

$StatsToUpdate = array_values(array_intersect(

array_keys($StatsUpdateFunctions),

explode('|', $_GET['stats'])

));

Start updating

foreach ( $StatsToUpdate as $funcName ) {

try {

$funcCallResult = call_user_func($StatsUpdateFunctions[$funcName]);

}

catch ( Exception $error ) {

trigger_error('Exception thrown:

' . $error->getMessage() . "

in function {$funcName}", E_USER_WARNING);

$funcCallResult = false;

}

file_put_contents('status.txt', $funcName . '|' . ((int) $funcCallResult) . "\r\n", FILE_APPEND);

}

unset($funcName, $funcCallResult);

Decode encoded comments, nowiki tags etc. before commiting the edit

$PageText = preg_replace_callback('/\{\{(?:[^\{\}]++|(?

function($match) {

return html_entity_decode($match[0], ENT_QUOTES | ENT_HTML5, 'UTF-8');

}, $PageText);

$PageText = preg_replace_callback('/(\<(syntaxhighlight|source|math|pre|nowiki)(?(?=\s)[^\>]*+)\>)(.*?)\<\/\2\>/us',

function($match) {

return $match[1] . html_entity_decode($match[3], ENT_QUOTES | ENT_HTML5, 'UTF-8') . '' ;

}, $PageText);

$PageText = preg_replace_callback('/\<\!--(.*?)--\>/us',

function($match) {

return '';

}, $PageText);

Updating finished, now edit

$endTime = time();

function IPLRecordsUpdateBot_editPage() {

global $wikiAPIEditToken, $wikiAPIRequestHeaders, $PageTitle, $PageText, $PageLatestRevisionTS, $startTime, $endTime;

# Get the update results (to be used in the edit summary)

$updateResults = file('status.txt');

if ( $updateResults !== false ) {

$updateResults = array_map( function($line) {

return explode('|', trim($line));

}, $updateResults);

$totalUpdates = count($updateResults);

$successfulUpdates = count(array_filter($updateResults,

function($result) {

return $result[1] == 1;

}

));

$failedUpdates = count(array_filter($updateResults,

function($result) {

return $result[1] == 0;

}

));

$updateTime = ((int) (($endTime - $startTime) / 60)) . ':' . str_pad(($endTime - $startTime) % 60, 2, '0', STR_PAD_LEFT);

$editSummary = "Bot: Updating statistics ({$successfulUpdates} updates successful, {$failedUpdates} failed, {$updateTime})";

}

else { # Use a generic edit summary if the status file is not available for some reason

$editSummary = "Bot: Updating statistics";

}

# Edit the page

$editPageResult = queryWikiAPI('POST', $wikiAPIRequestHeaders,

[],

[

'format' => 'xml',

'action' => 'edit',

'title' => $PageTitle,

'summary' => $editSummary,

'text' => $PageText,

'basetimestamp' => $PageLatestRevisionTS,

'nocreate' => true,

'md5' => md5($PageText),

'token' => $wikiAPIEditToken,

]

);

if ( $editPageResult === false ) {

die('Failed to edit: Query to Wikipedia API failed');

}

$XMLDOMDoc = new DOMDocument();

$XMLDOMDoc->loadXML($editPageResult);

if ( $XMLDOMDoc->getElementsByTagName('error')->length ) {

$errorCode = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('code');

$errorMessage = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('info');

# Save the wikitext to a backup file before ending. Can be retreived by adding &resume=1 in the URL

file_put_contents('edit_failed_backup.txt', $PageText);

die("[{$errorCode}] {$errorMessage}");

}

$editInfo = $XMLDOMDoc->getElementsByTagName('edit')->item(0);

if ( $editInfo->getAttribute('result') != 'Success' ) {

file_put_contents('edit_failed_backup.txt', $PageText);

die('Failed to edit: Unknown error');

}

$oldRevision = $editInfo->getAttribute('oldrevid');

$newRevision = $editInfo->getAttribute('newrevid');

echo "#{$oldRevision}|{$newRevision}";

}

IPLRecordsUpdateBot_editPage();