:User:Wmahan/despam.js
var despamPage = 'User:Wmahan/despam';
var query_php = '/w/query.php';
var wiki = 'http://en.wikipedia.org/wiki/';
var wPage = 'http://en.wikipedia.org/w/index.php';
var tab = ' ';
// number of history entries to fetch at a time
var revPage = 50;
// stop after searching this many revisions
var maxRevs = 5000;
var http_request;
var rvoffset;
var despamPage, despamUrl, despamUserRe;
var logDiv;
// parse parameters passed in the query string
function parseParams() {
var query = window.location.search.substring(1);
var params = new Array();
var qlist = query.split('&');
for (var i=0; i < qlist.length; i++) {
var pos = qlist[i].indexOf('=');
if (pos > 0) {
var key = unescape(qlist[i].substring(0,pos));
var val = unescape(qlist[i].substring(pos+1));
params[key] = val;
}
}
return params;
}
// messages indicating success or failure in removing a link
var removeSuccess = 'link removed\n';
var removeFailure = 'unable to remove link\n';
// save the edit page and close this window
function saveAndClose() {
window.onunload = function() {
window.close();
};
document.getElementById('wpSave').click();
}
// click the diff button on the edit page
function doDiff() {
document.getElementById('wpDiff').click();
}
// on the edit page, remove the link "url", noting in the
// edit summary that it was added by "user" on "timestamp"
function removeLink(url, user, timestamp) {
var es = document.getElementById('wpSummary');
if (es && es.value == '') {
document.getElementById('wpMinoredit').checked = true;
// remove the link;
var tb = document.getElementById('wpTextbox1');
var cont = document.getElementById('content');
logDiv = document.createElement('div');
logDiv.id = 'despamLog';
// XXX
logDiv.innerHTML = "
\n\n"
+ "
Removing link
\n"
+ "
+ " onclick='saveAndClose()' /> ***"
+ "
+ " onclick='doDiff()' />\n";
cont.appendChild(logDiv);
// turn url into a regex
var regexChars = new RegExp('([!*+?^\\\\$\\]\\[.])', 'g');
url = url.replace(regexChars, '\\$1');
//log('url regex: ' + url);
// now look for various externa link styles
var cb = function(a) {
log('deleted line: ' + a + '');
return '\n';
};
// *[http://www.example.com link] plus any following text
//log('regex1: \\n\\** *\\[' + url + '[^\\n]*?\\][^\\n]*?\\n');
var re1 = new RegExp('\\n\\**[^\\n]*\\[' + url + '[^\\n]*?\\][^\\n]*?\\n', 'g');
var newText = tb.value.replace(re1, cb);
// *http://www.example.com plus any following text
//log('regex2: \\n\\** *' + url + '[^\\n]*?\\n');
//var re2 = new RegExp('\\n\\** *' + url + '[^\\n]*?\\n', 'g');
//newText = newText.replace(re2, '\n');
if (newText != tb.value) {
log(removeSuccess);
tb.value = newText;
es.value = 'rm linkspam by [[Special:Contributions/' + user + '|'
+ user + ']] on ' + timestamp;
}
else {
log(removeFailure);
}
document.location.href = '#despam';
}
}
// write the log message "msg" to the logging area
function log(msg) {
var div = document.createElement('div');
div.innerHTML = msg; // XXX
if (logDiv) {
logDiv.appendChild(div);
}
}
// fetch "url" in with a synchronous (blocking) call
function sync_fetch(url) {
log('fetching ' + url + '...');
var http_request2 = new XMLHttpRequest();
http_request2.open("GET", url, false); // synchronous
http_request2.send(null);
return http_request2.responseText;
}
// Check whether the link "url" is in "page" revision
// "afterid", but not in "beforeid"
function wasLinkAdded(page, url, beforid, afterid) {
qpage = wiki + page + '?action=raw&oldid=';
beforeText = sync_fetch(qpage + beforeid);
if (beforeText.indexOf(url) != -1) {
// link already existed; it was not added in the next edit
return false;
}
afterText = sync_fetch(qpage + afterid);
if (afterText.indexOf(url) != -1) {
// it was added
return true;
}
else {
// it wasn't added, and there's no point searching
// further back in the history
// XXX doesn't consider vandalism
//stopSearch = true;
return false;
}
}
// examine the list of history entries "results"
// for edits by a user matching "userRe" that add
// the link "url"
function processHistory(results, url, page, userRe) {
var pages = results['pages'];
//var info = pages.shift();
var info;
for (var i in pages) { // XXX hack to get first element of associative array
info = pages[i];
break;
}
var revs = info['revisions'];
var found = false;
var i = -1;
for (var prevI in revs) {
if (i == -1) {
i = prevI;
continue;
}
//alert('i=' + i + '; user=' + revs[i]['user']);
var curuser = revs[i]['user'];
if (curuser.match(userRe)) {
// found a possible match
afterid = revs[i]['revid'];
beforeid = revs[prevI]['revid'];
log('checking possible match: ' + curuser + ' on ' + revs[i]['timestamp']);
//alert('beforeid=' + beforeid + '; afterid=' + afterid);
if (wasLinkAdded(page, url, beforeid, afterid)) {
found = true;
break;
}
else {
log(tab + 'no match');
}
}
i = prevI;
}
if (found) {
timestamp = revs[i]['timestamp'];
log('found addition of link by ' + curuser + ' on ' + timestamp
+ ' (diff, remove link)');
//removeLink(page, url, curuser, timestamp);
return true;
}
else {
rvoffset += revPage;
if (rvoffset > maxRevs || revs.length < revPage) {
// we reached the end without finding anything
log('No match found!');
return false;
}
else {
// go on to the next page
fetchHistory();
}
}
}
// set everythig up and start fetching pages of history entries
function doDespam(url, page, users) {
var regexCharsExceptStar = new RegExp('([!+?^\\\\$\\]\\[.])', 'g');
users = users.replace(regexCharsExceptStar, '\\$1');
// turn wildcards into regexes
starRe = new RegExp('\\*', 'g');
users = users.replace(starRe, '\\d+');
// remove extraneous spaces
users = users.replace(/ +/g, ' ');
users = users.replace(/(^ +| +$)/g, '');
var userlist = users.split(':');
var userRe = new RegExp('(' + userlist.join('|') + ')');
if (!userRe) {
log('Error: invalid user list');
log('debugging info: (' + userlist.join('|') + ')');
return false;
}
nicePage = page.replace(/_/g, ' ');
page = page.replace(/ /g, '_');
log('
Scanning history for ' + nicePage + ' (history)
\n');rvoffset = 0;
despamPage = page;
despamUrl = url;
despamUserRe = userRe;
http_request = new XMLHttpRequest();
fetchHistory();
}
// fetch
function fetchHistory() {
log('fetching history entries #' + rvoffset + ' through #' + (rvoffset + revPage));
// fetch the query page
var qpage = query_php + '?what=revisions&format=json&rvlimit=' + revPage
+ '&rvoffset=' + rvoffset + '&titles=' + escape(despamPage);
var results;
http_request.open("GET", qpage, true);
http_request.onreadystatechange = function () {
if (http_request.readyState == 4) {
if (http_request.status == 200) {
results = eval("(" + http_request.responseText + ")");
processHistory(results, despamUrl, despamPage, despamUserRe);
} else {
log('There was a problem querying the page history.');
return false;
}
}
//http_request = null;
};
http_request.send(null);
return true;
}
function despamClick(url, page) {
var despamUrl = '/wiki/' + despamPage;
users = document.getElementById('despamUsers').value;
if (users == '') {
alert('No user name or IP address was entered');
return false;
}
url = escape(url);
page = escape(page);
users = escape(users);
window.open(despamUrl + '?url=' + url + '&page=' + page + '&users=' + users);
return false;
}
addOnloadHook( function() {
if ((location.href.indexOf(':Linksearch') != -1
|| location.href.indexOf('%3ALinksearch') != -1)
&& location.href.indexOf('target=') != -1)
{
var textLabel = 'IPs or usernames for despam (e.g. SpamUser, 192.168.0.*):
';
var cont = document.getElementById("content");
var html = cont.innerHTML; // XXX un-DOM
var re = new RegExp('
var quoteChars = new RegExp('([\'\\\\])', 'g');
var cb = function(a, b, c, d) {
var skipPages = new RegExp('(talk|user):', 'i');
if (!d.match(skipPages)) {
// escape quotes
c = c.replace(quoteChars, '\\$1');
d = d.replace(quoteChars, '\\$1');
return '
}
else {
return a; // skip entry
}
};
div = document.createElement('div');
div.innerHTML = textLabel; // XXX
inp = document.createElement('input');
inp.type = 'text';
inp.id = 'despamUsers';
inp.size = 40;
div.appendChild(inp);
html = html.replace(re, cb);
cont.innerHTML = html;
bc = document.getElementById('bodyContent');
bc.insertBefore(div, document.getElementsByTagName('ol')[0]);
}
else if (document.location.href.indexOf(despamPage) != -1
&& document.location.href.indexOf('action=') == -1)
{
logDiv = document.getElementById('despamLog');
var params = parseParams();
if (params['url'] && params['page'] && params['users']) {
doDespam(params['url'], params['page'], params['users']);
}
else {
log('The url, page, or users parameter was missing.');
}
}
else if (document.location.href.indexOf('action=edit&fakeaction=rmlink') != -1) {
var params = parseParams();
if (params['url'] && params['user'] && params['timestamp']) {
removeLink(params['url'], params['user'], params['timestamp']);
}
}
} );