Wikipedia:Odd links/SQL
This page contains information on how to regenerate the Wikipedia:Odd links report.
DROP TABLE IF EXISTS oddlinks;
CREATE TABLE oddlinks (
title varchar(255) BINARY NOT NULL,
link varchar(255) BINARY NOT NULL,
from_ns int(8) unsigned NOT NULL,
to_ns int(8) unsigned NOT NULL
);
DROP TABLE IF EXISTS namespaces;
CREATE TABLE namespaces (
ns int(8) unsigned NOT NULL,
prefix varchar(255) BINARY NOT NULL
);
INSERT INTO namespaces VALUES ( 0, '' );
INSERT INTO namespaces VALUES ( 1, 'Talk' );
INSERT INTO namespaces VALUES ( 2, 'User' );
INSERT INTO namespaces VALUES ( 3, 'User Talk' );
INSERT INTO namespaces VALUES ( 4, 'Wikipedia' );
INSERT INTO namespaces VALUES ( 5, 'Wikipedia Talk' );
INSERT INTO namespaces VALUES ( 6, 'File' );
INSERT INTO namespaces VALUES ( 7, 'File Talk' );
INSERT INTO namespaces VALUES ( 8, 'MediaWiki' );
INSERT INTO namespaces VALUES ( 9, 'MediaWiki Talk' );
INSERT INTO namespaces VALUES ( 10, 'Template' );
INSERT INTO namespaces VALUES ( 11, 'Template Talk' );
INSERT INTO namespaces VALUES ( 12, 'Help' );
INSERT INTO namespaces VALUES ( 13, 'Help Talk' );
INSERT INTO namespaces VALUES ( 14, 'Category' );
INSERT INTO namespaces VALUES ( 15, 'Category Talk' );
INSERT INTO namespaces VALUES ( 100, 'Portal' );
INSERT INTO namespaces VALUES ( 101, 'Portal Talk' );
// Capture links from namespace 0 to various other namespaces
// This is done in sections as each million rows take around 3 minutes to process
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 000000 AND 999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 1000000 AND 1999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 2000000 AND 2999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 3000000 AND 3999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 4000000 AND 4999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 5000000 AND 5999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 6000000 AND 6999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 7000000 AND 7999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 8000000 AND 8999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 9000000 AND 9999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 10000000 AND 10999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 11000000 AND 11999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 12000000 AND 12999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 13000000 AND 13999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 14000000 AND 14999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 15000000 AND 15999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 16000000 AND 16999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 17000000 AND 17999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 18000000 AND 18999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 19000000 AND 19999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 20000000 AND 20999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 21000000 AND 21999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 22000000 AND 22999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 23000000 AND 23999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 24000000 AND 24999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 25000000 AND 25999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 26000000 AND 26999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 27000000 AND 27999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 28000000 AND 28999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 29000000 AND 29999999;
// Do not consider pages with 'under construction' notices on them
DROP TABLE IF EXISTS pages_under_contruction;
CREATE TABLE pages_under_contruction
AS
SELECT p.page_title
FROM enwiki_p.page p
INNER JOIN enwiki_p.templatelinks t ON p.page_id = t.tl_from
WHERE t.tl_title IN ( 'Underconstruction','UnderConstruction','Under_construction','Under_Construction','Construction','Undercon','UnderCon' )
AND t.tl_namespace = 10;
SELECT concat( '* ', o.title, ' ? ', n.prefix, ':', o.link, '' )
FROM oddlinks o
INNER JOIN namespaces n ON o.to_ns = n.ns
WHERE title IN (
SELECT *
FROM pages_under_contruction
);
ORDER BY n.ns ASC, o.title ASC;
DELETE FROM oddlinks WHERE title IN (
SELECT *
FROM pages_under_contruction
);
// Do not consider pages which link to User:CorenSearchBot
SELECT *
FROM oddlinks
WHERE from_ns = 0
AND to_ns = 2
AND link = 'CorenSearchBot';
DELETE
FROM oddlinks
WHERE from_ns = 0
AND to_ns = 2
AND link = 'CorenSearchBot';
// Pick out signaturs in articles - paired links to User: and matching User_Talk: namespaces
DROP TABLE IF EXISTS sigs;
CREATE TABLE sigs AS
SELECT o1.title, o1.link
FROM oddlinks o1, oddlinks o2
WHERE o1.title = o2.title
AND o1.link = o2.link
AND o1.to_ns = 2
AND o2.to_ns = 3;
SELECT concat( '* ', title, ' ? User:', link, ' User talk:', link, '' )
FROM sigs
ORDER BY 1 ASC;
DELETE
FROM oddlinks
WHERE EXISTS
( SELECT *
FROM sigs
WHERE oddlinks.title = sigs.title
AND oddlinks.link = sigs.link )
AND oddlinks.to_ns IN ( 2, 3 );
// Report remaining odd links
SELECT concat( '* ', o.title, ' ? ', n.prefix, ':', o.link, '' )
FROM oddlinks o
INNER JOIN namespaces n ON o.to_ns = n.ns
ORDER BY n.ns ASC, o.title ASC;