Wikipedia:Odd links/SQL

This page contains information on how to regenerate the Wikipedia:Odd links report.

DROP TABLE IF EXISTS oddlinks;

CREATE TABLE oddlinks (

title varchar(255) BINARY NOT NULL,

link varchar(255) BINARY NOT NULL,

from_ns int(8) unsigned NOT NULL,

to_ns int(8) unsigned NOT NULL

);

DROP TABLE IF EXISTS namespaces;

CREATE TABLE namespaces (

ns int(8) unsigned NOT NULL,

prefix varchar(255) BINARY NOT NULL

);

INSERT INTO namespaces VALUES ( 0, '' );

INSERT INTO namespaces VALUES ( 1, 'Talk' );

INSERT INTO namespaces VALUES ( 2, 'User' );

INSERT INTO namespaces VALUES ( 3, 'User Talk' );

INSERT INTO namespaces VALUES ( 4, 'Wikipedia' );

INSERT INTO namespaces VALUES ( 5, 'Wikipedia Talk' );

INSERT INTO namespaces VALUES ( 6, 'File' );

INSERT INTO namespaces VALUES ( 7, 'File Talk' );

INSERT INTO namespaces VALUES ( 8, 'MediaWiki' );

INSERT INTO namespaces VALUES ( 9, 'MediaWiki Talk' );

INSERT INTO namespaces VALUES ( 10, 'Template' );

INSERT INTO namespaces VALUES ( 11, 'Template Talk' );

INSERT INTO namespaces VALUES ( 12, 'Help' );

INSERT INTO namespaces VALUES ( 13, 'Help Talk' );

INSERT INTO namespaces VALUES ( 14, 'Category' );

INSERT INTO namespaces VALUES ( 15, 'Category Talk' );

INSERT INTO namespaces VALUES ( 100, 'Portal' );

INSERT INTO namespaces VALUES ( 101, 'Portal Talk' );

// Capture links from namespace 0 to various other namespaces

// This is done in sections as each million rows take around 3 minutes to process

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 000000 AND 999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 1000000 AND 1999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 2000000 AND 2999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 3000000 AND 3999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 4000000 AND 4999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 5000000 AND 5999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 6000000 AND 6999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 7000000 AND 7999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 8000000 AND 8999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 9000000 AND 9999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 10000000 AND 10999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 11000000 AND 11999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 12000000 AND 12999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 13000000 AND 13999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 14000000 AND 14999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 15000000 AND 15999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 16000000 AND 16999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 17000000 AND 17999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 18000000 AND 18999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 19000000 AND 19999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 20000000 AND 20999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 21000000 AND 21999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 22000000 AND 22999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 23000000 AND 23999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 24000000 AND 24999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 25000000 AND 25999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 26000000 AND 26999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 27000000 AND 27999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 28000000 AND 28999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 29000000 AND 29999999;

// Do not consider pages with 'under construction' notices on them

DROP TABLE IF EXISTS pages_under_contruction;

CREATE TABLE pages_under_contruction

AS

SELECT p.page_title

FROM enwiki_p.page p

INNER JOIN enwiki_p.templatelinks t ON p.page_id = t.tl_from

WHERE t.tl_title IN ( 'Underconstruction','UnderConstruction','Under_construction','Under_Construction','Construction','Undercon','UnderCon' )

AND t.tl_namespace = 10;

SELECT concat( '* ', o.title, ' ? ', n.prefix, ':', o.link, '' )

FROM oddlinks o

INNER JOIN namespaces n ON o.to_ns = n.ns

WHERE title IN (

SELECT *

FROM pages_under_contruction

);

ORDER BY n.ns ASC, o.title ASC;

DELETE FROM oddlinks WHERE title IN (

SELECT *

FROM pages_under_contruction

);

// Do not consider pages which link to User:CorenSearchBot

SELECT *

FROM oddlinks

WHERE from_ns = 0

AND to_ns = 2

AND link = 'CorenSearchBot';

DELETE

FROM oddlinks

WHERE from_ns = 0

AND to_ns = 2

AND link = 'CorenSearchBot';

// Pick out signaturs in articles - paired links to User: and matching User_Talk: namespaces

DROP TABLE IF EXISTS sigs;

CREATE TABLE sigs AS

SELECT o1.title, o1.link

FROM oddlinks o1, oddlinks o2

WHERE o1.title = o2.title

AND o1.link = o2.link

AND o1.to_ns = 2

AND o2.to_ns = 3;

SELECT concat( '* ', title, ' ? User:', link, ' User talk:', link, '' )

FROM sigs

ORDER BY 1 ASC;

DELETE

FROM oddlinks

WHERE EXISTS

( SELECT *

FROM sigs

WHERE oddlinks.title = sigs.title

AND oddlinks.link = sigs.link )

AND oddlinks.to_ns IN ( 2, 3 );

// Report remaining odd links

SELECT concat( '* ', o.title, ' ? ', n.prefix, ':', o.link, '' )

FROM oddlinks o

INNER JOIN namespaces n ON o.to_ns = n.ns

ORDER BY n.ns ASC, o.title ASC;