Wikipedia:WikiProject Redirect/Inconsistent targets

A more frequently updated version of this report is available on Toolforge at [https://tb-dev.toolforge.org/ISR/ https://tb-dev.toolforge.org/ISR/].

Regenerating this report

-- First generate a list of applicable titles

DROP TABLE IF EXISTS redir_titles;

CREATE TABLE redir_titles (

rt_id int(8) unsigned NOT NULL,

rt_title varchar(255) BINARY NOT NULL,

rt_crushed varchar(255) BINARY,

PRIMARY KEY (rt_id)

);

INSERT INTO redir_titles ( rt_id, rt_title )

SELECT /* SLOW_OK */ page_id, page_title

FROM enwiki_p.page

WHERE page_namespace = 0

AND length( page_title ) >= 10

AND page_is_redirect = 1;

UPDATE redir_titles

SET rt_crushed = lcase( rt_title )

WHERE rt_title != lcase( rt_title );

DELETE FROM redir_titles WHERE rt_crushed IS NULL;

ALTER TABLE redir_titles ADD INDEX( rt_crushed );

-- Now find pairs of similar redirects

DROP TABLE IF EXISTS similar_redirects;

CREATE TABLE similar_redirects (

sr_src_id1 int(8) unsigned NOT NULL,

sr_src_title1 varchar(255) binary NOT NULL,

sr_targ_ns1 int(11),

sr_targ_title1 varchar(255) binary,

sr_src_id2 int(8) unsigned NOT NULL,

sr_src_title2 varchar(255) binary NOT NULL,

sr_targ_ns2 int(11),

sr_targ_title2 varchar(255) binary,

PRIMARY KEY (sr_src_id1,sr_src_id2)

);

-- This will have found most pairs in both directions (A v B / B v A) - eliminate duplicates of this type

-- by checking the A id is always less than the B.

INSERT INTO similar_redirects( sr_src_id1, sr_src_title1, sr_src_id2, sr_src_title2 )

SELECT /* SLOW_OK */ a.rt_id, a.rt_title, b.rt_id, b.rt_title

FROM redir_titles a

INNER JOIN redir_titles b ON a.rt_crushed = b.rt_crushed

WHERE a.rt_id < b.rt_id;

-- Fill in the redirect target for each of these and dismiss any with the same target

-- Doing it this way is ugly, but is SQL language neutral. Go standardisation!

UPDATE similar_redirects

SET sr_targ_ns1 = ( SELECT rd_namespace FROM enwiki_p.redirect WHERE rd_from = sr_src_id1 );

UPDATE similar_redirects

SET sr_targ_title1 = ( SELECT rd_title FROM enwiki_p.redirect WHERE rd_from = sr_src_id1 );

UPDATE similar_redirects

SET sr_targ_ns2 = ( SELECT rd_namespace FROM enwiki_p.redirect WHERE rd_from = sr_src_id2 );

UPDATE similar_redirects

SET sr_targ_title2 = ( SELECT rd_title FROM enwiki_p.redirect WHERE rd_from = sr_src_id2 );

DELETE

FROM similar_redirects

WHERE sr_targ_ns1 = sr_targ_ns2

AND sr_targ_title1 = sr_targ_title2;

-- See what we have left

SELECT count(*) FROM similar_redirects;

SELECT concat( '* ' , sr_src_title1, ' redirects to ', sr_targ_title1, ', but ', sr_src_title2, ' redirects to ', sr_targ_title2, '' )

FROM similar_redirects

WHERE sr_targ_ns1 = 0

AND sr_targ_ns2 = 0

ORDER by sr_src_title1 ASC

LIMIT 200;