Wikipedia:AutoEd/unicodehex.js

// Purpose: Changes hexcharacter codes in wikilinks to actual unicode characters

//

// Examples: street -> street

// History ->

// History

//

// Note: Contributed by CharlotteWeb

//

// Comments (CharlotteWeb):

// To keep things simple we'll ignore all image links. because some people prefer

// underscores in the file name and the caption can contain god-knows-what.

// one easy way is to flag them with a character which should never be used,

// but if it is already present we have a problem, so let's just quit.

//

function autoEdUnicodeHex(txt) { //MAIN FUNCTION describes list of fixes

if(txt.match(/\uE000/)) return(txt); // see Private Use Area

txt = txt.replace(/(\[\[[\:\s*]*(?:Image|File|Media)\s*\:)/gi, "$1\uE000");

if(m = txt.match(/\[\[[^\[\]\n\uE000]+\]\]/g)) {

for(var i = 0; i < m.length; i++) {

parts = m[i].split("|");

link = parts[0];

a = link.split("#")

title = a[0];

section = a[1];

try {

link = decodeURIComponent(title.replace(/\%(.[^0-9A-F]|[^0-9A-F].|$)/gi, "%25$1")

) + ( section ? ("#" + decodeURIComponent(section

// change "&#" to "%" when followed by valid hex

.replace(/&#([0-9A-F]{2})/gi, "%$1")

.replace(/\%(.[^0-9A-F]|[^0-9A-F].|$)/gi, "%25$1")

)

) : "" )

} catch(e) { } // just do no decoding

parts[0] = link;

txt = txt.replace(m[i], parts.join("|"));

}

}

return(txt.replace(/\uE000/g, ""));

}