User:Brighterorange/punctuation.js

/* */

var punctuationVersion = "19 April 2008";

var punctuationID = 1;

var punctuationEdits = undefined;

var punctuationOriginalSummary = undefined;

var punctuationPageOriginalSummary = undefined;

var puCONTEXT = 40;

var puENDASH = 0;

var puSPELL = 1;

var puEMDASH = 2;

var puCOMMA = 3;

var puPERCENT = 4;

var puBORN = 5;

var puLINKSPACE = 6;

var puDECADE = 7;

var puPAREN = 8;

var puXHTML = 9;

var puREF = 10;

var puSEMICOLON = 11;

var puCITYSTATE = 12;

var puDESCRIPTIONS = ["en dash", "spelling", "em dash", "comma", "percent", "born", "link space", "decade", "paren", "xhtml", "ref", "semicolon", "city-state"];

var puNDESC = 13;

// TODO:

// The TODO list is maintained in the development version, at User:Brighterorange/punctuation2.js.

// Feel free to make new suggestions on my talk page.

function doPunctuation() {

// alert(document.editform.wpTextbox1.value);

// document.editform.wpMinoredit.checked = true;

// just need some prominent element to put our messages in. We use the "From Wikipedia" header.

var e = document.getElementById('siteSub');

e.innerHTML = 'Running autopunctuation...';

puDisableEditing(true);

// We'll represent the document as a list of chunks, where

// a chunk can either be raw text (no replacement suggested)

// or an edit (the suggested replacement text, the reason,

// the original text, and a flag indicating whether the

// change has been rejected).

// start by producing the singleton raw chunk:

var edits = new puCons(puRaw(document.editform.wpTextbox1.value), undefined);

e.innerHTML = 'References...';

setTimeout(function (){ // refs

edits = puRawMapConcat(puRef, edits);

e.innerHTML = 'Spelling...';

setTimeout(function (){ // spell

edits = puSpell(edits);

e.innerHTML = 'Born style...';

setTimeout(function (){ // born

edits = puBorn(edits);

e.innerHTML = 'Em dashes...';

setTimeout(function (){ // em dash

edits = puRawMapConcat(puEmDash, edits);

e.innerHTML = 'En dashes...';

setTimeout(function (){ // en dash

edits = puRawMapConcat(puEnDash, edits);

e.innerHTML = 'Commas...';

setTimeout(function (){ // comma

edits = puRawMapConcat(puComma, edits);

e.innerHTML = 'Semicolons...';

setTimeout(function (){ // semicolon

edits = puRawMapConcat(puSemicolon, edits);

e.innerHTML = 'Link space...';

setTimeout(function (){ // linkspace

edits = puRawMapConcat(puLinkSpace, edits);

e.innerHTML = 'Decade...';

setTimeout(function (){ // decade

edits = puRawMapConcat(puDecade, edits);

e.innerHTML = 'Parens...';

setTimeout(function (){ // paren

edits = puRawMapConcat(puParen, edits);

e.innerHTML = 'XHTML...';

setTimeout(function (){ // xhtml

edits = puXhtml(edits);

e.innerHTML = 'City-State...';

setTimeout(function (){ // city-state

edits = puCityState(edits);

punctuationEdits = edits;

punctuationOriginalSummary = document.editform.wpSummary.value;

document.editform.wpTextbox1.value = puRewrite(edits);

document.editform.wpSummary.value = puSummary(edits);

// finally, show interface for undos

puShowChanges("", edits);

}, 50); // city-state

}, 50); // xhtml

}, 50); // paren

}, 50); // decade

}, 50); // linkspace

}, 50); // semicolon

}, 50); // comma

}, 50); // en dash

}, 50); // em dash

}, 50); // born

}, 50); // spell

}, 50); // refs

};

// don't use textbox's "disable" field, since

// it makes the form submit an empty textbox,

// blanking the article!

function puDisableEditing(flag) {

var e = document.editform.wpTextbox1;

if (flag) {

e.style.opacity = "0.5";

e.style.filter = "Alpha(Opacity=50)";

} else {

e.style.opacity = undefined;

e.style.filter = undefined;

};

};

function puSummary(edits) {

var counts = new Array();

for(var i = 0; i < puNDESC; i ++) counts.push (0);

for(var l = edits; l != undefined; l = l.tail) {

if (!l.head.israw) {

counts[l.head.what] ++;

// alert("!" + l.head.what + "(" + puDESCRIPTIONS[l.head.what] + ") = " + counts[l.head.what]);

}

}

var s = "";

for(var j = 0; j < puNDESC; j ++) {

if (counts[j] > 0) {

if (s != "") s = s + "; ";

s = s + counts[j] + " " + puDESCRIPTIONS[j];

}

// alert("@" + j + ": " + counts[j] + "/" + puDESCRIPTIONS[j] + " -> " + s);

}

if (s == "") return punctuationOriginalSummary;

else {

if (punctuationOriginalSummary == punctuationPageOriginalSummary) {

// user never did anything except run punctuation, so minor

document.editform.wpMinoredit.checked = true;

}

return punctuationOriginalSummary +

(punctuationOriginalSummary == "" ? "" : " ") + "(auto: " + s + ")";

}

};

function puKindButtons(edits) {

var counts = new Array();

for(var i = 0; i < puNDESC; i ++) counts.push (0);

for(var l = edits; l != undefined; l = l.tail) {

if (!l.head.israw) {

counts[l.head.what] ++;

}

}

// now for any edit kind we did do, give buttons for them.

var s = "

"

for(var j = 0; j < puNDESC; j ++) {

if (counts[j] > 0) {

s = s +

'

';

// onClick="puUndo(' + l.head.id +');"

}

}

s = s + '

' +

counts[j] + " " + puDESCRIPTIONS[j] + '

' +

'
ON ' +

'OFF ' +

'HIDE' +

'

';

return s;

};

function puContextBefore(ol, ne) {

var s = ol + ne;

if (s.length < puCONTEXT) return s;

else return s.substring(s.length - puCONTEXT);

};

function puContextAfter(l) {

var s = "";

for(var z = l; z != undefined; z = z.tail) {

if (z.head.israw) s = s + z.head.text;

else s = s + z.head.rep;

if (s.length >= puCONTEXT) return s.substr(0, puCONTEXT);

}

return s;

};

// creates the menu for punctuation while in showchanges mode.

// for now just a 'done' button

function puMenu() {

return('

click this when done with changes
');

};

// when clicked, get rid of all the shown changes and re-enable

// the textbox.

function puDoneClick() {

puDisableEditing(false);

var e = document.getElementById('siteSub');

e.innerHTML = '';

};

// from a chunk list, give an HTML summary with edit buttons

// pass in the context c of some previous characters.

function puShowChanges(c, l) {

var e = document.getElementById('siteSub');

// XXX actually, if all are deactivated too...

if (l == undefined) {

e.innerHTML = '

Punctuation: no changes.

';

} else {

e.innerHTML = puShowSomeChanges(c, l);

}

};

function puShowSomeChanges(c, l) {

var o = puMenu();

o = o + puKindButtons(l) + "
";

while (l != undefined) {

if (l.head.israw) {

var nc = puContextBefore(c, l.head.text);

o = o + '(...)';

c = nc;

} else if (l.head.hidden) {

var nc = puContextBefore(c, l.head.rep);

o = o + '(hidden)'

c = nc;

} else {

// XXX hover could select in edit box??

var nc = puContextBefore(c, l.head.rep);

var ca = puContextAfter(l.tail);

var src = (l.head.dispsrc == undefined)?l.head.orig:l.head.dispsrc;

var dst = (l.head.dispdst == undefined)?l.head.rep:l.head.dispdst;

o = o + '
(' + puHighlightContext(puEscape(c)) +

'

' onClick="puUndo(' + l.head.id +');">' +

puHighlight(puEscape(src)) + "→" + puHighlight(puEscape(dst)) + ''

+ puHighlightContext(puEscape(ca)) +

') ';

c = nc;

}

l = l.tail;

}

return (o + puMenu());

};

// show spaces as light underscores, since many of these involve the deletion/insertion of spaces

function puHighlight(s) {

// first or it will mess up spaces in our html

s = s.replace(/ /g, '_');

return s.replace(/__PUREF__/g, '<REF>');

};

function puHighlightContext(s) {

s = s.replace(/\[/g, '[');

s = s.replace(/\]/g, ']');

s = s.replace(/\{/g, '{');

s = s.replace(/\}/g, '}');

s = s.replace(/\|/g, '|');

// these occur next to false positives for en dashes, commonly

s = s.replace (/issn/gi, 'ISSN');

s = s.replace (/isbn/gi, 'ISBN');

// template requires literal dash

s = s.replace (/scotus/gi, 'SCOTUS');

return s;

};

function puEscape(s) {

var s1 = s.replace(/

var s2 = s1.replace(/>/g, ">");

return s2;

};

// called from generated html; hides (just don't display) all

// from this kind

function puAllHide(k) {

for(var h = punctuationEdits; h != undefined; h = h.tail) {

if (h.head.what == k) {

h.head.hidden = true;

}

}

// always keep these up to date (actually this should never need a rewrite, right?)

// document.editform.wpTextbox1.value = puRewrite(punctuationEdits);

document.editform.wpSummary.value = puSummary(punctuationEdits);

puShowChanges("", punctuationEdits);

return ;

};

// called from generated html above. undoes the specified edit, making

// the chunk into a raw chunk and rewriting the textarea.

function puUndo(i) {

// alert('undo unimplemented for #' + i);

for(var h = punctuationEdits; h != undefined; h = h.tail) {

if (h.head.id == i) {

h.head.text = h.head.orig;

h.head.israw = true;

// undo edit where it matters

document.editform.wpTextbox1.value = puRewrite(punctuationEdits);

document.editform.wpSummary.value = puSummary(punctuationEdits);

var e = document.getElementById('puEdit' + i);

e.style.border = "none";

e.style.opacity = "0.5";

e.style.filter = "Alpha(Opacity=50)";

return;

}

}

alert("Oops, can't undo? " + i + " ... " + punctuationEdits);

};

// generate the raw text from a chunk list

function puRewrite(l) {

var o = "";

while(l != undefined) {

if (l.head.israw && l.head.text != undefined) o = o + l.head.text;

else if (!l.head.israw && l.head.rep != undefined) o = o + l.head.rep;

else o = o + "???";

l = l.tail;

}

return o;

};

// given a function (f : string -> chunk list) and (l : chunk list)

// build a new list where each raw chunk within l has f applied to

// it and the result flattened. edit chunks are not modified.

function puRawMapConcat(f, l) {

if (l == undefined) return l;

if (l.head.israw) {

var nl = f(l.head.text);

return puAppend(nl, puRawMapConcat(f, l.tail));

} else return puCons(l.head, puRawMapConcat(f, l.tail));

};

function puAppend (l1, l2) {

if (l1 == undefined) return l2;

else return puCons(l1.head, puAppend(l1.tail, l2));

};

// lists are represented as head/tail cons cells

// with nil = undefined

function puCons(h, t) {

// if they are both raw, then flatten.

if (t != undefined && t.head.israw && h.israw) {

var nh = new Object();

nh.israw = true;

nh.text = h.text + t.head.text;

var o = new Object;

o.head = nh;

o.tail = t.tail;

return o;

} else {

var o = new Object();

o.head = h;

o.tail = t;

return o;

}

}

function puRaw(s) {

var o = new Object();

o.israw = true;

o.text = s;

return o;

};

// puCleave(small, large)

// find the next match of small in large.

// return a two-element array of the

// string preceding the match, and the string

// following the match. If there are no matches,

// return undefined.

function puCleave(small, large) {

var x = large.indexOf(small);

if (x == -1) return undefined;

else return new Array(large.substr(0, x),

large.substring(x + small.length));

};

function puBorn(edits) {

return puRawMapConcat(puSpellRep("(b. ", "(born ", puBORN), edits);

};

function puXhtml(edits) {

edits = puRawMapConcat(puSpellRep("
", "
", puXHTML), edits);

edits = puRawMapConcat(puSpellRep("
", "
", puXHTML), edits);

return edits;

};

function puSpell(edits) {

edits = puRawMapConcat(puSpellRep("seperat", "separat", puSPELL), edits);

edits = puRawMapConcat(puSpellRep("embarass", "embarrass", puSPELL), edits);

edits = puRawMapConcat(puSpellRep("existance", "existence", puSPELL), edits);

edits = puRawMapConcat(puSpellRep("supercede", "supersede", puSPELL), edits);

edits = puRawMapConcat(puSpellRep("accomodat", "accommodat", puSPELL), edits);

edits = puRawMapConcat(puSpellRep("foreward", "foreword", puSPELL), edits);

edits = puRawMapConcat(puSpellRep("liason", "liaison", puSPELL), edits);

edits = puRawMapConcat(puSpellRep("millenium", "millennium", puSPELL), edits);

edits = puRawMapConcat(puSpellRep("accomoda", "accommoda", puSPELL), edits);

edits = puRawMapConcat(puSpellRep("occassion", "occasion", puSPELL), edits);

edits = puRawMapConcat(puSpellRep("occurrance", "occurrence", puSPELL), edits);

edits = puRawMapConcat(puSpellRep("privelege", "privilege", puSPELL), edits);

edits = puRawMapConcat(puSpellRep("priviledge", "privilege", puSPELL), edits);

edits = puRawMapConcat(puSpellRep("withold", "withhold", puSPELL), edits);

return edits;

};

function puSpellRep(src, dst, wh) {

return (function(t) {

// spelling is kinda slow, and most misspellings never appear at all

if (t.indexOf(src) == -1) return puCons(puRaw(t), undefined);

else return puSpellOne (t, src, dst, wh);

});

};

function puSpellOne (t, src, dst, wh) {

var a = puCleave(src, t);

if (a == undefined) return puCons(puRaw(t), undefined);

var subst = puEdit(src, dst, wh);

return puCons(puRaw(a[0]), puCons(subst, puSpellOne(a[1], src, dst, wh)));

};

function puCityState(edits) {

/* for every US State... (could do countries here, too.) */

edits = puRawMapConcat(puCityStateFn("Alabama"), edits);

edits = puRawMapConcat(puCityStateFn("Alaska"), edits);

edits = puRawMapConcat(puCityStateFn("Arizona"), edits);

edits = puRawMapConcat(puCityStateFn("Arkansas"), edits);

edits = puRawMapConcat(puCityStateFn("California"), edits);

edits = puRawMapConcat(puCityStateFn("Colorado"), edits);

edits = puRawMapConcat(puCityStateFn("Connecticut"), edits);

edits = puRawMapConcat(puCityStateFn("Delaware"), edits);

edits = puRawMapConcat(puCityStateFn("Florida"), edits);

edits = puRawMapConcat(puCityStateFn("Georgia", "Georgia (U.S. state)|Georgia"), edits);

edits = puRawMapConcat(puCityStateFn("Hawaii"), edits);

edits = puRawMapConcat(puCityStateFn("Idaho"), edits);

edits = puRawMapConcat(puCityStateFn("Illinois"), edits);

edits = puRawMapConcat(puCityStateFn("Indiana"), edits);

edits = puRawMapConcat(puCityStateFn("Iowa"), edits);

edits = puRawMapConcat(puCityStateFn("Kansas"), edits);

edits = puRawMapConcat(puCityStateFn("Kentucky"), edits);

edits = puRawMapConcat(puCityStateFn("Louisiana"), edits);

edits = puRawMapConcat(puCityStateFn("Maine"), edits);

edits = puRawMapConcat(puCityStateFn("Maryland"), edits);

edits = puRawMapConcat(puCityStateFn("Massachusetts"), edits);

edits = puRawMapConcat(puCityStateFn("Michigan"), edits);

edits = puRawMapConcat(puCityStateFn("Minnesota"), edits);

edits = puRawMapConcat(puCityStateFn("Mississippi"), edits);

edits = puRawMapConcat(puCityStateFn("Missouri"), edits);

edits = puRawMapConcat(puCityStateFn("Montana"), edits);

edits = puRawMapConcat(puCityStateFn("Nebraska"), edits);

edits = puRawMapConcat(puCityStateFn("Nevada"), edits);

edits = puRawMapConcat(puCityStateFn("New Hampshire"), edits);

edits = puRawMapConcat(puCityStateFn("New Jersey"), edits);

edits = puRawMapConcat(puCityStateFn("New Mexico"), edits);

edits = puRawMapConcat(puCityStateFn("New York"), edits);

edits = puRawMapConcat(puCityStateFn("North Carolina"), edits);

edits = puRawMapConcat(puCityStateFn("North Dakota"), edits);

edits = puRawMapConcat(puCityStateFn("Ohio"), edits);

edits = puRawMapConcat(puCityStateFn("Oklahoma"), edits);

edits = puRawMapConcat(puCityStateFn("Oregon"), edits);

edits = puRawMapConcat(puCityStateFn("Pennsylvania"), edits);

edits = puRawMapConcat(puCityStateFn("Rhode Island"), edits);

edits = puRawMapConcat(puCityStateFn("South Carolina"), edits);

edits = puRawMapConcat(puCityStateFn("South Dakota"), edits);

edits = puRawMapConcat(puCityStateFn("Tennessee"), edits);

edits = puRawMapConcat(puCityStateFn("Texas"), edits);

edits = puRawMapConcat(puCityStateFn("Utah"), edits);

edits = puRawMapConcat(puCityStateFn("Vermont"), edits);

edits = puRawMapConcat(puCityStateFn("Virginia"), edits);

edits = puRawMapConcat(puCityStateFn("Washington"), edits);

edits = puRawMapConcat(puCityStateFn("West Virginia"), edits);

edits = puRawMapConcat(puCityStateFn("Wisconsin"), edits);

edits = puRawMapConcat(puCityStateFn("Wyoming"), edits);

return edits;

};

function puCityStateFn(state, statelink) {

return (function(t) {

// citystate is kind of slow and there are 50 states; only run a state

// if it appears at all...

if (t.indexOf(', ' + state + ']]') == -1) return puCons(puRaw(t), undefined);

else return puCityStateOne (t, state, statelink);

});

};

function puSplitWhiteEnd(s) {

for(var i = s.length - 1; i >= 0; i --) {

if (s.charAt(i) != ' '.charAt(0))

return new Array(s.substr(0, i + 1), s.substring(i + 1));

}

// all whitespace!

return new Array("", s);

};

function puSplitWhiteStart(s) {

for(var i = 0; i < s.length; i ++) {

if (s.charAt(i) != ' '.charAt(0))

return new Array(s.substr(0, i), s.substring(i));

}

return new Array(s, "");

};

// XXX allow decimal places

function puNumberEnd(s) {

var n = "";

for(var i = s.length - 1; i >= 0; i --) {

if ((s.charCodeAt(i) >= '0'.charCodeAt(0) &&

s.charCodeAt(i) <= '9'.charCodeAt(0)) ||

s.charAt(i) == '-')

n = s.charAt(i) + n;

// years are often linked

else if (s.charAt(i) == '[' || s.charAt(i) == ']')

/* nothing */ ;

else return n;

}

return n;

};

// XXX now just takes the next token up to whitespace or |, ignoring brackets

function puNumberStart(s) {

var n = "";

for(var i = 0; i < s.length; i ++) {

if (s.charAt(i) == '[' || s.charAt(i) == ']')

/* nothing */ ;

else if (s.charAt(i) != ' ' && s.charAt(i) != '\n' && s.charAt(i) != '|')

n = n + s.charAt(i);

else return n;

}

return n;

};

// does this string end with a (partial) http link?

function puEndsHTTP (s) {

// only http since we want to catch https too

var h = s.lastIndexOf('http');

if (h == -1) return false;

// is there a space or ] terminating the link, though?

if (s.lastIndexOf(' ') > h ||

s.lastIndexOf(']') > h) return false;

else return true;

};

// are we inside an HTML element?

function puIsElement(s) {

var h = s.lastIndexOf('&');

if (h == -1) return false;

// is there a space or ; terminating the element?

if (s.lastIndexOf(' ') > h ||

s.lastIndexOf(';') > h) return false;

else return true;

};

function puEnDash (t) {

// split on every dash

var a = puCleave("-", t);

if (a == undefined) return puCons(puRaw(t), undefined);

// check if dash is preceded by a number and followed by

// a number.

var bef = puSplitWhiteEnd(a[0]);

var aft = puSplitWhiteStart(a[1]);

var befn = puNumberEnd(bef[0]);

var aftn = puNumberStart(aft[1]);

// alert("[" + bef[0] + "][" + bef[1] + "]-[" + aft[0] + "][" + aft[1] + "] .. [" + befn + "]–[" + aftn + "]");

var befnn = befn * 1;

var aftnn = aftn * 1;

// exclude ISBNs and certain dates by making sure the number doesn't have dash in it

if (befn.length > 0 && aftn.length > 0 &&

puEnDashBefOK(befn) && puEnDashAftOK(aftn) &&

!(puInLink(a[0], a[1])) &&

!puEndsHTTP(bef[0]) &&

// ranges are usually lo-hi, but sometimes we see 1987-8

(isNaN(befnn) || isNaN(aftnn) || befnn <= aftnn

|| (befnn >= 1000 && befnn <= 9999 && aftn <= 99) )) {

// src has whitespace around dash, replacement does not

// (note unicode en dash)

return puCons(puRaw(bef[0]), puCons(puEdit(bef[1] + "-" + aft[0], "–", puENDASH), puEnDash(aft[1])));

} else {

// don't match. but if we found dashes to the right, we shouldn't look at those

// again. (e.g. in ISBN 01-1234-6789, once we look at the first dash and reject it,

// we don't want to then consider 1234-6789, which looks like a match.)

var skip = puEnSkip(aft[1]);

return puCons(puRaw(a[0] + "-" + aft[0] + skip[0]), puEnDash(skip[1]));

}

};

// no more hyphens in the number (like when considering the second dash in ISBN 01-1234-6789)

function puEnDashBefOK(s) {

return (s.indexOf('-') == -1);

};

// Sees if this is in a link. That means as a {{ template }},

// or {{ template | with args }}, (but not in the argument part),

// or a wiki link, or a link (but not

// when in display portion).

function puInLink(a,b) {

var aa = puFindAnyLeft(a, ["}}", "]]", "{{", "[[", "|"]);

var bb = puFindAnyRight(b, ["}}", "]]", "{{", "[[", "|"]);

return ( (aa == "{{" && bb == "}}") ||

(aa == "{{" && bb == "|") ||

(aa == "[[" && bb == "|") ||

(aa == "" && bb == "") );

};

function puFindAnyLeft(str, finds) {

var latest = undefined;

var latesti = -1;

for(var i = 0; i < finds.length; i ++) {

var x = str.lastIndexOf(finds[i]);

if (x > latesti) {

latest = finds[i];

latesti = x;

}

}

return latest;

};

function puFindAnyRight(str, finds) {

var earliest = undefined;

var earliesti = str.length;

for(var i = 0; i < finds.length; i ++) {

var x = str.indexOf(finds[i]);

if (x < earliesti) {

earliest = finds[i];

earliesti = x;

}

}

return earliest;

};

function puEnDashAftOK(s) {

// some prefix has to be a number...

if (s.charCodeAt(0) >= '0'.charCodeAt(0) && s.charCodeAt(0) <= '9'.charCodeAt(0)) {

// but we should avoid certain stuff...

return (s.indexOf('-') == -1 &&

s.indexOf('.htm') == -1 &&

s.indexOf('.pdf') == -1 &&

s.indexOf('.png') == -1 &&

s.indexOf('.jpg') == -1 &&

s.indexOf('.gif') == -1 &&

s.indexOf('.svg') == -1 &&

s.indexOf('.stm') == -1);

} else {

// otherwise something special:

var ss = s.toLowerCase();

return (

puStartswith(ss, "january") ||

puStartswith(ss, "february") ||

puStartswith(ss, "march") ||

puStartswith(ss, "april") ||

puStartswith(ss, "may") ||

puStartswith(ss, "june") ||

puStartswith(ss, "july") ||

puStartswith(ss, "august") ||

puStartswith(ss, "september") ||

puStartswith(ss, "october") ||

puStartswith(ss, "november") ||

puStartswith(ss, "december") ||

puStartswith(ss, "today") ||

puStartswith(ss, "bc") ||

puStartswith(ss, "present"));

}

};

function puStartswith(lng, sht) {

return (lng.indexOf(sht) == 0);

};

// after not matching a dash for en dash replacement,

// split a string into two parts: the first is what we

// should skip, the rest is what we should look for

// more dashes within.

function puEnSkip(s) {

for(var i = 0; i < s.length; i ++) {

if ((s.charCodeAt(i) >= '0'.charCodeAt(0) &&

s.charCodeAt(i) <= '9'.charCodeAt(0)) ||

s.charAt(i) == '-' ||

s.charAt(i) == '[' ||

s.charAt(i) == ']')

/* nothing */ ;

else return new Array(s.substr(0, i), s.substring(i));

}

return new Array(s, "");

};

function puEdit(src, dst, what) {

return puEditExt(src, dst, what, undefined, undefined);

};

function puEditExt(src, dst, what, dispsrc, dispdst) {

var subst = new Object();

subst.orig = src;

subst.rep = dst;

subst.israw = false;

subst.what = what;

subst.hidden = false;

subst.dispsrc = dispsrc;

subst.dispdst = dispdst;

// alert (src + "→" + dst);

punctuationID ++;

subst.id = punctuationID;

return subst;

};

/* Fix faux em dashes.

"--" almost anywhere should almost always be a real em dash (unless there are four or as

part of an html comment)

TODO: " - " between words should usually be an em dash.

*/

function puEmDash(t) {

var a = puCleave("--", t);

if (a == undefined) return puCons(puRaw(t), undefined);

// must be preceded by a word and followed by a word

var bef = puSplitWhiteEnd(a[0]);

var aft = puSplitWhiteStart(a[1]);

if (aft[1].length > 0 && puEmOKChar(aft[1].charAt(0)) &&

bef[0].length > 0 && puEmOKChar(bef[0].charAt(bef[0].length - 1))) {

return puCons(puRaw(bef[0]),

puCons(puEdit(bef[1] + "--" + aft[0], "—", puEMDASH),

puEmDash(aft[1])));

} else {

/* not an em dash. */

return puCons(puRaw(a[0] + "--"), puEmDash(a[1]));

}

};

function puEmOKChar(c) {

// alert ("check char: [" + c + "]");

if (c == '>' || c == '!' || c == '<' || c == '-' || c == '|') return false;

else return true;

};

function puIsDigit(c) {

return (c.charCodeAt(0) >= '0'.charCodeAt(0) && c.charCodeAt(0) <= '9'.charCodeAt(0));

};

// Pittsburgh, Pennsylvania to Pittsburgh, Pennsylvania.

function puCityStateOne(t, state, statelink) {

var a = puCleave(", " + state + "]]", t);

// XXX could be improved by generating pipe trick expansion automatically

// (pipe trick doesn't work in ref tags, etc.)

// but that makes it a little trickier because we have to find "Pittsburgh" in the above

// and might fail (because of other edits)

// XXX when doing that should detect Image: and Category:

if (a == undefined) return puCons(puRaw(t), undefined);

var st = (statelink == undefined) ? state : statelink;

return puCons(puRaw(a[0]),

puCons(puEdit(", " + state + "]]", ", " + state + "|]], " + st + "", puCITYSTATE),

puCityStateOne(a[1], state, statelink)));

};

// 1980's to 1980s (Wikipedia:Manual of Style (dates and numbers))

// note this isn't always a mistake:

// "1981 was a cold year compared to 1980's record temperatures" would be okay

// so some context awareness is appropriate (but it is almost always wrong)

function puDecade(t) {

var a = puCleave("0's", t);

if (a == undefined) return puCons(puRaw(t), undefined);

if (// date before? (only do it for 4 or 2 digit dates)

(

(a[0].length >= 4 &&

puIsDigit(a[0].charAt(a[0].length - 1)) &&

puIsDigit(a[0].charAt(a[0].length - 2)) &&

puIsDigit(a[0].charAt(a[0].length - 3)) &&

!puIsDigit(a[0].charAt(a[0].length - 4))) ||

(a[0].length >= 2 &&

puIsDigit(a[0].charAt(a[0].length - 1)) &&

!puIsDigit(a[0].charAt(a[0].length - 2)))

)

&&

// safe to correct?

a[1].length > 0 && puDecadeOKChar(a[1].charAt(0))) {

return puCons(puRaw(a[0]),

puCons(puEdit("0's", "0s", puDECADE),

puDecade(a[1])));

} else {

/* no problem. */

return puCons(puRaw(a[0] + "0's"), puDecade(a[1]));

}

};

function puDecadeOKChar(c) {

// should be the end of a word

if (c == '\n' || c == ' ' || c == ',' || c == '.' ||

c == '&' || c == '—' || c == '-' || c == '–' ||

// text in tables?

c == '|' || c == '\t' || c == '<' || c == ')' ||

c == ';' || c == '!' || c == "'" || c == ':' ||

c == '/'

) return true;

else return false;

};

// space before/around(parentheses )

// closing parens are basically the same as commas below.

function puParen(t) {

var a = puCleave(")", t);

if (a == undefined) return puCons(puRaw(t), undefined);

// must be preceded by a word and followed by a word

var bef = puSplitWhiteEnd(a[0]);

var aft = puSplitWhiteStart(a[1]);

// alert('paren: [' + bef[0] + '][' + bef[1] + ']***[' + aft[0] + '][' + aft[1] + ']');

if (// needs correction?

(bef[1].length > 0 || aft[0].length == 0) &&

// safe to correct?

aft[1].length > 0 && puRParenOKChar(aft[1].charAt(0)) &&

bef[0].length > 0 && puRParenOKChar(bef[0].charAt(bef[0].length - 1))) {

return puCons(puRaw(bef[0]),

puCons(puEdit(bef[1] + ")" + aft[0], ") ", puPAREN),

puParen(aft[1])));

} else {

/* no problem. */

return puCons(puRaw(a[0] + ")"), puParen(a[1]));

}

};

// XXX perhaps should be okay-on-right and okay-on-left; this may be too conservative

function puRParenOKChar(c) {

if (c == ")" || c == "(" || c == '|' ||

// otherwise we undo our linkspace fix ;)

c == ']' ||

// title markup

c == '=' ||

// sometimes people do  

c == '&' ||

// quotes, obviously

c == '"' || c == '”' || c == '’' || c == "'" ||

// History of Russia (1900-1950)#World War II

c == "#" ||

// other stuff

c == '\n' || c == ':' || c == ';' || c == '.' || c == '-' || c == '—' || c == ',' ||

c == '}' || '{' || c == '<') return false;

else return true;

};

function puComma(t) {

return puCommaLike(',', puCOMMA, t);

};

function puSemicolon(t) {

return puCommaLike(';', puSEMICOLON, t);

};

// TODO: very important to filter out URL hits, since comma appears in lots of news URLs

function puCommaLike(ch, what, t) {

var a = puCleave(ch, t);

if (a == undefined) return puCons(puRaw(t), undefined);

// must be preceded by a word and followed by a word

var bef = puSplitWhiteEnd(a[0]);

var aft = puSplitWhiteStart(a[1]);

// alert('comma: [' + bef[0] + '][' + bef[1] + ']***[' + aft[0] + '][' + aft[1] + ']');

if (// needs correction?

(bef[1].length > 0 || aft[0].length == 0) &&

// safe to correct?

!puEndsHTTP(bef[0]) &&

!puIsElement(bef[0]) &&

aft[1].length > 0 && puCommaOKChar(aft[1].charAt(0)) &&

bef[0].length > 0 && puCommaOKChar(bef[0].charAt(bef[0].length - 1))) {

// alert('fix!');

return puCons(puRaw(bef[0]),

puCons(puEdit(bef[1] + ch + aft[0], ch + ' ', what),

puCommaLike(ch, what, aft[1])));

} else {

/* no problem. */

return puCons(puRaw(a[0] + ch), puCommaLike(ch, what, a[1]));

}

};

function puLinkSpace(t) {

var a = puCleave(" ]]", t);

if (a == undefined) return puCons(puRaw(t), undefined);

// maybe multiple spaces...

var bef = puSplitWhiteEnd(a[0]);

// alert('linkspace: [' + bef[0] + '][' + bef[1] + ']***[' + aft[0] + '][' + aft[1] + ']');

// filter out the common idiom

if (a[0].length > 0 && a[0].charAt(a[0].length - 1) != '|') {

return puCons(puRaw(bef[0]),

puCons(puEdit(bef[1] + " ]]", "]]", puLINKSPACE),

puLinkSpace(a[1])));

} else {

return puCons(puRaw(a[0] + " ]]"), puLinkSpace(a[1]));

}

};

/// XXX not hooked up -- did I finish implementing this?

// between number and %, remove space.

function puPercent(t) {

var a = puCleave("%", t);

if (a == undefined) return puCons(puRaw(t), undefined);

// must be preceded by a word and followed by a word

var bef = puSplitWhiteEnd(a[0]);

var aft = puSplitWhiteStart(a[1]);

// alert('pct: [' + bef[0] + '][' + bef[1] + ']***[' + aft[0] + '][' + aft[1] + ']');

if (// needs correction?

(bef[1].length > 0 || aft[0].length == 0) &&

// safe to correct?

aft[1].length > 0 && puPercentBeforeChar(aft[1].charAt(0)) &&

bef[0].length > 0 && puPercentAfterChar(bef[0].charAt(bef[0].length - 1))) {

// alert('fix!');

return puCons(puRaw(bef[0]),

puCons(puEdit(bef[1] + "%" + aft[0], "% ", puPERCENT),

puPercent(aft[1])));

} else {

/* no problem. */

return puCons(puRaw(a[0] + "%"), puPercent(a[1]));

}

};

function puCommaOKChar(c) {

// definitely not inside numbers

if ((c.charCodeAt(0) >= '0'.charCodeAt(0) && c.charCodeAt(0) <= '9'.charCodeAt(0)) ||

// text in tables?

c == '|' ||

// quotes, obviously

c == '"' || c == '”' || c == '’' || c == "'" ||

// link w/ underscores instead of spaces

c == '_' ||

c == '\n' || c == '&' || c == ',' ||

// ref tags

c == '{' || c == '<') return false;

else return true;

};

function puRefSpaceOKChar(c) {

if (// text in tables?

c == '|' ||

// parenthetical

c == ')' ||

// or space already...

c == ' ' ||

// ending image: tags

c == ']' ||

// ending template text

c == '}' ||

// before em dashes (see MOS)

c == '—' ||

// ending quotes...

c == '"' || c == '”' || c == '’' || c == "'" ||

c == '\n' || c == '&' || c == ',' ||

// ref tags

c == '{' || c == '<') return false;

else return true;

};

// for references, we want to find the ref tags, but

// they can appear in several common forms:

// ...

// ...

//

// this function returns a three-element array consisting of

// [the text before the first ref tag, the ref tag, the text following]

// (or it returns undefined if there are no ref tags to be found)

function puGetRef(t) {

var m = '

// but not this tag!

var nm = '

for(var i = 0; i < t.length; i ++) {

if (t.substr(i, m.length) == m &&

t.substr(i, nm.length) != nm) {

// now, decide what kind of ref

// appearance this is. keep looking

// at characters until we see

// > (bracketing)

// or

// /> (unitary)

for(var j = i + m.length; j < t.length; j ++) {

if (t.charAt(j) == '/') {

if (j < (t.length - 1) && t.charAt(j + 1) == '>') {

var rt = t.substr(i, (j + 2) - i);

var bef = t.substr(0, i);

var aft = t.substr(j + 2, t.length - (j + 2));

return new Array(bef, rt, aft);

} else {

// XXX report problem?

return undefined;

}

} else if (t.charAt(j) == '>') {

// found bracketing ref tag.

// so now eat until is

// encountered.

var rest = t.substr(j, t.length - j);

var a = puCleave('', rest);

if (a == undefined) {

// XXX warn: unclosed ref tag??

return undefined;

}

var rt = t.substr(i, j - i) + a[0] + '';

var bef = t.substr(0, i);

var aft = a[1];

// alert("REF. bef: [" + bef + "]\n" +

// "rt: [" + rt + "]\n" +

// "aft: [" + aft + "]\n");

return new Array(bef, rt, aft);

}

}

}

}

// none found...

return undefined;

};

// If we find a ref tag, we need to ensure the following:

// 1. there should never be any space before the tag.

// 2. the ref tag should appear after punctuation (except dashes)

// UNLESS the reference is to a specific term rather than

// to the sentence or comma/semicolon-separated phrase

// (we'll leave it up to the user to reject these false positives)

// 3. there shouldn't be double punctuation before/after the ref

// 4. there should be space after the ref

// UNLESS the reference is followed by another reference

// (or a dash, or legal punctuation as above)

//

// (this is according to the manual of style at wikipedia:footnotes;

// and conforms to the Chicago Manual of Style)

//

// So, we grab any punctuation that follows the reference,

// erase all space before the reference,

// insert space after the ref if needed

// and insert any trailing punctuation before the reference,

// unless there is already punctuation there.

function puRef(t) {

var a = puGetRef(t);

if (a == undefined) return puCons(puRaw(t), undefined);

var bef = puSplitWhiteEnd(a[0]);

var tag = a[1];

var aft = puSplitWhiteStart(a[2]);

// boolean flags

// insist on two newlines since people frequently put refs on their own lines.

var parend = aft[1].length > 1 && aft[1].charAt(0) == '\n' && aft[1].charAt(1) == '\n';

var nopuncbefore = bef[0].length == 0 || !(puRefPuncChar(bef[0].charAt(bef[0].length - 1)));

var needspuncbefore = nopuncbefore && bef[0].length > 0 && puRefNeedsPunc(bef[0].charAt(bef[0].length - 1));

// the punctuation char or undefined if none

var puncafter = (aft[1].length > 0)?aft[1].charAt(0):undefined;

if (puncafter != undefined && !puRefPuncChar(puncafter)) puncafter = undefined;

if (puncafter != undefined) {

aft[1] = aft[1].substr(1, aft[1].length - 1);

}

var needspaceafter = aft[1].length > 0 && puRefSpaceOKChar(aft[1].charAt(0));

// DEBUG

// var what = '';

// if (nopuncbefore) what = what + " NOPUNCBEFORE.";

// if (parend) what = what + " PAREND.";

// if (puncafter != undefined) what = what + " puncafter: " + puncafter;

// if (needspaceafter) what = what + " NEEDSPACEAFTER.";

// alert(what);

if (// whitespace before?

bef[1].length > 0 ||

// missing necessary whitespace after?

(aft[0].length == 0 && needspaceafter) ||

// punctuation after?

(puncafter != undefined) ||

// or there is no punctuation at all and this is

// the end of the paragraph

(parend && needspuncbefore)) {

// There's something to fix.

// the before part will be whatever's before, plus any additional punctuation,

// but minus any whitespace.

var befplus;

if (parend // implies no punctuation after ref

&& needspuncbefore) {

// assume period at end of paragraph.

// XXX note, this will put the period before only the last

// reference in a series of references at the end of

// a paragraph, sigh

befplus = '.';

} else if (nopuncbefore && puncafter != undefined) {

befplus = puncafter;

} else befplus = '';

var aftoldplus = '';

if (puncafter != undefined) aftoldplus = puncafter;

// XXX: should elide contents of ref in display somehow.

return puCons(puRaw(bef[0]),

puCons(puEditExt(// old:

bef[1] + tag + aft[0] + aftoldplus,

// new:

befplus + tag + (needspaceafter?' ':''),

puREF,

// display versions elide the ref itself:

bef[1] + '__PUREF__' + aft[0] + aftoldplus,

befplus + '__PUREF__' + (needspaceafter?' ':'')),

puRef(aft[1]) ));

} else {

// no change

return puCons(puRaw(a[0] + a[1]), puRef(a[2]));

}

};

function puRefPuncChar(c) {

// eta-expansion necessary??

if (c == '.' || c == ';' || c == ',' || c == '?' ||

c == '!' || c == ':') return true;

else return false;

};

function puRefNeedsPunc(c) {

return (c.charCodeAt(0) >= 'a' && c.charCodeAt(0) <= 'z') ||

(c.charCodeAt(0) >= 'A' && c.charCodeAt(0) <= 'Z') ||

(c.charCodeAt(0) >= '0' && c.charCodeAt(0) <= '9') ||

c == ']';

};

// ----------------------------------------------

// install it..

addOnloadHook(function() {

// not on talk pages...

if (document.title.indexOf("talk:") != -1) {

return;

}

if (document.title.indexOf("Editing ") != -1) {

addOnloadHook(addPunctuation);

}

});

function addPunctuation() {

// need to see later if user has done any editing...

punctuationPageOriginalSummary = document.editform.wpSummary.value;

addTab("javascript:doPunctuation()", "punctuation", "ca-punctuation", "Punctuation", "");

akeytt();

};

/* */