User:Quarl/diff.js

// User:Quarl/diff.js - utility functions for doing diffs

// quarl 2006-01-29 initial version

// requires: util.js (trimspaces)

//

// if more than this many words of changes, use overflow string

var diff_wikisummary_maxwords = 30;

var diff_wikisummary_overflow = "$1 words changed";

/*

* diff() and diffString() are based on

* http://ejohn.org/projects/javascript-diff-algorithm/

* Copyright John Resig

*/

function diff_split(s) {

//return trimspaces(s).split(/(?:\s|[.,;\'\"`])+/);

return trimspaces(s).split(/\s+/);

}

function diffString( o, n ) {

var out = diff( diff_split(o), diff_split(n) );

var str = "";

for ( var i = 0; i < out.n.length - 1; i++ ) {

if ( out.n[i].text == null ) {

if ( out.n[i].indexOf('"') == -1 && out.n[i].indexOf('<') == -1 )

str += " " + out.n[i] +"";

else

str += " " + out.n[i];

} else {

var pre = "";

if ( out.n[i].text.indexOf('"') == -1 && out.n[i].text.indexOf('<') == -1 ) {

var n = out.n[i].row + 1;

while ( n < out.o.length && out.o[n].text == null ) {

if ( out.o[n].indexOf('"') == -1 && out.o[n].indexOf('<') == -1 && out.o[n].indexOf(':') == -1 && out.o[n].indexOf(';') == -1 )

pre += " " + out.o[n] +" ";

n++;

}

}

str += " " + out.n[i].text + pre;

}

}

return str;

}

function diff( o, n ) {

var ns = {};

var os = {};

for ( var i = 0; i < n.length; i++ ) {

// note we have to check that it is in fact an object with "rows", in

// case ns[i] happens to match a javascript member function of class

// Array, e.g. "some"!

if ( ns[ n[i] ] == null || !ns[n[i]].rows )

ns[ n[i] ] = { rows: new Array(), o: null };

ns[ n[i] ].rows.push( i );

}

for ( var i = 0; i < o.length; i++ ) {

if ( os[ o[i] ] == null || !os[o[i]].rows )

os[ o[i] ] = { rows: new Array(), n: null };

os[ o[i] ].rows.push( i );

}

for ( var i in ns ) {

if ( ns[i].rows.length == 1 && typeof(os[i]) != "undefined" && os[i].rows.length == 1 ) {

n[ ns[i].rows[0] ] = { text: n[ ns[i].rows[0] ], row: os[i].rows[0] };

o[ os[i].rows[0] ] = { text: o[ os[i].rows[0] ], row: ns[i].rows[0] };

}

}

for ( var i = 0; i < n.length - 1; i++ ) {

if ( n[i].text != null && n[i+1].text == null &&

0 <= n[i].row+1 && n[i].row+1 < o.length &&

o[ n[i].row + 1 ].text == null &&

n[i+1] == o[ n[i].row + 1 ] )

{

n[i+1] = { text: n[i+1], row: n[i].row + 1 };

o[n[i].row+1] = { text: o[n[i].row+1], row: i + 1 };

}

}

for ( var i = n.length - 1; i > 0; i-- ) {

if ( n[i].text != null && n[i-1].text == null &&

0 <= n[i].row-1 && n[i].row-1 < o.length &&

o[ n[i].row - 1 ].text == null &&

n[i-1] == o[ n[i].row - 1 ] )

{

n[i-1] = { text: n[i-1], row: n[i].row - 1 };

o[n[i].row-1] = { text: o[n[i].row-1], row: i - 1 };

}

}

return { o: o, n: n };

}

function diffAggregate(words) {

var phrases = new Array();

var cur = null;

var wordcount = 0;

// start at virtual index -1 to check for removed words at beginning of

// text

for ( var i = -1; i < words.n.length; i++ ) {

if ( i!=-1 && words.n[i].text == null ) {

if (!cur) {

cur = { o: "", n: "" };

phrases.push(cur);

}

cur.n += " " + words.n[i];

wordcount ++;

} else {

var pre = "";

var j = i==-1 ? 0 : words.n[i].row + 1;

while ( j < words.o.length && words.o[j].text == null ) {

pre += " " + words.o[j];

j++;

wordcount ++;

}

if (pre) {

if (!cur) {

cur = { o: "", n: "" };

phrases.push(cur);

}

cur.o += pre;

}

if (pre && words.n[i+1] && words.n[i+1].text == null) {

// If there's an addition following, treat this as part of the

// same change.

} else {

cur = null;

}

}

}

for (var i in phrases) {

phrases[i].n = trimspaces(phrases[i].n);

phrases[i].o = trimspaces(phrases[i].o);

}

return { phrases: phrases, wordcount: wordcount };

}

function diffWikiQuote(s) {

if (!s) return s;

if (s.match(/^\{\{.*\}\}$/)) return s;

s = s.replace(/\"/g, "'");

return '"'+s+'"';

}

function reverse(s) {

var ret = '';

for (var i = s.length-1; i >= 0; --i) {

ret += s[i];

}

return ret;

}

// trim the equal chars from the front and back of o,n at a word boundary

function diffStringTrim(o, n) {

var r = diffStringTrim0(reverse(o), reverse(n));

return diffStringTrim0(reverse(r.o), reverse(r.n));

}

function diffStringTrim0(o, n) {

var i = 0;

while (i < o.length && i < n.length && o[i] == n[i]) {

++i;

}

// find index of last non-word character

var prefix = o.substr(0, i);

// if prefix ends with word characters and suffix starts with non-word,

// then erase entire prefix

if (prefix.match(/\w$/) &&

!o.substr(i, 1).match(/^\w/) && !n.substr(i, 1).match(/^\w/))

{

o = o.substr(i);

n = n.substr(i);

} else if (prefix.match(/.*\W/)) {

i = RegExp.lastMatch.length;

o = o.substr(i);

n = n.substr(i);

} else {

// keep entire prefix

}

return { o: o, n: n };

}

function diffSummary(o, n) {

if (o == n) return "";

if (!o) return "new";

if (!n) return "blank";

var words = diff( diff_split(o), diff_split(n) );

var r = diffAggregate(words);

if (!r.wordcount) return "";

if (r.wordcount > diff_wikisummary_maxwords) {

return diff_wikisummary_overflow.replace('$1', r.wordcount);

}

var phrases = r.phrases;

var str = [];

for (var i in phrases) {

var r = diffStringTrim(phrases[i].o, phrases[i].n);

var o = diffWikiQuote(r.o), n = diffWikiQuote(r.n);

if (o && n) {

str.push(o + ' → ' + n);

} else if (o) {

str.push('-' + o);

} else if (n) {

str.push('+' + n);

} else {

alert("## internal error 15e1b13f-bae3-4399-86c5-721786822fa2");

}

}

return str.join(", ");

}

//