User:Makyen/Syntax highlighter.js

//Syntax highlighter with various advantages

//See User:Remember the dot/Syntax highlighter for more information

(function () {

"use strict";

//variables that are preserved between function calls

var textboxContainer;

var wpTextbox0;

var wpTextbox1;

var syntaxStyleTextNode;

var lastText;

var maxSpanNumber = -1; //the number of the last span available, used to tell if creating additional spans is necessary

var highlightSyntaxIfNeededIntervalID;

/* Define context-specific regexes, one for every common token that ends the

current context.

An attempt has been made to search for the most common syntaxes first,

thus maximizing performance. Syntaxes that begin with the same character

are searched for at the same time.

Wiki syntaxes from most common to least common:

internal link [http:// named external link]

{{template}} {{{template parameter}}}

table

http:// bare external link

=Heading= * unordered list # ordered list : indent ; small heading pre ---- horizontal line

italic bold

three tildes username four tildes signature five tildes timestamp

&entity;

The tag-matching regex follows the XML standard closely so that users

won't feel like they have to escape sequences that MediaWiki will never

consider to be tags.

Only entities for characters which need to be escaped or cannot be

unambiguously represented in a monospace font are highlighted, such as

Greek letters that strongly resemble Latin letters. Use of other entities

is discouraged as a matter of style. For the same reasons, numeric

entities should be in hexadecimal (giving character codes in decimal only

adds confusion).

Flags: g for global search, m for make ^ match the beginning of each line

and $ the end of each line

*/

var breakerRegexBase = "\\[(?:\\[|(?:https?:|ftp:)?//|mailto:)|\\{(?:\\{\\{?|\\|)|<(?:[:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD][:\\w\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD-\\.\u00B7\u0300-\u036F\u203F-\u203F-\u2040]*(?=/?>| )|!--[^]*?-->)|(?:https?://|ftp://|mailto:)[^\\s\"<>[\\]{-}]*[^\\s\",\\.:;<>[\\]{-}]|^(?:=|[*#:;]+|-{4,})|\\\\'\\\\'(?:\\\\')?|&(?:(?:n(?:bsp|dash)|m(?:dash|inus)|lt|e[mn]sp|thinsp|amp|quot|gt|shy|zwn?j|lrm|rlm|Alpha|Beta|Epsilon|Zeta|Eta|Iota|Kappa|[Mm]u|micro|Nu|[Oo]micron|[Rr]ho|Tau|Upsilon|Chi)|#x[0-9a-fA-F]+);|~{3,5}";

function breakerRegexWithPrefix(prefix)

{

//the stop token has to be at the beginning of the regex so that it takes precedence over substrings of itself.

//suck up newlines into the end token to avoid creating spans with nothing but newlines in them

return new RegExp("(" + prefix + ")\n*|" + breakerRegexBase, "gm");

}

var defaultBreakerRegex = new RegExp(breakerRegexBase, "gm");

var wikilinkBreakerRegex = breakerRegexWithPrefix("]][a-zA-Z]*");

var namedExternalLinkBreakerRegex = breakerRegexWithPrefix("]");

var parameterBreakerRegex = breakerRegexWithPrefix("}}}");

var templateBreakerRegex = breakerRegexWithPrefix("}}");

var tableBreakerRegex = breakerRegexWithPrefix("\\|}");

var headingBreakerRegex = breakerRegexWithPrefix("\n");

var tagBreakerRegexCache = {};

//browser workaround triggers

var gecko = ($.client.profile().layout == "gecko");

var presto = ($.client.profile().layout == "presto");

var trident = ($.client.profile().layout == "trident");

var webkit = ($.client.profile().layout == "webkit") && ($.client.profile().platform != "win");

function highlightSyntax()

{

lastText = wpTextbox1.value;

/* Backslashes and apostrophes are CSS-escaped at the beginning and all

parsing regexes and functions are designed to match. On the other hand,

newlines are not escaped until written so that in the regexes ^ and $

work for both newlines and the beginning or end of the string. */

var text = lastText.replace(/['\\]/g, "\\$&") + "\n"; //add a newline to fix scrolling and parsing issues

var i = 0; //the location of the parser as it goes through var text

var css = "";

var spanNumber = 0;

var lastColor;

var before = true;

/* Highlighting bold or italic markup presents a special challenge

because the actual MediaWiki parser uses multiple passes to determine

which ticks represent start tags and which represent end tags.

Because that would be too slow for us here, we instead keep track of

what kinds of unclosed opening ticks have been encountered and use

that to make a good guess as to whether the next ticks encountered

are an opening tag or a closing tag.

The major downsides to this method are that 'apostrophe italic

and italic apostrophe' are not highlighted correctly, and bold

and italic are both highlighted in the same color. */

var assumedBold = false;

var assumedItalic = false;

//workaround for Opera

//there are two problems here:

//