/* Name: MSILCodeHighlighter
// Version: 0.1
// Date: 2007-02-18
// Author: Steven van Deursen ->  steven a t cuttingedge dot it
// License: GPL 2.0


Usuage:
	var codeHighlighter = new MSILCodeHighlighter();
	codeHighlighter.ProcessTags('[name of tags to parse]', '[language to parse]', '[attribute name containing the custom types]');
	
Example:
	(HTML code)
	<pre class="msil" language="msil" customtypes="MyControl Control INamingContainer HtmlTextWriter">
		public class MyControl : Control, INamingContainer
		{
			[...]
			
			protected override void Render(HtmlTextWriter writer)
			{
				base.Render(writer);
			}			
		}
	</pre>
	
	(JSCode)
	// Creating a new MSILCodeHighlighter
	var codeHighlighter = new MSILCodeHighlighter();
	
	// Declare as custom regex replacement
	codeHighlighter.AddRegExReplacement(/\[\.\.\.\]/g,	"<img src='/codecollapsed.gif' alt='[collapsed code]' border='0' />");
	
	// Start highlighting
	codeHighlighter.ProcessTags('pre', 'msil', 'customTypes');
*/


function MSILCodeHighlighter() {
	var NormalCharacterDictionary = new Array();
	var KeywordsDictionary = new Array();
	var MultiCharSymbols = ['//', '/*', '*/']; 

	function Initialize()
	{
		var normalCharacters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789';
		var i;
		for (i = 0; i < normalCharacters.length; i++)
			NormalCharacterDictionary[normalCharacters.charAt(i)] = true;
		
		// MSIL keywords
		var keywords = [		
			'add'		,'add.ovf'	,'and'		,'arglist'	,'beq'		,
			'bge'		,'bge.un'	,'bgt'		,'bgt.un'	,'ble'		,
			'ble..un'	,'blt'		,'blt.un'	,'bne.un'	,'br'		,
			'break'		,'brfalse'	,'brtrue'	,'call'		,'calli'	,
			'ceq'		,'cgt'		,'cgt.un'	,'ckfinite'	,'class'	,
			'clt'		,'clt.un'	,'conv'		,'conv.ovf'	,'conv.ovf'	,
			'cpblk'		,'div'		,'div.un'	,'dup'		,'endfilter',
			'endfinally','initblk'	,'jmp'		,'ldarg'	,'ldarga'	,
			'ldc'		,'ldftn'	,'ldind'	,'ldloc'	,'ldloca'	,
			'ldnull'	,'leave'	,'localloc'	,'mul'		,'mul.ovf'	,
			'neg'		,'nop'		,'not'		,'or'		,'pop'		,
			'rem'		,'rem.un'	,'ret'		,'shl'		,'shr'		,
			'shr.un'	,'starg'	,'stind'	,'stloc'	,'sub'		,
			'sub.ovf'	,'switch'	,'xor'		,'box'		,'callvirt'	,
			'cast'		,'cpobj'	,'initobj'	,'isinst'	,'ldelem'	,
			'ldelema'	,'ldfld'	,'ldflda'	,'ldlen'	,'ldobj'	,
			'ldsfld'	,'ldsflda'	,'ldstr'	,'ldtoken'	,'ldvirtfn'	,
			'mkrefany'	,'newarr'	,'newobj'	,'refanytype','refanyval',
			'rethrow'	,'sizeof'	,'stelem'	,'stfld'	,'stobj'	,
			'stsfld'	,'throw'	,'unbox'	,'instance'	,'void'		,
			'finally'	,'handler',	'try'		,'to'		,'method'	,
			'private'	,'hidebysig','static'	,'void'		,'string'	,
			'cil'		,'managed'	,'int32',	'int16'		,'byte'		,
			'int64'		,'float'	,'entrypoint','maxstack','locals'	,
			'init'
		];
		
		for (i = 0; i < keywords.length; i++)
			KeywordsDictionary[keywords[i]] = true;
			
	}

	Initialize();
	
	function IsSymbol(c)
	{
		return !(typeof(NormalCharacterDictionary[c]) != "undefined");
	}
	
	function IsKeyword(str)
	{
		return typeof(KeywordsDictionary[str]) != "undefined";
	}
	
	function CreateToken(value, isSymbol, isFinalized)
	{
		this.IsSymbol = isSymbol;
		this.IsFinalized = isFinalized;
		this.Value = value;
		this.IsComment = false;
		this.IsStringLiteral = false;
		this.ClassName = '';
	}


	function GetTokenList(code)
	{
		var tokens = new Array();
		
		for (var i = 0; i < code.length; i++)
		{
			var ch = code.charAt(i);
			
			var isSymbol = IsSymbol(ch);
			
			if (tokens.length == 0 || tokens[tokens.length-1].IsSymbol != isSymbol)
			{
				// add new token
				tokens[tokens.length] = new CreateToken(ch, isSymbol, false);
			}
			else
			{
				var token = tokens[tokens.length-1];
				
				if (isSymbol == true)
				{
					if (token.IsFinalized == true || (ch != '/' && ch != '*'))
					{
						// The token is finalized or the current char is no / and *
						
						// That new symbol-token is finalized when it doesn't contain a / or *
						// because / and * are the only symbols that consist of multiple characters
						var isFinalized = (ch == '/' || ch == '*') ? false : true;
						
						tokens[tokens.length] = new CreateToken(ch, isSymbol, isFinalized);
					}
					else
					{
						// The last token hasn't been finalized and the ch is either / or *
					
						var value = token.Value;
						
						if ((value == '/' && ch == '*') || (value == '*' && ch == '/'))
						{
							// finalize the token (no more characters can be added).
							token.IsFinalized = true;
						}
						
						token.Value += '' + ch;
					}
				}
				else
				{
					token.Value += '' + ch;
				}
			}
		}
		
		return tokens;
	}
		
	function IsValueInArray(value, arr)
	{
		if (arr == null)
			return false;
			
		for (var b = 0; b < arr.length; b++)
		{
			if (arr[b] == value)
				return true;
		}
		return false;
	}
	
	
	// Marks all tokens starting at index as Comment till a new line is found.
	// returns the index of the new line or eof character
	function MarkAndMergeCommentTokens(tokens, index, breakTokens)
	{	
		var baseToken = tokens[index];
		baseToken.IsComment = true;
		baseToken.IsSymbol = false;
		baseToken.ClassName = 'comment';
		
		for (var i = index + 1; i < tokens.length; i++)
		{
			var token = tokens[i];
			
			if (token != null)
			{
				var value = tokens[i].Value;
				
				baseToken.Value += '' + value;
				
				tokens[i] = null;
				
				// Check if a given break token is in the value of the current token
				// and return if this is the case
				for (var b = 0; b < breakTokens.length; b++)
				{
					if (breakTokens[b] == value)
						return i + 1;
				}
			}
		}
		return tokens.length;	
	}
	
	// Marks all tokens starting at index as String Literal till a endOfLiteralCharacter or optionaly an end of line character.
	// returns the index of the new line or eof character.
	function MarkAndMergeStringLiteralTokens(tokens, index, endOfLiteralCharacter, escapeCharacter, allowMultiline)
	{
		var baseToken = tokens[index];
		baseToken.IsStringLiteral = true;
		baseToken.IsSymbol = false;
		baseToken.ClassName = 'string';
	
		for (var i = index + 1; i < tokens.length; i++)
		{
			var token = tokens[i];
			
			if (token == null)
				continue;
			
			var value = tokens[i].Value;
			
			baseToken.Value += '' + value;
			
			tokens[i] = null;

			if (endOfLiteralCharacter != escapeCharacter)
			{
				// use look behind
				baseValue = baseToken.Value;
				if (value == endOfLiteralCharacter && 
					(baseValue.length > 1 && baseValue.charAt(baseValue.length-2) != escapeCharacter))
					return i + 1;
			}
			else
			{
				// use look ahead
				if (value == endOfLiteralCharacter)
				{
					if ((i + 1) >= tokens.length || tokens[i+1] == null || tokens[i+1].Value != endOfLiteralCharacter)
						return i + 1;
				
					// skip the next character, we've already seen it
					baseToken.Value += '' + tokens[i+1].Value;
					tokens[i+1] = null;
					i++;
				}
			}
	
			if (allowMultiline == false && (value == '\n' || value == '\r'))
				return i + 1;
		}
		return tokens.length;		
	}
			
	function CreateDictionary(arrayOfStrings)
	{
		var obj = new Object();
		
		for (var i = 0; i < arrayOfStrings.length; i++)
			obj[arrayOfStrings[i]] = true;
			
		return obj;
	}
	
		
	function WrapWithSpan(text, className)
	{
		text = text.replace(/</gi, '&lt;');
		text = text.replace(/>/gi, '&gt;');
		return className == '' ? text : '<span class="' + className + '">' + text + '</span>';
	}
	
	var RegExprDictionary = new Array();
	
	// PUBLIC METHOD
	this.AddRegExReplacement = function(regExpr, replacement)
	{
		var obj = new Object();
		obj.RegExpr = regExpr;
		obj.Replacement = replacement;
	
		RegExprDictionary[RegExprDictionary.length] = obj;
	}	
	
	// PUBLIC METHOD
	this.Parse = function(code, typeNames) {
		// Replace break tags with returns
		code = code.replace(/\<br[\s]*[\/]*\>/gi, '\n');
		code = code.replace(/&lt;/gi, '<');
		code = code.replace(/&gt;/gi, '>');
		
		var tokens = GetTokenList(code);
	
		var singleLineBreakTokens = ['\n'];
		var multiLineBreakTokens = ['*/'];
			
		// Find Single line // comments, merge all comment tokens and set the ClassName
		for (var i = 0; i < tokens.length; i++)
		{
			var token = tokens[i];
			if (token != null && token.IsComment == false && token.Value.indexOf('//') != -1)
			{
				token.ClassName = 'comment';
				i = MarkAndMergeCommentTokens(tokens, i, singleLineBreakTokens);
			}
		}
		
		// Find Multiline /* */ comments, merge all comment tokens and set the ClassName
		for (var i = 0; i < tokens.length; i++)
		{
			var token = tokens[i];
			if (token != null && token.IsComment == false && token.Value == '/*')
			{
				token.ClassName = 'comment';
				i = MarkAndMergeCommentTokens(tokens, i, multiLineBreakTokens);
			}			
		}
		
		// Find strings literals
		for (var i = 0; i < tokens.length; i++)
		{
			var token = tokens[i];
			if (token != null && token.IsStringLiteral == false && token.IsComment == false)
			{
				var value = token.Value;
				
				if (value == "'")
				{
					// Process a single quoted string literal
					i = MarkAndMergeStringLiteralTokens(tokens, i, '\'', '\\', false);
				}
				else if (value == '@')
				{
					if (tokens[i+1] != null && tokens[i+1].Value == '"')
					{
						// Process a multiline string literal
						i = MarkAndMergeStringLiteralTokens(tokens, i + 1, '"', '"', true);
					}
				}
				else if (value == '"')
				{
					// Process a double quoted string literal
					i = MarkAndMergeStringLiteralTokens(tokens, i, '"', '\\', false);
				}
			}			
		}		

		// Set ClassNames for keyword and type 
	
		var typeNameDictionary = new CreateDictionary(typeNames);
		for (var i = 0; i < tokens.length; i++)
		{
			var token = tokens[i];
			
			if (token != null && token.IsComment == false && token.IsSymbol == false)
			{
				if (IsKeyword(token.Value))
				{
					previousToken = i == 0 ? null : tokens[i-1];
					previousValue = previousToken == null ? '' : previousToken.Value;
					
					// Check for the @ sign. Keywords can be escaped with 
					if (previousValue.length == 0 || previousValue[previousValue.length-1] != '@')
						token.ClassName = 'keyword';
				}
				
				if (typeNameDictionary[token.Value] == true)
					token.ClassName = "type";
			}
		}
		
		var highlightedCode = '';
		
		// Find Keywords
		for (var i = 0; i < tokens.length; i++)
		{
			var token = tokens[i];
			
			if (token != null)
			{
				highlightedCode += WrapWithSpan(token.Value, token.ClassName);
			
				var isSymbol = token.IsSymbol ? 'true' : 'false';
			}
		}
		
		// Extra replacements using custom regular expressions
		for (var i = 0; i < RegExprDictionary.length; i++)
		{
			var expr = RegExprDictionary[i];
			
			highlightedCode = highlightedCode.replace(expr.RegExpr, expr.Replacement);
		}
		
		return highlightedCode;
	}
	
	// PUBLIC METHOD
	this.ProcessTags = function(tagName, language, customTypeNameAttribute)
	{
		var preTags = document.getElementsByTagName(tagName);
		
		for (var i = 0; i < preTags.length; i++)
		{
			var pre = preTags[i];
			
			if (pre.getAttribute('language') == language)
			{
				var typeNames = pre.getAttribute(customTypeNameAttribute)+''!='' ? (pre.getAttribute(customTypeNameAttribute) + '').split(' ') : [''];
				
				var innerHtml = this.Parse(pre.innerHTML, typeNames);

				// Workaround for IE <pre> innerHTML normalization quirk
				if ('outerHTML' in pre) {
					if (document.URL.indexOf('debug') == -1)
						innerHtml = innerHtml.replace(/\n/g,'<br />');
						
					pre.outerHTML = pre.outerHTML.substring(0,pre.outerHTML.indexOf('>')+1) + innerHtml + '<' + '/' + pre.tagName.toLowerCase() + '>';
				} else {
					pre.innerHTML = innerHtml;
				}
			}
		}	
	}
}
