/* Name: MSILCodeHighlighter
// Version: 0.1
// Date: 2007-02-18
// Author: Steven van Deursen ->  steven a t cuttingedge dot it
// License: GPL 2.0


Usuage:
	var codeHighlighter = new X86CodeHighlighter();
	codeHighlighter.ProcessTags('[name of tags to parse]', '[language to parse]', '[attribute name containing the custom types]');
	
Example:
	(HTML code)
	<pre class="x86" language="x86" customtypes="">
		00000073  nop              
		00000074  lea         esp,[ebp-0Ch] 
		00000077  pop         ebx  
		00000078  pop         esi  
		00000079  pop         edi  
		0000007a  pop         ebp  
		0000007b  ret              
		0000007c  mov         dword ptr [ebp-18h],0 
		00000083  jmp         00000073 
	</pre>
	
	(JSCode)
	// Creating a new X86CodeHighlighter
	var codeHighlighter = new X86CodeHighlighter();
	
	// Declare as custom regex replacement
	codeHighlighter.AddRegExReplacement(/\[\.\.\.\]/g,	"<img src='/codecollapsed.gif' alt='[collapsed code]' border='0' />");
	
	// Start highlighting
	codeHighlighter.ProcessTags('pre', 'x86', 'customTypes');
*/


function X86CodeHighlighter() {
	var NormalCharacterDictionary = new Array();
	var KeywordsDictionary = new Array();
	var MultiCharSymbols = ['//', '/*', '*/']; 

	function Initialize()
	{
		var normalCharacters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789';
		var i;
		for (i = 0; i < normalCharacters.length; i++)
			NormalCharacterDictionary[normalCharacters.charAt(i)] = true;
		
		// X86 keywords
		var keywords = [	
			'adc',	'add',	'ah',	'al',	'andl',	'ax',	'bh',	'bl',	
			'bound','bp',	'bx',	'call',	'ch',	'cl',	'cmp',	'cmps',	
			'cpuid','cs  ',	'cx',	'dh',	'div',	'dl',	'do',	'ds ',	
			'dx',	'eax',	'ebp',	'ebx',	'ecx',	'edi',	'edx',	'enter',	
			'es ',	'esc',	'esi',	'esp',	'fs ',	'gs ',	'hlt',	'idiv',	
			'imul',	'inc',	'int',	'intn',	'into',	'invlpg','iret','jae',	
			'jb',	'jbe',	'jc',	'jcxz',	'je/jz','jecxz','jg',	'jge',	
			'jl',	'jle',	'jna',	'jnae',	'jnb',	'jnbe',	'jnc',	'jne',	
			'jng',	'jnge',	'jnl',	'jnle',	'jno',	'jnp',	'jns',	'jnz',	
			'jo',	'jp',	'jpe',	'jpo',	'js',	'la',	'leal',	'leave',	
			'lods',	'mov',	'movl',	'movs',	'mul',	'neg',	'neg',	'nop',	
			'notl',	'orl',	'pop',	'popa',	'popl',	'push',	'pusha','pushl',
			'rcl',	'rcr',	'rep',	'repe',	'repne','ret',	'rol',	'ror',	
			'sal',	'sar',	'sbb',	'scas',	'shl',	'shr',	'si',	'sp',	
			'ss ',	'stos',	'testl','xchg',	'xlat',	'xlatb','xorl',	'xor',
			'dword','jmp',	'ptr',	'je',	'lea'
		];
		
		for (i = 0; i < keywords.length; i++)
			KeywordsDictionary[keywords[i]] = true;
	}

	Initialize();
	
	function IsSymbol(c)
	{
		return !(typeof(NormalCharacterDictionary[c]) != "undefined");
	}
	
	function IsKeyword(str)
	{
		return typeof(KeywordsDictionary[str]) != "undefined";
	}
	
	function CreateToken(value, isSymbol, isFinalized)
	{
		this.IsSymbol = isSymbol;
		this.IsFinalized = isFinalized;
		this.Value = value;
		this.IsComment = false;
		this.IsStringLiteral = false;
		this.ClassName = '';
	}


	function GetTokenList(code)
	{
		var tokens = new Array();
		
		for (var i = 0; i < code.length; i++)
		{
			var ch = code.charAt(i);
			
			var isSymbol = IsSymbol(ch);
			
			if (tokens.length == 0 || tokens[tokens.length-1].IsSymbol != isSymbol)
			{
				// add new token
				tokens[tokens.length] = new CreateToken(ch, isSymbol, false);
			}
			else
			{
				var token = tokens[tokens.length-1];
				
				if (isSymbol == true)
				{
					if (token.IsFinalized == true || (ch != '/' && ch != '*'))
					{
						// The token is finalized or the current char is no / and *
						
						// That new symbol-token is finalized when it doesn't contain a / or *
						// because / and * are the only symbols that consist of multiple characters
						var isFinalized = (ch == '/' || ch == '*') ? false : true;
						
						tokens[tokens.length] = new CreateToken(ch, isSymbol, isFinalized);
					}
					else
					{
						// The last token hasn't been finalized and the ch is either / or *
					
						var value = token.Value;
						
						if ((value == '/' && ch == '*') || (value == '*' && ch == '/'))
						{
							// finalize the token (no more characters can be added).
							token.IsFinalized = true;
						}
						
						token.Value += '' + ch;
					}
				}
				else
				{
					token.Value += '' + ch;
				}
			}
		}
		
		return tokens;
	}
		
	function IsValueInArray(value, arr)
	{
		if (arr == null)
			return false;
			
		for (var b = 0; b < arr.length; b++)
		{
			if (arr[b] == value)
				return true;
		}
		return false;
	}
	
	

	
	// Marks all tokens starting at index as Comment till a new line is found.
	// returns the index of the new line or eof character
	function MarkAndMergeCommentTokens(tokens, index, breakTokens)
	{	
		var baseToken = tokens[index];
		baseToken.IsComment = true;
		baseToken.IsSymbol = false;
		baseToken.ClassName = 'comment';
		
		for (var i = index + 1; i < tokens.length; i++)
		{
			var token = tokens[i];
			
			if (token != null)
			{
				var value = tokens[i].Value;
				
				baseToken.Value += '' + value;
				
				tokens[i] = null;
				
				// Check if a given break token is in the value of the current token
				// and return if this is the case
				for (var b = 0; b < breakTokens.length; b++)
				{
					if (breakTokens[b] == value)
						return i + 1;
				}
			}
		}
		return tokens.length;	
	}
	
	// Marks all tokens starting at index as String Literal till a endOfLiteralCharacter or optionaly an end of line character.
	// returns the index of the new line or eof character.
	function MarkAndMergeStringLiteralTokens(tokens, index, endOfLiteralCharacter, escapeCharacter, allowMultiline)
	{
		var baseToken = tokens[index];
		baseToken.IsStringLiteral = true;
		baseToken.IsSymbol = false;
		baseToken.ClassName = 'string';
	
		for (var i = index + 1; i < tokens.length; i++)
		{
			var token = tokens[i];
			
			if (token == null)
				continue;
			
			var value = tokens[i].Value;
			
			baseToken.Value += '' + value;
			
			tokens[i] = null;

			if (endOfLiteralCharacter != escapeCharacter)
			{
				// use look behind
				baseValue = baseToken.Value;
				if (value == endOfLiteralCharacter && 
					(baseValue.length > 1 && baseValue.charAt(baseValue.length-2) != escapeCharacter))
					return i + 1;
			}
			else
			{
				// use look ahead
				if (value == endOfLiteralCharacter)
				{
					if ((i + 1) >= tokens.length || tokens[i+1] == null || tokens[i+1].Value != endOfLiteralCharacter)
						return i + 1;
				
					// skip the next character, we've already seen it
					baseToken.Value += '' + tokens[i+1].Value;
					tokens[i+1] = null;
					i++;
				}
			}
	
			if (allowMultiline == false && (value == '\n' || value == '\r'))
				return i + 1;
		}
		return tokens.length;		
	}
			
	function CreateDictionary(arrayOfStrings)
	{
		var obj = new Object();
		
		for (var i = 0; i < arrayOfStrings.length; i++)
			obj[arrayOfStrings[i]] = true;
			
		return obj;
	}
	
		
	function WrapWithSpan(text, className)
	{
		text = text.replace(/</gi, '&lt;');
		text = text.replace(/>/gi, '&gt;');
		return className == '' ? text : '<span class="' + className + '">' + text + '</span>';
	}
	
	var RegExprDictionary = new Array();
	
	// PUBLIC METHOD
	this.AddRegExReplacement = function(regExpr, replacement)
	{
		var obj = new Object();
		obj.RegExpr = regExpr;
		obj.Replacement = replacement;
	
		RegExprDictionary[RegExprDictionary.length] = obj;
	}	
	
	// PUBLIC METHOD
	this.Parse = function(code, typeNames) {
		// Replace break tags with returns
		code = code.replace(/\<br[\s]*[\/]*\>/gi, '\n');
		code = code.replace(/&lt;/gi, '<');
		code = code.replace(/&gt;/gi, '>');
		
		var tokens = GetTokenList(code);
	
		var singleLineBreakTokens = ['\n'];
		var multiLineBreakTokens = ['*/'];
		
		// Find Single line // comments, merge all comment tokens and set the ClassName
		for (var i = 0; i < tokens.length; i++)
		{
			var token = tokens[i];
			if (token != null && token.IsComment == false && token.Value.indexOf('//') != -1)
			{
				token.ClassName = 'comment';
				i = MarkAndMergeCommentTokens(tokens, i, singleLineBreakTokens);
			}
		}
		
		// Find Multiline /* */ comments, merge all comment tokens and set the ClassName
		for (var i = 0; i < tokens.length; i++)
		{
			var token = tokens[i];
			if (token != null && token.IsComment == false && token.Value == '/*')
			{
				token.ClassName = 'comment';
				i = MarkAndMergeCommentTokens(tokens, i, multiLineBreakTokens);
			}			
		}
		
		// Find strings literals
		for (var i = 0; i < tokens.length; i++)
		{
			var token = tokens[i];
			if (token != null && token.IsStringLiteral == false && token.IsComment == false)
			{
				var value = token.Value;
				
				if (value == "'")
				{
					// Process a single quoted string literal
					i = MarkAndMergeStringLiteralTokens(tokens, i, '\'', '\\', false);
				}
				else if (value == '@')
				{
					if (tokens[i+1] != null && tokens[i+1].Value == '"')
					{
						// Process a multiline string literal
						i = MarkAndMergeStringLiteralTokens(tokens, i + 1, '"', '"', true);
					}
				}
				else if (value == '"')
				{
					// Process a double quoted string literal
					i = MarkAndMergeStringLiteralTokens(tokens, i, '"', '\\', false);
				}
			}			
		}		

		// Set ClassNames for keyword and type 
	
		var typeNameDictionary = new CreateDictionary(typeNames);
		for (var i = 0; i < tokens.length; i++)
		{
			var token = tokens[i];
			
			if (token != null && token.IsComment == false && token.IsSymbol == false)
			{
				if (IsKeyword(token.Value))
				{
					previousToken = i == 0 ? null : tokens[i-1];
					previousValue = previousToken == null ? '' : previousToken.Value;
					
					// Check for the @ sign. Keywords can be escaped with 
					if (previousValue.length == 0 || previousValue[previousValue.length-1] != '@')
						token.ClassName = 'keyword';
				}
				
				if (typeNameDictionary[token.Value] == true)
					token.ClassName = "type";
			}
		}
		
		var highlightedCode = '';
		
		// Find Keywords
		for (var i = 0; i < tokens.length; i++)
		{
			var token = tokens[i];
			
			if (token != null)
			{
				highlightedCode += WrapWithSpan(token.Value, token.ClassName);
			
				var isSymbol = token.IsSymbol ? 'true' : 'false';
			}
		}
		
		// Extra replacements using custom regular expressions
		for (var i = 0; i < RegExprDictionary.length; i++)
		{
			var expr = RegExprDictionary[i];
			
			highlightedCode = highlightedCode.replace(expr.RegExpr, expr.Replacement);
		}
		
		return highlightedCode;
	}
	
	// PUBLIC METHOD
	this.ProcessTags = function(tagName, language, customTypeNameAttribute)
	{
		var preTags = document.getElementsByTagName(tagName);
		
		for (var i = 0; i < preTags.length; i++)
		{
			var pre = preTags[i];
			
			if (pre.getAttribute('language') == language)
			{
				var typeNames = pre.getAttribute(customTypeNameAttribute)+''!='' ? (pre.getAttribute(customTypeNameAttribute) + '').split(' ') : [''];
				
				var innerHtml = this.Parse(pre.innerHTML, typeNames);

				// Workaround for IE <pre> innerHTML normalization quirk
				if ('outerHTML' in pre) {
					if (document.URL.indexOf('debug') == -1)
						innerHtml = innerHtml.replace(/\n/g,'<br />');
						
					pre.outerHTML = pre.outerHTML.substring(0,pre.outerHTML.indexOf('>')+1) + innerHtml + '<' + '/' + pre.tagName.toLowerCase() + '>';
				} else {
					pre.innerHTML = innerHtml;
				}
			}
		}	
	}
}
