Changeset 385
- Timestamp:
- 06/29/07 19:12:50 (5 years ago)
- Location:
- branches/bermi
- Files:
-
- 4 added
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/bermi/wymeditor/jquery.wymeditor.js
r270 r385 17 17 * Volker Mische (vmx@gmx.de) 18 18 * Scott Lewis (scott@bright-crayon.com) 19 * Bermi Ferrer (wymeditor a-t bermi dotorg) 19 20 */ 20 21 … … 75 76 var sWYM_DIALOG_IMAGE = "Image"; 76 77 var sWYM_DIALOG_TABLE = "Table"; 78 var sWYM_BOLD = "Bold"; 79 var sWYM_ITALIC = "Italic"; 77 80 var sWYM_CREATE_LINK = "CreateLink"; 78 81 var sWYM_INSERT_IMAGE = "InsertImage"; … … 80 83 var sWYM_TOGGLE_HTML = "ToggleHtml"; 81 84 var sWYM_FORMAT_BLOCK = "FormatBlock"; 85 var sWYM_PREVIEW = "Preview"; 82 86 83 87 var sWYM_DEFAULT_SKIN = "default"; … … 204 208 {'name': 'InsertImage', 'title': 'Image', 'css': 'wym_tools_image'}, 205 209 {'name': 'InsertTable', 'title': 'Table', 'css': 'wym_tools_table'}, 206 {'name': 'ToggleHtml', 'title': 'HTML', 'css': 'wym_tools_html'} 210 {'name': 'ToggleHtml', 'title': 'HTML', 'css': 'wym_tools_html'}, 211 {'name': 'Preview', 'title': 'Preview', 'css': 'wym_tools_preview'} 207 212 ], 208 213 … … 283 288 sSubmitSelector: ".wym_submit", 284 289 sCancelSelector: ".wym_cancel", 285 286 sDialogLinkSelector: ".wym_dialog_link", 287 sDialogImageSelector: ".wym_dialog_image", 288 sDialogTableSelector: ".wym_dialog_table", 290 sPreviewSelector: "", 291 292 sDialogLinkSelector: ".wym_dialog_link", 293 sDialogImageSelector: ".wym_dialog_image", 294 sDialogTableSelector: ".wym_dialog_table", 295 sDialogPreviewSelector: ".wym_dialog_preview", 289 296 290 297 sUpdateSelector: ".wymupdate", … … 300 307 + sWYM_DIALOG_TITLE 301 308 + "</title>" 309 + "<style type='text/css'></style>" 302 310 + "<script type='text/javascript'" 303 311 + " src='" … … 365 373 + "value='{Cancel}' />" 366 374 + "</p></body>", 375 376 sDialogPreviewHtml: "<body class='wym_dialog wym_dialog_preview'" 377 + " onload='fWYM_INIT_DIALOG(" + sWYM_INDEX + ")'" 378 + "></body>", 379 380 aDialogCss: [], 367 381 368 382 sSkin: sWYM_DEFAULT_SKIN, … … 625 639 this.update(); 626 640 this.toggleHtml(); 641 break; 642 643 case sWYM_PREVIEW: 644 this.dialog(sWYM_PREVIEW); 627 645 break; 628 646 … … 838 856 sBodyHtml = this._options.sDialogTableHtml; 839 857 break; 858 case(sWYM_PREVIEW): 859 sBodyHtml = this._options.sDialogPreviewHtml; 860 break; 840 861 } 841 862 … … 888 909 }; 889 910 911 Wymeditor.prototype.addCssRules = function(doc, aCss) { 912 var styles = doc.styleSheets[0]; 913 if(styles) { 914 for(var i = 0; i < aCss.length; i++) { 915 var oCss = aCss[i]; 916 if(oCss.name && oCss.css) this.addCssRule(styles, oCss); 917 } 918 } 919 }; 920 890 921 /********** CONFIGURATION **********/ 891 922 … … 974 1005 975 1006 var wym = window.opener.aWYM_INSTANCES[index]; 1007 var doc = window.document; 976 1008 var oSel = wym.selected(); 977 1009 var sStamp = wym.uniqueStamp(); … … 980 1012 if($j.isFunction(wym._options.fPreInitDialog)) 981 1013 wym._options.fPreInitDialog(wym,window); 1014 1015 //add css rules from options 1016 var styles = doc.styleSheets[0]; 1017 var aCss = eval(wym._options.aDialogCss); 1018 1019 wym.addCssRules(doc, aCss); 982 1020 983 1021 if(oSel) { … … 1044 1082 }); 1045 1083 1084 $j(wym._options.sDialogPreviewSelector + " " 1085 + wym._options.sPreviewSelector) 1086 .html(wym.xhtml()); 1087 1088 //cancel button 1046 1089 $j(wym._options.sCancelSelector).mousedown(function() { 1047 1090 window.close(); … … 1120 1163 return(null); 1121 1164 }; 1165 -
branches/bermi/wymeditor/jquery.wymeditor.mozilla.js
r276 r385 16 16 * Jean-Francois Hovinne (jf.hovinne@wymeditor.org) 17 17 * Volker Mische (vmx@gmx.de) 18 * Bermi Ferrer (wymeditor a-t bermi dotorg) 18 19 */ 19 20 … … 30 31 31 32 //add css rules from options 33 32 34 var styles = this._doc.styleSheets[0]; 33 35 var aCss = eval(this._options.aEditorCss); 34 35 for(var i = 0; i < aCss.length; i++) { 36 var oCss = aCss[i]; 37 if(oCss.name && oCss.css) 38 styles.insertRule(oCss.name + " {" + oCss.css + "}", 39 styles.cssRules.length); 40 } 36 37 this.addCssRules(this._doc, aCss); 41 38 42 39 this._doc.title = this._wym._index; … … 55 52 this._wym.bindEvents(); 56 53 57 // bidn key down events for enabling keyboar shortcuts54 //bind editor keydown events 58 55 $j(this._doc).bind("keydown", this.keydown); 59 56 60 //bind editor events57 //bind editor keyup events 61 58 $j(this._doc).bind("keyup", this.keyup); 62 59 … … 120 117 }; 121 118 119 WymClassMozilla.prototype.addCssRule = function(styles, oCss) { 120 121 styles.insertRule(oCss.name + " {" + oCss.css + "}", 122 styles.cssRules.length); 123 }; 122 124 123 125 /* @name xhtml … … 215 217 }; 216 218 217 //key up handler, mainly used for cleanups218 WymClassMozilla.prototype.key up= function(evt) {219 219 //keydown handler, mainly used for keyboard shortcuts 220 WymClassMozilla.prototype.keydown = function(evt) { 221 220 222 //'this' is the doc 221 223 var wym = aWYM_INSTANCES[this.title]; 222 224 223 wym._selected_image = null;224 225 if(evt.keyCode == 13 && !evt.shiftKey) {226 227 //RETURN key228 //cleanup <br><br> between paragraphs229 $j(wym._doc.body).children(sWYM_BR).remove();230 }231 232 else if(evt.keyCode != 8 && evt.keyCode != 46233 && !evt.metaKey234 && evt.keyCode != 224235 && evt.keyCode!=17 && !evt.ctrlKey) {236 //NOT BACKSPACE, NOT DELETE, NOT CTRL237 //text nodes replaced by P238 239 var container = wym.selected();240 var name = container.tagName.toLowerCase();241 242 //fix forbidden main containers243 if(244 name == "strong" ||245 name == "b" ||246 name == "em" ||247 name == "i" ||248 name == "sub" ||249 name == "sup" ||250 name == "a"251 252 ) name = container.parentNode.tagName.toLowerCase();253 254 if(name == sWYM_BODY) wym._exec(sWYM_FORMAT_BLOCK, sWYM_P);255 }256 };257 258 //keydown handler,used for keyboard shortcuts259 WymClassMozilla.prototype.keydown = function(evt) {260 261 //'this' is the doc262 var wym = aWYM_INSTANCES[this.title];263 264 225 if(evt.ctrlKey){ 265 226 if(evt.keyCode == 66){ 266 wym._exec('Bold'); 227 //CTRL+b => STRONG 228 wym._exec(sWYM_BOLD); 267 229 return false; 268 230 } 269 231 if(evt.keyCode == 73){ 270 wym._exec('Italic'); 232 //CTRL+i => EMPHASIS 233 wym._exec(sWYM_ITALIC); 271 234 return false; 272 235 } 273 236 } 274 } 237 }; 238 239 //keyup handler, mainly used for cleanups 240 WymClassMozilla.prototype.keyup = function(evt) { 241 242 //'this' is the doc 243 var wym = aWYM_INSTANCES[this.title]; 244 245 wym._selected_image = null; 246 247 if(evt.keyCode == 13 && !evt.shiftKey) { 248 249 //RETURN key 250 //cleanup <br><br> between paragraphs 251 $j(wym._doc.body).children(sWYM_BR).remove(); 252 } 253 254 else if(evt.keyCode != 8 255 && evt.keyCode != 17 256 && evt.keyCode != 46 257 && evt.keyCode != 224 258 && !evt.metaKey 259 && !evt.ctrlKey) { 260 261 //NOT BACKSPACE, NOT DELETE, NOT CTRL, NOT COMMAND 262 //text nodes replaced by P 263 264 var container = wym.selected(); 265 var name = container.tagName.toLowerCase(); 266 267 //fix forbidden main containers 268 if( 269 name == "strong" || 270 name == "b" || 271 name == "em" || 272 name == "i" || 273 name == "sub" || 274 name == "sup" || 275 name == "a" 276 277 ) name = container.parentNode.tagName.toLowerCase(); 278 279 if(name == sWYM_BODY) wym._exec(sWYM_FORMAT_BLOCK, sWYM_P); 280 } 281 }; 275 282 276 283 WymClassMozilla.prototype.setFocusToNode = function(node) { -
branches/bermi/wymeditor/vendors/parser.js
r286 r385 1 1 2 Object.prototype.extends = function (oSuper) { 2 3 for (sProperty in oSuper) { … … 8 9 return this.replace(/^(\s*)|(\s*)$/gm,''); 9 10 } 11 12 13 Array.prototype.contains = function (elem) { 14 for (var i = 0; i < this.length; i++) { 15 if (this[i] === elem) { 16 return true; 17 } 18 } 19 return false; 20 }; 21 10 22 11 23 /** … … 84 96 if (this._regex == null) { 85 97 for (i = 0, count = this._patterns.length; i < count; i++) { 86 this._patterns[i] = '(' + this._ patterns[i].replace(/([\/\(\)])/g,'\\$1') + ')';98 this._patterns[i] = '(' + this._untokenizeRegex(this._tokenizeRegex(this._patterns[i]).replace(/([\/\(\)])/g,'\\$1')) + ')'; 87 99 } 88 100 this._regex = new RegExp(this._patterns.join("|") ,this._getPerlMatchingFlags()); … … 91 103 return this._regex; 92 104 } 105 106 /** 107 * Escape lookahead/lookbehind blocks 108 */ 109 ParallelRegex.prototype._tokenizeRegex = function(regex) { 110 return regex. 111 replace(/\(\?(i|m|s|x|U)\)/, '~~~~~~Tk1\$1~~~~~~'). 112 replace(/\(\?(\-[i|m|s|x|U])\)/, '~~~~~~Tk2\$1~~~~~~'). 113 replace(/\(\?\=(.*)\)/, '~~~~~~Tk3\$1~~~~~~'). 114 replace(/\(\?\!(.*)\)/, '~~~~~~Tk4\$1~~~~~~'). 115 replace(/\(\?\<\=(.*)\)/, '~~~~~~Tk5\$1~~~~~~'). 116 replace(/\(\?\<\!(.*)\)/, '~~~~~~Tk6\$1~~~~~~'). 117 replace(/\(\?\:(.*)\)/, '~~~~~~Tk7\$1~~~~~~'); 118 } 119 120 /** 121 * Unscape lookahead/lookbehind blocks 122 */ 123 ParallelRegex.prototype._untokenizeRegex = function(regex) { 124 return regex. 125 replace(/~~~~~~Tk1(.{1})~~~~~~/, "(?\$1)"). 126 replace(/~~~~~~Tk2(.{2})~~~~~~/, "(?\$1)"). 127 replace(/~~~~~~Tk3(.*)~~~~~~/, "(?=\$1)"). 128 replace(/~~~~~~Tk4(.*)~~~~~~/, "(?!\$1)"). 129 replace(/~~~~~~Tk5(.*)~~~~~~/, "(?<=\$1)"). 130 replace(/~~~~~~Tk6(.*)~~~~~~/, "(?<!\$1)"). 131 replace(/~~~~~~Tk7(.*)~~~~~~/, "(?:\$1)"); 132 } 133 93 134 94 135 /** … … 286 327 * @access public 287 328 */ 288 Lexer.prototype.parse = function (raw) { 329 Lexer.prototype.parse = function (raw) { 330 289 331 if (this._parser == undefined) { 290 332 return false; 291 333 } 334 292 335 length = raw.length; 293 336 //parsed = this._reduce(raw); -
branches/bermi/wymeditor/vendors/xhtml_parser.js
r286 r385 1 /** 2 * Breas XHTML into SAX events. 3 * @package Parser 4 * @subpackage WebTester 1 /* Copyright (c) 2007 Bermi Ferrer (http://bermi.org) 2 * Dual licensed under the MIT (http://www.opensource.org/licenses/mit-license.php) 3 * and GPL (http://www.opensource.org/licenses/gpl-license.php) licenses. 5 4 * 6 * Sets up the lexer with case insensitive matching 7 * and adds the HTML handlers. 8 * @param SaxParser parser Handling strategy by 9 * reference. 10 * @access public 11 */ 12 function XhtmlLexer(parser) { 13 this.extends(new Lexer(parser, 'text')); 14 this.mapHandler('text', 'acceptTextToken'); 15 this._addSkipping(); 16 var parsed_tags = this._getParsedTags(); 17 for (i = 0, count = parsed_tags.length; i < count; i++) { 18 this._addTag(parsed_tags[i]); 19 } 20 this._addInTagTokens(); 21 } 22 23 /** 24 * List of parsed tags. Others are ignored. 25 * @return array List of searched for tags. 5 * Version: 1.0 6 */ 7 8 Object.prototype.extends = function (oSuper) { 9 for (sProperty in oSuper) { 10 this[sProperty] = oSuper[sProperty]; 11 } 12 } 13 14 String.prototype.trim = function () { 15 return this.replace(/^(\s*)|(\s*)$/gm,''); 16 } 17 18 19 Array.prototype.contains = function (elem) { 20 for (var i = 0; i < this.length; i++) { 21 if (this[i] === elem) { 22 return true; 23 } 24 } 25 return false; 26 }; 27 28 29 /* 30 * @name xml 31 * @description Use these methods to generate XML and XHTML compliant tags and 32 * escape tag attributes correctly 33 * @author Bermi Ferrer - http://bermi.org 34 * @author David Heinemeier Hansson http://loudthinking.com 35 */ 36 function XmlHelper() 37 { 38 this._entitiesDiv = document.createElement('div'); 39 } 40 41 42 /* 43 * @name tag 44 * @description 45 * Returns an empty HTML tag of type *name* which by default is XHTML 46 * compliant. Setting *open* to true will create an open tag compatible 47 * with HTML 4.0 and below. Add HTML attributes by passing an attributes 48 * array to *options*. For attributes with no value like (disabled and 49 * readonly), give it a value of true in the *options* array. 50 * 51 * Examples: 52 * 53 * this.tag('br') 54 * # => <br /> 55 * this.tag ('br', false, true) 56 * # => <br> 57 * this.tag ('input', $j({type:'text',disabled:true }) ) 58 * # => <input type="text" disabled="disabled" /> 59 */ 60 XmlHelper.prototype.tag = function(name, options, open) 61 { 62 options = options || false; 63 open = open || false; 64 return '<'+name+(options ? this.tagOptions(options) : '')+(open ? '>' : ' />'); 65 } 66 67 /* 68 * @name contentTag 69 * @description 70 * Returns a XML block tag of type *name* surrounding the *content*. Add 71 * XML attributes by passing an attributes array to *options*. For attributes 72 * with no value like (disabled and readonly), give it a value of true in 73 * the *options* array. You can use symbols or strings for the attribute names. 74 * 75 * this.contentTag ('p', 'Hello world!' ) 76 * # => <p>Hello world!</p> 77 * this.contentTag('div', this.contentTag('p', "Hello world!"), $j({class : "strong"})) 78 * # => <div class="strong"><p>Hello world!</p></div> 79 * this.contentTag("select", options, $j({multiple : true})) 80 * # => <select multiple="multiple">...options...</select> 81 */ 82 XmlHelper.prototype.contentTag = function(name, content, options) 83 { 84 options = options || false; 85 return '<'+name+(options ? this.tagOptions(options) : '')+'>'+content+'</'+name+'>'; 86 } 87 88 /* 89 * @name cdataSection 90 * @description 91 * Returns a CDATA section for the given +content+. CDATA sections 92 * are used to escape blocks of text containing characters which would 93 * otherwise be recognized as markup. CDATA sections begin with the string 94 * <tt><![CDATA[</tt> and } with (and may not contain) the string 95 * <tt>]]></tt>. 96 */ 97 XmlHelper.prototype.cdataSection = function(content) 98 { 99 return '<![CDATA['+content+']]>'; 100 } 101 102 103 /* 104 * @name escapeOnce 105 * @description 106 * Returns the escaped +xml+ without affecting existing escaped entities. 107 * 108 * this.escapeOnce( "1 > 2 & 3") 109 * # => "1 > 2 & 3" 110 */ 111 XmlHelper.prototype.escapeOnce = function(xml) 112 { 113 return this._fixDoubleEscape(this.escapeEntities(xml)); 114 } 115 116 /* 117 * @name _fixDoubleEscape 118 * @description 119 * Fix double-escaped entities, such as &amp;, &#123;, etc. 120 */ 121 XmlHelper.prototype._fixDoubleEscape = function(escaped) 122 { 123 return escaped.replace(/&([a-z]+|(#\d+));/i, "&$1;"); 124 } 125 126 /* 127 * @name tagOptions 128 * @description 129 * Takes an array like the one generated by Tag.parseAttributes 130 * [["src", "http://www.editam.com/?a=b&c=d&f=g"], ["title", "Editam, <Simplified> CMS"]] 131 * or an object like {src:"http://www.editam.com/?a=b&c=d&f=g", title:"Editam, <Simplified> CMS"} 132 * and returns a string properly escaped like 133 * ' src = "http://www.editam.com/?a=b&c=d&f=g" title = "Editam, <Simplified> CMS"' 134 * which is valid for strict XHTML 135 */ 136 XmlHelper.prototype.tagOptions = function(options) 137 { 138 var xml = this; 139 xml._formated_options = ''; 140 141 for (key in options) { 142 var formated_options = ''; 143 value = options[key]; 144 if(typeof value != 'function' && value.length > 0) { 145 if(parseInt(key) == key && typeof value == 'object'){ 146 key = value.shift(); 147 value = value.pop(); 148 } 149 if(key != '' && value != ''){ 150 xml._formated_options += ' '+key+'="'+xml.escapeOnce(value)+'"'; 151 } 152 } 153 } 154 return xml._formated_options; 155 } 156 157 /* 158 * @name escapeEntities 159 * @description 160 * Escapes XML/HTML entities <, >, & and ". If seccond parameter is set to false it 161 * will not escape ". If set to true it will also escape ' 162 */ 163 XmlHelper.prototype.escapeEntities = function(string, escape_quotes) 164 { 165 this._entitiesDiv.textContent = string; 166 var result = this._entitiesDiv.innerHTML; 167 if(typeof escape_quotes == 'undefined'){ 168 if(escape_quotes != false) result = result.replace('"', '"'); 169 if(escape_quotes == true) result = result.replace('"', '''); 170 } 171 return result; 172 } 173 174 /* 175 * Parses a string conatining tag attributes and values an returns an array formated like 176 * [["src", "http://www.editam.com"], ["title", "Editam, Simplified CMS"]] 177 */ 178 XmlHelper.prototype.parseAttributes = function(tag_attributes) 179 { 180 // Use a compounded regex to match single quoted, double quoted and unquoted attribute pairs 181 var result = []; 182 var matches = tag_attributes.split(/((=\s*")(")("))|((=\s*\')(\')(\'))|((=\s*[^>\s]*))/g); 183 if(matches.toString() != tag_attributes){ 184 for (k in matches) { 185 var v = matches[k]; 186 if(typeof v != 'function' && v.length != 0){ 187 var re = new RegExp('(\\w+)\\s*'+v); 188 if(match = tag_attributes.match(re) ){ 189 var value = v.replace(/^[\s=]+/, ""); 190 var delimiter = value.charAt(0); 191 delimiter = delimiter == '"' ? '"' : (delimiter=="'"?"'":''); 192 if(delimiter != ''){ 193 value = delimiter == '"' ? value.replace(/^"|"+$/g, '') : value.replace(/^'|'+$/g, ''); 194 } 195 tag_attributes = tag_attributes.replace(match[0],''); 196 result.push([match[1] , value]); 197 } 198 } 199 } 200 } 201 return result; 202 } 203 204 205 206 /** 207 * Compounded regular expression. Any of 208 * the contained patterns could match and 209 * when one does, it's label is returned. 210 * 211 * Constructor. Starts with no patterns. 212 * @param boolean case True for case sensitive, false 213 * for insensitive. 214 * @access public 215 * @author Marcus Baker (http://lastcraft.com) 216 * @author Bermi Ferrer (http://bermi.org) 217 */ 218 function ParallelRegex(case_sensitive) 219 { 220 this._case = case_sensitive; 221 this._patterns = []; 222 this._labels = []; 223 this._regex = null; 224 } 225 226 227 /** 228 * Adds a pattern with an optional label. 229 * @param string pattern Perl style regex, but ( and ) 230 * lose the usual meaning. 231 * @param string label Label of regex to be returned 232 * on a match. 233 * @access public 234 */ 235 ParallelRegex.prototype.addPattern = function(pattern, label) 236 { 237 label = label || true; 238 var count = this._patterns.length; 239 this._patterns[count] = pattern; 240 this._labels[count] = label; 241 this._regex = null; 242 } 243 244 /** 245 * Attempts to match all patterns at once against 246 * a string. 247 * @param string subject String to match against. 248 * 249 * @return boolean True on success. 250 * @return string match First matched portion of 251 * subject. 252 * @access public 253 */ 254 ParallelRegex.prototype.match = function(subject) 255 { 256 if (this._patterns.length == 0) { 257 return [false, '']; 258 } 259 matches = subject.match(this._getCompoundedRegex()); 260 261 if(!matches){ 262 return [false, '']; 263 } 264 var match = matches[0]; 265 for (i = 1; i < matches.length; i++) { 266 if (matches[i]) { 267 return [this._labels[i-1], match]; 268 } 269 } 270 return [true, matches[0]]; 271 } 272 273 /** 274 * Compounds the patterns into a single 275 * regular expression separated with the 276 * "or" operator. Caches the regex. 277 * Will automatically escape (, ) and / tokens. 278 * @param array patterns List of patterns in order. 26 279 * @access private 27 280 */ 28 XhtmlLexer.prototype._getParsedTags = function () { 29 return ["a", "abbr", "acronym", "address", "area", "b", 30 "base", "bdo", "big", "blockquote", "body", "br", "button", 31 "caption", "cite", "code", "col", "colgroup", "dd", "del", "div", 32 "dfn", "dl", "dt", "em", "fieldset", "form", "head", "h1", "h2", 33 "h3", "h4", "h5", "h6", "hr", "html", "i", "img", "input", "ins", 34 "kbd", "label", "legend", "li", "link", "map", "meta", "noscript", 35 "object", "ol", "optgroup", "option", "p", "param", "pre", "q", 36 "samp", "script", "select", "small", "span", "strong", "style", 37 "sub", "sup", "table", "tbody", "td", "textarea", "tfoot", "th", 281 ParallelRegex.prototype._getCompoundedRegex = function() 282 { 283 if (this._regex == null) { 284 for (i = 0, count = this._patterns.length; i < count; i++) { 285 this._patterns[i] = '(' + this._untokenizeRegex(this._tokenizeRegex(this._patterns[i]).replace(/([\/\(\)])/g,'\\$1')) + ')'; 286 } 287 this._regex = new RegExp(this._patterns.join("|") ,this._getPerlMatchingFlags()); 288 } 289 return this._regex; 290 } 291 292 /** 293 * Escape lookahead/lookbehind blocks 294 */ 295 ParallelRegex.prototype._tokenizeRegex = function(regex) 296 { 297 return regex. 298 replace(/\(\?(i|m|s|x|U)\)/, '~~~~~~Tk1\$1~~~~~~'). 299 replace(/\(\?(\-[i|m|s|x|U])\)/, '~~~~~~Tk2\$1~~~~~~'). 300 replace(/\(\?\=(.*)\)/, '~~~~~~Tk3\$1~~~~~~'). 301 replace(/\(\?\!(.*)\)/, '~~~~~~Tk4\$1~~~~~~'). 302 replace(/\(\?\<\=(.*)\)/, '~~~~~~Tk5\$1~~~~~~'). 303 replace(/\(\?\<\!(.*)\)/, '~~~~~~Tk6\$1~~~~~~'). 304 replace(/\(\?\:(.*)\)/, '~~~~~~Tk7\$1~~~~~~'); 305 } 306 307 /** 308 * Unscape lookahead/lookbehind blocks 309 */ 310 ParallelRegex.prototype._untokenizeRegex = function(regex) 311 { 312 return regex. 313 replace(/~~~~~~Tk1(.{1})~~~~~~/, "(?\$1)"). 314 replace(/~~~~~~Tk2(.{2})~~~~~~/, "(?\$1)"). 315 replace(/~~~~~~Tk3(.*)~~~~~~/, "(?=\$1)"). 316 replace(/~~~~~~Tk4(.*)~~~~~~/, "(?!\$1)"). 317 replace(/~~~~~~Tk5(.*)~~~~~~/, "(?<=\$1)"). 318 replace(/~~~~~~Tk6(.*)~~~~~~/, "(?<!\$1)"). 319 replace(/~~~~~~Tk7(.*)~~~~~~/, "(?:\$1)"); 320 } 321 322 323 /** 324 * Accessor for perl regex mode flags to use. 325 * @return string Perl regex flags. 326 * @access private 327 */ 328 ParallelRegex.prototype._getPerlMatchingFlags = function() 329 { 330 return (this._case ? "m" : "mi"); 331 } 332 333 334 335 /** 336 * States for a stack machine. 337 * 338 * Constructor. Starts in named state. 339 * @param string start Starting state name. 340 * @access public 341 * @author Marcus Baker (http://lastcraft.com) 342 * @author Bermi Ferrer (http://bermi.org) 343 */ 344 function StateStack( start) 345 { 346 this._stack = [start]; 347 } 348 349 /** 350 * Accessor for current state. 351 * @return string State. 352 * @access public 353 */ 354 StateStack.prototype.getCurrent = function () 355 { 356 return this._stack[this._stack.length - 1]; 357 } 358 359 /** 360 * Adds a state to the stack and sets it 361 * to be the current state. 362 * @param string state New state. 363 * @access public 364 */ 365 StateStack.prototype.enter = function (state) 366 { 367 this._stack.push(state); 368 } 369 370 /** 371 * Leaves the current state and reverts 372 * to the previous one. 373 * @return boolean False if we drop off 374 * the bottom of the list. 375 * @access public 376 */ 377 StateStack.prototype.leave = function () 378 { 379 if (this._stack.length == 1) { 380 return false; 381 } 382 this._stack.pop(); 383 return true; 384 } 385 386 387 388 var LEXER_ENTER = 1; 389 var LEXER_MATCHED = 2; 390 var LEXER_UNMATCHED = 3; 391 var LEXER_EXIT = 4; 392 var LEXER_SPECIAL = 5; 393 394 395 /** 396 * Accepts text and breaks it into tokens. 397 * Some optimisation to make the sure the 398 * content is only scanned by the PHP regex 399 * parser once. Lexer modes must not start 400 * with leading underscores. 401 * 402 * Sets up the lexer in case insensitive matching 403 * by default. 404 * @param Parser parser Handling strategy by reference. 405 * @param string start Starting handler. 406 * @param boolean case True for case sensitive. 407 * @access public 408 * @author Marcus Baker (http://lastcraft.com) 409 * @author Bermi Ferrer (http://bermi.org) 410 */ 411 function Lexer(parser, start, case_sensitive) 412 { 413 start = start || 'accept'; 414 this._case = case_sensitive || false; 415 this._regexes = {}; 416 this._parser = parser; 417 this._mode = new StateStack(start); 418 this._mode_handlers = {}; 419 this._mode_handlers[start] = start; 420 } 421 422 /** 423 * Adds a token search pattern for a particular 424 * parsing mode. The pattern does not change the 425 * current mode. 426 * @param string pattern Perl style regex, but ( and ) 427 * lose the usual meaning. 428 * @param string mode Should only apply this 429 * pattern when dealing with 430 * this type of input. 431 * @access public 432 */ 433 Lexer.prototype.addPattern = function (pattern, mode) 434 { 435 var mode = mode || "accept"; 436 if (this._regexes[mode] == undefined) { 437 this._regexes[mode] = new ParallelRegex(this._case); 438 } 439 this._regexes[mode].addPattern(pattern); 440 if (this._mode_handlers[mode] == undefined) { 441 this._mode_handlers[mode] = mode; 442 } 443 } 444 445 /** 446 * Adds a pattern that will enter a new parsing 447 * mode. Useful for entering parenthesis, strings, 448 * tags, etc. 449 * @param string pattern Perl style regex, but ( and ) 450 * lose the usual meaning. 451 * @param string mode Should only apply this 452 * pattern when dealing with 453 * this type of input. 454 * @param string new_mode Change parsing to this new 455 * nested mode. 456 * @access public 457 */ 458 Lexer.prototype.addEntryPattern = function (pattern, mode, new_mode) 459 { 460 if (this._regexes[mode] == undefined) { 461 this._regexes[mode] = new ParallelRegex(this._case); 462 } 463 this._regexes[mode].addPattern(pattern, new_mode); 464 if (this._mode_handlers[new_mode] == undefined) { 465 this._mode_handlers[new_mode] = new_mode; 466 } 467 } 468 469 /** 470 * Adds a pattern that will exit the current mode 471 * and re-enter the previous one. 472 * @param string pattern Perl style regex, but ( and ) 473 * lose the usual meaning. 474 * @param string mode Mode to leave. 475 * @access public 476 */ 477 Lexer.prototype.addExitPattern = function (pattern, mode) 478 { 479 if (this._regexes[mode] == undefined) { 480 this._regexes[mode] = new ParallelRegex(this._case); 481 } 482 this._regexes[mode].addPattern(pattern, "__exit"); 483 if (this._mode_handlers[mode] == undefined) { 484 this._mode_handlers[mode] = mode; 485 } 486 } 487 488 /** 489 * Adds a pattern that has a special mode. Acts as an entry 490 * and exit pattern in one go, effectively calling a special 491 * parser handler for this token only. 492 * @param string pattern Perl style regex, but ( and ) 493 * lose the usual meaning. 494 * @param string mode Should only apply this 495 * pattern when dealing with 496 * this type of input. 497 * @param string special Use this mode for this one token. 498 * @access public 499 */ 500 Lexer.prototype.addSpecialPattern = function (pattern, mode, special) 501 { 502 if (this._regexes[mode] == undefined) { 503 this._regexes[mode] = new ParallelRegex(this._case); 504 } 505 this._regexes[mode].addPattern(pattern, '_'+special); 506 if (this._mode_handlers[special] == undefined) { 507 this._mode_handlers[special] = special; 508 } 509 } 510 511 /** 512 * Adds a mapping from a mode to another handler. 513 * @param string mode Mode to be remapped. 514 * @param string handler New target handler. 515 * @access public 516 */ 517 Lexer.prototype.mapHandler = function (mode, handler) 518 { 519 this._mode_handlers[mode] = handler; 520 } 521 522 /** 523 * Splits the page text into tokens. Will fail 524 * if the handlers report an error or if no 525 * content is consumed. If successful then each 526 * unparsed and parsed token invokes a call to the 527 * held listener. 528 * @param string raw Raw HTML text. 529 * @return boolean True on success, else false. 530 * @access public 531 */ 532 Lexer.prototype.parse = function (raw) 533 { 534 if (this._parser == undefined) { 535 return false; 536 } 537 var length = raw.length; 538 while (typeof (parsed = this._reduce(raw)) == 'object') { 539 var raw = parsed[0]; 540 var unmatched = parsed[1]; 541 var matched = parsed[2]; 542 var mode = parsed[3]; 543 544 if (! this._dispatchTokens(unmatched, matched, mode)) { 545 return false; 546 } 547 548 if (raw == '') { 549 return true; 550 } 551 if (raw.length == length) { 552 return false; 553 } 554 length = raw.length; 555 } 556 if (! parsed ) { 557 return false; 558 } 559 return this._invokeParser(raw, LEXER_UNMATCHED); 560 } 561 562 /** 563 * Sends the matched token and any leading unmatched 564 * text to the parser changing the lexer to a new 565 * mode if one is listed. 566 * @param string unmatched Unmatched leading portion. 567 * @param string matched Actual token match. 568 * @param string mode Mode after match. A boolean 569 * false mode causes no change. 570 * @return boolean False if there was any error 571 * from the parser. 572 * @access private 573 */ 574 Lexer.prototype._dispatchTokens = function (unmatched, matched, mode) 575 { 576 mode = mode || false; 577 if (! this._invokeParser(unmatched, LEXER_UNMATCHED)) { 578 return false; 579 } 580 if (typeof mode == 'boolean') { 581 return this._invokeParser(matched, LEXER_MATCHED); 582 } 583 if (this._isModeEnd(mode)) { 584 if (! this._invokeParser(matched, LEXER_EXIT)) { 585 return false; 586 } 587 return this._mode.leave(); 588 } 589 if (this._isSpecialMode(mode)) { 590 this._mode.enter(this._decodeSpecial(mode)); 591 if (! this._invokeParser(matched, LEXER_SPECIAL)) { 592 return false; 593 } 594 return this._mode.leave(); 595 } 596 this._mode.enter(mode); 597 return this._invokeParser(matched, LEXER_ENTER); 598 } 599 600 /** 601 * Tests to see if the new mode is actually to leave 602 * the current mode and pop an item from the matching 603 * mode stack. 604 * @param string mode Mode to test. 605 * @return boolean True if this is the exit mode. 606 * @access private 607 */ 608 Lexer.prototype._isModeEnd = function (mode) 609 { 610 return (mode === "__exit"); 611 } 612 613 /** 614 * Test to see if the mode is one where this mode 615 * is entered for this token only and automatically 616 * leaves immediately afterwoods. 617 * @param string mode Mode to test. 618 * @return boolean True if this is the exit mode. 619 * @access private 620 */ 621 Lexer.prototype._isSpecialMode = function (mode) 622 { 623 return (mode[0] == "_"); 624 } 625 626 /** 627 * Strips the magic underscore marking single token 628 * modes. 629 * @param string mode Mode to decode. 630 * @return string Underlying mode name. 631 * @access private 632 */ 633 Lexer.prototype._decodeSpecial = function (mode) 634 { 635 return mode.substring(1); 636 } 637 638 /** 639 * Calls the parser method named after the current 640 * mode. Empty content will be ignored. The lexer 641 * has a parser handler for each mode in the lexer. 642 * @param string content Text parsed. 643 * @param boolean is_match Token is recognised rather 644 * than unparsed data. 645 * @access private 646 */ 647 Lexer.prototype._invokeParser = function (content, is_match) 648 { 649 650 if (!/ +/.test(content) && ((content === '') || (content == false))) { 651 return true; 652 } 653 var current = this._mode.getCurrent(); 654 var handler = this._mode_handlers[current]; 655 var result; 656 eval('result = this._parser.' + handler + '(content, is_match);') 657 return result; 658 } 659 660 /** 661 * Tries to match a chunk of text and if successful 662 * removes the recognised chunk and any leading 663 * unparsed data. Empty strings will not be matched. 664 * @param string raw The subject to parse. This is the 665 * content that will be eaten. 666 * @return array/boolean Three item list of unparsed 667 * content followed by the 668 * recognised token and finally the 669 * action the parser is to take. 670 * True if no match, false if there 671 * is a parsing error. 672 * @access private 673 */ 674 Lexer.prototype._reduce = function (raw) 675 { 676 var matched = this._regexes[this._mode.getCurrent()].match(raw); 677 var match = matched[1]; 678 var action = matched[0]; 679 if (action) { 680 unparsed_character_count = raw.indexOf(match); 681 unparsed = raw.substr(0, unparsed_character_count); 682 raw = raw.substring(unparsed_character_count + match.length); 683 return [raw, unparsed, match, action]; 684 } 685 return true; 686 } 687 688 689 690 /** 691 * This are the rules for breaking the XHTML code into events 692 * handled by the provided parser. 693 * 694 * @author Marcus Baker (http://lastcraft.com) 695 * @author Bermi Ferrer (http://bermi.org) 696 */ 697 function XhtmlLexer(parser) 698 { 699 this.extends(new Lexer(parser, 'Text')); 700 this.mapHandler('Text', 'Text'); 701 702 this.addCommentTokens('Text'); 703 this.addScriptTokens('Text'); 704 this.addCssTokens('Text'); 705 this.addTagTokens('Text'); 706 this.init(); 707 } 708 709 XhtmlLexer.prototype.init = function() 710 { 711 } 712 713 XhtmlLexer.prototype.addCommentTokens = function(scope) 714 { 715 this.addEntryPattern("<!--", scope, 'Comment'); 716 this.addExitPattern("-.", 'Comment'); 717 } 718 719 XhtmlLexer.prototype.addScriptTokens = function(scope) 720 { 721 this.addEntryPattern("<script", scope, 'Script'); 722 this.addExitPattern("</script>", 'Script'); 723 } 724 725 XhtmlLexer.prototype.addCssTokens = function(scope) 726 { 727 this.addEntryPattern("<style", scope, 'Css'); 728 this.addExitPattern("</style>", 'Css'); 729 } 730 731 XhtmlLexer.prototype.addTagTokens = function(scope) 732 { 733 this.addSpecialPattern("<\s*[a-z]+\s*>", scope, 'OpeningTag'); 734 this.addEntryPattern("<[a-z]+"+'[\\\/ \\\>]+', scope, 'OpeningTag'); 735 this.addInTagDeclarationTokens('OpeningTag'); 736 737 this.addSpecialPattern("</\s*[a-z]+\s*>", scope, 'ClosingTag'); 738 739 } 740 741 XhtmlLexer.prototype.addInTagDeclarationTokens = function(scope) 742 { 743 this.addSpecialPattern('\s+', scope, 'Ignore'); 744 745 this.addAttributeTokens(scope); 746 747 this.addExitPattern('/>', scope); 748 this.addExitPattern('>', scope); 749 750 } 751 752 XhtmlLexer.prototype.addAttributeTokens = function(scope) 753 { 754 this.addSpecialPattern("\s*[a-z-_0-9]+\s*(?=\=)\s*", scope, 'TagAttributes'); 755 756 this.addEntryPattern('=\s*"', scope, 'DoubleQuotedAttribute'); 757 this.addPattern("\\\\\"", 'DoubleQuotedAttribute'); 758 this.addExitPattern('"', 'DoubleQuotedAttribute'); 759 760 this.addEntryPattern("=\s*'", scope, 'SingleQuotedAttribute'); 761 this.addPattern("\\\\'", 'SingleQuotedAttribute'); 762 this.addExitPattern("'", 'SingleQuotedAttribute'); 763 764 this.addSpecialPattern('=\s*[^>\s]*', scope, 'UnquotedAttribute'); 765 } 766 767 768 769 /** 770 * XHTML Parser. 771 * 772 * This XHTML parser will trigger the events available on on 773 * current SaxListener 774 * 775 * @author Bermi Ferrer (http://bermi.org) 776 */ 777 function XhtmlParser(Listener, mode) 778 { 779 var mode = mode || 'Text'; 780 this._Lexer = new XhtmlLexer(this); 781 this._Listener = Listener; 782 this._mode = mode; 783 this._matches = []; 784 this._last_match = ''; 785 this._current_match = ''; 786 } 787 788 XhtmlParser.prototype.parse = function(raw) 789 { 790 this._Lexer.parse(this.beforeParsing(raw)); 791 return this.afterParsing(this._Listener.getResult()); 792 } 793 794 XhtmlParser.prototype.beforeParsing = function(raw) 795 { 796 return this._Listener.beforeParsing(raw); 797 } 798 799 XhtmlParser.prototype.afterParsing = function(parsed) 800 { 801 return this._Listener.afterParsing(parsed); 802 } 803 804 805 XhtmlParser.prototype.Ignore = function(match, state) 806 { 807 return true; 808 } 809 810 XhtmlParser.prototype.Text = function(text) 811 { 812 this._Listener.addContent(text); 813 return true; 814 } 815 816 XhtmlParser.prototype.Comment = function(match, status) 817 { 818 return this._addNonTagBlock(match, status, 'addComment'); 819 } 820 821 XhtmlParser.prototype.Script = function(match, status) 822 { 823 return this._addNonTagBlock(match, status, 'addScript'); 824 } 825 826 XhtmlParser.prototype.Css = function(match, status) 827 { 828 return this._addNonTagBlock(match, status, 'addCss'); 829 } 830 831 XhtmlParser.prototype._addNonTagBlock = function(match, state, type) 832 { 833 switch (state){ 834 case LEXER_ENTER: 835 this._non_tag = match; 836 break; 837 case LEXER_UNMATCHED: 838 this._non_tag += match; 839 break; 840 case LEXER_EXIT: 841 switch(type) { 842 case 'addComment': 843 this._Listener.addComment(this._non_tag+match); 844 break; 845 case 'addScript': 846 this._Listener.addScript(this._non_tag+match); 847 break; 848 case 'addCss': 849 this._Listener.addCss(this._non_tag+match); 850 break; 851 } 852 } 853 return true; 854 } 855 856 XhtmlParser.prototype.OpeningTag = function(match, state) 857 { 858 switch (state){ 859 case LEXER_ENTER: 860 this._tag = this.normalizeTag(match); 861 this._tag_attributes = {}; 862 break; 863 case LEXER_SPECIAL: 864 this._callOpenTagListener(this.normalizeTag(match)); 865 break; 866 case LEXER_EXIT: 867 this._callOpenTagListener(this._tag, this._tag_attributes); 868 } 869 return true; 870 } 871 872 XhtmlParser.prototype.ClosingTag = function(match, state) 873 { 874 this._callCloseTagListener(this.normalizeTag(match)); 875 return true; 876 } 877 878 XhtmlParser.prototype._callOpenTagListener = function(tag, attributes) 879 { 880 var attributes = attributes || {}; 881 this.autoCloseUnclosedBeforeNewOpening(tag); 882 883 this._Listener.last_tag = tag; 884 this._Listener.last_tag_attributes = attributes; 885 886 if(this._Listener.block_tags.contains(tag)){ 887 this._Listener.openBlockTag(tag, attributes); 888 this._increaseOpenTagCounter(tag); 889 }else if(this._Listener.inline_tags.contains(tag)){ 890 this._Listener.inlineTag(tag, attributes); 891 }else{ 892 this._Listener.openUnknownTag(tag, attributes); 893 } 894 } 895 896 XhtmlParser.prototype._callCloseTagListener = function(tag) 897 { 898 if(this._decreaseOpenTagCounter(tag)){ 899 this.autoCloseUnclosedBeforeTagClosing(tag); 900 if(this._Listener.block_tags.contains(tag)){ 901 this._Listener.closeBlockTag(tag); 902 }else{ 903 this._Listener.closeUnknownTag(tag); 904 } 905 }else{ 906 this._Listener.closeUnopenedTag(tag); 907 } 908 } 909 910 XhtmlParser.prototype._increaseOpenTagCounter = function(tag) 911 { 912 this._Listener._open_tags[tag] = this._Listener._open_tags[tag] || 0; 913 this._Listener._open_tags[tag]++; 914 } 915 916 XhtmlParser.prototype._decreaseOpenTagCounter = function(tag) 917 { 918 if(this._Listener._open_tags[tag]){ 919 this._Listener._open_tags[tag]--; 920 if(this._Listener._open_tags[tag] == 0){ 921 this._Listener._open_tags[tag] = undefined; 922 } 923 return true; 924 } 925 return false; 926 } 927 928 XhtmlParser.prototype.autoCloseUnclosedBeforeNewOpening = function(new_tag) 929 { 930 this._autoCloseUnclosed(new_tag, false); 931 } 932 933 XhtmlParser.prototype.autoCloseUnclosedBeforeTagClosing = function(tag) 934 { 935 this._autoCloseUnclosed(tag, true); 936 } 937 938 XhtmlParser.prototype._autoCloseUnclosed = function(new_tag, closing) 939 { 940 var closing = closing || false; 941 if(this._Listener._open_tags){ 942 for (tag in this._Listener._open_tags) { 943 counter = this._Listener._open_tags[tag]; 944 if(counter > 0 && this._Listener.shouldCloseTagAutomatically(tag, new_tag, closing)){ 945 this._callCloseTagListener(tag, true); 946 } 947 } 948 } 949 } 950 951 XhtmlParser.prototype.getTagReplacements = function() 952 { 953 return this._Listener.getTagReplacements(); 954 } 955 956 XhtmlParser.prototype.normalizeTag = function(tag) 957 { 958 tag = tag.replace(/^([\s<\/>]*)|([\s<\/>]*)$/gm,'').toLowerCase(); 959 tags = this._Listener.getTagReplacements(); 960 if(tags[tag]){ 961 return tags[tag]; 962 } 963 return tag; 964 } 965 966 XhtmlParser.prototype.TagAttributes = function(match, state) 967 { 968 if(LEXER_SPECIAL == state){ 969 this._current_attribute = match; 970 } 971 return true; 972 } 973 974 XhtmlParser.prototype.DoubleQuotedAttribute = function(match, state) 975 { 976 if(LEXER_UNMATCHED == state){ 977 this._tag_attributes[this._current_attribute] = match; 978 } 979 return true; 980 } 981 XhtmlParser.prototype.SingleQuotedAttribute = function(match, state) 982 { 983 if(LEXER_UNMATCHED == state){ 984 this._tag_attributes[this._current_attribute] = match; 985 } 986 return true; 987 } 988 XhtmlParser.prototype.UnquotedAttribute = function(match, state) 989 { 990 if(LEXER_UNMATCHED == state){ 991 this._tag_attributes[this._current_attribute] = match; 992 } 993 return true; 994 } 995 996 997 998 /** 999 * XHTML Sax parser. 1000 * 1001 * @author Bermi Ferrer (http://bermi.org) 1002 */ 1003 function XhtmlSaxListener() 1004 { 1005 this.xhtml = ''; 1006 this.helper = new XmlHelper(); 1007 this._open_tags = {}; 1008 1009 this.entities = { 1010 ' ':' ','¡':'¡','¢':'¢', 1011 '£':'£','¤':'¤','¥':'¥', 1012 '¦':'¦','§':'§','¨':'¨', 1013 '©':'©','ª':'ª','«':'«', 1014 '¬':'¬','­':'­','®':'®', 1015 '¯':'¯','°':'°','±':'±', 1016 '²':'²','³':'³','´':'´', 1017 'µ':'µ','¶':'¶','·':'·', 1018 '¸':'¸','¹':'¹','º':'º', 1019 '»':'»','¼':'¼','½':'½', 1020 '¾':'¾','¿':'¿','À':'À', 1021 'Á':'Á','Â':'Â','Ã':'Ã', 1022 'Ä':'Ä','Å':'Å','Æ':'Æ', 1023 'Ç':'Ç','È':'È','É':'É', 1024 'Ê':'Ê','Ë':'Ë','Ì':'Ì', 1025 'Í':'Í','Î':'Î','Ï':'Ï', 1026 'Ð':'Ð','Ñ':'Ñ','Ò':'Ò', 1027 'Ó':'Ó','Ô':'Ô','Õ':'Õ', 1028 'Ö':'Ö','×':'×','Ø':'Ø', 1029 'Ù':'Ù','Ú':'Ú','Û':'Û', 1030 'Ü':'Ü','Ý':'Ý','Þ':'Þ', 1031 'ß':'ß','à':'à','á':'á', 1032 'â':'â','ã':'ã','ä':'ä', 1033 'å':'å','æ':'æ','ç':'ç', 1034 'è':'è','é':'é','ê':'ê', 1035 'ë':'ë','ì':'ì','í':'í', 1036 'î':'î','ï':'ï','ð':'ð', 1037 'ñ':'ñ','ò':'ò','ó':'ó', 1038 'ô':'ô','õ':'õ','ö':'ö', 1039 '÷':'÷','ø':'ø','ù':'ù', 1040 'ú':'ú','û':'û','ü':'ü', 1041 'ý':'ý','þ':'þ','ÿ':'ÿ', 1042 'Œ':'Œ','œ':'œ','Š':'Š', 1043 'š':'š','Ÿ':'Ÿ','ƒ':'ƒ', 1044 'ˆ':'ˆ','˜':'˜','Α':'Α', 1045 'Β':'Β','Γ':'Γ','Δ':'Δ', 1046 'Ε':'Ε','Ζ':'Ζ','Η':'Η', 1047 'Θ':'Θ','Ι':'Ι','Κ':'Κ', 1048 'Λ':'Λ','Μ':'Μ','Ν':'Ν', 1049 'Ξ':'Ξ','Ο':'Ο','Π':'Π', 1050 'Ρ':'Ρ','Σ':'Σ','Τ':'Τ', 1051 'Υ':'Υ','Φ':'Φ','Χ':'Χ', 1052 'Ψ':'Ψ','Ω':'Ω','α':'α', 1053 'β':'β','γ':'γ','δ':'δ', 1054 'ε':'ε','ζ':'ζ','η':'η', 1055 'θ':'θ','ι':'ι','κ':'κ', 1056 'λ':'λ','μ':'μ','ν':'ν', 1057 'ξ':'ξ','ο':'ο','π':'π', 1058 'ρ':'ρ','ς':'ς','σ':'σ', 1059 'τ':'τ','υ':'υ','φ':'φ', 1060 'χ':'χ','ψ':'ψ','ω':'ω', 1061 'ϑ':'ϑ','ϒ':'ϒ','ϖ':'ϖ', 1062 ' ':' ',' ':' ',' ':' ', 1063 '‌':'‌','‍':'‍','‎':'‎', 1064 '‏':'‏','–':'–','—':'—', 1065 '‘':'‘','’':'’','‚':'‚', 1066 '“':'“','”':'”','„':'„', 1067 '†':'†','‡':'‡','•':'•', 1068 '…':'…','‰':'‰','′':'′', 1069 '″':'″','‹':'‹','›':'›', 1070 '‾':'‾','⁄':'⁄','€':'€', 1071 'ℑ':'ℑ','℘':'℘','ℜ':'ℜ', 1072 '™':'™','ℵ':'ℵ','←':'←', 1073 '↑':'↑','→':'→','↓':'↓', 1074 '↔':'↔','↵':'↵','⇐':'⇐', 1075 '⇑':'⇑','⇒':'⇒','⇓':'⇓', 1076 '⇔':'⇔','∀':'∀','∂':'∂', 1077 '∃':'∃','∅':'∅','∇':'∇', 1078 '∈':'∈','∉':'∉','∋':'∋', 1079 '∏':'∏','∑':'∑','−':'−', 1080 '∗':'∗','√':'√','∝':'∝', 1081 '∞':'∞','∠':'∠','∧':'∧', 1082 '∨':'∨','∩':'∩','∪':'∪', 1083 '∫':'∫','∴':'∴','∼':'∼', 1084 '≅':'≅','≈':'≈','≠':'≠', 1085 '≡':'≡','≤':'≤','≥':'≥', 1086 '⊂':'⊂','⊃':'⊃','⊄':'⊄', 1087 '⊆':'⊆','⊇':'⊇','⊕':'⊕', 1088 '⊗':'⊗','⊥':'⊥','⋅':'⋅', 1089 '⌈':'⌈','⌉':'⌉','⌊':'⌊', 1090 '⌋':'⌋','⟨':'〈','⟩':'〉', 1091 '◊':'◊','♠':'♠','♣':'♣', 1092 '♥':'♥','♦':'♦'}; 1093 1094 this.block_tags = ["a", "abbr", "acronym", "address", "area", "b", 1095 "base", "bdo", "big", "blockquote", "body", "button", 1096 "caption", "cite", "code", "col", "colgroup", "dd", "del", "div", 1097 "dfn", "dl", "dt", "em", "fieldset", "form", "head", "h1", "h2", 1098 "h3", "h4", "h5", "h6", "html", "i", "ins", 1099 "kbd", "label", "legend", "li", "map", "noscript", 1100 "object", "ol", "optgroup", "option", "p", "param", "pre", "q", 1101 "samp", "script", "select", "small", "span", "strong", "style", 1102 "sub", "sup", "table", "tbody", "td", "textarea", "tfoot", "th", 38 1103 "thead", "title", "tr", "tt", "ul", "var", "extends"]; 39 } 40 41 /** 42 * The lexer has to skip certain sections such 43 * as server code, client code and styles. 44 * @access private 45 */ 46 XhtmlLexer.prototype._addSkipping = function() { 47 this.mapHandler('css', 'ignore'); 48 this.addEntryPattern('<style', 'text', 'css'); 49 this.addExitPattern('</style>', 'css'); 50 this.mapHandler('js', 'ignore'); 51 this.addEntryPattern('<script', 'text', 'js'); 52 this.addExitPattern('</script>', 'js'); 53 this.mapHandler('comment', 'ignore'); 54 this.addEntryPattern('<!--', 'text', 'comment'); 55 this.addExitPattern('-->', 'comment'); 56 } 57 58 /** 59 * Pattern matches to start and end a tag. 60 * @param string tag Name of tag to scan for. 61 * @access private 62 */ 63 XhtmlLexer.prototype._addTag = function(tag) { 64 this.addSpecialPattern('</'+tag+'>', 'text', 'acceptEndToken'); 65 this.addEntryPattern('<'+tag, 'text', 'tag'); 66 } 67 68 /** 69 * Pattern matches to parse the inside of a tag 70 * including the attributes and their quoting. 71 * @access private 72 */ 73 XhtmlLexer.prototype._addInTagTokens = function() { 74 this.mapHandler('tag', 'acceptStartToken'); 75 this.addSpecialPattern('\\s+', 'tag', 'ignore'); 76 this._addAttributeTokens(); 77 this.addExitPattern('/>', 'tag'); 78 this.addExitPattern('>', 'tag'); 79 } 80 81 /** 82 * Matches attributes that are either single quoted, 83 * double quoted or unquoted. 84 * @access private 85 */ 86 XhtmlLexer.prototype._addAttributeTokens = function() { 87 this.mapHandler('dq_attribute', 'acceptAttributeToken'); 88 this.addEntryPattern('=\\s*"', 'tag', 'dq_attribute'); 89 this.addPattern("\\\\\"", 'dq_attribute'); 90 this.addExitPattern('"', 'dq_attribute'); 91 this.mapHandler('sq_attribute', 'acceptAttributeToken'); 92 this.addEntryPattern("=\\s*'", 'tag', 'sq_attribute'); 93 this.addPattern("\\\\'", 'sq_attribute'); 94 this.addExitPattern("'", 'sq_attribute'); 95 this.mapHandler('uq_attribute', 'acceptAttributeToken'); 96 this.addSpecialPattern('=\\s*[^>\\s]*', 'tag', 'uq_attribute'); 97 } 98 99 100 /** 101 * Converts HTML tokens into selected SAX evnts. 102 * @package Parser 103 * @subpackage WebTester 104 * 105 * Sets the listener. 106 * @param SaxListener listener SAX evnt handler. 107 * @access public 108 */ 109 function XhtmlSaxParser(listener) { 110 this._listener = listener; 111 this._lexer = this.createLexer(this); 112 this._tag = ''; 113 this._attributes = {}; 114 this._current_attribute = ''; 115 } 116 117 /** 118 * Runs the content through the lexer which 119 * should call back to the acceptors. 120 * @param string $raw Page text to parse. 121 * @return boolean False if parse error. 122 * @access public 123 */ 124 XhtmlSaxParser.prototype.parse = function (raw) { 125 return this._lexer.parse(raw); 126 } 127 128 /** 129 * Sets up the matching lexer. Starts in 'text' mode. 130 * @param SaxParser $parser evnt generator, usually $self. 131 * @return Lexer Lexer suitable for this parser. 132 * @access public 133 * @static 134 */ 135 XhtmlSaxParser.prototype.createLexer = function (parser) { 136 return new XhtmlLexer(parser); 137 } 138 139 /** 140 * Accepts a token from the tag mode. If the 141 * starting element completes then the element 142 * is dispatched and the current attributes 143 * set back to empty. The element or attribute 144 * name is converted to lower case. 145 * @param string token Incoming characters. 146 * @param integer evnt Lexer evnt type. 147 * @return boolean False if parse error. 148 * @access public 149 */ 150 XhtmlSaxParser.prototype.acceptStartToken = function (token, evnt) { 151 if (evnt == LEXER_ENTER) { 152 this._tag = token.toLowerCase().substring(1); 153 return true; 154 } 155 if (evnt == LEXER_EXIT) { 156 success = this._listener.startElement(this._tag, this._attributes); 157 this._tag = ''; 158 this._attributes = {}; 159 return success; 160 } 161 if (token != '=') { 162 this._current_attribute = this.decodeHtml(token).toLowerCase(); 163 this._attributes[this._current_attribute] = ''; 164 } 165 return true; 166 } 167 168 /** 169 * Accepts a token from the end tag mode. 170 * The element name is converted to lower case. 171 * @param string token Incoming characters. 172 * @param integer evnt Lexer evnt type. 173 * @return boolean False if parse error. 174 * @access public 175 */ 176 XhtmlSaxParser.prototype.acceptEndToken = function (token, evnt) { 177 matches = token.match(/<\/(.*)>/); 178 if (matches.length == 0) { 179 return false; 180 } 181 return this._listener.endElement(matches[1].toLowerCase()); 182 } 183 184 /** 185 * Part of the tag data. 186 * @param string token Incoming characters. 187 * @param integer evnt Lexer evnt type. 188 * @return boolean False if parse error. 189 * @access public 190 */ 191 XhtmlSaxParser.prototype.acceptAttributeToken = function (token, evnt) { 192 if (evnt == LEXER_UNMATCHED) { 193 this._attributes[this._current_attribute] += this.decodeHtml(token); 194 } 195 if (evnt == LEXER_SPECIAL) { 196 this._attributes[this._current_attribute] += this.decodeHtml(token).replace(/^=\s*/ , ''); 197 } 198 return true; 199 } 200 201 /** 202 * A character entity. 203 * @param string token Incoming characters. 204 * @param integer evnt Lexer evnt type. 205 * @return boolean False if parse error. 206 * @access public 207 */ 208 XhtmlSaxParser.prototype.acceptEntityToken = function (token, evnt) { 209 } 210 211 /** 212 * Character data between tags regarded as 213 * important. 214 * @param string $token Incoming characters. 215 * @param integer $evnt Lexer evnt type. 216 * @return boolean False if parse error. 217 * @access public 218 */ 219 XhtmlSaxParser.prototype.acceptTextToken = function (token, evnt) { 220 return this._listener.addContent(token); 221 } 222 223 /** 224 * Incoming data to be ignored. 225 * @param string $token Incoming characters. 226 * @param integer $evnt Lexer evnt type. 227 * @return boolean False if parse error. 228 * @access public 229 */ 230 XhtmlSaxParser.prototype.ignore = function (token, evnt) { 231 return true; 232 } 233 234 /** 235 * Decodes any HTML entities. 236 * @param string $html Incoming HTML. 237 * @return string Outgoing plain text. 238 * @access public 239 * @static 240 */ 241 XhtmlSaxParser.prototype.decodeHtml = function (html) { 242 var entities = [' ','¡','¢','£','¤','¥','¦','§', 243 '¨','©','ª','«','¬','­','®', 244 '¯','°','±','²','³','´','µ', 245 '¶','·','¸','¹','º','»','¼', 246 '½','¾','¿','À','Á','Â','Ã', 247 'Ä','Å','Æ','Ç','È','É','Ê', 248 'Ë','Ì','Í','Î','Ï','Ð','Ñ', 249 'Ò','Ó','Ô','Õ','Ö','×','Ø', 250 'Ù','Ú','Û','Ü','Ý','Þ','ß', 251 'à','á','â','ã','ä','å','æ', 252 'ç','è','é','ê','ë','ì','í', 253 'î','ï','ð','ñ','ò','ó','ô', 254 'õ','ö','÷','ø','ù','ú','û', 255 'ü','ý','þ','ÿ','"','<','>', 256 '&']; 257 258 var chars = ["\xa0","\xa1","\xa2","\xa3","\xa4","\xa5","\xa6","\xa7","\xa8","\xa9","\xaa", 259 "\xab","\xac","\xad","\xae","\xaf","\xb0","\xb1","\xb2","\xb3","\xb4", 260 "\xb5","\xb6","\xb7","\xb8","\xb9","\xba","\xbb","\xbc","\xbd","\xbe", 261 "\xbf","\xc0","\xc1","\xc2","\xc3","\xc4","\xc5","\xc6","\xc7","\xc8", 262 "\xc9","\xca","\xcb","\xcc","\xcd","\xce","\xcf","\xd0","\xd1","\xd2", 263 "\xd3","\xd4","\xd5","\xd6","\xd7","\xd8","\xd9","\xda","\xdb","\xdc", 264 "\xdd","\xde","\xdf","\xe0","\xe1","\xe2","\xe3","\xe4","\xe5","\xe6", 265 "\xe7","\xe8","\xe9","\xea","\xeb","\xec","\xed","\xee","\xef","\xf0", 266 "\xf1","\xf2","\xf3","\xf4","\xf5","\xf6","\xf7","\xf8","\xf9","\xfa", 267 "\xfb","\xfc","\xfd","\xfe","\xff","\x22","\x3c","\x3e","\x26"]; 268 269 for(var i = 0; i < entities.length; i++){ 270 html = html.replace(entities[i], chars[i]); 271 } 272 return html; 273 } 274 275 /** 276 * Turns HTML into text browser visible text. Images 277 * are converted to their alt text and tags are supressed. 278 * Entities are converted to their visible representation. 279 * @param string $html HTML to convert. 280 * @return string Plain text. 281 * @access public 282 * @static 283 */ 284 XhtmlSaxParser.prototype.normalise = function (html) { 285 return this.decodeHtml( 286 html.replace(/<!--.*?-./, ''). 287 replace(/<img.*?alt\s*=\s*"(.*?)".*?>/, ' \$1 '). 288 replace(/<img.*?alt\s*=\s*\'(.*?)\'.*?>/, ' \$1 '). 289 replace(/<img.*?alt\s*=\s*([a-zA-Z_]+).*?>/, ' \$1 '). 290 replace(/<.*?>/, '') 291 ). 292 replace(/\s+/, ' ').trim();; 293 } 294 295 /** 296 * SAX evnt handler. 297 * @package Parser 298 * @subpackage WebTester 299 * @abstract 300 * 301 * Sets the document to write to. 302 * @access public 303 */ 304 function SaxListener() { 305 } 306 307 /** 308 * Start of element evnt. 309 * @param string $name Element name. 310 * @param hash $attributes Name value pairs. 311 * Attributes without content 312 * are marked as true. 313 * @return boolean False on parse error. 314 * @access public 315 */ 316 SaxListener.prototype.startElement = function (name, attributes) { 317 console.log('start '+name); 318 console.log(attributes); 319 } 320 321 /** 322 * End of element evnt. 323 * @param string $name Element name. 324 * @return boolean False on parse error. 325 * @access public 326 */ 327 SaxListener.prototype.endElement = function (name) { 328 console.log('end '+name); 329 } 330 331 /** 332 * Unparsed, but relevant data. 333 * @param string $text May include unparsed tags. 334 * @return boolean False on parse error. 335 * @access public 336 */ 337 SaxListener.prototype.addContent = function (text) { 338 } 339 340 //var Parser = new XhtmlSaxParser(new SaxListener()); 341 //Parser.parse(html); 1104 1105 1106 this.inline_tags = ["br", "hr", "img", "input"]; 1107 } 1108 1109 XhtmlSaxListener.prototype.shouldCloseTagAutomatically = function(tag, now_on_tag, closing) 1110 { 1111 var closing = closing || false; 1112 if(tag == 'li'){ 1113 if((closing && (now_on_tag == 'ul' || now_on_tag == 'ol')) || (!closing && now_on_tag == 'li')){ 1114 return true; 1115 } 1116 } 1117 if(tag == 'td'){ 1118 if((closing && now_on_tag == 'tr') || (!closing && now_on_tag == 'td')){ 1119 return true; 1120 } 1121 } 1122 if(tag == 'option'){ 1123 if((closing && now_on_tag == 'select') || (!closing && now_on_tag == 'option')){ 1124 return true; 1125 } 1126 } 1127 return false; 1128 } 1129 1130 XhtmlSaxListener.prototype.beforeParsing = function(raw) 1131 { 1132 return raw; 1133 } 1134 1135 XhtmlSaxListener.prototype.afterParsing = function(xhtml) 1136 { 1137 xhtml = this.replaceNamedEntities(xhtml); 1138 xhtml = this.joinRepeatedEntities(xhtml); 1139 return xhtml; 1140 } 1141 1142 XhtmlSaxListener.prototype.replaceNamedEntities = function(xhtml) 1143 { 1144 for (entity in this.entities) { 1145 xhtml = xhtml.replace(entity, this.entities[entity]); 1146 } 1147 return xhtml; 1148 } 1149 1150 XhtmlSaxListener.prototype.joinRepeatedEntities = function(xhtml) 1151 { 1152 var tags = 'em|strong|sub|sup|acronym|pre|del|blockquote|address'; 1153 return xhtml.replace(new RegExp('<\/('+tags+')><\\1>' ,''),''). 1154 replace(new RegExp('(\s*<('+tags+')>\s*){2}(.*)(\s*<\/\\2>\s*){2}' ,''),'<\$2>\$3<\$2>'); 1155 } 1156 1157 XhtmlSaxListener.prototype.getResult = function() 1158 { 1159 return this.xhtml; 1160 } 1161 1162 XhtmlSaxListener.prototype.getTagReplacements = function() 1163 { 1164 return {'b':'strong', 'i':'em'}; 1165 } 1166 1167 XhtmlSaxListener.prototype.addContent = function(text) 1168 { 1169 this.xhtml += text; 1170 } 1171 1172 XhtmlSaxListener.prototype.addComment = function(text) 1173 { 1174 if(!this.remove_comments){ 1175 this.xhtml += text; 1176 } 1177 } 1178 1179 XhtmlSaxListener.prototype.addScript = function(text) 1180 { 1181 if(!this.remove_scripts){ 1182 this.xhtml += text; 1183 } 1184 } 1185 1186 XhtmlSaxListener.prototype.addCss = function(text) 1187 { 1188 if(!this.remove_embeded_styles){ 1189 this.xhtml += text; 1190 } 1191 } 1192 XhtmlSaxListener.prototype.openBlockTag = function(tag, attributes) 1193 { 1194 this.xhtml += this.helper.tag(tag, attributes, true); 1195 } 1196 1197 XhtmlSaxListener.prototype.inlineTag = function(tag, attributes) 1198 { 1199 this.xhtml += this.helper.tag(tag, attributes); 1200 } 1201 1202 XhtmlSaxListener.prototype.openUnknownTag = function(tag, attributes) 1203 { 1204 this.xhtml += this.helper.tag(tag, attributes, true); 1205 } 1206 1207 XhtmlSaxListener.prototype.closeBlockTag = function(tag) 1208 { 1209 this.xhtml += "</"+tag+">"; 1210 } 1211 1212 XhtmlSaxListener.prototype.closeUnknownTag = function(tag) 1213 { 1214 this.xhtml += "</"+tag+">"; 1215 } 1216 1217 XhtmlSaxListener.prototype.closeUnopenedTag = function(tag) 1218 { 1219 this.xhtml += "</"+tag+">"; 1220 } 1221
Note: See TracChangeset
for help on using the changeset viewer.