| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425 |
- /**
- * SaxParser.js
- *
- * Copyright, Moxiecode Systems AB
- * Released under LGPL License.
- *
- * License: http://www.tinymce.com/license
- * Contributing: http://www.tinymce.com/contributing
- */
- /*eslint max-depth:[2, 9] */
- /**
- * This class parses HTML code using pure JavaScript and executes various events for each item it finds. It will
- * always execute the events in the right order for tag soup code like <b><p></b></p>. It will also remove elements
- * and attributes that doesn't fit the schema if the validate setting is enabled.
- *
- * @example
- * var parser = new tinymce.html.SaxParser({
- * validate: true,
- *
- * comment: function(text) {
- * console.log('Comment:', text);
- * },
- *
- * cdata: function(text) {
- * console.log('CDATA:', text);
- * },
- *
- * text: function(text, raw) {
- * console.log('Text:', text, 'Raw:', raw);
- * },
- *
- * start: function(name, attrs, empty) {
- * console.log('Start:', name, attrs, empty);
- * },
- *
- * end: function(name) {
- * console.log('End:', name);
- * },
- *
- * pi: function(name, text) {
- * console.log('PI:', name, text);
- * },
- *
- * doctype: function(text) {
- * console.log('DocType:', text);
- * }
- * }, schema);
- * @class tinymce.html.SaxParser
- * @version 3.4
- */
- define("tinymce/html/SaxParser", [
- "tinymce/html/Schema",
- "tinymce/html/Entities",
- "tinymce/util/Tools"
- ], function(Schema, Entities, Tools) {
- var each = Tools.each;
- /**
- * Constructs a new SaxParser instance.
- *
- * @constructor
- * @method SaxParser
- * @param {Object} settings Name/value collection of settings. comment, cdata, text, start and end are callbacks.
- * @param {tinymce.html.Schema} schema HTML Schema class to use when parsing.
- */
- return function(settings, schema) {
- var self = this;
- function noop() {}
- settings = settings || {};
- self.schema = schema = schema || new Schema();
- if (settings.fix_self_closing !== false) {
- settings.fix_self_closing = true;
- }
- // Add handler functions from settings and setup default handlers
- each('comment cdata text start end pi doctype'.split(' '), function(name) {
- if (name) {
- self[name] = settings[name] || noop;
- }
- });
- /**
- * Parses the specified HTML string and executes the callbacks for each item it finds.
- *
- * @example
- * new SaxParser({...}).parse('<b>text</b>');
- * @method parse
- * @param {String} html Html string to sax parse.
- */
- self.parse = function(html) {
- var self = this, matches, index = 0, value, endRegExp, stack = [], attrList, i, text, name;
- var isInternalElement, removeInternalElements, shortEndedElements, fillAttrsMap, isShortEnded;
- var validate, elementRule, isValidElement, attr, attribsValue, validAttributesMap, validAttributePatterns;
- var attributesRequired, attributesDefault, attributesForced;
- var anyAttributesRequired, selfClosing, tokenRegExp, attrRegExp, specialElements, attrValue, idCount = 0;
- var decode = Entities.decode, fixSelfClosing, filteredUrlAttrs = Tools.makeMap('src,href,data,background,formaction,poster');
- var scriptUriRegExp = /((java|vb)script|mhtml):/i, dataUriRegExp = /^data:/i;
- function processEndTag(name) {
- var pos, i;
- // Find position of parent of the same type
- pos = stack.length;
- while (pos--) {
- if (stack[pos].name === name) {
- break;
- }
- }
- // Found parent
- if (pos >= 0) {
- // Close all the open elements
- for (i = stack.length - 1; i >= pos; i--) {
- name = stack[i];
- if (name.valid) {
- self.end(name.name);
- }
- }
- // Remove the open elements from the stack
- stack.length = pos;
- }
- }
- function parseAttribute(match, name, value, val2, val3) {
- var attrRule, i, trimRegExp = /[\s\u0000-\u001F]+/g;
- name = name.toLowerCase();
- value = name in fillAttrsMap ? name : decode(value || val2 || val3 || ''); // Handle boolean attribute than value attribute
- // Validate name and value pass through all data- attributes
- if (validate && !isInternalElement && name.indexOf('data-') !== 0) {
- attrRule = validAttributesMap[name];
- // Find rule by pattern matching
- if (!attrRule && validAttributePatterns) {
- i = validAttributePatterns.length;
- while (i--) {
- attrRule = validAttributePatterns[i];
- if (attrRule.pattern.test(name)) {
- break;
- }
- }
- // No rule matched
- if (i === -1) {
- attrRule = null;
- }
- }
- // No attribute rule found
- if (!attrRule) {
- return;
- }
- // Validate value
- if (attrRule.validValues && !(value in attrRule.validValues)) {
- return;
- }
- }
- // Block any javascript: urls or non image data uris
- if (filteredUrlAttrs[name] && !settings.allow_script_urls) {
- var uri = value.replace(trimRegExp, '');
- try {
- // Might throw malformed URI sequence
- uri = decodeURIComponent(uri);
- } catch (ex) {
- // Fallback to non UTF-8 decoder
- uri = unescape(uri);
- }
- if (scriptUriRegExp.test(uri)) {
- return;
- }
- if (!settings.allow_html_data_urls && dataUriRegExp.test(uri) && !/^data:image\//i.test(uri)) {
- return;
- }
- }
- // Add attribute to list and map
- attrList.map[name] = value;
- attrList.push({
- name: name,
- value: value
- });
- }
- // Precompile RegExps and map objects
- tokenRegExp = new RegExp('<(?:' +
- '(?:!--([\\w\\W]*?)-->)|' + // Comment
- '(?:!\\[CDATA\\[([\\w\\W]*?)\\]\\]>)|' + // CDATA
- '(?:!DOCTYPE([\\w\\W]*?)>)|' + // DOCTYPE
- '(?:\\?([^\\s\\/<>]+) ?([\\w\\W]*?)[?/]>)|' + // PI
- '(?:\\/([^>]+)>)|' + // End element
- '(?:([A-Za-z0-9\\-\\:\\.]+)((?:\\s+[^"\'>]+(?:(?:"[^"]*")|(?:\'[^\']*\')|[^>]*))*|\\/|\\s+)>)' + // Start element
- ')', 'g');
- attrRegExp = /([\w:\-]+)(?:\s*=\s*(?:(?:\"((?:[^\"])*)\")|(?:\'((?:[^\'])*)\')|([^>\s]+)))?/g;
- // Setup lookup tables for empty elements and boolean attributes
- shortEndedElements = schema.getShortEndedElements();
- selfClosing = settings.self_closing_elements || schema.getSelfClosingElements();
- fillAttrsMap = schema.getBoolAttrs();
- validate = settings.validate;
- removeInternalElements = settings.remove_internals;
- fixSelfClosing = settings.fix_self_closing;
- specialElements = schema.getSpecialElements();
- while ((matches = tokenRegExp.exec(html))) {
- // Text
- if (index < matches.index) {
- self.text(decode(html.substr(index, matches.index - index)));
- }
- if ((value = matches[6])) { // End element
- value = value.toLowerCase();
- // IE will add a ":" in front of elements it doesn't understand like custom elements or HTML5 elements
- if (value.charAt(0) === ':') {
- value = value.substr(1);
- }
- processEndTag(value);
- } else if ((value = matches[7])) { // Start element
- value = value.toLowerCase();
- // IE will add a ":" in front of elements it doesn't understand like custom elements or HTML5 elements
- if (value.charAt(0) === ':') {
- value = value.substr(1);
- }
- isShortEnded = value in shortEndedElements;
- // Is self closing tag for example an <li> after an open <li>
- if (fixSelfClosing && selfClosing[value] && stack.length > 0 && stack[stack.length - 1].name === value) {
- processEndTag(value);
- }
- // Validate element
- if (!validate || (elementRule = schema.getElementRule(value))) {
- isValidElement = true;
- // Grab attributes map and patters when validation is enabled
- if (validate) {
- validAttributesMap = elementRule.attributes;
- validAttributePatterns = elementRule.attributePatterns;
- }
- // Parse attributes
- if ((attribsValue = matches[8])) {
- isInternalElement = attribsValue.indexOf('data-mce-type') !== -1; // Check if the element is an internal element
- // If the element has internal attributes then remove it if we are told to do so
- if (isInternalElement && removeInternalElements) {
- isValidElement = false;
- }
- attrList = [];
- attrList.map = {};
- attribsValue.replace(attrRegExp, parseAttribute);
- } else {
- attrList = [];
- attrList.map = {};
- }
- // Process attributes if validation is enabled
- if (validate && !isInternalElement) {
- attributesRequired = elementRule.attributesRequired;
- attributesDefault = elementRule.attributesDefault;
- attributesForced = elementRule.attributesForced;
- anyAttributesRequired = elementRule.removeEmptyAttrs;
- // Check if any attribute exists
- if (anyAttributesRequired && !attrList.length) {
- isValidElement = false;
- }
- // Handle forced attributes
- if (attributesForced) {
- i = attributesForced.length;
- while (i--) {
- attr = attributesForced[i];
- name = attr.name;
- attrValue = attr.value;
- if (attrValue === '{$uid}') {
- attrValue = 'mce_' + idCount++;
- }
- attrList.map[name] = attrValue;
- attrList.push({name: name, value: attrValue});
- }
- }
- // Handle default attributes
- if (attributesDefault) {
- i = attributesDefault.length;
- while (i--) {
- attr = attributesDefault[i];
- name = attr.name;
- if (!(name in attrList.map)) {
- attrValue = attr.value;
- if (attrValue === '{$uid}') {
- attrValue = 'mce_' + idCount++;
- }
- attrList.map[name] = attrValue;
- attrList.push({name: name, value: attrValue});
- }
- }
- }
- // Handle required attributes
- if (attributesRequired) {
- i = attributesRequired.length;
- while (i--) {
- if (attributesRequired[i] in attrList.map) {
- break;
- }
- }
- // None of the required attributes where found
- if (i === -1) {
- isValidElement = false;
- }
- }
- // Invalidate element if it's marked as bogus
- if (attrList.map['data-mce-bogus']) {
- isValidElement = false;
- }
- }
- if (isValidElement) {
- self.start(value, attrList, isShortEnded);
- }
- } else {
- isValidElement = false;
- }
- // Treat script, noscript and style a bit different since they may include code that looks like elements
- if ((endRegExp = specialElements[value])) {
- endRegExp.lastIndex = index = matches.index + matches[0].length;
- if ((matches = endRegExp.exec(html))) {
- if (isValidElement) {
- text = html.substr(index, matches.index - index);
- }
- index = matches.index + matches[0].length;
- } else {
- text = html.substr(index);
- index = html.length;
- }
- if (isValidElement) {
- if (text.length > 0) {
- self.text(text, true);
- }
- self.end(value);
- }
- tokenRegExp.lastIndex = index;
- continue;
- }
- // Push value on to stack
- if (!isShortEnded) {
- if (!attribsValue || attribsValue.indexOf('/') != attribsValue.length - 1) {
- stack.push({name: value, valid: isValidElement});
- } else if (isValidElement) {
- self.end(value);
- }
- }
- } else if ((value = matches[1])) { // Comment
- // Padd comment value to avoid browsers from parsing invalid comments as HTML
- if (value.charAt(0) === '>') {
- value = ' ' + value;
- }
- if (!settings.allow_conditional_comments && value.substr(0, 3) === '[if') {
- value = ' ' + value;
- }
- self.comment(value);
- } else if ((value = matches[2])) { // CDATA
- self.cdata(value);
- } else if ((value = matches[3])) { // DOCTYPE
- self.doctype(value);
- } else if ((value = matches[4])) { // PI
- self.pi(value, matches[5]);
- }
- index = matches.index + matches[0].length;
- }
- // Text
- if (index < html.length) {
- self.text(decode(html.substr(index)));
- }
- // Close any open elements
- for (i = stack.length - 1; i >= 0; i--) {
- value = stack[i];
- if (value.valid) {
- self.end(value.name);
- }
- }
- };
- };
- });
|