Serializer.js 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. /**
  2. * Serializer.js
  3. *
  4. * Copyright, Moxiecode Systems AB
  5. * Released under LGPL License.
  6. *
  7. * License: http://www.tinymce.com/license
  8. * Contributing: http://www.tinymce.com/contributing
  9. */
  10. /**
  11. * This class is used to serialize DOM trees into a string. Consult the TinyMCE Wiki API for
  12. * more details and examples on how to use this class.
  13. *
  14. * @class tinymce.dom.Serializer
  15. */
  16. define("tinymce/dom/Serializer", [
  17. "tinymce/dom/DOMUtils",
  18. "tinymce/html/DomParser",
  19. "tinymce/html/Entities",
  20. "tinymce/html/Serializer",
  21. "tinymce/html/Node",
  22. "tinymce/html/Schema",
  23. "tinymce/Env",
  24. "tinymce/util/Tools"
  25. ], function(DOMUtils, DomParser, Entities, Serializer, Node, Schema, Env, Tools) {
  26. var each = Tools.each, trim = Tools.trim;
  27. var DOM = DOMUtils.DOM;
  28. /**
  29. * Constructs a new DOM serializer class.
  30. *
  31. * @constructor
  32. * @method Serializer
  33. * @param {Object} settings Serializer settings object.
  34. * @param {tinymce.Editor} editor Optional editor to bind events to and get schema/dom from.
  35. */
  36. return function(settings, editor) {
  37. var dom, schema, htmlParser;
  38. if (editor) {
  39. dom = editor.dom;
  40. schema = editor.schema;
  41. }
  42. // Default DOM and Schema if they are undefined
  43. dom = dom || DOM;
  44. schema = schema || new Schema(settings);
  45. settings.entity_encoding = settings.entity_encoding || 'named';
  46. settings.remove_trailing_brs = "remove_trailing_brs" in settings ? settings.remove_trailing_brs : true;
  47. htmlParser = new DomParser(settings, schema);
  48. // Convert tabindex back to elements when serializing contents
  49. htmlParser.addAttributeFilter('data-mce-tabindex', function(nodes, name) {
  50. var i = nodes.length, node;
  51. while (i--) {
  52. node = nodes[i];
  53. node.attr('tabindex', node.attributes.map['data-mce-tabindex']);
  54. node.attr(name, null);
  55. }
  56. });
  57. // Convert move data-mce-src, data-mce-href and data-mce-style into nodes or process them if needed
  58. htmlParser.addAttributeFilter('src,href,style', function(nodes, name) {
  59. var i = nodes.length, node, value, internalName = 'data-mce-' + name;
  60. var urlConverter = settings.url_converter, urlConverterScope = settings.url_converter_scope, undef;
  61. while (i--) {
  62. node = nodes[i];
  63. value = node.attributes.map[internalName];
  64. if (value !== undef) {
  65. // Set external name to internal value and remove internal
  66. node.attr(name, value.length > 0 ? value : null);
  67. node.attr(internalName, null);
  68. } else {
  69. // No internal attribute found then convert the value we have in the DOM
  70. value = node.attributes.map[name];
  71. if (name === "style") {
  72. value = dom.serializeStyle(dom.parseStyle(value), node.name);
  73. } else if (urlConverter) {
  74. value = urlConverter.call(urlConverterScope, value, name, node.name);
  75. }
  76. node.attr(name, value.length > 0 ? value : null);
  77. }
  78. }
  79. });
  80. // Remove internal classes mceItem<..> or mceSelected
  81. htmlParser.addAttributeFilter('class', function(nodes) {
  82. var i = nodes.length, node, value;
  83. while (i--) {
  84. node = nodes[i];
  85. value = node.attr('class').replace(/(?:^|\s)mce-item-\w+(?!\S)/g, '');
  86. node.attr('class', value.length > 0 ? value : null);
  87. }
  88. });
  89. // Remove bookmark elements
  90. htmlParser.addAttributeFilter('data-mce-type', function(nodes, name, args) {
  91. var i = nodes.length, node;
  92. while (i--) {
  93. node = nodes[i];
  94. if (node.attributes.map['data-mce-type'] === 'bookmark' && !args.cleanup) {
  95. node.remove();
  96. }
  97. }
  98. });
  99. // Remove expando attributes
  100. htmlParser.addAttributeFilter('data-mce-expando', function(nodes, name) {
  101. var i = nodes.length;
  102. while (i--) {
  103. nodes[i].attr(name, null);
  104. }
  105. });
  106. htmlParser.addNodeFilter('noscript', function(nodes) {
  107. var i = nodes.length, node;
  108. while (i--) {
  109. node = nodes[i].firstChild;
  110. if (node) {
  111. node.value = Entities.decode(node.value);
  112. }
  113. }
  114. });
  115. // Force script into CDATA sections and remove the mce- prefix also add comments around styles
  116. htmlParser.addNodeFilter('script,style', function(nodes, name) {
  117. var i = nodes.length, node, value;
  118. function trim(value) {
  119. /*jshint maxlen:255 */
  120. /*eslint max-len:0 */
  121. return value.replace(/(<!--\[CDATA\[|\]\]-->)/g, '\n')
  122. .replace(/^[\r\n]*|[\r\n]*$/g, '')
  123. .replace(/^\s*((<!--)?(\s*\/\/)?\s*<!\[CDATA\[|(<!--\s*)?\/\*\s*<!\[CDATA\[\s*\*\/|(\/\/)?\s*<!--|\/\*\s*<!--\s*\*\/)\s*[\r\n]*/gi, '')
  124. .replace(/\s*(\/\*\s*\]\]>\s*\*\/(-->)?|\s*\/\/\s*\]\]>(-->)?|\/\/\s*(-->)?|\]\]>|\/\*\s*-->\s*\*\/|\s*-->\s*)\s*$/g, '');
  125. }
  126. while (i--) {
  127. node = nodes[i];
  128. value = node.firstChild ? node.firstChild.value : '';
  129. if (name === "script") {
  130. // Remove mce- prefix from script elements and remove default text/javascript mime type (HTML5)
  131. var type = (node.attr('type') || 'text/javascript').replace(/^mce\-/, '');
  132. node.attr('type', type === 'text/javascript' ? null : type);
  133. if (value.length > 0) {
  134. node.firstChild.value = '// <![CDATA[\n' + trim(value) + '\n// ]]>';
  135. }
  136. } else {
  137. if (value.length > 0) {
  138. node.firstChild.value = '<!--\n' + trim(value) + '\n-->';
  139. }
  140. }
  141. }
  142. });
  143. // Convert comments to cdata and handle protected comments
  144. htmlParser.addNodeFilter('#comment', function(nodes) {
  145. var i = nodes.length, node;
  146. while (i--) {
  147. node = nodes[i];
  148. if (node.value.indexOf('[CDATA[') === 0) {
  149. node.name = '#cdata';
  150. node.type = 4;
  151. node.value = node.value.replace(/^\[CDATA\[|\]\]$/g, '');
  152. } else if (node.value.indexOf('mce:protected ') === 0) {
  153. node.name = "#text";
  154. node.type = 3;
  155. node.raw = true;
  156. node.value = unescape(node.value).substr(14);
  157. }
  158. }
  159. });
  160. htmlParser.addNodeFilter('xml:namespace,input', function(nodes, name) {
  161. var i = nodes.length, node;
  162. while (i--) {
  163. node = nodes[i];
  164. if (node.type === 7) {
  165. node.remove();
  166. } else if (node.type === 1) {
  167. if (name === "input" && !("type" in node.attributes.map)) {
  168. node.attr('type', 'text');
  169. }
  170. }
  171. }
  172. });
  173. // Fix list elements, TODO: Replace this later
  174. if (settings.fix_list_elements) {
  175. htmlParser.addNodeFilter('ul,ol', function(nodes) {
  176. var i = nodes.length, node, parentNode;
  177. while (i--) {
  178. node = nodes[i];
  179. parentNode = node.parent;
  180. if (parentNode.name === 'ul' || parentNode.name === 'ol') {
  181. if (node.prev && node.prev.name === 'li') {
  182. node.prev.append(node);
  183. }
  184. }
  185. }
  186. });
  187. }
  188. // Remove internal data attributes
  189. htmlParser.addAttributeFilter('data-mce-src,data-mce-href,data-mce-style,data-mce-selected', function(nodes, name) {
  190. var i = nodes.length;
  191. while (i--) {
  192. nodes[i].attr(name, null);
  193. }
  194. });
  195. // Return public methods
  196. return {
  197. /**
  198. * Schema instance that was used to when the Serializer was constructed.
  199. *
  200. * @field {tinymce.html.Schema} schema
  201. */
  202. schema: schema,
  203. /**
  204. * Adds a node filter function to the parser used by the serializer, the parser will collect the specified nodes by name
  205. * and then execute the callback ones it has finished parsing the document.
  206. *
  207. * @example
  208. * parser.addNodeFilter('p,h1', function(nodes, name) {
  209. * for (var i = 0; i < nodes.length; i++) {
  210. * console.log(nodes[i].name);
  211. * }
  212. * });
  213. * @method addNodeFilter
  214. * @method {String} name Comma separated list of nodes to collect.
  215. * @param {function} callback Callback function to execute once it has collected nodes.
  216. */
  217. addNodeFilter: htmlParser.addNodeFilter,
  218. /**
  219. * Adds a attribute filter function to the parser used by the serializer, the parser will
  220. * collect nodes that has the specified attributes
  221. * and then execute the callback ones it has finished parsing the document.
  222. *
  223. * @example
  224. * parser.addAttributeFilter('src,href', function(nodes, name) {
  225. * for (var i = 0; i < nodes.length; i++) {
  226. * console.log(nodes[i].name);
  227. * }
  228. * });
  229. * @method addAttributeFilter
  230. * @method {String} name Comma separated list of nodes to collect.
  231. * @param {function} callback Callback function to execute once it has collected nodes.
  232. */
  233. addAttributeFilter: htmlParser.addAttributeFilter,
  234. /**
  235. * Serializes the specified browser DOM node into a HTML string.
  236. *
  237. * @method serialize
  238. * @param {DOMNode} node DOM node to serialize.
  239. * @param {Object} args Arguments option that gets passed to event handlers.
  240. */
  241. serialize: function(node, args) {
  242. var self = this, impl, doc, oldDoc, htmlSerializer, content;
  243. // Explorer won't clone contents of script and style and the
  244. // selected index of select elements are cleared on a clone operation.
  245. if (Env.ie && dom.select('script,style,select,map').length > 0) {
  246. content = node.innerHTML;
  247. node = node.cloneNode(false);
  248. dom.setHTML(node, content);
  249. } else {
  250. node = node.cloneNode(true);
  251. }
  252. // Nodes needs to be attached to something in WebKit/Opera
  253. // This fix will make DOM ranges and make Sizzle happy!
  254. impl = node.ownerDocument.implementation;
  255. if (impl.createHTMLDocument) {
  256. // Create an empty HTML document
  257. doc = impl.createHTMLDocument("");
  258. // Add the element or it's children if it's a body element to the new document
  259. each(node.nodeName == 'BODY' ? node.childNodes : [node], function(node) {
  260. doc.body.appendChild(doc.importNode(node, true));
  261. });
  262. // Grab first child or body element for serialization
  263. if (node.nodeName != 'BODY') {
  264. node = doc.body.firstChild;
  265. } else {
  266. node = doc.body;
  267. }
  268. // set the new document in DOMUtils so createElement etc works
  269. oldDoc = dom.doc;
  270. dom.doc = doc;
  271. }
  272. args = args || {};
  273. args.format = args.format || 'html';
  274. // Don't wrap content if we want selected html
  275. if (args.selection) {
  276. args.forced_root_block = '';
  277. }
  278. // Pre process
  279. if (!args.no_events) {
  280. args.node = node;
  281. self.onPreProcess(args);
  282. }
  283. // Setup serializer
  284. htmlSerializer = new Serializer(settings, schema);
  285. // Parse and serialize HTML
  286. args.content = htmlSerializer.serialize(
  287. htmlParser.parse(trim(args.getInner ? node.innerHTML : dom.getOuterHTML(node)), args)
  288. );
  289. // Replace all BOM characters for now until we can find a better solution
  290. if (!args.cleanup) {
  291. args.content = args.content.replace(/\uFEFF/g, '');
  292. }
  293. // Post process
  294. if (!args.no_events) {
  295. self.onPostProcess(args);
  296. }
  297. // Restore the old document if it was changed
  298. if (oldDoc) {
  299. dom.doc = oldDoc;
  300. }
  301. args.node = null;
  302. return args.content;
  303. },
  304. /**
  305. * Adds valid elements rules to the serializers schema instance this enables you to specify things
  306. * like what elements should be outputted and what attributes specific elements might have.
  307. * Consult the Wiki for more details on this format.
  308. *
  309. * @method addRules
  310. * @param {String} rules Valid elements rules string to add to schema.
  311. */
  312. addRules: function(rules) {
  313. schema.addValidElements(rules);
  314. },
  315. /**
  316. * Sets the valid elements rules to the serializers schema instance this enables you to specify things
  317. * like what elements should be outputted and what attributes specific elements might have.
  318. * Consult the Wiki for more details on this format.
  319. *
  320. * @method setRules
  321. * @param {String} rules Valid elements rules string.
  322. */
  323. setRules: function(rules) {
  324. schema.setValidElements(rules);
  325. },
  326. onPreProcess: function(args) {
  327. if (editor) {
  328. editor.fire('PreProcess', args);
  329. }
  330. },
  331. onPostProcess: function(args) {
  332. if (editor) {
  333. editor.fire('PostProcess', args);
  334. }
  335. }
  336. };
  337. };
  338. });