Entities.js 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. /**
  2. * Entities.js
  3. *
  4. * Copyright, Moxiecode Systems AB
  5. * Released under LGPL License.
  6. *
  7. * License: http://www.tinymce.com/license
  8. * Contributing: http://www.tinymce.com/contributing
  9. */
  10. /*jshint bitwise:false */
  11. /*eslint no-bitwise:0 */
  12. /**
  13. * Entity encoder class.
  14. *
  15. * @class tinymce.html.Entities
  16. * @static
  17. * @version 3.4
  18. */
  19. define("tinymce/html/Entities", [
  20. "tinymce/util/Tools"
  21. ], function(Tools) {
  22. var makeMap = Tools.makeMap;
  23. var namedEntities, baseEntities, reverseEntities,
  24. attrsCharsRegExp = /[&<>\"\u0060\u007E-\uD7FF\uE000-\uFFEF]|[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
  25. textCharsRegExp = /[<>&\u007E-\uD7FF\uE000-\uFFEF]|[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
  26. rawCharsRegExp = /[<>&\"\']/g,
  27. entityRegExp = /&(#x|#)?([\w]+);/g,
  28. asciiMap = {
  29. 128: "\u20AC", 130: "\u201A", 131: "\u0192", 132: "\u201E", 133: "\u2026", 134: "\u2020",
  30. 135: "\u2021", 136: "\u02C6", 137: "\u2030", 138: "\u0160", 139: "\u2039", 140: "\u0152",
  31. 142: "\u017D", 145: "\u2018", 146: "\u2019", 147: "\u201C", 148: "\u201D", 149: "\u2022",
  32. 150: "\u2013", 151: "\u2014", 152: "\u02DC", 153: "\u2122", 154: "\u0161", 155: "\u203A",
  33. 156: "\u0153", 158: "\u017E", 159: "\u0178"
  34. };
  35. // Raw entities
  36. baseEntities = {
  37. '\"': '&quot;', // Needs to be escaped since the YUI compressor would otherwise break the code
  38. "'": '&#39;',
  39. '<': '&lt;',
  40. '>': '&gt;',
  41. '&': '&amp;',
  42. '\u0060': '&#96;'
  43. };
  44. // Reverse lookup table for raw entities
  45. reverseEntities = {
  46. '&lt;': '<',
  47. '&gt;': '>',
  48. '&amp;': '&',
  49. '&quot;': '"',
  50. '&apos;': "'"
  51. };
  52. // Decodes text by using the browser
  53. function nativeDecode(text) {
  54. var elm;
  55. elm = document.createElement("div");
  56. elm.innerHTML = text;
  57. return elm.textContent || elm.innerText || text;
  58. }
  59. // Build a two way lookup table for the entities
  60. function buildEntitiesLookup(items, radix) {
  61. var i, chr, entity, lookup = {};
  62. if (items) {
  63. items = items.split(',');
  64. radix = radix || 10;
  65. // Build entities lookup table
  66. for (i = 0; i < items.length; i += 2) {
  67. chr = String.fromCharCode(parseInt(items[i], radix));
  68. // Only add non base entities
  69. if (!baseEntities[chr]) {
  70. entity = '&' + items[i + 1] + ';';
  71. lookup[chr] = entity;
  72. lookup[entity] = chr;
  73. }
  74. }
  75. return lookup;
  76. }
  77. }
  78. // Unpack entities lookup where the numbers are in radix 32 to reduce the size
  79. namedEntities = buildEntitiesLookup(
  80. '50,nbsp,51,iexcl,52,cent,53,pound,54,curren,55,yen,56,brvbar,57,sect,58,uml,59,copy,' +
  81. '5a,ordf,5b,laquo,5c,not,5d,shy,5e,reg,5f,macr,5g,deg,5h,plusmn,5i,sup2,5j,sup3,5k,acute,' +
  82. '5l,micro,5m,para,5n,middot,5o,cedil,5p,sup1,5q,ordm,5r,raquo,5s,frac14,5t,frac12,5u,frac34,' +
  83. '5v,iquest,60,Agrave,61,Aacute,62,Acirc,63,Atilde,64,Auml,65,Aring,66,AElig,67,Ccedil,' +
  84. '68,Egrave,69,Eacute,6a,Ecirc,6b,Euml,6c,Igrave,6d,Iacute,6e,Icirc,6f,Iuml,6g,ETH,6h,Ntilde,' +
  85. '6i,Ograve,6j,Oacute,6k,Ocirc,6l,Otilde,6m,Ouml,6n,times,6o,Oslash,6p,Ugrave,6q,Uacute,' +
  86. '6r,Ucirc,6s,Uuml,6t,Yacute,6u,THORN,6v,szlig,70,agrave,71,aacute,72,acirc,73,atilde,74,auml,' +
  87. '75,aring,76,aelig,77,ccedil,78,egrave,79,eacute,7a,ecirc,7b,euml,7c,igrave,7d,iacute,7e,icirc,' +
  88. '7f,iuml,7g,eth,7h,ntilde,7i,ograve,7j,oacute,7k,ocirc,7l,otilde,7m,ouml,7n,divide,7o,oslash,' +
  89. '7p,ugrave,7q,uacute,7r,ucirc,7s,uuml,7t,yacute,7u,thorn,7v,yuml,ci,fnof,sh,Alpha,si,Beta,' +
  90. 'sj,Gamma,sk,Delta,sl,Epsilon,sm,Zeta,sn,Eta,so,Theta,sp,Iota,sq,Kappa,sr,Lambda,ss,Mu,' +
  91. 'st,Nu,su,Xi,sv,Omicron,t0,Pi,t1,Rho,t3,Sigma,t4,Tau,t5,Upsilon,t6,Phi,t7,Chi,t8,Psi,' +
  92. 't9,Omega,th,alpha,ti,beta,tj,gamma,tk,delta,tl,epsilon,tm,zeta,tn,eta,to,theta,tp,iota,' +
  93. 'tq,kappa,tr,lambda,ts,mu,tt,nu,tu,xi,tv,omicron,u0,pi,u1,rho,u2,sigmaf,u3,sigma,u4,tau,' +
  94. 'u5,upsilon,u6,phi,u7,chi,u8,psi,u9,omega,uh,thetasym,ui,upsih,um,piv,812,bull,816,hellip,' +
  95. '81i,prime,81j,Prime,81u,oline,824,frasl,88o,weierp,88h,image,88s,real,892,trade,89l,alefsym,' +
  96. '8cg,larr,8ch,uarr,8ci,rarr,8cj,darr,8ck,harr,8dl,crarr,8eg,lArr,8eh,uArr,8ei,rArr,8ej,dArr,' +
  97. '8ek,hArr,8g0,forall,8g2,part,8g3,exist,8g5,empty,8g7,nabla,8g8,isin,8g9,notin,8gb,ni,8gf,prod,' +
  98. '8gh,sum,8gi,minus,8gn,lowast,8gq,radic,8gt,prop,8gu,infin,8h0,ang,8h7,and,8h8,or,8h9,cap,8ha,cup,' +
  99. '8hb,int,8hk,there4,8hs,sim,8i5,cong,8i8,asymp,8j0,ne,8j1,equiv,8j4,le,8j5,ge,8k2,sub,8k3,sup,8k4,' +
  100. 'nsub,8k6,sube,8k7,supe,8kl,oplus,8kn,otimes,8l5,perp,8m5,sdot,8o8,lceil,8o9,rceil,8oa,lfloor,8ob,' +
  101. 'rfloor,8p9,lang,8pa,rang,9ea,loz,9j0,spades,9j3,clubs,9j5,hearts,9j6,diams,ai,OElig,aj,oelig,b0,' +
  102. 'Scaron,b1,scaron,bo,Yuml,m6,circ,ms,tilde,802,ensp,803,emsp,809,thinsp,80c,zwnj,80d,zwj,80e,lrm,' +
  103. '80f,rlm,80j,ndash,80k,mdash,80o,lsquo,80p,rsquo,80q,sbquo,80s,ldquo,80t,rdquo,80u,bdquo,810,dagger,' +
  104. '811,Dagger,81g,permil,81p,lsaquo,81q,rsaquo,85c,euro', 32);
  105. var Entities = {
  106. /**
  107. * Encodes the specified string using raw entities. This means only the required XML base entities will be endoded.
  108. *
  109. * @method encodeRaw
  110. * @param {String} text Text to encode.
  111. * @param {Boolean} attr Optional flag to specify if the text is attribute contents.
  112. * @return {String} Entity encoded text.
  113. */
  114. encodeRaw: function(text, attr) {
  115. return text.replace(attr ? attrsCharsRegExp : textCharsRegExp, function(chr) {
  116. return baseEntities[chr] || chr;
  117. });
  118. },
  119. /**
  120. * Encoded the specified text with both the attributes and text entities. This function will produce larger text contents
  121. * since it doesn't know if the context is within a attribute or text node. This was added for compatibility
  122. * and is exposed as the DOMUtils.encode function.
  123. *
  124. * @method encodeAllRaw
  125. * @param {String} text Text to encode.
  126. * @return {String} Entity encoded text.
  127. */
  128. encodeAllRaw: function(text) {
  129. return ('' + text).replace(rawCharsRegExp, function(chr) {
  130. return baseEntities[chr] || chr;
  131. });
  132. },
  133. /**
  134. * Encodes the specified string using numeric entities. The core entities will be
  135. * encoded as named ones but all non lower ascii characters will be encoded into numeric entities.
  136. *
  137. * @method encodeNumeric
  138. * @param {String} text Text to encode.
  139. * @param {Boolean} attr Optional flag to specify if the text is attribute contents.
  140. * @return {String} Entity encoded text.
  141. */
  142. encodeNumeric: function(text, attr) {
  143. return text.replace(attr ? attrsCharsRegExp : textCharsRegExp, function(chr) {
  144. // Multi byte sequence convert it to a single entity
  145. if (chr.length > 1) {
  146. return '&#' + (((chr.charCodeAt(0) - 0xD800) * 0x400) + (chr.charCodeAt(1) - 0xDC00) + 0x10000) + ';';
  147. }
  148. return baseEntities[chr] || '&#' + chr.charCodeAt(0) + ';';
  149. });
  150. },
  151. /**
  152. * Encodes the specified string using named entities. The core entities will be encoded
  153. * as named ones but all non lower ascii characters will be encoded into named entities.
  154. *
  155. * @method encodeNamed
  156. * @param {String} text Text to encode.
  157. * @param {Boolean} attr Optional flag to specify if the text is attribute contents.
  158. * @param {Object} entities Optional parameter with entities to use.
  159. * @return {String} Entity encoded text.
  160. */
  161. encodeNamed: function(text, attr, entities) {
  162. entities = entities || namedEntities;
  163. return text.replace(attr ? attrsCharsRegExp : textCharsRegExp, function(chr) {
  164. return baseEntities[chr] || entities[chr] || chr;
  165. });
  166. },
  167. /**
  168. * Returns an encode function based on the name(s) and it's optional entities.
  169. *
  170. * @method getEncodeFunc
  171. * @param {String} name Comma separated list of encoders for example named,numeric.
  172. * @param {String} entities Optional parameter with entities to use instead of the built in set.
  173. * @return {function} Encode function to be used.
  174. */
  175. getEncodeFunc: function(name, entities) {
  176. entities = buildEntitiesLookup(entities) || namedEntities;
  177. function encodeNamedAndNumeric(text, attr) {
  178. return text.replace(attr ? attrsCharsRegExp : textCharsRegExp, function(chr) {
  179. return baseEntities[chr] || entities[chr] || '&#' + chr.charCodeAt(0) + ';' || chr;
  180. });
  181. }
  182. function encodeCustomNamed(text, attr) {
  183. return Entities.encodeNamed(text, attr, entities);
  184. }
  185. // Replace + with , to be compatible with previous TinyMCE versions
  186. name = makeMap(name.replace(/\+/g, ','));
  187. // Named and numeric encoder
  188. if (name.named && name.numeric) {
  189. return encodeNamedAndNumeric;
  190. }
  191. // Named encoder
  192. if (name.named) {
  193. // Custom names
  194. if (entities) {
  195. return encodeCustomNamed;
  196. }
  197. return Entities.encodeNamed;
  198. }
  199. // Numeric
  200. if (name.numeric) {
  201. return Entities.encodeNumeric;
  202. }
  203. // Raw encoder
  204. return Entities.encodeRaw;
  205. },
  206. /**
  207. * Decodes the specified string, this will replace entities with raw UTF characters.
  208. *
  209. * @method decode
  210. * @param {String} text Text to entity decode.
  211. * @return {String} Entity decoded string.
  212. */
  213. decode: function(text) {
  214. return text.replace(entityRegExp, function(all, numeric, value) {
  215. if (numeric) {
  216. value = parseInt(value, numeric.length === 2 ? 16 : 10);
  217. // Support upper UTF
  218. if (value > 0xFFFF) {
  219. value -= 0x10000;
  220. return String.fromCharCode(0xD800 + (value >> 10), 0xDC00 + (value & 0x3FF));
  221. } else {
  222. return asciiMap[value] || String.fromCharCode(value);
  223. }
  224. }
  225. return reverseEntities[all] || namedEntities[all] || nativeDecode(all);
  226. });
  227. }
  228. };
  229. return Entities;
  230. });