javascriptpacker.php 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741
  1. <?php
  2. /* 9 April 2008. version 1.1
  3. *
  4. * This is the php version of the Dean Edwards JavaScript's Packer,
  5. * Based on :
  6. *
  7. * ParseMaster, version 1.0.2 (2005-08-19) Copyright 2005, Dean Edwards
  8. * a multi-pattern parser.
  9. * KNOWN BUG: erroneous behavior when using escapeChar with a replacement
  10. * value that is a function
  11. *
  12. * packer, version 2.0.2 (2005-08-19) Copyright 2004-2005, Dean Edwards
  13. *
  14. * License: http://creativecommons.org/licenses/LGPL/2.1/
  15. *
  16. * Ported to PHP by Nicolas Martin.
  17. *
  18. * ----------------------------------------------------------------------
  19. * changelog:
  20. * 1.1 : correct a bug, '\0' packed then unpacked becomes '\'.
  21. * ----------------------------------------------------------------------
  22. *
  23. * examples of usage :
  24. * $myPacker = new JavaScriptPacker($script, 62, true, false);
  25. * $packed = $myPacker->pack();
  26. *
  27. * or
  28. *
  29. * $myPacker = new JavaScriptPacker($script, 'Normal', true, false);
  30. * $packed = $myPacker->pack();
  31. *
  32. * or (default values)
  33. *
  34. * $myPacker = new JavaScriptPacker($script);
  35. * $packed = $myPacker->pack();
  36. *
  37. *
  38. * params of the constructor :
  39. * $script: the JavaScript to pack, string.
  40. * $encoding: level of encoding, int or string :
  41. * 0,10,62,95 or 'None', 'Numeric', 'Normal', 'High ASCII'.
  42. * default: 62.
  43. * $fastDecode: include the fast decoder in the packed result, boolean.
  44. * default : true.
  45. * $specialChars: if you are flagged your private and local variables
  46. * in the script, boolean.
  47. * default: false.
  48. *
  49. * The pack() method return the compressed JavasScript, as a string.
  50. *
  51. * see http://dean.edwards.name/packer/usage/ for more information.
  52. *
  53. * Notes :
  54. * # need PHP 5 . Tested with PHP 5.1.2, 5.1.3, 5.1.4, 5.2.3
  55. *
  56. * # The packed result may be different than with the Dean Edwards
  57. * version, but with the same length. The reason is that the PHP
  58. * function usort to sort array don't necessarily preserve the
  59. * original order of two equal member. The Javascript sort function
  60. * in fact preserve this order (but that's not require by the
  61. * ECMAScript standard). So the encoded keywords order can be
  62. * different in the two results.
  63. *
  64. * # Be careful with the 'High ASCII' Level encoding if you use
  65. * UTF-8 in your files...
  66. */
  67. class JavaScriptPacker {
  68. // constants
  69. const IGNORE = '$1';
  70. // validate parameters
  71. private $_script = '';
  72. private $_encoding = 62;
  73. private $_fastDecode = true;
  74. private $_specialChars = false;
  75. private $LITERAL_ENCODING = array(
  76. 'None' => 0,
  77. 'Numeric' => 10,
  78. 'Normal' => 62,
  79. 'High ASCII' => 95
  80. );
  81. public function __construct($_script, $_encoding = 62, $_fastDecode = true, $_specialChars = false)
  82. {
  83. $this->_script = $_script . "\n";
  84. if (array_key_exists($_encoding, $this->LITERAL_ENCODING))
  85. $_encoding = $this->LITERAL_ENCODING[$_encoding];
  86. $this->_encoding = min((int)$_encoding, 95);
  87. $this->_fastDecode = $_fastDecode;
  88. $this->_specialChars = $_specialChars;
  89. }
  90. public function pack() {
  91. $this->_addParser('_basicCompression');
  92. if ($this->_specialChars)
  93. $this->_addParser('_encodeSpecialChars');
  94. if ($this->_encoding)
  95. $this->_addParser('_encodeKeywords');
  96. // go!
  97. return $this->_pack($this->_script);
  98. }
  99. // apply all parsing routines
  100. private function _pack($script) {
  101. for ($i = 0; isset($this->_parsers[$i]); $i++) {
  102. $script = call_user_func(array(&$this,$this->_parsers[$i]), $script);
  103. }
  104. return $script;
  105. }
  106. // keep a list of parsing functions, they'll be executed all at once
  107. private $_parsers = array();
  108. private function _addParser($parser) {
  109. $this->_parsers[] = $parser;
  110. }
  111. // zero encoding - just removal of white space and comments
  112. private function _basicCompression($script) {
  113. $parser = new ParseMaster();
  114. // make safe
  115. $parser->escapeChar = '\\';
  116. // protect strings
  117. $parser->add('/\'[^\'\\n\\r]*\'/', self::IGNORE);
  118. $parser->add('/"[^"\\n\\r]*"/', self::IGNORE);
  119. // remove comments
  120. $parser->add('/\\/\\/[^\\n\\r]*[\\n\\r]/', ' ');
  121. $parser->add('/\\/\\*[^*]*\\*+([^\\/][^*]*\\*+)*\\//', ' ');
  122. // protect regular expressions
  123. $parser->add('/\\s+(\\/[^\\/\\n\\r\\*][^\\/\\n\\r]*\\/g?i?)/', '$2'); // IGNORE
  124. $parser->add('/[^\\w\\x24\\/\'"*)\\?:]\\/[^\\/\\n\\r\\*][^\\/\\n\\r]*\\/g?i?/', self::IGNORE);
  125. // remove: ;;; doSomething();
  126. if ($this->_specialChars) $parser->add('/;;;[^\\n\\r]+[\\n\\r]/');
  127. // remove redundant semi-colons
  128. $parser->add('/\\(;;\\)/', self::IGNORE); // protect for (;;) loops
  129. $parser->add('/;+\\s*([};])/', '$2');
  130. // apply the above
  131. $script = $parser->exec($script);
  132. // remove white-space
  133. $parser->add('/(\\b|\\x24)\\s+(\\b|\\x24)/', '$2 $3');
  134. $parser->add('/([+\\-])\\s+([+\\-])/', '$2 $3');
  135. $parser->add('/\\s+/', '');
  136. // done
  137. return $parser->exec($script);
  138. }
  139. private function _encodeSpecialChars($script) {
  140. $parser = new ParseMaster();
  141. // replace: $name -> n, $$name -> na
  142. $parser->add('/((\\x24+)([a-zA-Z$_]+))(\\d*)/',
  143. array('fn' => '_replace_name')
  144. );
  145. // replace: _name -> _0, double-underscore (__name) is ignored
  146. $regexp = '/\\b_[A-Za-z\\d]\\w*/';
  147. // build the word list
  148. $keywords = $this->_analyze($script, $regexp, '_encodePrivate');
  149. // quick ref
  150. $encoded = $keywords['encoded'];
  151. $parser->add($regexp,
  152. array(
  153. 'fn' => '_replace_encoded',
  154. 'data' => $encoded
  155. )
  156. );
  157. return $parser->exec($script);
  158. }
  159. private function _encodeKeywords($script) {
  160. // escape high-ascii values already in the script (i.e. in strings)
  161. if ($this->_encoding > 62)
  162. $script = $this->_escape95($script);
  163. // create the parser
  164. $parser = new ParseMaster();
  165. $encode = $this->_getEncoder($this->_encoding);
  166. // for high-ascii, don't encode single character low-ascii
  167. $regexp = ($this->_encoding > 62) ? '/\\w\\w+/' : '/\\w+/';
  168. // build the word list
  169. $keywords = $this->_analyze($script, $regexp, $encode);
  170. $encoded = $keywords['encoded'];
  171. // encode
  172. $parser->add($regexp,
  173. array(
  174. 'fn' => '_replace_encoded',
  175. 'data' => $encoded
  176. )
  177. );
  178. if (empty($script)) return $script;
  179. else {
  180. //$res = $parser->exec($script);
  181. //$res = $this->_bootStrap($res, $keywords);
  182. //return $res;
  183. return $this->_bootStrap($parser->exec($script), $keywords);
  184. }
  185. }
  186. private function _analyze($script, $regexp, $encode) {
  187. // analyse
  188. // retreive all words in the script
  189. $all = array();
  190. preg_match_all($regexp, $script, $all);
  191. $_sorted = array(); // list of words sorted by frequency
  192. $_encoded = array(); // dictionary of word->encoding
  193. $_protected = array(); // instances of "protected" words
  194. $all = $all[0]; // simulate the javascript comportement of global match
  195. if (!empty($all)) {
  196. $unsorted = array(); // same list, not sorted
  197. $protected = array(); // "protected" words (dictionary of word->"word")
  198. $value = array(); // dictionary of charCode->encoding (eg. 256->ff)
  199. $this->_count = array(); // word->count
  200. $i = count($all); $j = 0; //$word = null;
  201. // count the occurrences - used for sorting later
  202. do {
  203. --$i;
  204. $word = '$' . $all[$i];
  205. if (!isset($this->_count[$word])) {
  206. $this->_count[$word] = 0;
  207. $unsorted[$j] = $word;
  208. // make a dictionary of all of the protected words in this script
  209. // these are words that might be mistaken for encoding
  210. //if (is_string($encode) && method_exists($this, $encode))
  211. $values[$j] = call_user_func(array(&$this, $encode), $j);
  212. $protected['$' . $values[$j]] = $j++;
  213. }
  214. // increment the word counter
  215. $this->_count[$word]++;
  216. } while ($i > 0);
  217. // prepare to sort the word list, first we must protect
  218. // words that are also used as codes. we assign them a code
  219. // equivalent to the word itself.
  220. // e.g. if "do" falls within our encoding range
  221. // then we store keywords["do"] = "do";
  222. // this avoids problems when decoding
  223. $i = count($unsorted);
  224. do {
  225. $word = $unsorted[--$i];
  226. if (isset($protected[$word]) /*!= null*/) {
  227. $_sorted[$protected[$word]] = substr($word, 1);
  228. $_protected[$protected[$word]] = true;
  229. $this->_count[$word] = 0;
  230. }
  231. } while ($i);
  232. // sort the words by frequency
  233. // Note: the javascript and php version of sort can be different :
  234. // in php manual, usort :
  235. // " If two members compare as equal,
  236. // their order in the sorted array is undefined."
  237. // so the final packed script is different of the Dean's javascript version
  238. // but equivalent.
  239. // the ECMAscript standard does not guarantee this behaviour,
  240. // and thus not all browsers (e.g. Mozilla versions dating back to at
  241. // least 2003) respect this.
  242. usort($unsorted, array(&$this, '_sortWords'));
  243. $j = 0;
  244. // because there are "protected" words in the list
  245. // we must add the sorted words around them
  246. do {
  247. if (!isset($_sorted[$i]))
  248. $_sorted[$i] = substr($unsorted[$j++], 1);
  249. $_encoded[$_sorted[$i]] = $values[$i];
  250. } while (++$i < count($unsorted));
  251. }
  252. return array(
  253. 'sorted' => $_sorted,
  254. 'encoded' => $_encoded,
  255. 'protected' => $_protected);
  256. }
  257. private $_count = array();
  258. private function _sortWords($match1, $match2) {
  259. return $this->_count[$match2] - $this->_count[$match1];
  260. }
  261. // build the boot function used for loading and decoding
  262. private function _bootStrap($packed, $keywords) {
  263. $ENCODE = $this->_safeRegExp('$encode\\($count\\)');
  264. // $packed: the packed script
  265. $packed = "'" . $this->_escape($packed) . "'";
  266. // $ascii: base for encoding
  267. $ascii = min(count($keywords['sorted']), $this->_encoding);
  268. if ($ascii == 0) $ascii = 1;
  269. // $count: number of words contained in the script
  270. $count = count($keywords['sorted']);
  271. // $keywords: list of words contained in the script
  272. foreach ($keywords['protected'] as $i=>$value) {
  273. $keywords['sorted'][$i] = '';
  274. }
  275. // convert from a string to an array
  276. ksort($keywords['sorted']);
  277. $keywords = "'" . implode('|',$keywords['sorted']) . "'.split('|')";
  278. $encode = ($this->_encoding > 62) ? '_encode95' : $this->_getEncoder($ascii);
  279. $encode = $this->_getJSFunction($encode);
  280. $encode = preg_replace('/_encoding/','$ascii', $encode);
  281. $encode = preg_replace('/arguments\\.callee/','$encode', $encode);
  282. $inline = '\\$count' . ($ascii > 10 ? '.toString(\\$ascii)' : '');
  283. // $decode: code snippet to speed up decoding
  284. if ($this->_fastDecode) {
  285. // create the decoder
  286. $decode = $this->_getJSFunction('_decodeBody');
  287. if ($this->_encoding > 62)
  288. $decode = preg_replace('/\\\\w/', '[\\xa1-\\xff]', $decode);
  289. // perform the encoding inline for lower ascii values
  290. elseif ($ascii < 36)
  291. $decode = preg_replace($ENCODE, $inline, $decode);
  292. // special case: when $count==0 there are no keywords. I want to keep
  293. // the basic shape of the unpacking funcion so i'll frig the code...
  294. if ($count == 0)
  295. $decode = preg_replace($this->_safeRegExp('($count)\\s*=\\s*1'), '$1=0', $decode, 1);
  296. }
  297. // boot function
  298. $unpack = $this->_getJSFunction('_unpack');
  299. if ($this->_fastDecode) {
  300. // insert the decoder
  301. $this->buffer = $decode;
  302. $unpack = preg_replace_callback('/\\{/', array(&$this, '_insertFastDecode'), $unpack, 1);
  303. }
  304. $unpack = preg_replace('/"/', "'", $unpack);
  305. if ($this->_encoding > 62) { // high-ascii
  306. // get rid of the word-boundaries for regexp matches
  307. $unpack = preg_replace('/\'\\\\\\\\b\'\s*\\+|\\+\s*\'\\\\\\\\b\'/', '', $unpack);
  308. }
  309. if ($ascii > 36 || $this->_encoding > 62 || $this->_fastDecode) {
  310. // insert the encode function
  311. $this->buffer = $encode;
  312. $unpack = preg_replace_callback('/\\{/', array(&$this, '_insertFastEncode'), $unpack, 1);
  313. } else {
  314. // perform the encoding inline
  315. $unpack = preg_replace($ENCODE, $inline, $unpack);
  316. }
  317. // pack the boot function too
  318. $unpackPacker = new JavaScriptPacker($unpack, 0, false, true);
  319. $unpack = $unpackPacker->pack();
  320. // arguments
  321. $params = array($packed, $ascii, $count, $keywords);
  322. if ($this->_fastDecode) {
  323. $params[] = 0;
  324. $params[] = '{}';
  325. }
  326. $params = implode(',', $params);
  327. // the whole thing
  328. return 'eval(' . $unpack . '(' . $params . "))\n";
  329. }
  330. private $buffer;
  331. private function _insertFastDecode($match) {
  332. return '{' . $this->buffer . ';';
  333. }
  334. private function _insertFastEncode($match) {
  335. return '{$encode=' . $this->buffer . ';';
  336. }
  337. // mmm.. ..which one do i need ??
  338. private function _getEncoder($ascii) {
  339. return $ascii > 10 ? $ascii > 36 ? $ascii > 62 ?
  340. '_encode95' : '_encode62' : '_encode36' : '_encode10';
  341. }
  342. // zero encoding
  343. // characters: 0123456789
  344. private function _encode10($charCode) {
  345. return $charCode;
  346. }
  347. // inherent base36 support
  348. // characters: 0123456789abcdefghijklmnopqrstuvwxyz
  349. private function _encode36($charCode) {
  350. return base_convert($charCode, 10, 36);
  351. }
  352. // hitch a ride on base36 and add the upper case alpha characters
  353. // characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
  354. private function _encode62($charCode) {
  355. $res = '';
  356. if ($charCode >= $this->_encoding) {
  357. $res = $this->_encode62((int)($charCode / $this->_encoding));
  358. }
  359. $charCode = $charCode % $this->_encoding;
  360. if ($charCode > 35)
  361. return $res . chr($charCode + 29);
  362. else
  363. return $res . base_convert($charCode, 10, 36);
  364. }
  365. // use high-ascii values
  366. // characters: ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþ
  367. private function _encode95($charCode) {
  368. $res = '';
  369. if ($charCode >= $this->_encoding)
  370. $res = $this->_encode95($charCode / $this->_encoding);
  371. return $res . chr(($charCode % $this->_encoding) + 161);
  372. }
  373. private function _safeRegExp($string) {
  374. return '/'.preg_replace('/\$/', '\\\$', $string).'/';
  375. }
  376. private function _encodePrivate($charCode) {
  377. return "_" . $charCode;
  378. }
  379. // protect characters used by the parser
  380. private function _escape($script) {
  381. return preg_replace('/([\\\\\'])/', '\\\$1', $script);
  382. }
  383. // protect high-ascii characters already in the script
  384. private function _escape95($script) {
  385. return preg_replace_callback(
  386. '/[\\xa1-\\xff]/',
  387. array(&$this, '_escape95Bis'),
  388. $script
  389. );
  390. }
  391. private function _escape95Bis($match) {
  392. return '\x'.((string)dechex(ord($match)));
  393. }
  394. private function _getJSFunction($aName) {
  395. if (defined('self::JSFUNCTION'.$aName))
  396. return constant('self::JSFUNCTION'.$aName);
  397. else
  398. return '';
  399. }
  400. // JavaScript Functions used.
  401. // Note : In Dean's version, these functions are converted
  402. // with 'String(aFunctionName);'.
  403. // This internal conversion complete the original code, ex :
  404. // 'while (aBool) anAction();' is converted to
  405. // 'while (aBool) { anAction(); }'.
  406. // The JavaScript functions below are corrected.
  407. // unpacking function - this is the boot strap function
  408. // data extracted from this packing routine is passed to
  409. // this function when decoded in the target
  410. // NOTE ! : without the ';' final.
  411. const JSFUNCTION_unpack =
  412. 'function($packed, $ascii, $count, $keywords, $encode, $decode) {
  413. while ($count--) {
  414. if ($keywords[$count]) {
  415. $packed = $packed.replace(new RegExp(\'\\\\b\' + $encode($count) + \'\\\\b\', \'g\'), $keywords[$count]);
  416. }
  417. }
  418. return $packed;
  419. }';
  420. /*
  421. 'function($packed, $ascii, $count, $keywords, $encode, $decode) {
  422. while ($count--)
  423. if ($keywords[$count])
  424. $packed = $packed.replace(new RegExp(\'\\\\b\' + $encode($count) + \'\\\\b\', \'g\'), $keywords[$count]);
  425. return $packed;
  426. }';
  427. */
  428. // code-snippet inserted into the unpacker to speed up decoding
  429. const JSFUNCTION_decodeBody =
  430. //_decode = function() {
  431. // does the browser support String.replace where the
  432. // replacement value is a function?
  433. ' if (!\'\'.replace(/^/, String)) {
  434. // decode all the values we need
  435. while ($count--) {
  436. $decode[$encode($count)] = $keywords[$count] || $encode($count);
  437. }
  438. // global replacement function
  439. $keywords = [function ($encoded) {return $decode[$encoded]}];
  440. // generic match
  441. $encode = function () {return \'\\\\w+\'};
  442. // reset the loop counter - we are now doing a global replace
  443. $count = 1;
  444. }
  445. ';
  446. //};
  447. /*
  448. ' if (!\'\'.replace(/^/, String)) {
  449. // decode all the values we need
  450. while ($count--) $decode[$encode($count)] = $keywords[$count] || $encode($count);
  451. // global replacement function
  452. $keywords = [function ($encoded) {return $decode[$encoded]}];
  453. // generic match
  454. $encode = function () {return\'\\\\w+\'};
  455. // reset the loop counter - we are now doing a global replace
  456. $count = 1;
  457. }';
  458. */
  459. // zero encoding
  460. // characters: 0123456789
  461. const JSFUNCTION_encode10 =
  462. 'function($charCode) {
  463. return $charCode;
  464. }';//;';
  465. // inherent base36 support
  466. // characters: 0123456789abcdefghijklmnopqrstuvwxyz
  467. const JSFUNCTION_encode36 =
  468. 'function($charCode) {
  469. return $charCode.toString(36);
  470. }';//;';
  471. // hitch a ride on base36 and add the upper case alpha characters
  472. // characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
  473. const JSFUNCTION_encode62 =
  474. 'function($charCode) {
  475. return ($charCode < _encoding ? \'\' : arguments.callee(parseInt($charCode / _encoding))) +
  476. (($charCode = $charCode % _encoding) > 35 ? String.fromCharCode($charCode + 29) : $charCode.toString(36));
  477. }';
  478. // use high-ascii values
  479. // characters: ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþ
  480. const JSFUNCTION_encode95 =
  481. 'function($charCode) {
  482. return ($charCode < _encoding ? \'\' : arguments.callee($charCode / _encoding)) +
  483. String.fromCharCode($charCode % _encoding + 161);
  484. }';
  485. }
  486. class ParseMaster {
  487. public $ignoreCase = false;
  488. public $escapeChar = '';
  489. // constants
  490. const EXPRESSION = 0;
  491. const REPLACEMENT = 1;
  492. const LENGTH = 2;
  493. // used to determine nesting levels
  494. private $GROUPS = '/\\(/';//g
  495. private $SUB_REPLACE = '/\\$\\d/';
  496. private $INDEXED = '/^\\$\\d+$/';
  497. private $TRIM = '/([\'"])\\1\\.(.*)\\.\\1\\1$/';
  498. private $ESCAPE = '/\\\./';//g
  499. private $QUOTE = '/\'/';
  500. private $DELETED = '/\\x01[^\\x01]*\\x01/';//g
  501. public function add($expression, $replacement = '') {
  502. // count the number of sub-expressions
  503. // - add one because each pattern is itself a sub-expression
  504. $length = 1 + preg_match_all($this->GROUPS, $this->_internalEscape((string)$expression), $out);
  505. // treat only strings $replacement
  506. if (is_string($replacement)) {
  507. // does the pattern deal with sub-expressions?
  508. if (preg_match($this->SUB_REPLACE, $replacement)) {
  509. // a simple lookup? (e.g. "$2")
  510. if (preg_match($this->INDEXED, $replacement)) {
  511. // store the index (used for fast retrieval of matched strings)
  512. $replacement = (int)(substr($replacement, 1)) - 1;
  513. } else { // a complicated lookup (e.g. "Hello $2 $1")
  514. // build a function to do the lookup
  515. $quote = preg_match($this->QUOTE, $this->_internalEscape($replacement))
  516. ? '"' : "'";
  517. $replacement = array(
  518. 'fn' => '_backReferences',
  519. 'data' => array(
  520. 'replacement' => $replacement,
  521. 'length' => $length,
  522. 'quote' => $quote
  523. )
  524. );
  525. }
  526. }
  527. }
  528. // pass the modified arguments
  529. if (!empty($expression)) $this->_add($expression, $replacement, $length);
  530. else $this->_add('/^$/', $replacement, $length);
  531. }
  532. public function exec($string) {
  533. // execute the global replacement
  534. $this->_escaped = array();
  535. // simulate the _patterns.toSTring of Dean
  536. $regexp = '/';
  537. foreach ($this->_patterns as $reg) {
  538. $regexp .= '(' . substr($reg[self::EXPRESSION], 1, -1) . ')|';
  539. }
  540. $regexp = substr($regexp, 0, -1) . '/';
  541. $regexp .= ($this->ignoreCase) ? 'i' : '';
  542. $string = $this->_escape($string, $this->escapeChar);
  543. $string = preg_replace_callback(
  544. $regexp,
  545. array(
  546. &$this,
  547. '_replacement'
  548. ),
  549. $string
  550. );
  551. $string = $this->_unescape($string, $this->escapeChar);
  552. return preg_replace($this->DELETED, '', $string);
  553. }
  554. public function reset() {
  555. // clear the patterns collection so that this object may be re-used
  556. $this->_patterns = array();
  557. }
  558. // private
  559. private $_escaped = array(); // escaped characters
  560. private $_patterns = array(); // patterns stored by index
  561. // create and add a new pattern to the patterns collection
  562. private function _add() {
  563. $arguments = func_get_args();
  564. $this->_patterns[] = $arguments;
  565. }
  566. // this is the global replace function (it's quite complicated)
  567. private function _replacement($arguments) {
  568. if (empty($arguments)) return '';
  569. $i = 1; $j = 0;
  570. // loop through the patterns
  571. while (isset($this->_patterns[$j])) {
  572. $pattern = $this->_patterns[$j++];
  573. // do we have a result?
  574. if (isset($arguments[$i]) && ($arguments[$i] != '')) {
  575. $replacement = $pattern[self::REPLACEMENT];
  576. if (is_array($replacement) && isset($replacement['fn'])) {
  577. if (isset($replacement['data'])) $this->buffer = $replacement['data'];
  578. return call_user_func(array(&$this, $replacement['fn']), $arguments, $i);
  579. } elseif (is_int($replacement)) {
  580. return $arguments[$replacement + $i];
  581. }
  582. $delete = ($this->escapeChar == '' ||
  583. strpos($arguments[$i], $this->escapeChar) === false)
  584. ? '' : "\x01" . $arguments[$i] . "\x01";
  585. return $delete . $replacement;
  586. // skip over references to sub-expressions
  587. } else {
  588. $i += $pattern[self::LENGTH];
  589. }
  590. }
  591. }
  592. private function _backReferences($match, $offset) {
  593. $replacement = $this->buffer['replacement'];
  594. $quote = $this->buffer['quote'];
  595. $i = $this->buffer['length'];
  596. while ($i) {
  597. $replacement = str_replace('$'.$i--, $match[$offset + $i], $replacement);
  598. }
  599. return $replacement;
  600. }
  601. private function _replace_name($match, $offset){
  602. $length = strlen($match[$offset + 2]);
  603. $start = $length - max($length - strlen($match[$offset + 3]), 0);
  604. return substr($match[$offset + 1], $start, $length) . $match[$offset + 4];
  605. }
  606. private function _replace_encoded($match, $offset) {
  607. return $this->buffer[$match[$offset]];
  608. }
  609. // php : we cannot pass additional data to preg_replace_callback,
  610. // and we cannot use &$this in create_function, so let's go to lower level
  611. private $buffer;
  612. // encode escaped characters
  613. private function _escape($string, $escapeChar) {
  614. if ($escapeChar) {
  615. $this->buffer = $escapeChar;
  616. return preg_replace_callback(
  617. '/\\' . $escapeChar . '(.)' .'/',
  618. array(&$this, '_escapeBis'),
  619. $string
  620. );
  621. } else {
  622. return $string;
  623. }
  624. }
  625. private function _escapeBis($match) {
  626. $this->_escaped[] = $match[1];
  627. return $this->buffer;
  628. }
  629. // decode escaped characters
  630. private function _unescape($string, $escapeChar) {
  631. if ($escapeChar) {
  632. $regexp = '/'.'\\'.$escapeChar.'/';
  633. $this->buffer = array('escapeChar'=> $escapeChar, 'i' => 0);
  634. return preg_replace_callback
  635. (
  636. $regexp,
  637. array(&$this, '_unescapeBis'),
  638. $string
  639. );
  640. } else {
  641. return $string;
  642. }
  643. }
  644. private function _unescapeBis() {
  645. if (isset($this->_escaped[$this->buffer['i']])
  646. && $this->_escaped[$this->buffer['i']] != '')
  647. {
  648. $temp = $this->_escaped[$this->buffer['i']];
  649. } else {
  650. $temp = '';
  651. }
  652. $this->buffer['i']++;
  653. return $this->buffer['escapeChar'] . $temp;
  654. }
  655. private function _internalEscape($string) {
  656. return preg_replace($this->ESCAPE, '', $string);
  657. }
  658. }
  659. ?>