iconv.php 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665
  1. <?php
  2. // +----------------------------------------------------------------------
  3. // | Fanwe 方维直播系统
  4. // +----------------------------------------------------------------------
  5. // | Copyright (c) 2011 http://www.fanwe.com All rights reserved.
  6. // +----------------------------------------------------------------------
  7. // | Author: 云淡风轻(1956838968@qq.com)
  8. // +----------------------------------------------------------------------
  9. class Chinese
  10. {
  11. /**
  12. * 存放 GB <-> UNICODE 对照表的内容
  13. * @变量类型
  14. * @访问 内部
  15. */
  16. var $unicode_table = array();
  17. /**
  18. * 访问中文繁简互换表的文件指针
  19. *
  20. * @变量类型 对象
  21. * @访问 内部
  22. */
  23. var $ctf;
  24. /**
  25. * 等待转换的字符串
  26. * @变量类型
  27. * @访问 内部
  28. */
  29. var $SourceText = '';
  30. /**
  31. * Chinese 的运行配置
  32. *
  33. * @变量类型 数组
  34. * @访问 公开
  35. */
  36. var $config = array(
  37. 'codetable_dir' => '', // 存放各种语言互换表的目录
  38. 'source_lang' => '', // 字符的原编码
  39. 'target_lang' => '', // 转换后的编码
  40. 'GBtoBIG5_table' => 'gb-big5.table', // 简体中文转换为繁体中文的对照表
  41. 'BIG5toGB_table' => 'big5-gb.table', // 繁体中文转换为简体中文的对照表
  42. 'GBtoUTF8_table' => 'gb_utf8.php', // 简体中文转换为UTF-8的对照表
  43. 'BIG5toUTF8_table' => 'big5_utf8.php' // 繁体中文转换为UTF-8的对照表
  44. );
  45. var $iconv_enabled = false; // 是否存在 ICONV 模块,默认为否
  46. var $mbstring_enabled = false; // 是否存在 MBSTRING 模块,默认为否
  47. /**
  48. * Chinese 的悉构函数
  49. *
  50. * 详细说明
  51. * @形参 字符串 $source_lang 为需要转换的字符串的原编码
  52. * 字符串 $target_lang 为转换的目标编码
  53. * 字符串 $SourceText 为等待转换的字符串
  54. * @访问 公开
  55. * @返回值 无
  56. * @throws
  57. */
  58. function Chinese()
  59. {
  60. $this->config['codetable_dir'] = "codetable/";
  61. if (function_exists('iconv'))
  62. {
  63. $this->iconv_enabled = true;
  64. }
  65. if (PHP_VERSION >= '5.0' && function_exists('mb_convert_encoding') && function_exists('mb_list_encodings'))
  66. {
  67. $encodings = mb_list_encodings();
  68. if (in_array('UTF-8', $encodings) == true && in_array('BIG-5', $encodings) == true && in_array('CP936', $encodings) == true) // CP936 就是 GBK 字符集的别名
  69. {
  70. $this->mbstring_enabled = true;
  71. }
  72. }
  73. }
  74. function Convert($source_lang, $target_lang, $source_string = '')
  75. {
  76. /* 如果字符串为空或者字符串不需要转换,直接返回 */
  77. if ($source_string == '' || preg_match("/[\x80-\xFF]+/", $source_string) == 0)
  78. {
  79. return $source_string;
  80. }
  81. if ($source_lang)
  82. {
  83. $this->config['source_lang'] = $this->_lang($source_lang);
  84. }
  85. if ($target_lang)
  86. {
  87. $this->config['target_lang'] = $this->_lang($target_lang);
  88. }
  89. /* 如果编码相同,直接返回 */
  90. if ($this->config['source_lang'] == $this->config['target_lang'])
  91. {
  92. return $source_string;
  93. }
  94. $this->SourceText = $source_string;
  95. if (($this->iconv_enabled || $this->mbstring_enabled) && !($this->config['source_lang'] == 'GBK' && $this->config['target_lang'] == 'BIG-5'))
  96. {
  97. if ($this->config['target_lang'] != 'UNICODE')
  98. {
  99. $string = $this->_convert_iconv_mbstring($this->SourceText, $this->config['target_lang'], $this->config['source_lang']);
  100. /* 如果正确转换 */
  101. if ($string)
  102. {
  103. return $string;
  104. }
  105. }
  106. else
  107. {
  108. $string = '';
  109. $text = $SourceText;
  110. while ($text)
  111. {
  112. if (ord(substr($text, 0, 1)) > 127)
  113. {
  114. if ($this->config['source_lang'] != 'UTF-8')
  115. {
  116. $char = $this->_convert_iconv_mbstring(substr($text, 0, 2), 'UTF-8', $this->config['source_lang']);
  117. }
  118. else
  119. {
  120. $char = substr($text, 0, 3);
  121. }
  122. /* 如果转换出错 */
  123. if ($char == '')
  124. {
  125. $string = '';
  126. break;
  127. }
  128. switch (strlen($char))
  129. {
  130. case 1:
  131. $uchar = ord($char);
  132. break;
  133. case 2:
  134. $uchar = (ord($char[0]) & 0x3f) << 6;
  135. $uchar += ord($char[1]) & 0x3f;
  136. break;
  137. case 3:
  138. $uchar = (ord($char[0]) & 0x1f) << 12;
  139. $uchar += (ord($char[1]) & 0x3f) << 6;
  140. $uchar += ord($char[2]) & 0x3f;
  141. break;
  142. case 4:
  143. $uchar = (ord($char[0]) & 0x0f) << 18;
  144. $uchar += (ord($char[1]) & 0x3f) << 12;
  145. $uchar += (ord($char[2]) & 0x3f) << 6;
  146. $uchar += ord($char[3]) & 0x3f;
  147. break;
  148. }
  149. $string .= '&#x' . dechex($uchar) . ';';
  150. if ($this->config['source_lang'] != 'UTF-8')
  151. {
  152. $text = substr($text, 2);
  153. }
  154. else
  155. {
  156. $text = substr($text, 3);
  157. }
  158. }
  159. else
  160. {
  161. $string .= substr($text, 0, 1);
  162. $text = substr($text, 1);
  163. }
  164. }
  165. /* 如果正确转换 */
  166. if ($string)
  167. {
  168. return $string;
  169. }
  170. }
  171. }
  172. $this->OpenTable();
  173. // 判断是否为中文繁、简转换
  174. if (($this->config['source_lang'] == 'GBK' || $this->config['source_lang'] == 'BIG-5') && ($this->config['target_lang'] == 'GBK' || $this->config['target_lang'] == 'BIG-5'))
  175. {
  176. return $this->GBtoBIG5();
  177. }
  178. // 判断是否为简体、繁体中文与UTF8转换
  179. if (($this->config['source_lang'] == 'GBK' || $this->config['source_lang'] == 'BIG-5' || $this->config['source_lang'] == 'UTF-8') && ($this->config['target_lang'] == 'UTF-8' || $this->config['target_lang'] == 'GBK' || $this->config['target_lang'] == 'BIG-5'))
  180. {
  181. return $this->CHStoUTF8();
  182. }
  183. // 判断是否为简体、繁体中文与UNICODE转换
  184. if (($this->config['source_lang'] == 'GBK' || $this->config['source_lang'] == 'BIG-5') && $this->config['target_lang'] == 'UNICODE')
  185. {
  186. return $this->CHStoUNICODE();
  187. }
  188. }
  189. function _lang($lang)
  190. {
  191. $lang = strtoupper($lang);
  192. if (substr($lang, 0, 2) == 'GB')
  193. {
  194. return 'GBK';
  195. }
  196. else
  197. {
  198. switch(substr($lang, 0, 3))
  199. {
  200. case 'BIG':
  201. return 'BIG-5';
  202. case 'UTF':
  203. return 'UTF-8';
  204. case 'UNI':
  205. return 'UNICODE';
  206. default:
  207. return '';
  208. }
  209. }
  210. }
  211. function _convert_iconv_mbstring($string, $target_lang, $source_lang)
  212. {
  213. if ($this->iconv_enabled)
  214. {
  215. $return_string = @iconv($source_lang, $target_lang, $string);
  216. if ($return_string !== false)
  217. {
  218. return $return_string;
  219. }
  220. }
  221. if ($this->mbstring_enabled)
  222. {
  223. if ($source_lang == 'GBK')
  224. {
  225. $source_lang = 'CP936';
  226. }
  227. if ($target_lang == 'GBK')
  228. {
  229. $target_lang = 'CP936';
  230. }
  231. $return_string = @mb_convert_encoding($string, $target_lang, $source_lang);
  232. if ($return_string !== false)
  233. {
  234. return $return_string;
  235. }
  236. else
  237. {
  238. return false;
  239. }
  240. }
  241. }
  242. /**
  243. * 将 16 进制转换为 2 进制字符
  244. *
  245. * 详细说明
  246. * @形参 $hexdata 为16进制的编码
  247. * @访问 内部
  248. * @返回 字符串
  249. * @throws
  250. */
  251. function _hex2bin($hexdata)
  252. {
  253. $bindata = '';
  254. for ($i = 0, $count = strlen($hexdata); $i < $count; $i += 2)
  255. {
  256. $bindata .= chr(hexdec($hexdata{$i} . $hexdata{$i + 1}));
  257. }
  258. return $bindata;
  259. }
  260. /**
  261. * 打开对照表
  262. *
  263. * 详细说明
  264. * @形参
  265. * @访问 内部
  266. * @返回 无
  267. * @throws
  268. */
  269. function OpenTable()
  270. {
  271. static $gb_utf8_table = NULL;
  272. static $gb_unicode_table = NULL;
  273. static $utf8_gb_table = NULL;
  274. static $big5_utf8_table = NULL;
  275. static $big5_unicode_table = NULL;
  276. static $utf8_big5_table = NULL;
  277. // 假如原编码为简体中文的话
  278. if ($this->config['source_lang'] == 'GBK')
  279. {
  280. // 假如转换目标编码为繁体中文的话
  281. if ($this->config['target_lang'] == 'BIG-5')
  282. {
  283. $this->ctf = @fopen($this->config['codetable_dir'] . $this->config['GBtoBIG5_table'], 'rb');
  284. if (is_null($this->ctf))
  285. {
  286. echo '打开打开转换表文件失败!';
  287. exit;
  288. }
  289. }
  290. // 假如转换目标编码为 UTF8 的话
  291. if ($this->config['target_lang'] == 'UTF-8')
  292. {
  293. if ($gb_utf8_table === NULL)
  294. {
  295. require_once($this->config['codetable_dir'] . $this->config['GBtoUTF8_table']);
  296. }
  297. $this->unicode_table = $gb_utf8_table;
  298. }
  299. // 假如转换目标编码为 UNICODE 的话
  300. if ($this->config['target_lang'] == 'UNICODE')
  301. {
  302. if ($gb_unicode_table === NULL)
  303. {
  304. if (isset($gb_utf8_table) === false)
  305. {
  306. require_once($this->config['codetable_dir'] . $this->config['GBtoUTF8_table']);
  307. }
  308. foreach ($gb_utf8_table AS $key => $value)
  309. {
  310. $gb_unicode_table[$key] = substr($value, 2);
  311. }
  312. }
  313. $this->unicode_table = $gb_unicode_table;
  314. }
  315. }
  316. // 假如原编码为繁体中文的话
  317. if ($this->config['source_lang'] == 'BIG-5')
  318. {
  319. // 假如转换目标编码为简体中文的话
  320. if ($this->config['target_lang'] == 'GBK')
  321. {
  322. $this->ctf = @fopen($this->config['codetable_dir'] . $this->config['BIG5toGB_table'], 'rb');
  323. if (is_null($this->ctf))
  324. {
  325. echo '打开打开转换表文件失败!';
  326. exit;
  327. }
  328. }
  329. // 假如转换目标编码为 UTF8 的话
  330. if ($this->config['target_lang'] == 'UTF-8')
  331. {
  332. if ($big5_utf8_table === NULL)
  333. {
  334. require_once($this->config['codetable_dir'] . $this->config['BIG5toUTF8_table']);
  335. }
  336. $this->unicode_table = $big5_utf8_table;
  337. }
  338. // 假如转换目标编码为 UNICODE 的话
  339. if ($this->config['target_lang'] == 'UNICODE')
  340. {
  341. if ($big5_unicode_table === NULL)
  342. {
  343. if (isset($big5_utf8_table) === false)
  344. {
  345. require_once($this->config['codetable_dir'] . $this->config['BIG5toUTF8_table']);
  346. }
  347. foreach ($big5_utf8_table AS $key => $value)
  348. {
  349. $big5_unicode_table[$key] = substr($value, 2);
  350. }
  351. }
  352. $this->unicode_table = $big5_unicode_table;
  353. }
  354. }
  355. // 假如原编码为 UTF8 的话
  356. if ($this->config['source_lang'] == 'UTF-8')
  357. {
  358. // 假如转换目标编码为 GBK 的话
  359. if ($this->config['target_lang'] == 'GBK')
  360. {
  361. if ($utf8_gb_table === NULL)
  362. {
  363. if (isset($gb_utf8_table) === false)
  364. {
  365. require_once($this->config['codetable_dir'] . $this->config['GBtoUTF8_table']);
  366. }
  367. foreach ($gb_utf8_table AS $key => $value)
  368. {
  369. $utf8_gb_table[hexdec($value)] = '0x' . dechex($key);
  370. }
  371. }
  372. $this->unicode_table = $utf8_gb_table;
  373. }
  374. // 假如转换目标编码为 BIG5 的话
  375. if ($this->config['target_lang'] == 'BIG-5')
  376. {
  377. if ($utf8_big5_table === NULL)
  378. {
  379. if (isset($big5_utf8_table) === false)
  380. {
  381. require_once($this->config['codetable_dir'] . $this->config['BIG5toUTF8_table']);
  382. }
  383. foreach ($big5_utf8_table AS $key => $value)
  384. {
  385. $utf8_big5_table[hexdec($value)] = '0x' . dechex($key);
  386. }
  387. }
  388. $this->unicode_table = $utf8_big5_table;
  389. }
  390. }
  391. }
  392. /**
  393. * 将简体、繁体中文的 UNICODE 编码转换为 UTF8 字符
  394. *
  395. * 详细说明
  396. * @形参 数字 $c 简体中文汉字的UNICODE编码的10进制
  397. * @访问 内部
  398. * @返回 字符串
  399. * @throws
  400. */
  401. function CHSUtoUTF8($c)
  402. {
  403. $str='';
  404. if ($c < 0x80)
  405. {
  406. $str .= $c;
  407. }
  408. elseif ($c < 0x800)
  409. {
  410. $str .= (0xC0 | $c >> 6);
  411. $str .= (0x80 | $c & 0x3F);
  412. }
  413. elseif ($c < 0x10000)
  414. {
  415. $str .= (0xE0 | $c >> 12);
  416. $str .= (0x80 | $c >> 6 & 0x3F);
  417. $str .= (0x80 | $c & 0x3F);
  418. }
  419. elseif ($c < 0x200000)
  420. {
  421. $str .= (0xF0 | $c >> 18);
  422. $str .= (0x80 | $c >> 12 & 0x3F);
  423. $str .= (0x80 | $c >> 6 & 0x3F);
  424. $str .= (0x80 | $c & 0x3F);
  425. }
  426. return $str;
  427. }
  428. /**
  429. * 简体、繁体中文 <-> UTF8 互相转换的函数
  430. *
  431. * 详细说明
  432. * @形参
  433. * @访问 内部
  434. * @返回 字符串
  435. * @throws
  436. */
  437. function CHStoUTF8()
  438. {
  439. if ($this->config['source_lang'] == 'BIG-5' || $this->config['source_lang'] == 'GBK')
  440. {
  441. $ret = '';
  442. while ($this->SourceText)
  443. {
  444. if (ord($this->SourceText{0}) > 127)
  445. {
  446. if ($this->config['source_lang'] == 'BIG-5')
  447. {
  448. $utf8 = $this->CHSUtoUTF8(hexdec(@$this->unicode_table[hexdec(bin2hex($this->SourceText{0} . $this->SourceText{1}))]));
  449. }
  450. if ($this->config['source_lang'] == 'GBK')
  451. {
  452. $utf8 = $this->CHSUtoUTF8(hexdec(@$this->unicode_table[hexdec(bin2hex($this->SourceText{0} . $this->SourceText{1})) - 0x8080]));
  453. }
  454. for ($i = 0, $count = strlen($utf8); $i < $count; $i += 3)
  455. {
  456. $ret .= chr(substr($utf8, $i, 3));
  457. }
  458. $this->SourceText = substr($this->SourceText, 2, strlen($this->SourceText));
  459. }
  460. else
  461. {
  462. $ret .= $this->SourceText{0};
  463. $this->SourceText = substr($this->SourceText, 1, strlen($this->SourceText));
  464. }
  465. }
  466. $this->unicode_table = array();
  467. $this->SourceText = '';
  468. return $ret;
  469. }
  470. if ($this->config['source_lang'] == 'UTF-8')
  471. {
  472. $i = 0;
  473. $out = '';
  474. $len = strlen($this->SourceText);
  475. while ($i < $len)
  476. {
  477. $c = ord($this->SourceText{$i++});
  478. switch($c >> 4)
  479. {
  480. case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
  481. // 0xxxxxxx
  482. $out .= $this->SourceText{$i - 1};
  483. break;
  484. case 12: case 13:
  485. // 110x xxxx 10xx xxxx
  486. $char2 = ord($this->SourceText{$i++});
  487. $char3 = @$this->unicode_table[(($c & 0x1F) << 6) | ($char2 & 0x3F)];
  488. if ($this->config['target_lang'] == 'GBK')
  489. {
  490. $out .= $this->_hex2bin(dechex($char3 + 0x8080));
  491. }
  492. elseif ($this->config['target_lang'] == 'BIG-5')
  493. {
  494. $out .= $this->_hex2bin(dechex($char3 + 0x0000));
  495. }
  496. break;
  497. case 14:
  498. // 1110 xxxx 10xx xxxx 10xx xxxx
  499. $char2 = ord($this->SourceText{$i++});
  500. $char3 = ord($this->SourceText{$i++});
  501. $char4 = @$this->unicode_table[(($c & 0x0F) << 12) | (($char2 & 0x3F) << 6) | (($char3 & 0x3F) << 0)];
  502. if ($this->config['target_lang'] == 'GBK')
  503. {
  504. $out .= $this->_hex2bin(dechex($char4 + 0x8080));
  505. } elseif ($this->config['target_lang'] == 'BIG-5')
  506. {
  507. $out .= $this->_hex2bin(dechex($char4 + 0x0000));
  508. }
  509. break;
  510. }
  511. }
  512. // 返回结果
  513. return $out;
  514. }
  515. }
  516. /**
  517. * 简体、繁体中文转换为 UNICODE编码
  518. *
  519. * 详细说明
  520. * @形参
  521. * @访问 内部
  522. * @返回 字符串
  523. * @throws
  524. */
  525. function CHStoUNICODE()
  526. {
  527. $utf = '';
  528. while ($this->SourceText)
  529. {
  530. if (ord($this->SourceText{0}) > 127)
  531. {
  532. if ($this->config['source_lang'] == 'GBK')
  533. {
  534. $utf .= '&#x' . $this->unicode_table[hexdec(bin2hex($this->SourceText{0} . $this->SourceText{1})) - 0x8080] . ';';
  535. }
  536. elseif ($this->config['source_lang'] == 'BIG-5')
  537. {
  538. $utf .= '&#x' . $this->unicode_table[hexdec(bin2hex($this->SourceText{0} . $this->SourceText{1}))] . ';';
  539. }
  540. $this->SourceText = substr($this->SourceText, 2, strlen($this->SourceText));
  541. }
  542. else
  543. {
  544. $utf .= $this->SourceText{0};
  545. $this->SourceText = substr($this->SourceText, 1, strlen($this->SourceText));
  546. }
  547. }
  548. return $utf;
  549. }
  550. /**
  551. * 简体中文 <-> 繁体中文 互相转换的函数
  552. *
  553. * 详细说明
  554. * @访问 内部
  555. * @返回值 经过编码的utf8字符
  556. * @throws
  557. */
  558. function GBtoBIG5()
  559. {
  560. // 获取等待转换的字符串的总长度
  561. $max = strlen($this->SourceText) - 1;
  562. for ($i = 0; $i < $max; $i++)
  563. {
  564. $h = ord($this->SourceText{$i});
  565. if ($h >= 160)
  566. {
  567. $l = ord($this->SourceText{$i + 1});
  568. if ($h == 161 && $l == 64)
  569. {
  570. $gb = ' ';
  571. }
  572. else
  573. {
  574. fseek($this->ctf, ($h - 160) * 510 + ($l - 1) * 2);
  575. $gb = fread($this->ctf, 2);
  576. }
  577. $this->SourceText{$i} = $gb{0};
  578. $this->SourceText{$i + 1} = $gb{1};
  579. $i++;
  580. }
  581. }
  582. fclose($this->ctf);
  583. // 将转换后的结果赋予 $result;
  584. $result = $this->SourceText;
  585. // 清空 $thisSourceText
  586. $this->SourceText = '';
  587. // 返回转换结果
  588. return $result;
  589. }
  590. }
  591. ?>