| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665 |
- <?php
- // +----------------------------------------------------------------------
- // | Fanwe 方维直播系统
- // +----------------------------------------------------------------------
- // | Copyright (c) 2011 http://www.fanwe.com All rights reserved.
- // +----------------------------------------------------------------------
- // | Author: 云淡风轻(1956838968@qq.com)
- // +----------------------------------------------------------------------
- class Chinese
- {
- /**
- * 存放 GB <-> UNICODE 对照表的内容
- * @变量类型
- * @访问 内部
- */
- var $unicode_table = array();
- /**
- * 访问中文繁简互换表的文件指针
- *
- * @变量类型 对象
- * @访问 内部
- */
- var $ctf;
- /**
- * 等待转换的字符串
- * @变量类型
- * @访问 内部
- */
- var $SourceText = '';
- /**
- * Chinese 的运行配置
- *
- * @变量类型 数组
- * @访问 公开
- */
- var $config = array(
- 'codetable_dir' => '', // 存放各种语言互换表的目录
- 'source_lang' => '', // 字符的原编码
- 'target_lang' => '', // 转换后的编码
- 'GBtoBIG5_table' => 'gb-big5.table', // 简体中文转换为繁体中文的对照表
- 'BIG5toGB_table' => 'big5-gb.table', // 繁体中文转换为简体中文的对照表
- 'GBtoUTF8_table' => 'gb_utf8.php', // 简体中文转换为UTF-8的对照表
- 'BIG5toUTF8_table' => 'big5_utf8.php' // 繁体中文转换为UTF-8的对照表
- );
- var $iconv_enabled = false; // 是否存在 ICONV 模块,默认为否
- var $mbstring_enabled = false; // 是否存在 MBSTRING 模块,默认为否
- /**
- * Chinese 的悉构函数
- *
- * 详细说明
- * @形参 字符串 $source_lang 为需要转换的字符串的原编码
- * 字符串 $target_lang 为转换的目标编码
- * 字符串 $SourceText 为等待转换的字符串
- * @访问 公开
- * @返回值 无
- * @throws
- */
- function Chinese()
- {
- $this->config['codetable_dir'] = "codetable/";
- if (function_exists('iconv'))
- {
- $this->iconv_enabled = true;
- }
- if (PHP_VERSION >= '5.0' && function_exists('mb_convert_encoding') && function_exists('mb_list_encodings'))
- {
- $encodings = mb_list_encodings();
- if (in_array('UTF-8', $encodings) == true && in_array('BIG-5', $encodings) == true && in_array('CP936', $encodings) == true) // CP936 就是 GBK 字符集的别名
- {
- $this->mbstring_enabled = true;
- }
- }
- }
- function Convert($source_lang, $target_lang, $source_string = '')
- {
- /* 如果字符串为空或者字符串不需要转换,直接返回 */
- if ($source_string == '' || preg_match("/[\x80-\xFF]+/", $source_string) == 0)
- {
- return $source_string;
- }
- if ($source_lang)
- {
- $this->config['source_lang'] = $this->_lang($source_lang);
- }
- if ($target_lang)
- {
- $this->config['target_lang'] = $this->_lang($target_lang);
- }
- /* 如果编码相同,直接返回 */
- if ($this->config['source_lang'] == $this->config['target_lang'])
- {
- return $source_string;
- }
- $this->SourceText = $source_string;
- if (($this->iconv_enabled || $this->mbstring_enabled) && !($this->config['source_lang'] == 'GBK' && $this->config['target_lang'] == 'BIG-5'))
- {
- if ($this->config['target_lang'] != 'UNICODE')
- {
- $string = $this->_convert_iconv_mbstring($this->SourceText, $this->config['target_lang'], $this->config['source_lang']);
- /* 如果正确转换 */
- if ($string)
- {
- return $string;
- }
- }
- else
- {
- $string = '';
- $text = $SourceText;
- while ($text)
- {
- if (ord(substr($text, 0, 1)) > 127)
- {
- if ($this->config['source_lang'] != 'UTF-8')
- {
- $char = $this->_convert_iconv_mbstring(substr($text, 0, 2), 'UTF-8', $this->config['source_lang']);
- }
- else
- {
- $char = substr($text, 0, 3);
- }
- /* 如果转换出错 */
- if ($char == '')
- {
- $string = '';
- break;
- }
- switch (strlen($char))
- {
- case 1:
- $uchar = ord($char);
- break;
- case 2:
- $uchar = (ord($char[0]) & 0x3f) << 6;
- $uchar += ord($char[1]) & 0x3f;
- break;
- case 3:
- $uchar = (ord($char[0]) & 0x1f) << 12;
- $uchar += (ord($char[1]) & 0x3f) << 6;
- $uchar += ord($char[2]) & 0x3f;
- break;
- case 4:
- $uchar = (ord($char[0]) & 0x0f) << 18;
- $uchar += (ord($char[1]) & 0x3f) << 12;
- $uchar += (ord($char[2]) & 0x3f) << 6;
- $uchar += ord($char[3]) & 0x3f;
- break;
- }
- $string .= '&#x' . dechex($uchar) . ';';
- if ($this->config['source_lang'] != 'UTF-8')
- {
- $text = substr($text, 2);
- }
- else
- {
- $text = substr($text, 3);
- }
- }
- else
- {
- $string .= substr($text, 0, 1);
- $text = substr($text, 1);
- }
- }
- /* 如果正确转换 */
- if ($string)
- {
- return $string;
- }
- }
- }
- $this->OpenTable();
- // 判断是否为中文繁、简转换
- if (($this->config['source_lang'] == 'GBK' || $this->config['source_lang'] == 'BIG-5') && ($this->config['target_lang'] == 'GBK' || $this->config['target_lang'] == 'BIG-5'))
- {
- return $this->GBtoBIG5();
- }
- // 判断是否为简体、繁体中文与UTF8转换
- if (($this->config['source_lang'] == 'GBK' || $this->config['source_lang'] == 'BIG-5' || $this->config['source_lang'] == 'UTF-8') && ($this->config['target_lang'] == 'UTF-8' || $this->config['target_lang'] == 'GBK' || $this->config['target_lang'] == 'BIG-5'))
- {
- return $this->CHStoUTF8();
- }
- // 判断是否为简体、繁体中文与UNICODE转换
- if (($this->config['source_lang'] == 'GBK' || $this->config['source_lang'] == 'BIG-5') && $this->config['target_lang'] == 'UNICODE')
- {
- return $this->CHStoUNICODE();
- }
- }
- function _lang($lang)
- {
- $lang = strtoupper($lang);
- if (substr($lang, 0, 2) == 'GB')
- {
- return 'GBK';
- }
- else
- {
- switch(substr($lang, 0, 3))
- {
- case 'BIG':
- return 'BIG-5';
- case 'UTF':
- return 'UTF-8';
- case 'UNI':
- return 'UNICODE';
- default:
- return '';
- }
- }
- }
- function _convert_iconv_mbstring($string, $target_lang, $source_lang)
- {
- if ($this->iconv_enabled)
- {
- $return_string = @iconv($source_lang, $target_lang, $string);
- if ($return_string !== false)
- {
- return $return_string;
- }
- }
- if ($this->mbstring_enabled)
- {
- if ($source_lang == 'GBK')
- {
- $source_lang = 'CP936';
- }
- if ($target_lang == 'GBK')
- {
- $target_lang = 'CP936';
- }
- $return_string = @mb_convert_encoding($string, $target_lang, $source_lang);
- if ($return_string !== false)
- {
- return $return_string;
- }
- else
- {
- return false;
- }
- }
- }
- /**
- * 将 16 进制转换为 2 进制字符
- *
- * 详细说明
- * @形参 $hexdata 为16进制的编码
- * @访问 内部
- * @返回 字符串
- * @throws
- */
- function _hex2bin($hexdata)
- {
- $bindata = '';
- for ($i = 0, $count = strlen($hexdata); $i < $count; $i += 2)
- {
- $bindata .= chr(hexdec($hexdata{$i} . $hexdata{$i + 1}));
- }
- return $bindata;
- }
- /**
- * 打开对照表
- *
- * 详细说明
- * @形参
- * @访问 内部
- * @返回 无
- * @throws
- */
- function OpenTable()
- {
- static $gb_utf8_table = NULL;
- static $gb_unicode_table = NULL;
- static $utf8_gb_table = NULL;
- static $big5_utf8_table = NULL;
- static $big5_unicode_table = NULL;
- static $utf8_big5_table = NULL;
- // 假如原编码为简体中文的话
- if ($this->config['source_lang'] == 'GBK')
- {
- // 假如转换目标编码为繁体中文的话
- if ($this->config['target_lang'] == 'BIG-5')
- {
- $this->ctf = @fopen($this->config['codetable_dir'] . $this->config['GBtoBIG5_table'], 'rb');
- if (is_null($this->ctf))
- {
- echo '打开打开转换表文件失败!';
- exit;
- }
- }
- // 假如转换目标编码为 UTF8 的话
- if ($this->config['target_lang'] == 'UTF-8')
- {
- if ($gb_utf8_table === NULL)
- {
- require_once($this->config['codetable_dir'] . $this->config['GBtoUTF8_table']);
- }
- $this->unicode_table = $gb_utf8_table;
- }
- // 假如转换目标编码为 UNICODE 的话
- if ($this->config['target_lang'] == 'UNICODE')
- {
- if ($gb_unicode_table === NULL)
- {
- if (isset($gb_utf8_table) === false)
- {
- require_once($this->config['codetable_dir'] . $this->config['GBtoUTF8_table']);
- }
- foreach ($gb_utf8_table AS $key => $value)
- {
- $gb_unicode_table[$key] = substr($value, 2);
- }
- }
- $this->unicode_table = $gb_unicode_table;
- }
- }
- // 假如原编码为繁体中文的话
- if ($this->config['source_lang'] == 'BIG-5')
- {
- // 假如转换目标编码为简体中文的话
- if ($this->config['target_lang'] == 'GBK')
- {
- $this->ctf = @fopen($this->config['codetable_dir'] . $this->config['BIG5toGB_table'], 'rb');
- if (is_null($this->ctf))
- {
- echo '打开打开转换表文件失败!';
- exit;
- }
- }
- // 假如转换目标编码为 UTF8 的话
- if ($this->config['target_lang'] == 'UTF-8')
- {
- if ($big5_utf8_table === NULL)
- {
- require_once($this->config['codetable_dir'] . $this->config['BIG5toUTF8_table']);
- }
- $this->unicode_table = $big5_utf8_table;
- }
- // 假如转换目标编码为 UNICODE 的话
- if ($this->config['target_lang'] == 'UNICODE')
- {
- if ($big5_unicode_table === NULL)
- {
- if (isset($big5_utf8_table) === false)
- {
- require_once($this->config['codetable_dir'] . $this->config['BIG5toUTF8_table']);
- }
- foreach ($big5_utf8_table AS $key => $value)
- {
- $big5_unicode_table[$key] = substr($value, 2);
- }
- }
- $this->unicode_table = $big5_unicode_table;
- }
- }
- // 假如原编码为 UTF8 的话
- if ($this->config['source_lang'] == 'UTF-8')
- {
- // 假如转换目标编码为 GBK 的话
- if ($this->config['target_lang'] == 'GBK')
- {
- if ($utf8_gb_table === NULL)
- {
- if (isset($gb_utf8_table) === false)
- {
- require_once($this->config['codetable_dir'] . $this->config['GBtoUTF8_table']);
- }
- foreach ($gb_utf8_table AS $key => $value)
- {
- $utf8_gb_table[hexdec($value)] = '0x' . dechex($key);
- }
- }
- $this->unicode_table = $utf8_gb_table;
- }
- // 假如转换目标编码为 BIG5 的话
- if ($this->config['target_lang'] == 'BIG-5')
- {
- if ($utf8_big5_table === NULL)
- {
- if (isset($big5_utf8_table) === false)
- {
- require_once($this->config['codetable_dir'] . $this->config['BIG5toUTF8_table']);
- }
- foreach ($big5_utf8_table AS $key => $value)
- {
- $utf8_big5_table[hexdec($value)] = '0x' . dechex($key);
- }
- }
- $this->unicode_table = $utf8_big5_table;
- }
- }
- }
- /**
- * 将简体、繁体中文的 UNICODE 编码转换为 UTF8 字符
- *
- * 详细说明
- * @形参 数字 $c 简体中文汉字的UNICODE编码的10进制
- * @访问 内部
- * @返回 字符串
- * @throws
- */
- function CHSUtoUTF8($c)
- {
- $str='';
- if ($c < 0x80)
- {
- $str .= $c;
- }
- elseif ($c < 0x800)
- {
- $str .= (0xC0 | $c >> 6);
- $str .= (0x80 | $c & 0x3F);
- }
- elseif ($c < 0x10000)
- {
- $str .= (0xE0 | $c >> 12);
- $str .= (0x80 | $c >> 6 & 0x3F);
- $str .= (0x80 | $c & 0x3F);
- }
- elseif ($c < 0x200000)
- {
- $str .= (0xF0 | $c >> 18);
- $str .= (0x80 | $c >> 12 & 0x3F);
- $str .= (0x80 | $c >> 6 & 0x3F);
- $str .= (0x80 | $c & 0x3F);
- }
- return $str;
- }
- /**
- * 简体、繁体中文 <-> UTF8 互相转换的函数
- *
- * 详细说明
- * @形参
- * @访问 内部
- * @返回 字符串
- * @throws
- */
- function CHStoUTF8()
- {
- if ($this->config['source_lang'] == 'BIG-5' || $this->config['source_lang'] == 'GBK')
- {
- $ret = '';
- while ($this->SourceText)
- {
- if (ord($this->SourceText{0}) > 127)
- {
- if ($this->config['source_lang'] == 'BIG-5')
- {
- $utf8 = $this->CHSUtoUTF8(hexdec(@$this->unicode_table[hexdec(bin2hex($this->SourceText{0} . $this->SourceText{1}))]));
- }
- if ($this->config['source_lang'] == 'GBK')
- {
- $utf8 = $this->CHSUtoUTF8(hexdec(@$this->unicode_table[hexdec(bin2hex($this->SourceText{0} . $this->SourceText{1})) - 0x8080]));
- }
- for ($i = 0, $count = strlen($utf8); $i < $count; $i += 3)
- {
- $ret .= chr(substr($utf8, $i, 3));
- }
- $this->SourceText = substr($this->SourceText, 2, strlen($this->SourceText));
- }
- else
- {
- $ret .= $this->SourceText{0};
- $this->SourceText = substr($this->SourceText, 1, strlen($this->SourceText));
- }
- }
- $this->unicode_table = array();
- $this->SourceText = '';
- return $ret;
- }
- if ($this->config['source_lang'] == 'UTF-8')
- {
- $i = 0;
- $out = '';
- $len = strlen($this->SourceText);
- while ($i < $len)
- {
- $c = ord($this->SourceText{$i++});
- switch($c >> 4)
- {
- case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
- // 0xxxxxxx
- $out .= $this->SourceText{$i - 1};
- break;
- case 12: case 13:
- // 110x xxxx 10xx xxxx
- $char2 = ord($this->SourceText{$i++});
- $char3 = @$this->unicode_table[(($c & 0x1F) << 6) | ($char2 & 0x3F)];
- if ($this->config['target_lang'] == 'GBK')
- {
- $out .= $this->_hex2bin(dechex($char3 + 0x8080));
- }
- elseif ($this->config['target_lang'] == 'BIG-5')
- {
- $out .= $this->_hex2bin(dechex($char3 + 0x0000));
- }
- break;
- case 14:
- // 1110 xxxx 10xx xxxx 10xx xxxx
- $char2 = ord($this->SourceText{$i++});
- $char3 = ord($this->SourceText{$i++});
- $char4 = @$this->unicode_table[(($c & 0x0F) << 12) | (($char2 & 0x3F) << 6) | (($char3 & 0x3F) << 0)];
- if ($this->config['target_lang'] == 'GBK')
- {
- $out .= $this->_hex2bin(dechex($char4 + 0x8080));
- } elseif ($this->config['target_lang'] == 'BIG-5')
- {
- $out .= $this->_hex2bin(dechex($char4 + 0x0000));
- }
- break;
- }
- }
- // 返回结果
- return $out;
- }
- }
- /**
- * 简体、繁体中文转换为 UNICODE编码
- *
- * 详细说明
- * @形参
- * @访问 内部
- * @返回 字符串
- * @throws
- */
- function CHStoUNICODE()
- {
- $utf = '';
- while ($this->SourceText)
- {
- if (ord($this->SourceText{0}) > 127)
- {
- if ($this->config['source_lang'] == 'GBK')
- {
- $utf .= '&#x' . $this->unicode_table[hexdec(bin2hex($this->SourceText{0} . $this->SourceText{1})) - 0x8080] . ';';
- }
- elseif ($this->config['source_lang'] == 'BIG-5')
- {
- $utf .= '&#x' . $this->unicode_table[hexdec(bin2hex($this->SourceText{0} . $this->SourceText{1}))] . ';';
- }
- $this->SourceText = substr($this->SourceText, 2, strlen($this->SourceText));
- }
- else
- {
- $utf .= $this->SourceText{0};
- $this->SourceText = substr($this->SourceText, 1, strlen($this->SourceText));
- }
- }
- return $utf;
- }
- /**
- * 简体中文 <-> 繁体中文 互相转换的函数
- *
- * 详细说明
- * @访问 内部
- * @返回值 经过编码的utf8字符
- * @throws
- */
- function GBtoBIG5()
- {
- // 获取等待转换的字符串的总长度
- $max = strlen($this->SourceText) - 1;
- for ($i = 0; $i < $max; $i++)
- {
- $h = ord($this->SourceText{$i});
- if ($h >= 160)
- {
- $l = ord($this->SourceText{$i + 1});
- if ($h == 161 && $l == 64)
- {
- $gb = ' ';
- }
- else
- {
- fseek($this->ctf, ($h - 160) * 510 + ($l - 1) * 2);
- $gb = fread($this->ctf, 2);
- }
- $this->SourceText{$i} = $gb{0};
- $this->SourceText{$i + 1} = $gb{1};
- $i++;
- }
- }
- fclose($this->ctf);
- // 将转换后的结果赋予 $result;
- $result = $this->SourceText;
- // 清空 $thisSourceText
- $this->SourceText = '';
- // 返回转换结果
- return $result;
- }
- }
- ?>
|