| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200 | <?php// +----------------------------------------------------------------------// | ThinkPHP [ WE CAN DO IT JUST THINK IT ]// +----------------------------------------------------------------------// | Copyright (c) 2009 http://thinkphp.cn All rights reserved.// +----------------------------------------------------------------------// | Licensed ( http://www.apache.org/licenses/LICENSE-2.0 )// +----------------------------------------------------------------------// | Author: liu21st <liu21st@gmail.com>// +----------------------------------------------------------------------namespace Org\Util;class CodeSwitch {    // 错误信息    static private $error = array();    // 提示信息    static private $info = array();    // 记录错误    static private function error($msg) {        self::$error[]   =  $msg;    }    // 记录信息    static private function info($info) {        self::$info[]     = $info;    }	/**     * 编码转换函数,对整个文件进行编码转换	 * 支持以下转换	 * GB2312、UTF-8 WITH BOM转换为UTF-8	 * UTF-8、UTF-8 WITH BOM转换为GB2312     * @access public     * @param string $filename		文件名	 * @param string $out_charset	转换后的文件编码,与iconv使用的参数一致     * @return void     */	static function DetectAndSwitch($filename,$out_charset) {		$fpr = fopen($filename,"r");		$char1 = fread($fpr,1);		$char2 = fread($fpr,1);		$char3 = fread($fpr,1);		$originEncoding = "";		if($char1==chr(239) && $char2==chr(187) && $char3==chr(191))//UTF-8 WITH BOM			$originEncoding = "UTF-8 WITH BOM";		elseif($char1==chr(255) && $char2==chr(254))//UNICODE LE		{			self::error("不支持从UNICODE LE转换到UTF-8或GB编码");			fclose($fpr);			return;		}elseif($char1==chr(254) && $char2==chr(255)){//UNICODE BE			self::error("不支持从UNICODE BE转换到UTF-8或GB编码");			fclose($fpr);			return;		}else{//没有文件头,可能是GB或UTF-8			if(rewind($fpr)===false){//回到文件开始部分,准备逐字节读取判断编码				self::error($filename."文件指针后移失败");				fclose($fpr);				return;			}			while(!feof($fpr)){				$char = fread($fpr,1);				//对于英文,GB和UTF-8都是单字节的ASCII码小于128的值				if(ord($char)<128)					continue;				//对于汉字GB编码第一个字节是110*****第二个字节是10******(有特例,比如联字)				//UTF-8编码第一个字节是1110****第二个字节是10******第三个字节是10******				//按位与出来结果要跟上面非星号相同,所以应该先判断UTF-8				//因为使用GB的掩码按位与,UTF-8的111得出来的也是110,所以要先判断UTF-8				if((ord($char)&224)==224) {					//第一个字节判断通过					$char = fread($fpr,1);					if((ord($char)&128)==128) {						//第二个字节判断通过						$char = fread($fpr,1);						if((ord($char)&128)==128) {							$originEncoding = "UTF-8";							break;						}					}				}				if((ord($char)&192)==192) {					//第一个字节判断通过					$char = fread($fpr,1);					if((ord($char)&128)==128) {						//第二个字节判断通过						$originEncoding = "GB2312";						break;					}				}			}		}		if(strtoupper($out_charset)==$originEncoding) {			self::info("文件".$filename."转码检查完成,原始文件编码".$originEncoding);			fclose($fpr);		}else {			//文件需要转码			$originContent = "";			if($originEncoding == "UTF-8 WITH BOM") {				//跳过三个字节,把后面的内容复制一遍得到utf-8的内容				fseek($fpr,3);				$originContent = fread($fpr,filesize($filename)-3);				fclose($fpr);			}elseif(rewind($fpr)!=false){//不管是UTF-8还是GB2312,回到文件开始部分,读取内容				$originContent = fread($fpr,filesize($filename));				fclose($fpr);			}else{				self::error("文件编码不正确或指针后移失败");				fclose($fpr);				return;			}			//转码并保存文件			$content = iconv(str_replace(" WITH BOM","",$originEncoding),strtoupper($out_charset),$originContent);			$fpw = fopen($filename,"w");			fwrite($fpw,$content);			fclose($fpw);			if($originEncoding!="")				self::info("对文件".$filename."转码完成,原始文件编码".$originEncoding.",转换后文件编码".strtoupper($out_charset));			elseif($originEncoding=="")				self::info("文件".$filename."中没有出现中文,但是可以断定不是带BOM的UTF-8编码,没有进行编码转换,不影响使用");		}	}	/**     * 目录遍历函数     * @access public     * @param string $path		要遍历的目录名     * @param string $mode		遍历模式,一般取FILES,这样只返回带路径的文件名     * @param array $file_types		文件后缀过滤数组	 * @param int $maxdepth		遍历深度,-1表示遍历到最底层     * @return void     */	static function searchdir($path,$mode = "FULL",$file_types = array(".html",".php"),$maxdepth = -1,$d = 0) {	   if(substr($path,strlen($path)-1) != '/')		   $path .= '/';	   $dirlist = array();	   if($mode != "FILES")			$dirlist[] = $path;	   if($handle = @opendir($path)) {		   while(false !== ($file = readdir($handle)))		   {			   if($file != '.' && $file != '..')			   {				   $file = $path.$file ;				   if(!is_dir($file))				   {						if($mode != "DIRS")						{							$extension = "";							$extpos = strrpos($file, '.');							if($extpos!==false)								$extension = substr($file,$extpos,strlen($file)-$extpos);							$extension=strtolower($extension);							if(in_array($extension, $file_types))								$dirlist[] = $file;						}				   }				   elseif($d >= 0 && ($d < $maxdepth || $maxdepth < 0))				   {					   $result = self::searchdir($file.'/',$mode,$file_types,$maxdepth,$d + 1) ;					   $dirlist = array_merge($dirlist,$result);				   }			   }		   }		   closedir ( $handle ) ;	   }	   if($d == 0)		   natcasesort($dirlist);	   return($dirlist) ;	}	/**     * 对整个项目目录中的PHP和HTML文件行进编码转换     * @access public     * @param string $app		要遍历的项目路径     * @param string $mode		遍历模式,一般取FILES,这样只返回带路径的文件名     * @param array $file_types		文件后缀过滤数组     * @return void     */	static function CodingSwitch($app = "./",$charset='UTF-8',$mode = "FILES",$file_types = array(".html",".php")) {		self::info("注意: 程序使用的文件编码检测算法可能对某些特殊字符不适用");		$filearr = self::searchdir($app,$mode,$file_types);		foreach($filearr as $file)			self::DetectAndSwitch($file,$charset);	}    static public function getError() {        return self::$error;    }    static public function getInfo() {        return self::$info;    }}
 |