<?php
/**
* @ string 需要转换的文字
* @ encoding 目标编码
**/
function detect_encoding($string,$encoding = 'gbk'){
$is_utf8 = preg_match('%^(?:[\x09\x0A\x0D\x20-\x7E]| [\xC2-\xDF][\x80-\xBF]| \xE0[\xA0-\xBF][\x80-\xBF] | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} | \xED[\x80-\x9F][\x80-\xBF] | \xF0[\x90-\xBF][\x80-\xBF]{2} | [\xF1-\xF3][\x80-\xBF]{3} | \xF4[\x80-\x8F][\x80-\xBF]{2} )*$%xs', $string);
if($is_utf8 && $encoding == 'utf8'){
return $string;
}elseif($is_utf8){
return mb_convert_encoding($string, $encoding, "UTF-8");
}else{
return mb_convert_encoding($string, $encoding, 'gbk,gb2312,big5');
}
}
?>
如果给的是Byte的数组buf,那么可以这样判断
UTF-8:
(buf[0] == 0xEF) && (buf[1] == 0xBB) && (buf[2] == 0xBF)
UTF-32:
(buf[0] == 0xFF) && (buf[1] == 0xFE) && (buf[2] == 0×00) && (buf[3] == 0×00)
UTF-16/16LE:
(buf[0] == 0xFF) && (buf[1] == 0xFE) && (buf[2] != 0×00) && (buf[3] != 0×00)
UTF-16BE:
(buf[0] == 0xFE) && (buf[1] == 0xFF)
UTF-32BE:
(buf[0] == 0×00) && (buf[1] == 0×00) && (buf[2] == 0xFE) && (buf[3] == 0xFF)
剩下的是ANSI类的编码 |
|