<?php
/**
* 实现utf-8与gbk中文无乱码截取,提示中文占用‘2’个字符,英文占用‘1’个字符
* @param sting
$string 待截取字符串
* @param int
$length 截取字符长度,注意截取是字符长度,不是截取‘中文个数’或者‘英文个数’
* @param string
$charset 编码方式,只能为‘uft-8与gbk’,默认为‘utf-8’
* @param string
$dot 分割符,默认为‘...’
* @return string
已截取的目标字符串
*/
function cutstr($string, $length,
$charset = 'utf-8' , $dot = '...') //字符,截取长度,字符集,结尾符
{
if(strlen($string)
<= $length) return $string;
$pre = chr(1);
$end = chr(1);
//保护特殊字符串
$string = str_replace( array('&' , '"' , '<' , '>' ), array($pre
. '&' . $end, $pre . '"' .
$end, $pre . '<' . $end, $pre . '>' .
$end), $string);
$strcut = '' ;
if(strtolower($charset)
== 'utf-8')
{
$n = $tn = $noc = 0;
while($n
< strlen($string))
{
$t = ord($string[$n]);
if($t
== 9 || $t == 10 || (32 <= $t && $t <= 126))
{
$tn = 1;
$n++;
$noc++;
}
elseif(194
<= $t && $t <= 223)
{
$tn = 2;
$n += 2;
$noc += 2;
}
elseif(224
<= $t && $t <= 239)
{
$tn = 3;
$n += 3;
$noc += 2;
}
elseif(240
<= $t && $t <= 247)
{
$tn = 4;
$n += 4;
$noc += 2;
}
elseif(248
<= $t && $t <= 251)
{
$tn = 5;
$n += 5;
$noc += 2;
}
elseif($t
== 252 || $t == 253)
{
$tn = 6;
$n += 6;
$noc += 2;
}
else
{
$n++;
}
if($noc
>= $length) break;
}
if($noc
> $length) $n -= $tn;
$strcut = substr($string, 0, $n);
}
else
{
for($i
= 0; $i < $length; $i++)
$strcut .= ord($string[$i]) > 127 ? $string[$i]
. $string[++$i] : $string[$i];
}
//还原特殊字符串
$strcut = str_replace(array($pre
. '&' . $end, $pre . '"' .
$end, $pre . '<' . $end, $pre . '>' .
$end), array( '&', '"' , '<' , '>'), $strcut);
//修复出现特殊字符串截段的问题
$pos = strrpos($s, chr(1));
if($pos
!== false) $strcut =
substr( $s, 0, $pos);
return $strcut .
$dot;
}
/**
* 测试数据
*/
header("Content-type:text/html; charset=utf-8");
$str = "我爱中华1213我爱中华人民共和国" ;
echo cutstr($str, 6) . '<br
/>' ; //注意截取是字符长度,不是截取‘中文个数’或者‘英文个数’
echo cutstr($str, 10) . '<br
/>' ;
echo cutstr($str, 14) . '<br
/>' ;
?>