上三个版本的都或解决了utf8 下截字的乱码问题,但功能不是很全面,这次贴出的在功能上和substr已经不相上下了,代码如下:
<? php
class splite_utf8
{
private function splite_single_utf8_left_word ( $str )
{
$aciss = ord ( $str );
$out_str = '' ;
if ( $aciss >= 240 )
{
$out_str .= substr ( $str , 0 , 4 );
}
elseif ( $aciss >= 224 )
{
$out_str .= substr ( $str , 0 , 3 );
}
elseif ( $aciss >= 192 )
{
$out_str .= substr ( $str , 0 , 2 );
}
else
{
$out_str .= substr ( $str , 0 , 1 );
}
return $out_str ;
}
private function splite_single_utf8_right_word ( $str )
{
$aciss = ord ( $str );
$out_str = '' ;
if ( $aciss >= 240 )
{
$out_str .= substr ( $str , 4 );
}
elseif ( $aciss >= 224 )
{
$out_str .= substr ( $str , 3 );
}
elseif ( $aciss >= 192 )
{
$out_str .= substr ( $str , 2 );
}
else
{
$out_str .= substr ( $str , 1 );
}
return $out_str ;
}
public function count_word ( $str , $length = 0 )
{
$aciss = ord ( $str );
if ( $aciss >= 240 )
{
$length += 1 ;
$str = substr ( $str , 4 );
}
elseif ( $aciss >= 224 )
{
$length += 1 ;
$str = substr ( $str , 3 );
}
elseif ( $aciss >= 192 )
{
$length += 1 ;
$str = substr ( $str , 2 );
}
else
{
$length += 1 ;
$str = substr ( $str , 1 );
}
if( $str == '' )
{
return $length ;
}
else
{
return $this -> count_word ( $str , $length );
}
}
public function splite_mulit_utf8_word ( $str , $start = 0 , $length = - 1 )
{
$temp = '' ;
if( $start < 0 )
{
$start = $this -> count_word ( $str ) + $start ;
}
for ( $i = 0 ; $i < $start ; $i ++ )
{
$str = $this -> splite_single_utf8_right_word ( $str );
}
for ( $i = 0 ; $i < $length ; $i ++ )
{
$temp .= $this -> splite_single_utf8_left_word ( $str );
$str = $this -> splite_single_utf8_right_word ( $str );
}
if( $length == - 1 )
{
return $str ;
}
else
{
return $temp ;
}
}
}
$utf =new splite_utf8 ();
$text = '的萨芬dfdf!@#$%^&*I()' ;
$length = $utf -> count_word ( $text );
echo $length . "/n" ;
$word = $utf -> splite_mulit_utf8_word ( $text , - 6 , 2 );
var_dump ( $word );
?>
屏幕输出的就是
18
string(2) "^&"
<? php
class splite_utf8
{
private function splite_single_utf8_left_word ( $str )
{
$aciss = ord ( $str );
$out_str = '' ;
if ( $aciss >= 240 )
{
$out_str .= substr ( $str , 0 , 4 );
}
elseif ( $aciss >= 224 )
{
$out_str .= substr ( $str , 0 , 3 );
}
elseif ( $aciss >= 192 )
{
$out_str .= substr ( $str , 0 , 2 );
}
else
{
$out_str .= substr ( $str , 0 , 1 );
}
return $out_str ;
}
private function splite_single_utf8_right_word ( $str )
{
$aciss = ord ( $str );
$out_str = '' ;
if ( $aciss >= 240 )
{
$out_str .= substr ( $str , 4 );
}
elseif ( $aciss >= 224 )
{
$out_str .= substr ( $str , 3 );
}
elseif ( $aciss >= 192 )
{
$out_str .= substr ( $str , 2 );
}
else
{
$out_str .= substr ( $str , 1 );
}
return $out_str ;
}
public function count_word ( $str , $length = 0 )
{
$aciss = ord ( $str );
if ( $aciss >= 240 )
{
$length += 1 ;
$str = substr ( $str , 4 );
}
elseif ( $aciss >= 224 )
{
$length += 1 ;
$str = substr ( $str , 3 );
}
elseif ( $aciss >= 192 )
{
$length += 1 ;
$str = substr ( $str , 2 );
}
else
{
$length += 1 ;
$str = substr ( $str , 1 );
}
if( $str == '' )
{
return $length ;
}
else
{
return $this -> count_word ( $str , $length );
}
}
public function splite_mulit_utf8_word ( $str , $start = 0 , $length = - 1 )
{
$temp = '' ;
if( $start < 0 )
{
$start = $this -> count_word ( $str ) + $start ;
}
for ( $i = 0 ; $i < $start ; $i ++ )
{
$str = $this -> splite_single_utf8_right_word ( $str );
}
for ( $i = 0 ; $i < $length ; $i ++ )
{
$temp .= $this -> splite_single_utf8_left_word ( $str );
$str = $this -> splite_single_utf8_right_word ( $str );
}
if( $length == - 1 )
{
return $str ;
}
else
{
return $temp ;
}
}
}
$utf =new splite_utf8 ();
$text = '的萨芬dfdf!@#$%^&*I()' ;
$length = $utf -> count_word ( $text );
echo $length . "/n" ;
$word = $utf -> splite_mulit_utf8_word ( $text , - 6 , 2 );
var_dump ( $word );
?>
屏幕输出的就是
18
string(2) "^&"