`
hegz
  • 浏览: 436072 次
  • 性别: Icon_minigender_1
  • 来自: 茂名
社区版块
存档分类
最新评论

中文字符串截取(支持gb2312、gbk、utf-8、big5)

    博客分类:
  • PHP
 
阅读更多
    /*
     * 中文字符串截取,支持gb2312/gbk/utf-8/big5
     *
     * @param string $str 要截取的字串
     * @param int $start 截取起始位置
     * @param int $length 截取长度
     * @param string $charset utf-8|gb2312|gbk|big5 编码
     * @param string $suffixChars 后缀字符
     * @param Boolean $suffix 是否加后缀
     */
    public function csubstr($str, $start = 0, $length, $charset = 'gb2312', $suffixChars = '…', $suffix = true)
    {
        if (function_exists("mb_substr")) {
            if (mb_strlen($str, $charset) <= $length) {
                return $str;
            }
             $slice = mb_substr($str, $start, $length, $charset);
          } else {
            $re['utf-8'] = "/[\x01-\x7f]|[\xc2-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xff][\x80-\xbf]{3}/";
            $re['gb2312'] = "/[\x01-\x7f]|[\xb0-\xf7][\xa0-\xfe]/";
            $re['gbk'] = "/[\x01-\x7f]|[\x81-\xfe][\x40-\xfe]/";
            $re['big5'] = "/[\x01-\x7f]|[\x81-\xfe]([\x40-\x7e]|\xa1-\xfe])/";
            preg_match_all($re[$charset], $str, $match);
            if (count($match[0]) <= $length) {
                return $str;
            }
            $slice = join("",array_slice($match[0], $start, $length));
        }
        if ($suffix) {
            return $slice . $suffixChars;
        }
        return $slice;
    }
 


再 一个:

       function  cutstr( $string ,  $length ,  $dot  =  ' ...' ) {  
             if  ( strlen  (  $string  ) <=  $length ) {  
                 return   $string ;  
            }  
              
             $string  =  str_replace  (  array  ( '&amp;' ,  '&quot;' ,  '&lt;' ,  '&gt;'  ),  array  ( '&' ,  '"' ,  '<' ,  '>'  ),  $string  );  
              
             $strcut  =  '' ;  
              
            $n  =  $tn  =  $noc  = 0;  
            while  (  $n  <  strlen  (  $string  ) ) {  
                 
                $t  = ord (  $string  [ $n ] );  
                if  ( $t  == 9 ||  $t  == 10 || (32 <=  $t  &&  $t  <= 126)) {  
                    $tn  = 1;  
                    $n  ++;  
                    $noc  ++;  
               }  elseif  (194 <=  $t  &&  $t  <= 223) {  
                    $tn  = 2;  
                    $n  += 2;  
                    $noc  += 2;  
               }  elseif  (224 <=  $t  &&  $t  < 239) {  
                    $tn  = 3;  
                    $n  += 3;  
                    $noc  += 2;  
               }  elseif  (240 <=  $t  &&  $t  <= 247) {  
                    $tn  = 4;  
                    $n  += 4;  
                    $noc  += 2;  
               }  elseif  (248 <=  $t  &&  $t  <= 251) {  
                    $tn  = 5;  
                    $n  += 5;  
                    $noc  += 2;  
               }  elseif  ( $t  == 252 ||  $t  == 253) {  
                    $tn  = 6;  
                    $n  += 6;  
                    $noc  += 2;  
               }  else  {  
                    $n  ++;  
               }  
                 
                if  ( $noc  >=  $length ) {  
                    break ;  
               }  
             
           }  
            if  ( $noc  >  $length ) {  
                $n  -=  $tn ;  
           }  
             
            $strcut  =  substr  (  $string , 0,  $n  );  
            $strcut  =  str_replace  (  array  ( '&' ,  '"' ,  '<' ,  '>'  ),  array  ( '&amp;' ,  '&quot;' ,  '&lt;' ,  '&gt;'  ),  $strcut  );  
             
            return   $strcut  .  $dot ;  
       }
    
     
      分享到:
      评论

      相关推荐

      Global site tag (gtag.js) - Google Analytics