获取中文字符串中的数字并转换成阿拉伯数字
<?php
function charN2intN($str, $solve_discrete = true)
{
$arr_char_unit = array('1一壹','2二贰两','3三叁','4四肆','5五伍','6六陆','7七柒','8八捌','9九玖');
$arr_char_digit = array('0零','十拾','百佰','千仟','万','亿');
$temp_arr_match_num = "1一壹2二贰两3三叁4四肆5五伍6六陆7七柒8八捌9九玖0零十拾百佰千仟万亿";
$temp_len = iconv_strlen($str,'utf-8');
$temp_ans = "";
$temp_start = false;
for($i = 0;$i < $temp_len;$i++)
{
$temp_char = mb_substr($str,$i,1,'utf-8');
if((stripos($temp_arr_match_num, $temp_char) > -1))
{
$temp_start = true;
$temp_ans .= $temp_char;
}
else if(!$solve_discrete && $temp_start)
{
break;
}
}
$str = $temp_ans;
$str_len = iconv_strlen($str,'utf-8');
$objs_code = "";
$objs = [];
for($i = 0;$i < $str_len;$i++)
{
$temp_char = mb_substr($str,$i,1,'utf-8');
$isNum = false;
for($index = 0 ; $index < count($arr_char_unit);$index++)
{
if((stripos($arr_char_unit[$index], $temp_char) > -1))
{
$objs_code .='u';
$objs[] = [
'type' => 'unit',
'value' => $index+1
];
$isNum = true;
break;
}
}
if($isNum){continue;}
for($index = 0 ; $index < count($arr_char_digit);$index++)
{
if((stripos($arr_char_digit[$index], $temp_char) > -1))
{
if(0 != $index)
{
$objs_code .='d';
}
$objs[] = [
'type' => 'digit',
'value' => (0 == $index?0:pow(10,$index))
];
break;
}
}
}
$ans = "";
if(preg_match("/^u+?$/u", $objs_code, $ms))
{
foreach ($objs as $obj)
{
$ans .= $obj['value'];
}
}
else if(preg_match("/^[ud]{0,1}(ud|du)*[ud]*$/u", $objs_code, $ms))
{
$unit = -1;
$temp_ans = 0;
foreach ($objs as $obj)
{
if('unit' == $obj['type'])
{
if(-1 < $unit)
{
$temp_ans += $unit;
$ans .= $temp_ans;
$temp_ans = 0;
}
$unit = $obj['value'];
}
else if('digit' == $obj['type'])
{
if(0 < $obj['value'])
{
if(-1 < $unit)
{
$temp_ans += $unit*$obj['value'];
}
else
{
$temp_ans += $obj['value'];
}
}
$unit = -1;
}
}
if(-1 < $unit)
{
$temp_ans += $unit;
$ans .= $temp_ans;
}
if("" == $ans)
{
$ans .= $temp_ans;
}
}
return $ans;
}
echo "通过的正常格式\n";
$temp = '从七开始';
echo charN2intN($temp)."\t\t\t".$temp;
echo "\n";
$temp = '从十开始';
echo charN2intN($temp)."\t\t\t".$temp;
echo "\n";
$temp = '从十七开始';
echo charN2intN($temp)."\t\t\t".$temp;
echo "\n";
$temp = '从七十开始';
echo charN2intN($temp)."\t\t\t".$temp;
echo "\n";
$temp = '从七十七开始';
echo charN2intN($temp)."\t\t\t".$temp;
echo "\n";
$temp = '从七佰零七开始';
echo charN2intN($temp)."\t\t\t".$temp;
echo "\n";
$temp = '从七佰一十七开始';
echo charN2intN($temp)."\t\t\t".$temp;
echo "\n";
$temp = '从七佰七十七开始';
echo charN2intN($temp)."\t\t\t".$temp;
echo "\n";
$temp = '从七千零七开始';
echo charN2intN($temp)."\t\t".$temp;
echo "\n";
$temp = '从七千零七十七开始';
echo charN2intN($temp)."\t\t".$temp;
echo "\n";
$temp = '从七千七佰零七开始';
echo charN2intN($temp)."\t\t".$temp;
echo "\n";
$temp = '从七千七佰七十七开始';
echo charN2intN($temp)."\t\t".$temp;
echo "\n";
echo "\n";
echo "通过的非正常格式\n";
$temp = '从七千零七十七开始到12';
echo charN2intN($temp)."\t\t".$temp;
echo "\n";
$temp = '从一千四百八十七开始到12';
echo charN2intN($temp)."\t\t".$temp;
echo "\n";
$temp = '从五七开始到七零七零零';
echo charN2intN($temp)."\t\t".$temp;
echo "\n";
$temp = '从五七开始到七零七00';
echo charN2intN($temp)."\t\t".$temp;
echo "\n";
$temp = '从203开始到七零七';
echo charN2intN($temp)."\t\t".$temp;
echo "\n";
$temp = '从203开始到707';
echo charN2intN($temp)."\t\t".$temp;
echo "\n";
$temp = '从一千四百八十开始到12';
echo charN2intN($temp)."\t\t".$temp;
echo "\n";
$temp = '从七千零七十七开始到七千零七十七';
echo charN2intN($temp)."\t".$temp;
echo "\n";
echo "\n";
echo "solve_discrete:false,只处理连续的数\n";
echo "\n";
echo "通过的正常格式\n";
$temp = '从七开始';
echo charN2intN($temp,false)."\t\t\t".$temp;
echo "\n";
$temp = '从十开始';
echo charN2intN($temp,false)."\t\t\t".$temp;
echo "\n";
$temp = '从十七开始';
echo charN2intN($temp,false)."\t\t\t".$temp;
echo "\n";
$temp = '从七十开始';
echo charN2intN($temp,false)."\t\t\t".$temp;
echo "\n";
$temp = '从七十七开始';
echo charN2intN($temp,false)."\t\t\t".$temp;
echo "\n";
$temp = '从七佰零七开始';
echo charN2intN($temp,false)."\t\t\t".$temp;
echo "\n";
$temp = '从七佰一十七开始';
echo charN2intN($temp,false)."\t\t\t".$temp;
echo "\n";
$temp = '从七佰七十七开始';
echo charN2intN($temp,false)."\t\t\t".$temp;
echo "\n";
$temp = '从七千零七开始';
echo charN2intN($temp,false)."\t\t".$temp;
echo "\n";
$temp = '从七千零七十七开始';
echo charN2intN($temp,false)."\t\t".$temp;
echo "\n";
$temp = '从七千七佰零七开始';
echo charN2intN($temp,false)."\t\t".$temp;
echo "\n";
$temp = '从七千七佰七十七开始';
echo charN2intN($temp,false)."\t\t".$temp;
echo "\n";
echo "\n";
echo "通过的非正常格式\n";
$temp = '从七千零七十七开始到12';
echo charN2intN($temp,false)."\t\t".$temp;
echo "\n";
$temp = '从一千四百八十七开始到12';
echo charN2intN($temp,false)."\t\t".$temp;
echo "\n";
$temp = '从五七开始';
echo charN2intN($temp,false)."\t\t\t".$temp;
echo "\n";
$temp = '从五七开始到七零七零零';
echo charN2intN($temp,false)."\t\t\t".$temp;
echo "\n";
$temp = '从五七开始到七零七00';
echo charN2intN($temp,false)."\t\t\t".$temp;
echo "\n";
$temp = '从203开始到七零七';
echo charN2intN($temp,false)."\t\t\t".$temp;
echo "\n";
$temp = '从203开始到707';
echo charN2intN($temp,false)."\t\t\t".$temp;
echo "\n";
echo "\n";
测试结果
结果 | 参数 |
---|
正常格式 | |
7 | 从七开始 |
10 | 从十开始 |
17 | 从十七开始 |
70 | 从七十开始 |
77 | 从七十七开始 |
707 | 从七佰零七开始 |
717 | 从七佰一十七开始 |
777 | 从七佰七十七开始 |
7007 | 从七千零七开始 |
7077 | 从七千零七十七开始 |
7707 | 从七千七佰零七开始 |
7777 | 从七千七佰七十七开始 |
非正常格式 | |
707712 | 从七千零七十七开始到12 |
148712 | 从一千四百八十七开始到12 |
5770700 | 从五七开始到七零七零零 |
5770700 | 从五七开始到七零七00 |
203707 | 从203开始到七零七 |
203707 | 从203开始到707 |
70777077 | 从七千零七十七开始到七千零七十七 |
不能通过的非正常格式 | |
14812 | 从一千四百八十开始到12 |
| |
$solve_sp:false,只处理连续的数 | |
通过的正常格式 | |
7 | 从七开始 |
10 | 从十开始 |
17 | 从十七开始 |
70 | 从七十开始 |
77 | 从七十七开始 |
707 | 从七佰零七开始 |
717 | 从七佰一十七开始 |
777 | 从七佰七十七开始 |
7007 | 从七千零七开始 |
7077 | 从七千零七十七开始 |
7707 | 从七千七佰零七开始 |
7777 | 从七千七佰七十七开始 |
通过的非正常格式 | |
7077 | 从七千零七十七开始到12 |
1487 | 从一千四百八十七开始到12 |
57 | 从五七开始 |
57 | 从五七开始到七零七零零 |
57 | 从五七开始到七零七00 |
203 | 从203开始到七零七 |
203 | 从203开始到707 |