函数描述及例子 PHP采集程序中常用的函数 查询关键字 PHP采集程序中常用的函数
001.
//获得当前的脚本网址
002.
function get_php_url(){
003.
if(!emptyempty($_SERVER["REQUEST_URI"])){
004.
$scriptName = $_SERVER["REQUEST_URI"];
005.
$nowurl = $scriptName;
006.
}else{
007.
$scriptName = $_SERVER["PHP_SELF"];
008.
if(emptyempty($_SERVER["QUERY_STRING"])) $nowurl = $scriptName;
009.
else $nowurl = $scriptName."?".$_SERVER["QUERY_STRING"];
010.
}
011.
return $nowurl;
012.
}
013.
//把全角数字转为半角数字
014.
function GetAlabNum($fnum){
015.
$nums = array("0","1","2","3","4","5","6","7","8","9");
016.
$fnums = "0123456789";
017.
for($i=0;$i<=9;$i++) $fnum = str_replace($nums[$i],$fnums[$i],$fnum);
018.
$fnum = ereg_replace("[^0-9\.]|^0{1,}","",$fnum);
019.
if($fnum=="") $fnum=0;
020.
return $fnum;
021.
}
022.
//去除HTML标记
023.
function Text2Html($txt){
024.
$txt = str_replace(" "," ",$txt);
025.
$txt = str_replace("<","<",$txt);
026.
$txt = str_replace(">",">",$txt);
027.
$txt = preg_replace("/[\r\n]{1,}/isU","
028.
\r\n",$txt);
029.
return $txt;
030.
}
031.
//清除HTML标记
032.
function ClearHtml($str){
033.
$str = str_replace('<','<',$str);
034.
$str = str_replace('>','>',$str);
035.
return $str;
036.
}
037.
//相对路径转化成绝对路径
038.
function relative_to_absolute($content, $feed_url) {
039.
preg_match('/(http|https|ftp):\/\//', $feed_url, $protocol);
040.
$server_url = preg_replace("/(http|https|ftp|news):\/\//", "", $feed_url);
041.
$server_url = preg_replace("/\/.*/", "", $server_url);
042.
if ($server_url == '') {
043.
return $content;
044.
}
045.
if (isset($protocol[0])) {
046.
$new_content = preg_replace('/href="\//','href="'.$protocol[0].$server_url.'/', $content);
047.
$new_content = preg_replace('/src="\//','src="'.$protocol[0].$server_url.'/', $new_content);
048.
} else {
049.
$new_content = $content;
050.
}
051.
return $new_content;
052.
}
053.
//取得所有链接
054.
function get_all_url($code){
055.
preg_match_all('/<a\s+href=["|\']?([^>"\' ]+)["|\']?\s*[^>]*>([^>]+)<\/a>/i',$code,$arr);
056.
return array('name'=>$arr[2],'url'=>$arr[1]);
057.
}
058.
//获取指定标记中的内容
059.
function get_tag_data($str, $start, $end){
060.
if ( $start == '' || $end == '' ){
061.
return;
062.
}
063.
$str = explode($start, $str);
064.
$str = explode($end, $str[1]);
065.
return $str[0];
066.
}
067.
//HTML表格的每行转为CSV格式数组
068.
function get_tr_array($table) {
069.
$table = preg_replace("'<td[^>]*?>'si",'"',$table);
070.
$table = str_replace("",'",',$table);
071.
$table = str_replace("","{tr}",$table);
072.
//去掉 HTML 标记
073.
$table = preg_replace("'<[\/\!]*?[^<>]*?>'si","",$table);
074.
//去掉空白字符
075.
$table = preg_replace("'([\r\n])[\s]+'","",$table);
076.
$table = str_replace(" ","",$table);
077.
$table = str_replace(" ","",$table);
078.
$table = explode(",{tr}",$table);
079.
array_pop($table);
080.
return $table;
081.
}
082.
//将HTML表格的每行每列转为数组,采集表格数据
083.
function get_td_array($table) {
084.
$table = preg_replace("'<table[^>]*?>'si","",$table);
085.
$table = preg_replace("'<tr[^>]*?>'si","",$table);
086.
$table = preg_replace("'<td[^>]*?>'si","",$table);
087.
$table = str_replace("","{tr}",$table);
088.
$table = str_replace("","{td}",$table);
089.
//去掉 HTML 标记
090.
$table = preg_replace("'<[\/\!]*?[^<>]*?>'si","",$table);
091.
//去掉空白字符
092.
$table = preg_replace("'([\r\n])[\s]+'","",$table);
093.
$table = str_replace(" ","",$table);
094.
$table = str_replace(" ","",$table);
095.
096.
$table = explode('{tr}', $table);
097.
array_pop($table);
098.
foreach ($table as $key=>$tr) {
099.
$td = explode('{td}', $tr);
100.
array_pop($td);
101.
$td_array[] = $td;
102.
}
103.
return $td_array;
104.
}
105.
//返回字符串中的所有单词 $distinct=true 去除重复
106.
function split_en_str($str,$distinct=true) {
107.
preg_match_all('/([a-zA-Z]+)/',$str,$match);
108.
if ($distinct == true) {
109.
$match[1] = array_unique($match[1]);
110.
}
111.
sort($match[1]);
112.
return $match[1];
113.
}
114.
115.
函数描述及例子
116.
117.
PHP采集程序中常用的函数
118.
119.
查询关键字
120.
121.
PHP采集程序中常用的函数
122.
<!--?
123.
//获得当前的脚本网址
124.
function get_php_url(){
125.
if(!emptyempty($_SERVER["REQUEST_URI"])){
126.
$scriptName = $_SERVER["REQUEST_URI"];
127.
$nowurl = $scriptName;
128.
}else{
129.
$scriptName = $_SERVER["PHP_SELF"];
130.
if(emptyempty($_SERVER["QUERY_STRING"])) $nowurl = $scriptName;
131.
else $nowurl = $scriptName."?".$_SERVER["QUERY_STRING"];
132.
}
133.
return $nowurl;
134.
}
135.
//把全角数字转为半角数字
136.
function GetAlabNum($fnum){
137.
$nums = array("0","1","2","3","4","5","6","7","8","9");
138.
$fnums = "0123456789";
139.
for($i=0;$i<=9;$i++) $fnum = str_replace($nums[$i],$fnums[$i],$fnum);
140.
$fnum = ereg_replace("[^0-9\.]|^0{1,}","",$fnum);
141.
if($fnum=="") $fnum=0;
142.
return $fnum;
143.
}
144.
//去除HTML标记
145.
function Text2Html($txt){
146.
$txt = str_replace(" "," ",$txt);
147.
$txt = str_replace("<","<",$txt);
148.
$txt = str_replace("-->",">",$txt);
149.
$txt = preg_replace("/[\r\n]{1,}/isU","
150.
\r\n",$txt);
151.
return $txt;
152.
}
153.
//清除HTML标记
154.
function ClearHtml($str){
155.
$str = str_replace('<','<',$str);
156.
$str = str_replace('>','>',$str);
157.
return $str;
158.
}
159.
//相对路径转化成绝对路径
160.
function relative_to_absolute($content, $feed_url) {
161.
preg_match('/(http|https|ftp):\/\//', $feed_url, $protocol);
162.
$server_url = preg_replace("/(http|https|ftp|news):\/\//", "", $feed_url);
163.
$server_url = preg_replace("/\/.*/", "", $server_url);
164.
if ($server_url == '') {
165.
return $content;
166.
}
167.
if (isset($protocol[0])) {
168.
$new_content = preg_replace('/href="\//','href="'.$protocol[0].$server_url.'/', $content);
169.
$new_content = preg_replace('/src="\//','src="'.$protocol[0].$server_url.'/', $new_content);
170.
} else {
171.
$new_content = $content;
172.
}
173.
return $new_content;
174.
}
175.
//取得所有链接
176.
function get_all_url($code){
177.
preg_match_all('/<a\s+href=["|\']?([^>"\' ]+)["|\']?\s*[^>]*>([^>]+)<\/a>/i',$code,$arr);
178.
return array('name'=>$arr[2],'url'=>$arr[1]);
179.
}
180.
//获取指定标记中的内容
181.
function get_tag_data($str, $start, $end){
182.
if ( $start == '' || $end == '' ){
183.
return;
184.
}
185.
$str = explode($start, $str);
186.
$str = explode($end, $str[1]);
187.
return $str[0];
188.
}
189.
//HTML表格的每行转为CSV格式数组
190.
function get_tr_array($table) {
191.
$table = preg_replace("'<td[^>]*?>'si",'"',$table);
192.
$table = str_replace("",'",',$table);
193.
$table = str_replace("","{tr}",$table);
194.
//去掉 HTML 标记
195.
$table = preg_replace("'<[\/\!]*?[^<>]*?>'si","",$table);
196.
//去掉空白字符
197.
$table = preg_replace("'([\r\n])[\s]+'","",$table);
198.
$table = str_replace(" ","",$table);
199.
$table = str_replace(" ","",$table);
200.
$table = explode(",{tr}",$table);
201.
array_pop($table);
202.
return $table;
203.
}
204.
//将HTML表格的每行每列转为数组,采集表格数据
205.
function get_td_array($table) {
206.
$table = preg_replace("'<table[^>]*?>'si","",$table);
207.
$table = preg_replace("'<tr[^>]*?>'si","",$table);
208.
$table = preg_replace("'<td[^>]*?>'si","",$table);
209.
$table = str_replace("","{tr}",$table);
210.
$table = str_replace("","{td}",$table);
211.
//去掉 HTML 标记
212.
$table = preg_replace("'<[\/\!]*?[^<>]*?>'si","",$table);
213.
//去掉空白字符
214.
$table = preg_replace("'([\r\n])[\s]+'","",$table);
215.
$table = str_replace(" ","",$table);
216.
$table = str_replace(" ","",$table);
217.
218.
$table = explode('{tr}', $table);
219.
array_pop($table);
220.
foreach ($table as $key=>$tr) {
221.
$td = explode('{td}', $tr);
222.
array_pop($td);
223.
$td_array[] = $td;
224.
}
225.
return $td_array;
226.
}
227.
//返回字符串中的所有单词 $distinct=true 去除重复
228.
function split_en_str($str,$distinct=true) {
229.
preg_match_all('/([a-zA-Z]+)/',$str,$match);
230.
if ($distinct == true) {
231.
$match[1] = array_unique($match[1]);
232.
}
233.
sort($match[1]);
234.
return $match[1];
235.
}
236.
237.
</td[^></tr[^></table[^></td[^></a\s+href=["|\']?([^></td[^></tr[^></table[^></td[^></a\s+href=["|\']?([^>
除非特别声明,PHP100新闻均为原创或投稿报道,转载请注明作者及原文链接
原文地址: http://www.php100.com/html/php/hanshu/2013/0903/1039.html

被折叠的 条评论
为什么被折叠?



