<?php header('Content-Type:text/html;charset=utf-8'); //$file=_FILE_('e:\URLRepeat.txt'); require 'includes/mysql.func.php'; //echo $DOCUMENT_ROOT; // //$path="e:/URLRepeat.txt"; //$path1="e:/alias.sorted.short"; $fp =fopen($path1,'r'); if($action == 'addDB'){ echo 'addDB done'; } if($action == 'addURLRepeat'){ echo 'addURLRepeat done'; //addURLRepeat(); // show(); } if($action == 'count'){ countTotal(); } function countTotal(){ // 建立对数据库的连接 $db=new mysqli('localhost','root','ustc','url'); if(mysqli_connect_errno()){ echo 'Mysql Connect Error'; exit(0); } $db->query('SET NAMES UTF8'); /* * 计算case的总数量 */ // $_sql_count='select count(*) from urlcase'; // $result_count=$db->query($_sql_count); // $count_num=$result_count->fetch_array(); // echo 'case数量为:'.$count_num[0].'个'.'<br>'; /* * 遍历urlcase总数据 然后到url_db 表中查询 */ $num=0; set_time_limit(0); $_sql_all='select * from urlcase'; $result_all = $db->query($_sql_all); for( $i =0;$i<296;$i++){ $count_all=$result_all->fetch_array(); //echo $count_all[2].'<br>'; //一个case中多个url $result_split=split('\|',$count_all[2]); //计算有多少个url $result_count=count($result_split); $result_split_1=$result_split[0]; $result_split_1=split('//',$result_split[0]); //echo '查询'.$result_split_1[1].'是否在苦中'.'<br>'; $result_test='www.zahrainfotech.com'; $_sql_search='select id from url_db where url = '.'"'.trim($result_split_1[1]).'"' ; // echo '<br>'.'sql'.$_sql_search.'<br>'; // $_sql_search='select id from url_db where url = '.'\''.trim($result_split_1).'\'' ; //echo $_sql_search.'<br>'; $db1=new mysqli('localhost','root','ustc','url'); if(mysqli_connect_errno()){ echo 'Mysql Connect Error'; exit(0); } $db1->query('SET NAMES UTF8'); $result=$db1->query($_sql_search); $result_1=$result->fetch_array; //得到 查詢url的結果 $result_first=$result_1[0]; $no=0; $yes=0; if(empty($result_first)){ //echo '无结果!'; $no++; echo $result_split_1[1].'<br>'; }else{ // $yes++; $flag=0; for($i=1;$i<$result_count;$i++){ $_sql_search_1='select id from url_db where url = '.'"'.trim($result_split[$i]).'"' ; // $_sql_search='select id from url_db where url = '.'\''.trim($result_split_1).'\'' ; //echo $_sql_search.'<br>'; $result_1=$db1->query($_sql_search_1); $result_1_1=$result_1->fetch_array(); //得到 查詢url的結果 $result_first_1=$result_1_1[0]; if ($result_firest_1 == $result_first){ }else { $flag=1; // echo '查询'.$result_split_1[1].'是否在库中'.'<br>'; // echo '查询出的相关id为:'.$result_first.'<br>'; echo $count_all[0]; } // echo $result_count.'<br>'; } if (flag == 0){ $yes++; } // //echo $result_split[0].'<br>'; // // if($num==100){ // break; // } // $num++; } } echo '解决'.$yes.'个'; $db->close(); $db1->close(); } function show(){ $path="e:/URLRepeat_2.txt"; $fp=fopen($path,'r'); while(!feof($fp)) { $filetxt = fgets($fp); echo $filetxt.'<br>'; $result=split(' ',$filetxt); // echo $result[0].$result[2].'<br>'; //case_id $url_id=$result[0]; $anchor=$result[2]; $url=$result[1]; $case_id=$result[0].$result[2]; } } function addURLRepeat(){ $path="e:/URLRepeat_2.txt"; $fp=fopen($path,'r'); // 建立对数据库的连接 $db=new mysqli('localhost','root','ustc','url'); if(mysqli_connect_errno()){ echo 'Mysql Connect Error'; exit(0); } $db->query('SET NAMES UTF8'); while(!feof($fp)) { $filetxt = fgets($fp); // echo $filetxt.'<br>'; $result=split(' ',$filetxt); // echo $result[0].$result[2].'<br>'; //case_id $url_id=$result[0]; $anchor=$result[2]; $url=$result[1]; $case_id=$result[0].$result[2]; //1、检查case_id 在数据库中是否存在 //2、存在 则更 //3、不存在则插入 // echo $case_id; // $_sql='select * from '.'\''.'urlcase'.'\''.' where '.'\''.'case_id'.'\''.' = '.'\''.$case_id.'\''; $_sql= ' select * from urlcase where case_id = '.'\''.trim($case_id).'\'' ; // echo $_sql.'<br>'; // echo $_sql.'<br>'; $result = $db->query($_sql); $rows=$result->num_rows; // echo 'rows'.$rows.'<br>'; if($rows == 0 ){ echo '0000000'.'<br>'; $_sql_insert='insert into urlcase (case_id,url,url_id,anchor) values('.'\''.trim($case_id).'\''.','.'\''.trim($url).'\''.','.trim($url_id).','.'\''.trim($anchor).'\''.')'; // echo $_sql_insert.'<br>'; $db->query($_sql_insert); }else{ $_sql_select_url='select url from urlcase where case_id = '.'\''.trim($case_id).'\'' ; $result_url = $db->query($_sql_select_url); $rows=$result_url->fetch_array(); $prex_url=$rows[0]; $url_all = $prex_url.'|'.$url; echo $url_all.'<br>'; $_sql_update='update urlcase set url = '.'\''.trim($url_all).'\''.' where case_id='.'\''.trim($case_id).'\'' ; $db->query($_sql_update); } } $db->close(); } function addDB(){ //echo $fp; //echo filesize($path); //_connect(); //echo '<br>'; //$i = 0; //set_time_limit(0); //while(!feof($fp)) //{ // $filetxt = fgets($fp); // echo $filetxt.'<br>'; // $result=split(' ',$filetxt); // echo count($result); // echo $result[0]; // echo strcmp ($result[1],trim($result[1])); // echo '<br>'; // echo count($result); //echo $result[3]; //echo $result[0].$result[1]; //*****插入400m文件****** //$_sql = 'insert into url values('.trim($result[0]).','.'\''.trim($result[1]).'\''.')'; //_insert($_sql); //*********************** // //$_sql='select * from url where url=\'www.baidu.com\''; // $result= mysql_fetch_array(_query($_sql),MYSQL_ASSOC); // // echo empty($result); // foreach($result as $n=>$m) // { // echo $n.' '; // } // if($result[4] == 1 ) // { // echo 'ssss'; // } // foreach($result as $r){ // echo '%%%%%'.$r; //// //// } // echo '<br>'; // echo '&&&&&&&&'.split($filetxt,' ').'&&&&&&&&&&&&&&&&'; //} //fclose($fp); } ?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <body> <form action="deal.php?action=addDB" method="post" name="addDB"><input type="submit" id="addDB" value='将url归一化库插入到数据库'></input></form> <form action="deal.php?action=addURLRepeat" method="post" name="addURLRepeat"><input type="submit" id="addURLRepeat" value="将需要处理的URL 插入数据库"></input></form> </body> <form action="deal.php?action=count" method="post" name="count"><input type="submit" id="count" value="开始统计"></input></form> </body> </html>