js 抓取Google词库内容(仅供学习)

本文介绍了一个使用Microsoft XMLHTTP对象获取网页内容的示例。该示例通过JavaScript实现了一个简单的网页抓取功能,用于从Google搜索页面抓取定义查询结果,并将其显示在本地页面上。
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" >
<HTML>
    
<HEAD>
        
<title>xmlhttp获取网页内容</title>
ExpandedBlockStart.gifContractedBlock.gif        
<script language="javascript">
        
var result='';
        
function getXML(URL) 
ExpandedSubBlockStart.gifContractedSubBlock.gif        
{
         
var xmlhttp = new ActiveXObject("Microsoft.XMLHTTP");
            xmlhttp.Open(
"GET",URL, false);
ExpandedSubBlockStart.gifContractedSubBlock.gif            
try 
                xmlhttp.send(); 
               
var result = xmlhttp.status;}

ExpandedSubBlockStart.gifContractedSubBlock.gif            
catch(e) {return(false);}
ExpandedSubBlockStart.gifContractedSubBlock.gif          
if(result==200
              
return(xmlhttp.responseText); 
                     }

           
delete(xmlhttp)
         }

         
//取数据
        function getData()
ExpandedSubBlockStart.gifContractedSubBlock.gif        

          
try
ExpandedSubBlockStart.gifContractedSubBlock.gif          
{
            
var keystr=getparastr("key");
            
//document.write(keystr)
            if(keystr!=null)
ExpandedSubBlockStart.gifContractedSubBlock.gif            
{
                document.getElementById(
"txtKey").value=decodeURI(keystr);
                
var url="http://www.google.com/search?hl=en&q=define%3A"+keystr+"&aq=f&oq=";
    
                
var resultStr = getXML(url);
                
                
var cr = resultStr.match(/<ul[\s|\S]*?<\/ul>/);
                
                
if (cr != null && typeof(cr) == 'object' && cr.length != null// 如果计算结果是一个数组, 则取出所有数组的值
ExpandedSubBlockStart.gifContractedSubBlock.gif
                {
            
                    
for ( i = 0; i < cr.length; i++ )
ExpandedSubBlockStart.gifContractedSubBlock.gif                    
{
                        result 
+= cr[i];
                    }

                        
//document.write("ddd")
                }

                document.getElementById(
'divContent').innerHTML=result;
                
//resultwrite();
            }

          }

          
catch(e)
ExpandedSubBlockStart.gifContractedSubBlock.gif          
{
                alert(e);
          }

        }

        
//取参数
        function getparastr(strname)
ExpandedSubBlockStart.gifContractedSubBlock.gif          
{
           
var hrefstr,pos,parastr,para,tempstr;
           hrefstr 
= window.location.href;
           pos 
= hrefstr.indexOf("?")
           parastr 
= hrefstr.substring(pos+1);
           para 
= parastr.split("&");
           tempstr
="";
           
for(i=0;i<para.length;i++)
ExpandedSubBlockStart.gifContractedSubBlock.gif           
{
            tempstr 
= para[i];
            pos 
= tempstr.indexOf("=");
            
if(tempstr.substring(0,pos) == strname)
ExpandedSubBlockStart.gifContractedSubBlock.gif            
{
             
return tempstr.substring(pos+1);
             }

           }

           
return null;
          }

        
//跳转
        function ongo(obj)
ExpandedSubBlockStart.gifContractedSubBlock.gif        
{
            
var objtxt=document.getElementById(obj).value;
            
//document.write(objtxt)
            var url="Google.html";//要跳转的网页
            window.location.href=url+"?key="+encodeURI(objtxt);
        }

        
function resultwrite()
ExpandedSubBlockStart.gifContractedSubBlock.gif        
{
            document.write(result);
        }

             
        
</script>
    
</HEAD>
    
<body onLoad="getData();">
             
<div style="text-align:center; font-size:12px">
               
<input type="text" name="txtKey" id="txtKey">
               
<href="#" onClick="ongo('txtKey');">查询</a>
             
</div>
            
<div id="divContent"></div>
            
<br/>
            
<br/>
            
<br/>
              
<div id="tmpContent">
            
</div>
      
    
</body>
</HTML>
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值