Google新推出的sitemap,是对原来robots.txt的扩展,sitemap使用xml格式来记录整个网站的信息并供google读取,使搜索引擎能更快更全面的收录网站的内容。
sitemap的作用就好像为网站提供了整站的rss,而google就是这些rss的订阅者,只要网站有更新就会自动通知google。这样一来,搜索引擎的收录由被动的pull变成了主动的push,辛苦的google爬虫们终于可以松一口气了。
快来尝试下: https://www.google.com/webmasters/sitemaps/login
有Gmail的可以用Gmail直接登录。登录后把生成的xml文件地址提交就可以了。
下面提供生成XML的google新推出的sitemap,是对原来robots.txt的扩展,sitemap使用xml格式来记录整个网站的信息并供google读取,使搜索引擎能更快更全面的收录网站的内容。
sitemap的作用就好像为网站提供了整站的rss,而google就是这些rss的订阅者,只要网站有更新就会自动通知google。这样一来,搜索引擎的收录由被动的pull变成了主动的push,辛苦的google爬虫们终于可以松一口气了。
下面提供生成XML的Google SiteMap代码[ASP版本]。
<
%
Server.ScriptTimeout
=
50000
dim
seoDir
session(
"
server
"
)
=
"
http://www.seo165.com
"
'
网址
seoDir
=
"
/
"

set
objfso
=
CreateObject
(
"
Scripting.FileSystemObject
"
)
root
=
Server.MapPath(seoDir)

'
response.ContentType = "text/xml"
'
response.write "<?xml version='1.0' encoding='UTF-8'?>"
'
response.write "<urlset xmlns='http://www.google.com/schemas/sitemap/0.84'>"
str
=
"
<?xml version='1.0' encoding='UTF-8'?>
"
&
vbcrlf
str
=
str
&
"
<urlset xmlns='http://www.google.com/schemas/sitemap/0.84'>
"
&
vbcrlf

Set
objFolder
=
objFSO.GetFolder(root)
'
response.write getfilelink(objFolder.Path,objFolder.dateLastModified)
Set
colFiles
=
objFolder.Files
For
Each
objFile In colFiles
str
=
str
&
getfilelink(objFile.Path,objfile.dateLastModified)
&
vbcrlf
Next
ShowSubFolders(objFolder)


str
=
str
&
"
</urlset>
"
&
vbcrlf
set
fso
=
nothing

Set
objStream
=
Server.CreateObject(
"
ADODB.Stream
"
)
With
objStream
.Open
.Charset
=
"
utf-8
"
.Position
=
objStream.Size
.WriteText
=
str
.SaveToFile server.mappath(
"
/sitemap.xml
"
),
2
'
生成的XML文件名
.Close
End
With

Set
objStream
=
Nothing
If
Not
Err
Then
Response.Write(
"
<script>alert('成功生成站点地图!');history.back();</script>
"
)
Response.End
End
If

Sub
ShowSubFolders(objFolder)
Set
colFolders
=
objFolder.SubFolders
For
Each
objSubFolder In colFolders
if
folderpermission(objSubFolder.Path)
then
str
=
str
&
getfilelink(objSubFolder.Path,objSubFolder.dateLastModified)
&
vbcrlf
Set
colFiles
=
objSubFolder.Files
For
Each
objFile In colFiles
str
=
str
&
getfilelink(objFile.Path,objFile.dateLastModified)
&
vbcrlf
Next
ShowSubFolders(objSubFolder)
end
if
Next
End Sub


Function
getfilelink(file,datafile)
file
=
replace
(file,root,
""
)
file
=
replace
(file,
"
"
,
"
/
"
)
If
FileExtensionIsBad(file)
then
Exit
Function
if
month
(datafile)
<
10
then
filedatem
=
"
0
"
if
day
(datafile)
<
10
then
filedated
=
"
0
"
filedate
=
year
(datafile)
&
"
-
"
&
filedatem
&
month
(datafile)
&
"
-
"
&
filedated
&
day
(datafile)
getfilelink
=
"
<url><loc>
"
&
server.htmlencode(session(
"
server
"
)
&
seoDir
&
file)
&
"
</loc><lastmod>
"
&
filedate
&
"
</lastmod><changefreq>daily</changefreq><priority>1.0</priority></url>
"
Response.Flush
End Function


Function
Folderpermission(pathName)
PathExclusion
=
Array
(
"
emp
"
,
"
_vti_cnf
"
,
"
_vti_pvt
"
,
"
_vti_log
"
,
"
cgi-bin
"
,
"
admin
"
,
"
edu
"
)
Folderpermission
=
True
for
each
PathExcluded in PathExclusion
if
instr
(
ucase
(pathName),
ucase
(PathExcluded))
>
0
then
Folderpermission
=
False
exit
for
end
if
next
End Function


Function
FileExtensionIsBad(sFileName)
Dim
sFileExtension, bFileExtensionIsValid, sFileExt
Extensions
=
Array
(
"
png
"
,
"
gif
"
,
"
jpg
"
,
"
jpeg
"
,
"
zip
"
,
"
pdf
"
,
"
ps
"
,
"
html
"
,
"
htm
"
,
"
php
"
,
"
wk1
"
,
"
wk2
"
,
"
wk3
"
,
"
wk4
"
,
"
wk5
"
,
"
wki
"
,
"
wks
"
,
"
wku
"
,
"
lwp
"
,
"
mw
"
,
"
xls
"
,
"
ppt
"
,
"
doc
"
,
"
swf
"
,
"
wks
"
,
"
wps
"
,
"
wdb
"
,
"
wri
"
,
"
rtf
"
,
"
ans
"
,
"
txt
"
,
"
asp
"
)
'
设置列表的文件名,扩展名不在其中的话SiteMap则不会收录该扩展名的文件
if
len
(
trim
(sFileName))
=
0
then
FileExtensionIsBad
=
true
Exit
Function
end
if

sFileExtension
=
right
(sFileName,
len
(sFileName)
-
instrrev
(sFileName,
"
.
"
))
bFileExtensionIsValid
=
false
for
each
sFileExt in extensions
if
ucase
(sFileExt)
=
ucase
(sFileExtension)
then
bFileExtensionIsValid
=
True
exit
for
end
if
next
FileExtensionIsBad
=
not
bFileExtensionIsValid
End Function
%
>
sitemap的作用就好像为网站提供了整站的rss,而google就是这些rss的订阅者,只要网站有更新就会自动通知google。这样一来,搜索引擎的收录由被动的pull变成了主动的push,辛苦的google爬虫们终于可以松一口气了。
快来尝试下: https://www.google.com/webmasters/sitemaps/login
有Gmail的可以用Gmail直接登录。登录后把生成的xml文件地址提交就可以了。
下面提供生成XML的google新推出的sitemap,是对原来robots.txt的扩展,sitemap使用xml格式来记录整个网站的信息并供google读取,使搜索引擎能更快更全面的收录网站的内容。
sitemap的作用就好像为网站提供了整站的rss,而google就是这些rss的订阅者,只要网站有更新就会自动通知google。这样一来,搜索引擎的收录由被动的pull变成了主动的push,辛苦的google爬虫们终于可以松一口气了。
下面提供生成XML的Google SiteMap代码[ASP版本]。








































































































