def base = 'http://www.the-asci.org/core/asci'
(1969..2011).each{
def m = "${base}/list.php?type=year&key=$it".toURL().text =~ /<a href=profile\.php\?pid=(\d*)>/
m.each{
def text = "${base}/profile.php?pid=${it[1]}".toURL().text, data = [:]
def n = text =~ /<div class=member_name>(.*?)<\/div>/
data.name = n[0][1]
n = text =~ /<div class=member_address>(.*?)<\/div>/
def address = []
n.each{address << it[1].replaceAll(/<\/?.*?>/,'')}
data.contactInfo = address.join('\n')
def l = text =~ /<div width='100%'><div class=rh>Specialties<\/div>(.*?)<\/div>(<div width='100%'>|\s{3})/, specialties = []
if(l)
(l[0][1] =~ /<div class=r\d>(\w*)<\/div>/).each{specialties << it[1]}
data.specialties = specialties.join(', ')
new Physician(data).save()
}
}
asci
最新推荐文章于 2025-11-25 23:42:46 发布
本文介绍了一种使用脚本爬取ASCI网站上特定年份成员名单的方法,并详细解析了如何从每个成员的个人页面抓取姓名、联系方式及专长等信息。
16万+

被折叠的 条评论
为什么被折叠?



