import urllib.request, urllib.error
import re
def get_references(title):
# 将文章标题中的空格替换为加号,以便用于生成 URL
title = title.replace(' ', '+')
# 构造搜索 URL
search_url = f'https://scholar.google.com/scholar?hl=en&q={title}&btnG=&as_sdt=1%2C5&as_sdtp='
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
'Referer': 'https://www.google.com/'
}
# 发送 HTTP 请求并读取响应内容
req=urllib.request.Request(search_url,headers=headers)
try:
response = urllib.request.urlopen(req)
html_content = response.read().decode('utf-8')
except urllib.error.HTTPError as e:
print(f'Error: {e.code} {e.reason}')
return None
# 从 HTML 页面中提取被引用次数
m = re.search('Cited by\s(\d+)', html_content)
if m:
num_citations = int(m.group(1))
return num_citations
else:
return 0
# 测试代码
titles = ['Experimental Study on the Autogenic Acid Fluid System of a High-Temperature Carbonate Reservoir by Acid Fracturing',
'Experimental study on a new type of self-propping fracturing technology'
]
for title in titles:
num_citations = get_references(title)
print(f'{title}: {num_citations} 次被引用')
爬虫在谷歌学术找文献被引次数
最新推荐文章于 2025-07-13 00:41:24 发布
部署运行你感兴趣的模型镜像
您可能感兴趣的与本文相关的镜像
Python3.9
Conda
Python
Python 是一种高级、解释型、通用的编程语言,以其简洁易读的语法而闻名,适用于广泛的应用,包括Web开发、数据分析、人工智能和自动化脚本
625

被折叠的 条评论
为什么被折叠?



