import urllib.request, urllib.error
import re
def get_references(title):
title = title.replace(' ', '+')
search_url = f'https://scholar.google.com/scholar?hl=en&q={title}&btnG=&as_sdt=1%2C5&as_sdtp='
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
'Referer': 'https://www.google.com/'
}
req=urllib.request.Request(search_url,headers=headers)
try:
response = urllib.request.urlopen(req)
html_content = response.read().decode('utf-8')
except urllib.error.HTTPError as e:
print(f'Error: {e.code} {e.reason}')
return None
m = re.search('Cited by\s(\d+)', html_content)
if m:
num_citations = int(m.group(1))
return num_citations
else:
return 0
titles = ['Experimental Study on the Autogenic Acid Fluid System of a High-Temperature Carbonate Reservoir by Acid Fracturing',
'Experimental study on a new type of self-propping fracturing technology'
]
for title in titles:
num_citations = get_references(title)
print(f'{title}: {num_citations} 次被引用')