scholarly试用_scholarly 怎么使用-优快云博客

本文链接：https://blog.youkuaiyun.com/ZYTHZJ/article/details/142325522

检索文献有点麻烦，于是想用scholarly去爬取google学术上的文献。可惜只检索了不到900篇文献就被google的反爬虫系统侦测到了。还是去用publish or perish吧。

import os
import time
import csv
import random

import scholarly
from scholarly import scholarly


def search_scholar(query, filename):
    count = 0

    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['title', 'author', 'year', 'cites', 'url']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        
        # 写入 CSV 文件的头部信息
        writer.writeheader()

        search_query = scholarly.search_pubs('query')

        while True:
            try:
                pub = next(search_query)
                result = {
                    'title': pub['bib']['title'],
                    'author': pub['bib']['author'],
                    'year': pub['bib']['pub_year'],
                    'cites': pub['num_citations'],
                    'url': pub['url_scholarbib']
                }
                writer.writerow(result)
                count += 1
                if count%100 == 0:
                    print('fetched: ', count)
                    sleep_time = random.uniform(15, 45)
                    time.sleep(sleep_time)
            except StopIteration:
                break
            except Exception as e:
                # 记录异常情况
                print(f"Error occurred: {e}")
                continue
    
    return count


if __name__ == '__main__':
    os.chdir('./results/')

    filename = 'googleresults.csv'
    query = "检索内容"
    publications = search_scholar(query, filename)
    print(publications)