import time
import requests
from bs4 import BeautifulSoup
import lxml
import json
import copy
import csv
company_base_url = 'https://www.tianyancha.com/search?key='
search_list = [] #搜索空间
result_dir = 'D:/5-Python/爬虫/data/'
headers = {
#Chrome抓取
}
cookies = {
#Chrome抓取
}
target_data_frame = {
'company_name' : '',
'human_name' : '',
'regist_time' : '',
'telephone' : '',
'email': '',
'main_members' : ''
}
search_result = []
proxies = {
"http" : '182.34.21.124:41873',
"http" : '218.73.135.14:27182'
}
def get_member(main_member_index, id_str, company_detail_url, main_members):
main_member_page = main_member_index.find(name='div', id=id_str)
if (main_member_page.find(name='div', class_='company_pager')):
pass
else:
main_member_contents = main_member_page.find_all(name='a', class_='link-click')
for main_member_content in main_member_contents:
main_members.append(main_member_content.text)
def get_main_member(company_detail_url):
print (company_detail_url)
company_detail_index = requests.get(company_detail_url, headers=headers, cookies=cookies, proxies=proxies)
if (company_detail_index):