Python爬取的数据存为json文件,并读取

Python爬取的数据存为json文件,并读取

import requests
import time
from lxml import etree
import json


def json_data_save(url):
    headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
                }
        
    html = requests.get(url,headers = headers).content.decode('gbk')
    data = etree.HTML(html)
    lists = data.xpath('//ul[@class="bigimg"]/li')
    dicts = {}
    names = []
    athors = []
    publishs = []
    publish_times = []
    prices = []
    for lis in lists:
        
        name = lis.xpath('./a/@title')[0]
        names.append(name)
        athor = lis.xpath('./p[@class="search_book_author"]/span/a/@title')[0]
        athors.append(athor)
        publish = lis.xpath('./p[@class="search_book_author"]/span/a/@title')[-1]
        publishs.append(publish)
        publish_time = lis.xpath('./p[@class="search_book_author"]/span/text()')[-2].split('/')[-1]
        publish_times.append(publish_time)
        price = lis.xpath('./p[@class="price"]/span[@class="search_now_price"]/text()')[0].split('¥')[-1]
        prices.append(price)
        
    dicts['name'] = names
    dicts['athor'] = athors
    dicts['publish'] = publishs
    dicts['publish_time'] = publish_times
    dicts['price'] = prices
    # 保存数据为json格式
    try:
        with open('a.json','a',encoding="utf-8") as f:
            f.write(json.dumps(dicts,ensure_ascii=False)) #ensure_ascii=False,则返回值可以包含非ascii值
    except IOError as e:
        print(str(e))
      
    finally:
        f.close()
        
def open_json(path):
    try:
        with open(path,'r',encoding='utf-8') as f:
            js_data = json.load(f)
            print('这是json数据:',js_data)
            print('这是数据类型:', type(js_data))
    except Exception as e:
        print(str(e))
    finally:
        f.close()
        
if __name__ == '__main__':
    url = 'http://search.dangdang.com/?key=%D0%A1%CD%F5%D7%D3&act=input&page_index=1'
    json_data_save(url)
    time.sleep(1)
    path = 'a.json'
    open_json(path)
  

 

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值