跟着b站爬取漫客栈漫画

视屏地址用python实现在漫画网站免费下载喜欢的漫画_哔哩哔哩_bilibili 

视屏老师没敲完,说了个大概。我把它完善了,然后我也是意思了一下,没把这本灵剑尊爬完。

from random import randint

import parsel

import requests

import re

import json

from pprint import pprint

import time

import os

import random

url='https://comic.mkzcdn.com/chapter/content/v1/?chapter_id=1004107&comic_id=209412&format=1&quality=1&sign=12e1c69d4004a630429dffb02abb83ea&type=1&uid=65412952'

headers={

  'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'

}

""" resp=requests.get(url=url,headers=headers).text

# pprint(type(resp.text))

print('^^^'*20)

# print(resp.text['data'])

json1=json.loads(resp)

pprint(json1['data']['page'][0]['image'])

print('oh ggbond') """

resp=requests.get(url=url,headers=headers)

# pprint(resp.json())

# print(type(resp))

image1_list=resp.json()['data']['page']

""" for image1 in image1_list:

  img_url=image1['image']

  img_id=image1['page_id']

  img_content=requests.get(url=img_url,headers=headers).content

  with open('img/'+str(img_id)+'.jpg',mode='wb') as f:

    f.write(img_content)

  # print(img_url) """


 

resp_home=requests.get('https://www.mkzhan.com/209412/',headers=headers)

print(resp_home)

selector=parsel.Selector(resp_home.text)

lis=selector.css('.j-chapter-item')

count1=0

jlz_library={}

for i in lis:

  time.sleep(random.random())

  count1+=1

  chapter_id=i.css('a::attr(data-chapterid)').get()

  chapter_title=i.css('a::text').getall()[-1].strip()

  jlz_library[chapter_id]=chapter_title

  if count1>5:

     break

  # print(chapter_title)

# print(jlz_library)

for i in jlz_library:

  # url2=f'https://www.mkzhan.com/209412/{i}.html'

  url_comic=f'https://comic.mkzcdn.com/chapter/content/v1/?chapter_id={i}&comic_id=209412&format=1&quality=1&sign=12e1c69d4004a630429dffb02abb83ea&type=1&uid=65412952'

  resp2=requests.get(url_comic,headers=headers)

  # sel2=resp2.text

  # print(resp2.json())

  image2_list=resp2.json()['data']['page']

  for image2 in image2_list:

      img_url2=image2['image']

      img_id2=image2['page_id']

      img_content2=requests.get(url=img_url2,headers=headers).content

      if not os.path.exists(f'./img_colle/{jlz_library[i]}'):

        os.makedirs(f'./img_colle/{jlz_library[i]}')

      with open(f'./img_colle/{jlz_library[i]}/{img_id2}.jpg',mode='wb') as f:

        f.write(img_content2)

  # print(img_url)

  print(image2_list,f'爬完了')

  # exit()


 

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值