跟着b站爬取漫客栈漫画

最新推荐文章于 2025-04-10 19:16:05 发布

最强巫女

最新推荐文章于 2025-04-10 19:16:05 发布

阅读量361

点赞数

文章标签： python 爬虫

本文链接：https://blog.youkuaiyun.com/weixin_43179869/article/details/132078382

版权

视屏地址用python实现在漫画网站免费下载喜欢的漫画_哔哩哔哩_bilibili

视屏老师没敲完，说了个大概。我把它完善了，然后我也是意思了一下，没把这本灵剑尊爬完。

from random import randint

import parsel

import requests

import re

import json

from pprint import pprint

import time

import os

import random

url='https://comic.mkzcdn.com/chapter/content/v1/?chapter_id=1004107&comic_id=209412&format=1&quality=1&sign=12e1c69d4004a630429dffb02abb83ea&type=1&uid=65412952'

headers={

'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'

}

""" resp=requests.get(url=url,headers=headers).text

# pprint(type(resp.text))

print('^^^'*20)

# print(resp.text['data'])

json1=json.loads(resp)

pprint(json1['data']['page'][0]['image'])

print('oh ggbond') """

resp=requests.get(url=url,headers=headers)

# pprint(resp.json())

# print(type(resp))

image1_list=resp.json()['data']['page']

""" for image1 in image1_list:

img_url=image1['image']

img_id=image1['page_id']

img_content=requests.get(url=img_url,headers=headers).content

with open('img/'+str(img_id)+'.jpg',mode='wb') as f:

f.write(img_content)

# print(img_url) """

resp_home=requests.get('https://www.mkzhan.com/209412/',headers=headers)

print(resp_home)

selector=parsel.Selector(resp_home.text)

lis=selector.css('.j-chapter-item')

count1=0

jlz_library={}

for i in lis:

time.sleep(random.random())

count1+=1

chapter_id=i.css('a::attr(data-chapterid)').get()

chapter_title=i.css('a::text').getall()[-1].strip()

jlz_library[chapter_id]=chapter_title

if count1>5:

break

# print(chapter_title)

# print(jlz_library)

for i in jlz_library:

# url2=f'https://www.mkzhan.com/209412/{i}.html'

url_comic=f'https://comic.mkzcdn.com/chapter/content/v1/?chapter_id={i}&comic_id=209412&format=1&quality=1&sign=12e1c69d4004a630429dffb02abb83ea&type=1&uid=65412952'

resp2=requests.get(url_comic,headers=headers)

# sel2=resp2.text

# print(resp2.json())

image2_list=resp2.json()['data']['page']

for image2 in image2_list:

img_url2=image2['image']

img_id2=image2['page_id']

img_content2=requests.get(url=img_url2,headers=headers).content

if not os.path.exists(f'./img_colle/{jlz_library[i]}'):

os.makedirs(f'./img_colle/{jlz_library[i]}')

with open(f'./img_colle/{jlz_library[i]}/{img_id2}.jpg',mode='wb') as f:

f.write(img_content2)

# print(img_url)

print(image2_list,f'爬完了')

# exit()