Python Selenium+PhantomJs 爬煎蛋妹子图_selenium phantomjs 爬美女照片-优快云博客

本文介绍了一个简单的Python爬虫程序，用于从简丹网抓取图片资源。该程序使用了PhantomJS作为浏览器模拟器，BeautifulSoup进行网页解析，Requests进行HTTP请求。通过遍历页面链接，获取并下载所需的图片。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

第一次爬虫，代码很简单，借鉴了很多前辈的代码。

import re
import os
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
#设定phantomjs路径
driver = webdriver.PhantomJS(executable_path='D:/phantomjs-2.1.1-windows/bin/phantomjs')

def jiandanSpider(Url,basePath):
    page = 1
    #遍历url并解析
    for urlPools in Url:
        driver.get(urlPools)
        driver.implicitly_wait(10)
        html_text = driver.page_source
        #转换格式，并找到链接
        img_url = []
        soup = BeautifulSoup(html_text,'html.parser')
        imges = soup.select("a.view_img_link")
        for i in imges:
            wx = i.get('href')
            if str('gif') in str(wx):
                pass
            else:
                http_url = 'http:' + wx
                img_url.append(http_url)
        #开始下载
        n = 1
        for url in img_url:
            print("第 %s 张" % (n))
            with open(basePath + '妹子图 %s-%s'%(page,n) + url[-4:],'wb' ) as f:
                f.write(requests.get(url).content)
            print('下载完成')
            n = n + 1

        page = page + 1

if __name__ == '__main__':
    urlPool = ('http://jandan.net/ooxx/page-{}#comments'.format(i) for i in range(1, 5))
    basePath = 'D:/jiandanImage/'
    jiandanSpider(urlPool,basePath)