Python入门

最新推荐文章于 2025-08-19 18:46:01 发布

一路风顺

最新推荐文章于 2025-08-19 18:46:01 发布

阅读量311

点赞数 1

CC 4.0 BY-SA版权

分类专栏： python 文章标签： python

本文链接：https://blog.youkuaiyun.com/a360316515/article/details/76010898

python 专栏收录该内容

1 篇文章

订阅专栏

python入门

前几天自己看了一下ptyhon的语法，根据网上的教程自己写了一个小小的爬虫，可以爬出淘mm的展示图片，然后自动保存到目录，

import random
import sys
reload(sys)

import requests
import urllib
from bs4 import BeautifulSoup
import time
def gDownloadWithFilename(url,savePath,file): 
  #参数检查，现忽略 
  try: 
    urlopen=urllib.URLopener() 
    fp = urlopen.open(url) 
    data = fp.read() 
    fp.close() 
    file=open(savePath + file,'w+b') 
    file.write(data) 
    file.close() 
  except IOError, error: 
    print "DOWNLOAD %s ERROR!==>>%s" % (url, error) 
  except Exception, e: 
    print "Exception==>>" + e 
    return



url='https://mm.taobao.com/json/request_top_list.htm?page=3'


headerss = {
    'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36 Core/1.47.933.400 QQBrowser/9.4.8699.400',
}

data = requests.get(url, headers=headerss)
soup = BeautifulSoup(data.text, 'lxml')

count=0
import time

for img in soup.find_all(class_="lady-avatar"):
  print img.get('href')
  src_1="http:"+img.get('href')
  data2 = requests.get(src_1,headers=headerss)
  soup2=BeautifulSoup(data2.text,'lxml')
  fo= open("list_img_taobao.txt","a")
  fo.write("====="+str((soup.find_all(class_="lady-avatar")).index(img))+"============")
  fo.close()
  for imgg in soup2.find_all("img"):
      print imgg.get('src')
      fo= open("list_img_taobao.txt","a")
      time.sleep(1)
      jpg_url ="http:"+imgg.get('src')
      fo.write("第"+str(count)+"   "+jpg_url+"\n")
      gDownloadWithFilename(jpg_url,"./img/",str(count)+".jpg")
      count+=1
      fo.close()