- 写入图片
def saveImg(self,imageURL,fileName):
u = urllib.urlopen(imageURL)
data = u.read()
f = open(fileName, 'wb')
f.write(data)
f.close()
- 写入文件
def saveBrief(self,content,name):
fileName = name + "/" + name + ".txt"
f = open(fileName,"w+")
print u"保存信息",fileName
f.write(content.encode('utf-8'))
- 创建新文件夹
创建新目录
def mkdir(self,path):
path = path.strip()
isExists=os.path.exists(path)
if not isExists:
os.makedirs(path)
return True
else:
return False
- 看完《python数据采集》第一章,将demo网页中的图片和图片信息存至本地作为练手,保存备忘
import urllib
from bs4 import BeautifulSoup
def saveImg(imageURL, fileName):
u = urllib.urlopen(imageURL)
data = u.read()
f = open("/home/zhangbo/PycharmProjects/reptile/"+fileName, 'wb')
f.write(data)
f.close()
main_url = "http://www.pythonscraping.com/pages/"
response = urllib.urlopen(main_url+"page3.html")
bsObj = BeautifulSoup(response.read())
giftList = bsObj.find_all("tr",{"class":"gift"})
for gift in giftList:
fileName = gift.find_all("td")[0].get_text().strip()
imgURL = main_url + str(gift.find_all("td")[3].find("img"))[10:31]
saveImg(imgURL, fileName)