#gethtmllines() extractimageurls() showresults() saveresults()
def main():
inputfile='nationalgeograhic.html'
outputfile='nationalgeographic.txt'
htmllines=gethtmllines(inputfile)
imageurls=extractimageurls(htmllines)
showresults(imageurls)
saveresults(imageurls,outputfile)
def gethtmllines(filename):
f=open(filename,'r',encoding='utf-8')
txt=f.readlines()
f.close()
return txt
def extractimageurls(htmllines):
urls=[]
for line in htmllines:
if 'img' in line:
url=line.split('src=')[-1]
if 'http' in url:
urls.append(url)
return urls
def showresults(urls):
count=1
for url in urls:
print('第{0}个URL:{1}'.format(count,url))
count+=1
def saveresults(urls,outputfile):
f=open(outputfile,'w')
for url in urls:
f.write(url+'\n')
f.close()
main()