闲来随笔
from urllib import request
from bs4 import BeautifulSoup
def replace(content):
str=''
for i in content:
if i != ' ':
str+=i
return str
if __name__=="__main__":
head = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:83.0) Gecko/20100101 Firefox/83.0"
}
url="https://movie.douban.com/top250?format=text";
fullHtml= url
req=request.Request(fullHtml,headers=head)
res=request.urlopen(req)
html=res.read()
html=html.decode("utf-8")
soup = BeautifulSoup(html, "html.parser")
for tag in soup.find_all("div", class_="item"):
m_movie_level= tag.find("div",class_="pic")
print(m_movie_level.find('a').get('href'))
print(m_movie_level.find('a').find("img")["src"])
print(m_movie_level.find('em').get_text())
m_movie_info_hd= tag.find("div",class_="info").find("div",class_="hd")
m_movie_info_hd_span=m_movie_info_hd.findAll("span");
print(m_movie_info_hd_span[0].contents[0]);
print(m_movie_info_hd_span[1].contents[0]+m_movie_info_hd_span[2].contents[0]);
m_movie_info_hd = tag.find("div", class_="info").find("div", class_="bd")
m_movie_info_hd_class = m_movie_info_hd.findAll("p");
print(replace(m_movie_info_hd_class[0].contents[0]));
m_movie_info_hd_star= m_movie_info_hd.find("div",class_="star").findAll("span");
print(replace(m_movie_info_hd_star[1].contents[0]))
m_movie_info_hd_cri = m_movie_info_hd.find("p",class_="quote").get_text()
print(replace(m_movie_info_hd_cri));