父节点
import bs4
from bs4 import BeautifulSoup
html_doc = """
<head>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
<title>
齐鲁晚报网-山东新闻门户 传播品质资讯
</title>
<meta content="齐鲁晚报,生活日报,鲁南商报,山东新闻,济南新闻,滚动新闻,齐鲁晚报论坛,网上报料,拍客,微博齐鲁" name="keywords"/>
<meta content="”no-transform”" http-equiv="”Cache-Control”"/>
<meta content="YSz04dmCHI" name="baidu-site-verification"/>
<link href="https://img.qlwb.com.cn/templates/qlwb/css/2017shou/qlwb.css" rel="stylesheet" type="text/css"/>
<link href="https://img.qlwb.com.cn/templates/qlwb/css/2017shou/ad.css" rel="stylesheet" type="text/css"/>/
<link href="https://img.qlwb.com.cn/templates/qlwb/css/2017shou/index-szb.css" rel="stylesheet" type="text/css"/>
</head>
"""
bs = bs4.BeautifulSoup(html_doc, 'lxml')
tag = bs.title
print(tag.parent.name)
print(tag.string.parent.name)
print(type(bs.html.parent))
print(bs.parent)
for i in tag.parents:
if i is None:
print(i)
else:
print(i.name)
输出
head
title
<class 'bs4.BeautifulSoup'>
None
head
html
[document]
兄弟节点
import bs4
from bs4 import BeautifulSoup
html_doc = """
<head>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
<title>
齐鲁晚报网-山东新闻门户 传播品质资讯
</title>
<meta content="齐鲁晚报,生活日报,鲁南商报,山东新闻,济南新闻,滚动新闻,齐鲁晚报论坛,网上报料,拍客,微博齐鲁" name="keywords"/>
<meta content="”no-transform”" http-equiv="”Cache-Control”"/>
<meta content="YSz04dmCHI" name="baidu-site-verification"/>
<link href="https://img.qlwb.com.cn/templates/qlwb/css/2017shou/qlwb.css" rel="stylesheet" type="text/css"/>
<link href="https://img.qlwb.com.cn/templates/qlwb/css/2017shou/ad.css" rel="stylesheet" type="text/css"/>/
<link href="https://img.qlwb.com.cn/templates/qlwb/css/2017shou/index-szb.css" rel="stylesheet" type="text/css"/>
</head>
"""
bs = bs4.BeautifulSoup(html_doc, 'lxml')
tag = bs.title
print(tag.next_sibling)
print(tag.previous_sibling)
print("-----------------------")
for i in tag.next_siblings:
print(i)
print('----------------')
for i in tag.previous_siblings:
print(i)
print('-------------------')
print(tag.next_element)
print(tag.previous_element)
输出
-----------------------
<meta content="齐鲁晚报,生活日报,鲁南商报,山东新闻,济南新闻,滚动新闻,齐鲁晚报论坛,网上报料,拍客,微博齐鲁" name="keywords"/>
<meta content="”no-transform”" http-equiv="”Cache-Control”"/>
<meta content="YSz04dmCHI" name="baidu-site-verification"/>
<link href="https://img.qlwb.com.cn/templates/qlwb/css/2017shou/qlwb.css" rel="stylesheet" type="text/css"/>
<link href="https://img.qlwb.com.cn/templates/qlwb/css/2017shou/ad.css" rel="stylesheet" type="text/css"/>
----------------
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
-------------------
齐鲁晚报网-山东新闻门户 传播品质资讯
参考
Beautiful Soup 4.2.0 文档