from logging import basicConfig,DEBUG,debug,CRITICAL,disable
basicConfig(level=DEBUG, format='%(levelname)s: %(message)s. [%(lineno)d]%(filename)s <%(asctime)s>',filename='debug.log',filemode='w')
u'str'
r'str'
b'str'
import webbrowser
webbrowser.open('url')
import requests
rget = requests.get('url')
rpost = requests.post('url', data = {'key':'value'})
rput = requests.put('url', data = {'key':'value'})
rdelete = requests.delete('url')
rhead = requests.head('url')
roptions = requests.options('url')
paradict = {'key1': 'value1', 'key2': ['value2', 'value3']}
rget=requests.get('https://httpbin.org/get',params=paradict)
rget.url
rget.content
rget.encoding
rget.apparent_encoding
rget.text
rget.status_code
try:rget.raise_for_status()
except Exception as e:print(e)
with open('web_text.txt','wb') as a:
for n in rget.iter_content(100000):
a.write()
import bs4
html=bs4.BeautifulSoup(rget.text,'lxml')
html.prettify()
html.title
html.head
url_tag=html.a
url_tag.parent
parents_gener=url_tag.parents
url_tag.parent.name
html.p
html.p.next_sibling
html.p.prev_sibling
next_siblings_gener=html.p.next_siblings
prev_siblings_gener=html.p.prev_siblings
html.name
html.title.name
html.a.next_element
html.a.previous_element
next_elements_gener=html.p.next_elements
prev_elements_gener=html.p.prev_elements
html.a.attrs
html.attrs
html.a.string
html.a.text
html.body.string
if isinstance(html.a.string,bs4.element.Comment):print(html.a.string)
tags_list=html.head.contents
tags_gener=html.head.children
tags_gener=html.descendants
text_strings_gener=html.strings
text_strings_gener=html.stripped_strings
b_list=html.find_all('b')
b_list=html.find_all(re.compile(pattern))
b_list=html.find_all(list,recursive=False)
b_list=html.find_all(True,limit=5)
def cust_func(tag):
return tag.has_attr('class') and not tag.has_attr('id')
cust_list=html.find_all(cust_func,text='str')
cust_list=html.find_all(id='name')
cust_list=html.find_all(href=re.compile(pattern))
cust_list=html.find_all(href=re.compile(pattern1),class_=re.compile(pattern2))
html.find()
html.find_parents()
html.find_parent()
html.find_next_siblings()
html.find_next_sibling()
html.find_previous_siblings()
html.find_previous_sibling()
html.find_all_next()
html.find_next()
html.find_all_previous()
html.find_previous()
html.select('div')
html.select('.classname')
html.select('#idname')
html.select('p #idname')
html.select('head > title')
html.select('a[class="classname"]')
url_tags_list=html.select('a[href="url"]')
for url_tag in url_tags_list:url_tag.get_text()
for url_tag in url_tags_list:url_tag.get('href')