1#start
2
3
4from bs4 import BeautifulSoup
5import requests
6
7req = requests.get('https://www.slickcharts.com/sp500')
8soup = BeautifulSoup(req.text, 'html.parser')
1>>> from bs4 import BeautifulSoup
2>>> soup = BeautifulSoup("<p>Some<b>bad<i>HTML")
3>>> print soup.prettify()
4<html>
5<body>
6<p>
7Some
8<b>
9bad
10<i>
11HTML
12</i>
13</b>
14</p>
15</body>
16</html>
17>>> soup.find(text="bad")
18u'bad'
19>>> soup.i
20<i>HTML</i>
21#
22>>> soup = BeautifulSoup("<tag1>Some<tag2/>bad<tag3>XML", "xml")
23#
24>>> print soup.prettify()
25<?xml version="1.0" encoding="utf-8">
26<tag1>
27Some
28<tag2 />
29bad
30<tag3>
31XML
32</tag3>
33</tag1>
34
1soup = BeautifulSoup(plateRequest.text)
2#print(soup.prettify())
3#print soup.find_all('tr')
4
5table = soup.find("table", { "class" : "lineItemsTable" })
6for row in table.findAll("tr"):
7 cells = row.findAll("td")
8 print cells
9
1from bs4 import BeautifulSoup
2soup = BeautifulSoup(html_doc, 'html.parser')
3
4print(soup.prettify())
5# <html>
6# <head>
7# <title>
8# The Dormouse's story
9# </title>
10# </head>
11# <body>
12# <p class="title">
13# <b>
14# The Dormouse's story
15# </b>
16# </p>
17# <p class="story">
18# Once upon a time there were three little sisters; and their names were
19# <a class="sister" href="http://example.com/elsie" id="link1">
20# Elsie
21# </a>
22# ,
23# <a class="sister" href="http://example.com/lacie" id="link2">
24# Lacie
25# </a>
26# and
27# <a class="sister" href="http://example.com/tillie" id="link3">
28# Tillie
29# </a>
30# ; and they lived at the bottom of a well.
31# </p>
32# <p class="story">
33# ...
34# </p>
35# </body>
36# </html>
37
1soup.title
2# <title>The Dormouse's story</title>
3
4soup.title.name
5# u'title'
6
7soup.title.string
8# u'The Dormouse's story'
9
10soup.title.parent.name
11# u'head'
12
13soup.p
14# <p class="title"><b>The Dormouse's story</b></p>
15
16soup.p['class']
17# u'title'
18
19soup.a
20# <a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>
21
22soup.find_all('a')
23# [<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
24# <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>,
25# <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]
26
27soup.find(id="link3")
28# <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>
29
1html_doc = """<html><head><title>The Dormouse's story</title></head>
2<body>
3<p class="title"><b>The Dormouse's story</b></p>
4
5<p class="story">Once upon a time there were three little sisters; and their names were
6<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
7<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
8<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
9and they lived at the bottom of a well.</p>
10
11<p class="story">...</p>
12"""
13