1# Importing the required modules
2import os
3import sys
4import pandas as pd
5from bs4 import BeautifulSoup
6
7path = 'html.html'
8
9# empty list
10data = []
11
12# for getting the header from
13# the HTML file
14list_header = []
15soup = BeautifulSoup(open(path),'html.parser')
16header = soup.find_all("table")[0].find("tr")
17
18for items in header:
19 try:
20 list_header.append(items.get_text())
21 except:
22 continue
23
24# for getting the data
25HTML_data = soup.find_all("table")[0].find_all("tr")[1:]
26
27for element in HTML_data:
28 sub_data = []
29 for sub_element in element:
30 try:
31 sub_data.append(sub_element.get_text())
32 except:
33 continue
34 data.append(sub_data)
35
36# Storing the data into Pandas
37# DataFrame
38dataFrame = pd.DataFrame(data = data, columns = list_header)
39
40# Converting Pandas DataFrame
41# into CSV file
42dataFrame.to_csv('MyTable.csv')