1from bs4 import BeautifulSoup
2import json
3import urllib.request
4url = 'http://nbviewer.jupyter.org/url/jakevdp.github.com/downloads/notebooks/XKCD_plots.ipynb'
5response = urllib.request.urlopen(url)
6# for local html file
7# response = open("/Users/note/jupyter/notebook.html")
8text = response.read()
9
10soup = BeautifulSoup(text, 'lxml')
11# see some of the html
12print(soup.div)
13dictionary = {'nbformat': 4, 'nbformat_minor': 1, 'cells': [], 'metadata': {}}
14for d in soup.findAll("div"):
15 if 'class' in d.attrs.keys():
16 for clas in d.attrs["class"]:
17 if clas in ["text_cell_render", "input_area"]:
18 # code cell
19 if clas == "input_area":
20 cell = {}
21 cell['metadata'] = {}
22 cell['outputs'] = []
23 cell['source'] = [d.get_text()]
24 cell['execution_count'] = None
25 cell['cell_type'] = 'code'
26 dictionary['cells'].append(cell)
27
28 else:
29 cell = {}
30 cell['metadata'] = {}
31
32 cell['source'] = [d.decode_contents()]
33 cell['cell_type'] = 'markdown'
34 dictionary['cells'].append(cell)
35open('notebook.ipynb', 'w').write(json.dumps(dictionary))
36
1from bs4 import BeautifulSoup
2import json
3import urllib.request
4url = 'http://nbviewer.jupyter.org/url/jakevdp.github.com/downloads/notebooks/XKCD_plots.ipynb'
5response = urllib.request.urlopen(url)
6# for local html file
7# response = open("/Users/note/jupyter/notebook.html")
8text = response.read()
9
10soup = BeautifulSoup(text, 'lxml')
11# see some of the html
12print(soup.div)
13dictionary = {'nbformat': 4, 'nbformat_minor': 1, 'cells': [], 'metadata': {}}
14for d in soup.findAll("div"):
15 if 'class' in d.attrs.keys():
16 for clas in d.attrs["class"]:
17 if clas in ["text_cell_render", "input_area"]:
18 # code cell
19 if clas == "input_area":
20 cell = {}
21 cell['metadata'] = {}
22 cell['outputs'] = []
23 cell['source'] = [d.get_text()]
24 cell['execution_count'] = None
25 cell['cell_type'] = 'code'
26 dictionary['cells'].append(cell)
27
28 else:
29 cell = {}
30 cell['metadata'] = {}
31
32 cell['source'] = [d.decode_contents()]
33 cell['cell_type'] = 'markdown'
34 dictionary['cells'].append(cell)
35open('notebook.ipynb', 'w').write(json.dumps(dictionary))