-
-
Notifications
You must be signed in to change notification settings - Fork 48
/
generate_notebooks_from_toc.py
149 lines (128 loc) · 5.11 KB
/
generate_notebooks_from_toc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import argparse
from collections import OrderedDict, defaultdict
import re
from pathlib import Path
import nbformat as nbf
def parse_toc(toc_markdown):
"""
Generate set of essentially empty Jupyter notebooks from a table of
contents in markdown.
Parameters
----------
toc_markdown : str
Path to markdown file that contains only the table of contents. See
Notes for a description of the format.
Returns
-------
list
A list of lists containing the table of contents entries.
Notes
-----
The markdown file should contain a table of contents with entries
indicated as headers in the "hashtag" format. For example,
# First TOC entry, will be numbered 00
# Second entry, will be numbered 01
## Section of the second entry, will be numbered 01.00
## Another section of the second entry, numbered 01.01
# Third entry, numbered 02
## Section of third entry, 02.00
## Second section, 02.01
### Subsection of the second section, numbered 02.01.00
Though further nesting could in principle be allowed, it isn't.
"""
with open(toc_markdown) as f:
lines = f.readlines()
toc = defaultdict(OrderedDict)
#toc = defaultdict(dd)
current_level = 1
current_dict = toc
parents = []
# Nuke blank lines and removing whitespace
lines = [line.strip() for line in lines if line.strip()]
for line in lines:
matches = re.match(r'^(#+) +(.*)', line)
level, title = matches.group(1, 2)
level_n = len(level)
if not current_level and level_n != 1:
raise ValueError("Improperly formatted TOC")
if level_n == current_level:
# Just make a new entry...
current_dict[title] = OrderedDict()
latest_title = title
if level_n > current_level:
# Time for a new dictionary
parents.append(current_dict)
current_dict = current_dict[latest_title]
current_dict[title] = OrderedDict()
latest_title = title
if level_n < current_level:
level = current_level
while level_n < level:
current_dict = parents.pop()
level -= 1
current_dict[title] = OrderedDict()
latest_title = title
current_level = level_n
print(level_n, title)
return toc
def generate_notebooks(toc, directory, parent_string='', start=0, depth=0):
"""
Generate notebooks/cells/anchors from a table of contents.
The top two levels each get their own notebooks; third-level
TOC entries are created as h2 cells in the notebook and a links
to those sections are added to the second-level notebook below
that notebook's title.
"""
results = []
for num, entry in enumerate(toc.keys()):
num_str = f'{num + start:02d}'
if parent_string:
num_str = '.'.join([parent_string, num_str])
if len(toc[entry].keys()) > 0:
kids = generate_notebooks(toc[entry], directory,
parent_string=num_str, start=1,
depth=depth + 1)
if kids is None:
kids = []
else:
kids = []
if depth == 2:
entry_cell = nbf.v4.new_markdown_cell(entry)
results.append(entry_cell)
else:
notebook = nbf.v4.new_notebook()
toc_kids = ''
if depth == 1:
# Kids are cells...
toc_entries = []
for cell in kids:
cell_text = cell['source']
toc_link = cell_text.replace(' ', '-')
toc_entries.append(f'+ [{cell_text}](#{toc_link})')
cell['source'] = '## ' + cell_text
toc_kids = '\n'.join(toc_entries)
else:
# Top level, add a '00' to title
num_str += '.00'
title_cell = '\n'.join([f'# {entry}', toc_kids])
title_cell = nbf.v4.new_markdown_cell(title_cell)
notebook.cells = [cell for cell in [title_cell] + kids]
notebook_title = num_str + '-' + entry.replace(' ', '-') + '.ipynb'
path = Path(directory)
path.mkdir(exist_ok=True)
nbf.write(notebook, str(path / notebook_title))
print(' ' * depth, num_str)
return results
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Generate notebooks from '
'markdown table of contents.')
parser.add_argument('toc',
help='Table of contents from which to generate '
'the notebooks. Heading level (with #s) used '
'to determine chapters/subsections.')
parser.add_argument('--destination-dir', default='.',
help='Directory to which results notebooks '
'should be written.')
args = parser.parse_args()
toc = parse_toc(args.toc)
generate_notebooks(toc, args.destination_dir)