-
Notifications
You must be signed in to change notification settings - Fork 1
/
md-to-toc.py
122 lines (88 loc) · 3.13 KB
/
md-to-toc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
'''
输入:md格式文件名
输出:TOC格式的字符串
处理逻辑:
用正则匹配出标题以及确定标题的级别并拼接成相应的字符串
'''
import sys
import re
TOC_LIST_PREFIX = "-"
# TOC_LIST_PREFIX = "*"
HEADER_LINE_RE = re.compile("^(#+)\s*(.*?)\s*(#+$|$)", re.IGNORECASE)
HEADER1_UNDERLINE_RE = re.compile("^-+$")
HEADER2_UNDERLINE_RE = re.compile("^=+$")
# Dictionary of anchor name to number of instances found so far
anchors = {}
def print_usage():
print("\nUsage: md-to-toc <markdown_file>")
def to_github_anchor(title):
'''
Converts markdown header title (without #s) to GitHub-formatted anchor.
Note that this function attempts to recreate GitHub's anchor-naming logic.
'''
# Convert to lower case and replace spaces with dashes
anchor_name = title.strip().lower().replace(' ', '-')
# Strip all invalid characters
anchor_name = re.sub(r"[\[\]\"!#$%&'()*+,./:;<=>?@\^{|}~]", "", anchor_name)
# If we've encountered this anchor name before, append next instance count
count = anchors.get(anchor_name)
if count is None:
anchors[anchor_name] = 0
else:
count = count + 1
anchors[anchor_name] = count
anchor_name = anchor_name + '-' + str(count)
anchor_name = anchor_name.replace('`', '')
return '#' + anchor_name
def toggles_block_quote(line):
'''Returns true if line toggles block quotes on or off'''
'''(i.e. finds odd number of ```)'''
n = line.count("```")
return n > 0 and line.count("```") % 2 != 0
def main(argv=None):
if argv is None:
argv = sys.argv
if len(argv) < 2:
print_usage()
return 0
filespec = argv[1]
in_block_quote = False
results = [] # list of (header level, title, anchor) tuples
last_line = ""
file = open(filespec)
for line in file.readlines():
if toggles_block_quote(line):
in_block_quote = not in_block_quote
if in_block_quote:
continue
found_header = False
header_level = 0
m = HEADER_LINE_RE.match(line)
if m is not None:
header_level = len(m.group(1))
title = m.group(2)
found_header = True
if not found_header:
m = HEADER1_UNDERLINE_RE.match(line)
if m is not None:
header_level = 1
title = last_line.rstrip()
found_header = True
if not found_header:
m = HEADER2_UNDERLINE_RE.match(line)
if m is not None:
header_level = 2
title = last_line.rstrip()
found_header = True
if found_header:
results.append((header_level, title, to_github_anchor(title)))
last_line = line
# Compute min header level so we can offset output to be flush with
# left edge
min_header_level = min(results, key=lambda e: e[0])[0]
for r in results:
header_level = r[0]
spaces = " " * (header_level - min_header_level)
print("{}{} [{}]({})".format(spaces, TOC_LIST_PREFIX, r[1], r[2]))
if __name__ == "__main__":
sys.exit(main())