-
Notifications
You must be signed in to change notification settings - Fork 0
/
format_tex.py
243 lines (208 loc) · 8.48 KB
/
format_tex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
import os
import sys
import argparse
import pprint
import re
TABLES_LR_TRIMMED = False
TABLES_FIRST_ROW_COLORED = True
TABLES_FIRST_ROW_COLOR = [134, 168, 209]
"""End of user settings"""
"""Derived parameters"""
TABLES_COLOR_PREAMBLE = """\\usepackage{colortbl}
\\usepackage{array}
\\definecolor{Tab}{RGB}{TABLES_FIRST_ROW_COLOR_R, TABLES_FIRST_ROW_COLOR_G, TABLES_FIRST_ROW_COLOR_B}
\\newcolumntype{$}{>{\global\let\currentrowstyle\\relax}}
\\newcolumntype{^}{>{\currentrowstyle}}
\\newcommand{\\rowstyle}[1]{\gdef\currentrowstyle{#1}
#1\ignorespaces
}
\\newcommand\MyTabHeadings{
\\rowcolor{Tab}\\rowstyle{\\bfseries\color{white}}}
"""
TABLES_COLOR_PREAMBLE = TABLES_COLOR_PREAMBLE.replace('TABLES_FIRST_ROW_COLOR_R', str(TABLES_FIRST_ROW_COLOR[0]))
TABLES_COLOR_PREAMBLE = TABLES_COLOR_PREAMBLE.replace('TABLES_FIRST_ROW_COLOR_G', str(TABLES_FIRST_ROW_COLOR[1]))
TABLES_COLOR_PREAMBLE = TABLES_COLOR_PREAMBLE.replace('TABLES_FIRST_ROW_COLOR_B', str(TABLES_FIRST_ROW_COLOR[2]))
if TABLES_LR_TRIMMED:
table_lr_trimmed_char = '@{}'
else:
table_lr_trimmed_char = ''
re_table_format = re.compile('.*(hack-[C,J,R,L,\|]+-[a-z,A-Z,_,\-]+).*')
re_table_multicolumn = re.compile('.*multicolumn\{.*\}(\{.*\}).*')
def format_preamble(infile):
in_tex_file = open(infile, 'r')
contents = in_tex_file.readlines()
in_tex_file.close()
contents_out = contents[:]
for index, line in enumerate(contents):
if '%HOOK PREAMBLE%' in line:
contents_out.insert(index+1, TABLES_COLOR_PREAMBLE)
out_tex_file = open(infile, 'w')
out_tex_file.writelines(contents_out)
out_tex_file.close()
def format_tables(infile):
in_tex_file = open(infile, 'r')
contents = in_tex_file.readlines()
in_tex_file.close()
# Make a copy of contents using slice
contents_out = contents[:]
line_difference = 0
"""
A simple state machine to track the progress of table processing.
Values are:
'NEW' --> 'TABLE_START' --> 'TOPLINE_DONE' --> 'MIDLINE_DONE' --> 'NEW'
"""
state = 'NEW'
table_column_types = 'CJRL'
"""
This for loop iterates over all elements of 'contents', which is a list of lines of the
original LaTeX source file, which are never altered. The 'index' variable generated by the
enumerate() is the line number in the original 'contents' list.
As we progress, lines in 'contents_out' can be edited, deleted or inserted.
The 'line_difference' variable keeps track of the number of lines which were inserted or deleted
from 'contents_new' so that we can match it with the original 'contents' array by adding it
as an offset to 'index'.
"""
for index, line in enumerate(contents):
"""
Deal with the \begin{tabulary} line
The processing includes:
* Decoding the stuf inside the {} after tabulary{}
** If the .. tabularcolumns:: is used in the original sphinx source file above the table
then the format: "hack-|C|C|C|-OPTIONS" (without quotes) can be used and decoded here
** If the tabulary options match the expected pattern (see re_table_format) then decode
it as a special formatting type
** If not, we use a default formatting which uses 'L'
* If the first line must be colored, we insert $ and ^ symbols inside the formatting, such
that the line is: {$L^L^L} rather than {LLL}
* Depending on the TABLES_LR_TRIMMED option, some whitespace will removed from the edges
* Wraps the tabular inside a \\begin{table}
** Necessary for \\centering (future option)
"""
if '\\begin{tabulary}' in line and state == 'NEW':
# Update the 'state' to the next position in the state machine
state = 'TABLE_START'
color_first_row = TABLES_FIRST_ROW_COLORED
table_color = TABLES_FIRST_ROW_COLOR
table_columns = 'DEFAULT'
table_hline = 'TOPRULE'
table_centering = False
if 'hack' in line:
line_format_match = re_table_format.match(line)
if line_format_match:
table_format_parts = line_format_match.group(1).split('-')
table_columns = table_format_parts[1]
table_width = sum([table_columns.count(alignment) for alignment in table_column_types])
if len(table_format_parts) > 2:
for option in table_format_parts[2:]:
print(option)
[opt_key, opt_val] = option.split('_')
if opt_key == 'COLOR':
color_first_row = opt_val != 'NONE'
table_color = opt_val
elif opt_key == 'HLINE':
table_hline = opt_val
elif opt_key == 'CENTER':
table_centering = opt_val == 'TRUE'
else:
sys.exit('Error: table format regex mismatch, the table format in "%s" is probably wrong' % line)
else:
table_width = line.count('|') - 1
if table_columns == 'DEFAULT':
if color_first_row:
table_header = '{' + table_lr_trimmed_char + '$'+'L^'*(table_width-1) + 'L' + table_lr_trimmed_char + '}'
else:
table_header = '{' + table_lr_trimmed_char +'L'*table_width + table_lr_trimmed_char + '}'
else:
if color_first_row:
i = 0
table_header = '{' + table_lr_trimmed_char
if table_columns[i] == '|':
table_header += '|'
i += 1
table_header += '$'
table_header += table_columns[i]
i += 1
for elem in table_columns[i:]:
if elem in table_column_types:
table_header += '^'
table_header += elem
table_header += table_lr_trimmed_char + '}'
else:
table_header = '{' + table_lr_trimmed_char + table_columns + table_lr_trimmed_char + '}'
if table_centering:
centering_txt = '\\centering'
else:
centering_txt = ''
contents_out.insert(index + line_difference, '\\begin{table}[H]\n')
contents_out.insert(index + line_difference + 1, '\\ra{1.2}'+ centering_txt +' \n')
contents_out.pop(index + line_difference + 2)
contents_out.insert(index + line_difference + 2, '\\begin{tabulary}{\\linewidth}' + table_header + '\n')
line_difference += 2
"""
Process the first \\hlines.
These are optionally replaced with:
* \\toprule : gives prettier result when color is not used
* nothing : looks nice if color is used
* or kept as-is: used if vertical lines a're present to avoid a gap
"""
if '\\hline' in line and (state == 'TABLE_START' or state == 'TOPLINE_DONE'):
# If there's a vertical line, don't use toprule
if ('|' in table_columns or color_first_row) and table_hline == 'TOPRULE':
table_hline = 'HLINE'
corrected = line.replace('\\hline', '\\' + table_hline.lower())
#else:
# corrected = line
contents_out.pop(index + line_difference)
contents_out.insert(index + line_difference, corrected)
if state == 'TABLE_START':
state = 'TOPLINE_DONE'
if color_first_row:
contents_out.insert(index + line_difference + 1, '\\MyTabHeadings\n\n')
line_difference += 1
elif state == 'TOPLINE_DONE':
state = 'MIDLINE_DONE'
"""
Process lines containing multicolumn.
We replace 'l' by 'c' to center text in a multicolumn, which is usually what we
want to do when text spans multiple columns. This is hardcoded, so very basic.
Warning: should not work if there are more than 1 'multicolumn' per line of LaTeX code.
"""
if '\\multicolumn' in line:
match_multicolumn = re_table_multicolumn.match(line)
if match_multicolumn:
multicolumn_arg = match_multicolumn.group(1)
multicolumn_arg_new = multicolumn_arg.replace('l','c')
contents_out.pop(index + line_difference)
contents_out.insert(index + line_difference, line.replace(multicolumn_arg,multicolumn_arg_new))
else:
exit("Error: invalid regex for multicolumn in line [%s]" % line)
"""
Process the first \\end{tabulary} line.
* Consistent with previous process (i.e. bottomrule vs none)
* Adds the \\end{table} at the right place
"""
if '\\end{tabulary}' in line:
contents_out.insert(index + line_difference + 1, '\\end{table}')
offset = 0
while '\\hline' not in line:
line = contents_out(index + line_difference - offset)
offset += 1
corrected = line.replace('\\hline', '\\' + table_hline.lower().replace('top','bottom'))
contents_out.pop(index + line_difference - offset)
contents_out.insert(index + line_difference - offset, corrected)
line_difference += 1
state = 'NEW'
# Write contents_out to output file
out_tex_file = open(infile, 'w')
out_tex_file.writelines(contents_out)
out_tex_file.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('infolder', help='Folder containing sphinx-generated LaTeX file(s)')
args = parser.parse_args();
if args.infolder:
infiles = [os.path.join(args.infolder,fn) for fn in next(os.walk(args.infolder))[2]]
for f in infiles:
if f.endswith('.tex'):
format_preamble(f)
format_tables(f)