-
Notifications
You must be signed in to change notification settings - Fork 10
/
latex_input_resolver.py
95 lines (79 loc) · 3.14 KB
/
latex_input_resolver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import glob
import os
from TexSoup import TexSoup
EXTRACT_FOLDER = "./paper_folders/"
FINAL_FOLDER = "./papers/"
if not os.path.exists(FINAL_FOLDER):
os.mkdir(FINAL_FOLDER)
papers = open("selected_papers.txt", "rt")
lines = papers.read().split("\n")
papers.close()
nb_papers = int(len(lines) / 2)
print("number papers:", nb_papers)
def import_resolve(tex, path):
"""Resolve all imports and update the parse tree.
Reads from a tex file and once finished, writes to a tex file.
"""
soup = TexSoup(tex)
dir_path = os.path.dirname(path) + "/"
for _input in soup.find_all('input'):
#print("input statement detected")
path = os.path.join(dir_path, _input.args[0])
if not os.path.exists(path):
path = path + ".tex"
#print("Resolved Path:", path)
_input.replace(*import_resolve(open(path), dir_path).contents)
# CHECK FOLLOWING ONES
# resolve subimports
for subimport in soup.find_all('subimport'):
#print("subimport statement detected")
path = os.path.join(dir_path, subimport.args[0] + subimport.args[1])
if not os.path.exists(path):
path = path + ".tex"
#print("Resolved Path:", path)
subimport.replace(*import_resolve(open(path), dir_path).contents)
# resolve imports
for _import in soup.find_all('import'):
#print("import statement detected")
path = os.path.join(dir_path, _import.args[0])
if not os.path.exists(path):
path = path + ".tex"
#print("Resolved Path:", path)
_import.replace(*import_resolve(open(path), dir_path).contents)
# resolve includes
for include in soup.find_all('include'):
#print("include statement detected")
path = os.path.join(dir_path, include.args[0])
if not os.path.exists(path):
path = path + ".tex"
#print("Resolved Path:", path)
include.replace(*import_resolve(open(path), dir_path).contents)
return soup
for i in range(0, nb_papers, 1):
paper_folder_dir = EXTRACT_FOLDER + str(i) + "/**/"
extension = "*.tex"
tex_files = glob.glob(paper_folder_dir + extension, recursive=True)
root_files = []
#print(tex_files)
try:
for f_path in tex_files:
with open(f_path) as f:
tex = f.read()
soup = TexSoup(tex)
if soup.documentclass is not None:
latex_object = import_resolve(tex, f_path)
root_files.append(latex_object)
if len(root_files) < 1:
print("no root file?")
elif len(root_files) > 1:
print("writing multiple root files for paper", i)
for j in range(len(root_files)):
with open(FINAL_FOLDER + str(i) + "-" + str(j) + ".tex", "wt") as f:
f.write(str(root_files[j]))
else:
print("writing single root file for paper", i)
with open(FINAL_FOLDER + str(i) + ".tex", "wt") as f:
f.write(str(root_files[0]))
except Exception:
print("error at paper %g" % (i))
print("progress: %g / %g" % (i,nb_papers))