-
Notifications
You must be signed in to change notification settings - Fork 3
/
merge-pgn.py
234 lines (189 loc) · 8 KB
/
merge-pgn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
# Description: A simple tool to merge several pgn games into a single game with
# variations.
import chess.pgn
import sys
import re
from collections import OrderedDict
from itertools import filterfalse
# Extracts annotations from the comment
# cmd is for example %cal or %csl
# uci is e2e4
# color is G for green or B for blue
def extract_annotations(text):
annotations = OrderedDict()
normal_text = ""
# Split the text by placeholders
parts = re.split(r"\[|\]", text)
for i, part in enumerate(parts):
# Check if the current part is a placeholder
if part and part[0] == "%":
# Split the part by the white space
cmd, values = part.split(" ", 1)
# Split the values by the comma and strip any leading/trailing whitespace
values = [value.strip() for value in values.split(",")]
# Add the values to the annotations dictionary
if cmd not in annotations:
annotations[cmd] = OrderedDict()
for value in values:
uci = value[1:]
color = value[0]
annotations[cmd][uci] = color
else:
# Add the part to the normal_text variable
normal_text += part
# Replace multiple consecutive spaces with a single space in the normal_text variable
normal_text = re.sub(r"\ ", " ", normal_text)
return normal_text, annotations
def merge_text_strings(text1, text2):
# If one is included in the other then it's a duplicate comment and should be ignored
one_in_two = text1 and text2 and text1.casefold() in text2.casefold()
two_in_one = text1 and text2 and text2.casefold() in text1.casefold()
identical = one_in_two and two_in_one
if identical:
return text1
if one_in_two:
return text2
if two_in_one:
return text1
if text1 and text2:
transposition = "Transposition: "
if transposition in text1 or transposition in text2:
print(f"Found \"{transposition}\" in one of these two comments about to be merged:")
print(f"Text1: \"{text1}\"")
print(f"Text2: \"{text2}\"")
sys.exit()
return text1 + "\n\n" + text2
if text1:
return text1
if text2:
return text2
return ""
# Picks one color of two according to a prio list when two colors are conflicting
def pick_color(color1, color2):
color_prio = ['R', 'G', 'B', 'Y'] # highest prio first
if color_prio.index(color1) < color_prio.index(color2):
return color1
else:
return color2
def merge_annotations(annotations1, annotations2):
for cmd2, ucis in annotations2.items():
if cmd2 not in annotations1:
annotations1[cmd2] = OrderedDict()
for uci in ucis:
ucis1 = annotations1[cmd2]
if uci in ucis1: # an arrow or circle of the same UCI and color (ie G and e2e4 in Ge2e4) already exist, use it!
color1 = ucis1[uci]
color2 = ucis[uci]
ucis1[uci] = pick_color(color1, color2)
else:
color2 = ucis[uci]
ucis1[uci] = color2
# Format the merged annotations in the same way the placeholders appear in the text
formatted_annotations = ""
for cmd, ucis in annotations1.items():
values = map(lambda uci: "" + ucis[uci] + uci, ucis)
formatted_annotations += f"[{cmd} {','.join(values)}]"
return formatted_annotations
def merge_comments(text1, text2):
normal_text1, annotations1 = extract_annotations(text1)
normal_text2, annotations2 = extract_annotations(text2)
combined_text = merge_text_strings(normal_text1, normal_text2)
combined_annotations = merge_annotations(annotations1, annotations2)
return combined_text, combined_annotations
def insert_braces(text):
# Find all the occurrances of the braced string using re.finditer
brace_matches = list(re.finditer(r'\{(.*?)\}', text, flags=re.DOTALL))
# Iterate through all the matches (in reversed order because inserting stuff messes up the matches completely)
for brace_match in reversed(brace_matches):
start_pos = brace_match.start()
end_pos = brace_match.end()
# Find the position of the first [% within the braces
percent_match = re.search(r'\[%', brace_match.group(1), flags=re.DOTALL)
if percent_match is None:
continue
percent_pos = percent_match.start() + start_pos + 1
comment_text = text[start_pos + 1:percent_pos].strip()
# Insert "} {" at the position of the [% , if and only if the text before is non-empty
if comment_text:
text = text[:percent_pos] + "} { " + text[percent_pos:]
return text
def main():
usage = f"Usage: {sys.argv[0]} <PGN FILES>... <OUTPUT FILE> [--no-comments]\nWhere OUTPUT_FILE can be - to indicate STDOUT."
if len(sys.argv) <= 2:
raise SystemExit(usage)
try:
filter_options = lambda x: re.match("^--[a-zA-Z-]*", x)
options = list(filter(filter_options, sys.argv))
sys.argv = list(filterfalse(filter_options, sys.argv))
infiles = sys.argv[1:-1]
outfile = sys.argv[-1]
except IndexError:
raise SystemExit(usage)
master_node = chess.pgn.Game()
games = []
for name in infiles:
pgn = open(name, encoding="utf-8")
game = chess.pgn.read_game(pgn)
while game is not None:
text, annotations = merge_comments(master_node.comment, game.comment)
if "--no-comments" in options and not "Transposition:" in text:
text = ""
master_node.comment = f"{text}{annotations}"
games.append(game)
game = chess.pgn.read_game(pgn)
mlist = []
headers = {}
for game in games:
mlist.extend(game.variations)
# Save all headers from all games
for header in game.headers.keys():
if header not in headers:
headers[header] = set()
headers[header].add(game.headers[header])
# Set those headers that had common values in all games
for header in headers:
values = headers[header]
if len(values) == 1:
value = values.pop()
master_node.headers[header] = value
variations = [(master_node, mlist)]
done = False
while not done:
newvars = []
done = True
for vnode, nodes in variations:
newmoves = {} # Maps move to its index in newvars.
for node in nodes:
if node.move is None:
continue
elif node.move not in list(newmoves):
nvnode = vnode.add_variation(node.move, nags = node.nags)
text, annotations = merge_comments(node.comment, "")
if "--no-comments" in options and not "Transposition:" in text:
text = ""
nvnode.comment = f"{text}{annotations}"
if len(node.variations) > 0:
done = False
newvars.append((nvnode, node.variations))
newmoves[node.move] = len(newvars) - 1
else:
nvnode, nlist = newvars[newmoves[node.move]]
text, annotations = merge_comments(nvnode.comment, node.comment)
nvnode.comment = f"{text}{annotations}"
nvnode.nags.update(node.nags)
if len(node.variations) > 0:
done = False
nlist.extend(node.variations)
newvars[newmoves[node.move]] = (nvnode, nlist)
variations = newvars
pgn = f"{master_node}"
# Workaround to make sure the annotations end up in their own curly brackets,
# ie 1.e4 { A comment } { [%cal Ge2e4] } instead of 1.e4 { A comment [%cal Ge2e4] }
pgn = insert_braces(pgn)
if outfile == "-":
print(pgn)
else:
f = open(outfile, "w", encoding="utf-8")
f.write(pgn)
f.close()
main()