Skip to content

Commit 4ae676f

Browse files
committed
Fix normalize_book_sets.py to properly exclude premium problems from All and All-TODO sets
1 parent f7d88d7 commit 4ae676f

File tree

2 files changed

+107
-33
lines changed

2 files changed

+107
-33
lines changed

data/book-sets.json

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,6 @@
388388
23,
389389
25,
390390
26,
391-
27,
392391
28,
393392
30,
394393
31,
@@ -477,17 +476,10 @@
477476
152,
478477
153,
479478
155,
480-
156,
481-
157,
482-
158,
483-
159,
484479
160,
485-
161,
486480
162,
487-
163,
488481
167,
489482
169,
490-
170,
491483
172,
492484
173,
493485
188,
@@ -602,11 +594,9 @@
602594
1029,
603595
1046,
604596
1071,
605-
1127,
606597
1137,
607598
1143,
608599
1161,
609-
1167,
610600
1207,
611601
1249,
612602
1268,

scripts/normalize_book_sets.py

Lines changed: 107 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,92 @@
1313
import json
1414
import os
1515
import argparse
16+
import re
1617
from pathlib import Path
1718

1819

20+
def compact_json_arrays(json_str: str, max_line_length: int = 150) -> str:
21+
"""
22+
Compact number arrays in JSON string to have multiple numbers per line.
23+
24+
Args:
25+
json_str: JSON string with arrays formatted one number per line
26+
max_line_length: Maximum characters per line (default: 150)
27+
28+
Returns:
29+
Compacted JSON string
30+
"""
31+
lines = json_str.split("\n")
32+
result = []
33+
i = 0
34+
35+
while i < len(lines):
36+
line = lines[i]
37+
38+
# Check if this line starts an array of numbers
39+
# Pattern: some indentation, "key": [
40+
array_start_match = re.match(r'^(\s*)"[^"]+": \[$', line)
41+
42+
if array_start_match:
43+
indent = array_start_match.group(1)
44+
result.append(line)
45+
i += 1
46+
47+
# Collect all numbers from subsequent lines
48+
numbers = []
49+
while i < len(lines):
50+
num_line = lines[i].strip()
51+
52+
# Check if it's a closing bracket
53+
if num_line == "]" or num_line == "],":
54+
# Format numbers compactly
55+
if numbers:
56+
# Group numbers to fit within max_line_length
57+
number_indent = indent + " "
58+
current_line = number_indent
59+
60+
for idx, num in enumerate(numbers):
61+
num_str = str(num)
62+
# Add comma if not the last number
63+
if idx < len(numbers) - 1:
64+
num_str += ", "
65+
66+
# Check if adding this number would exceed max length
67+
if (
68+
len(current_line + num_str) > max_line_length
69+
and current_line != number_indent
70+
):
71+
result.append(current_line.rstrip())
72+
current_line = number_indent + num_str
73+
else:
74+
current_line += num_str
75+
76+
# Add the last line
77+
if current_line.strip():
78+
result.append(current_line.rstrip())
79+
80+
# Add closing bracket
81+
result.append(indent + num_line)
82+
i += 1
83+
break
84+
85+
# Check if it's a number (with optional comma)
86+
num_match = re.match(r"^(\d+),?$", num_line)
87+
if num_match:
88+
numbers.append(int(num_match.group(1)))
89+
i += 1
90+
else:
91+
# Not a number array, just add the line as-is
92+
result.append(lines[i])
93+
i += 1
94+
break
95+
else:
96+
result.append(line)
97+
i += 1
98+
99+
return "\n".join(result)
100+
101+
19102
def check_solution_exists(problem_number: int, solutions_dir: Path) -> bool:
20103
"""Check if solution exists for a problem number."""
21104
solution_path = solutions_dir / str(problem_number) / "01.py"
@@ -118,7 +201,7 @@ def normalize_book_sets(
118201
print("Error: The root of the JSON file must be an array.")
119202
return
120203

121-
# Find the "All-TODO", "All" objects, and get premium list
204+
# Find the "All-TODO" and "All" objects, and get premium list
122205
all_todo_obj = None
123206
all_obj = None
124207
premium_set = set()
@@ -134,37 +217,27 @@ def normalize_book_sets(
134217

135218
changes_made = False
136219

137-
# Process "All-TODO": Remove problems that have both solution and explanation or are premium
220+
# Process "All-TODO": Remove problems that have both solution and explanation
138221
if all_todo_obj:
139222
original_count = len(all_todo_obj.get("problems", []))
140223
problems = all_todo_obj.get("problems", [])
141224
removed = []
142-
removed_premium = []
143225

144226
new_problems = []
145227
for problem_num in problems:
146-
# Remove if premium
147-
if problem_num in premium_set:
148-
removed_premium.append(problem_num)
149-
# Remove if has both solution and explanation
150-
elif has_both_solution_and_explanation(
228+
if has_both_solution_and_explanation(
151229
problem_num, solutions_path, explanations_path
152230
):
153231
removed.append(problem_num)
154232
else:
155233
new_problems.append(problem_num)
156234

157-
if removed or removed_premium:
235+
if removed:
158236
changes_made = True
159-
print(f"\n[All-TODO] Removing problems:")
160-
if removed_premium:
161-
print(
162-
f" Removed {len(removed_premium)} premium problems: {removed_premium[:10]}{'...' if len(removed_premium) > 10 else ''}"
163-
)
164-
if removed:
165-
print(
166-
f" Removed {len(removed)} problems with both solution and explanation: {removed[:10]}{'...' if len(removed) > 10 else ''}"
167-
)
237+
print(
238+
f"\n[All-TODO] Removing {len(removed)} problems with both solution and explanation:"
239+
)
240+
print(f" Removed: {removed[:10]}{'...' if len(removed) > 10 else ''}")
168241
all_todo_obj["problems"] = sorted(new_problems)
169242
print(f" Updated count: {original_count} -> {len(new_problems)}")
170243
else:
@@ -184,7 +257,7 @@ def normalize_book_sets(
184257
if d.is_dir() and d.name.isdigit() and not d.name.startswith("todo-")
185258
}
186259

187-
# Find problems that have both (excluding premium problems)
260+
# Find problems that have both (excluding premium)
188261
problems_with_both = sorted(
189262
[
190263
p
@@ -199,13 +272,23 @@ def normalize_book_sets(
199272

200273
original_count = len(all_obj.get("problems", []))
201274
original_problems = set(all_obj.get("problems", []))
202-
203-
if set(problems_with_both) != original_problems:
275+
problems_with_both_set = set(problems_with_both)
276+
277+
# Remove premium problems from original_problems for comparison
278+
original_non_premium = original_problems - premium_set
279+
removed_premium = sorted(original_problems & premium_set)
280+
281+
# Always update if there are premium problems to remove, or if sets differ
282+
if removed_premium or problems_with_both_set != original_non_premium:
204283
changes_made = True
205-
added = sorted(set(problems_with_both) - original_problems)
206-
removed = sorted(original_problems - set(problems_with_both))
284+
added = sorted(problems_with_both_set - original_non_premium)
285+
removed = sorted(original_non_premium - problems_with_both_set)
207286

208287
print(f"\n[All] Updating problem list:")
288+
if removed_premium:
289+
print(
290+
f" Removed {len(removed_premium)} premium problems: {removed_premium[:10]}{'...' if len(removed_premium) > 10 else ''}"
291+
)
209292
if added:
210293
print(
211294
f" Added {len(added)} problems: {added[:10]}{'...' if len(added) > 10 else ''}"
@@ -301,6 +384,7 @@ def normalize_book_sets(
301384
try:
302385
with open(book_sets_file, "w", encoding="utf-8") as f:
303386
json.dump(data, f, indent=2, ensure_ascii=False)
387+
304388
print("\n" + "=" * 70)
305389
print(f"✓ Successfully updated '{book_sets_file}'")
306390
except Exception as e:

0 commit comments

Comments
 (0)