-
Notifications
You must be signed in to change notification settings - Fork 98
/
Copy pathverify.py
111 lines (80 loc) · 3.08 KB
/
verify.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import json
import sys
from pathlib import Path
from typing import Dict, List
def text_color(color, text):
colors = {
"red": "\033[31m",
"green": "\033[32m",
"yellow": "\033[33m",
"blue": "\033[34m",
"magenta": "\033[35m",
"cyan": "\033[36m",
"white": "\033[37m",
}
return f"{colors[color]}{text}\x1B[0m"
ERROR = text_color("red", "Error:")
WARNING = text_color("yellow", "Warning:")
SUCCESS = text_color("green", "Success:")
def generate_correct_related_posts():
with open("./related_posts_python.json") as f:
python_related: List[Dict] = json.load(f)
correct_payload: Dict[str, int] = {}
for i in range(len(python_related)):
correct_post = python_related[i]
# target_post = target_related[i]
tag_set = set(correct_post["tags"])
shared_tag_count = 0
for related_post in correct_post["related"]:
shared_tag_count += len(set(related_post["tags"]) & tag_set)
correct_payload[correct_post["_id"]] = shared_tag_count
with open("./correct_related.json", "w") as f:
json.dump(correct_payload, f)
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python verify.py <path_to_posts.json>")
sys.exit(1)
filepath = sys.argv[1]
if not filepath.endswith(".json"):
print("Generating correct file...")
generate_correct_related_posts()
sys.exit(0)
with open(filepath) as f:
target_file: List[Dict] = json.load(f)
with open(Path(filepath).with_name("correct_related.json")) as f:
correct_file: Dict[str, int] = json.load(f)
if len(target_file) != len(correct_file):
print(
f"{WARNING} Invalid post count! \nexpected: {len(correct_file)} \nactual: {len(target_file)}\n"
)
checked_posts = set()
for post in target_file:
if "_id" not in post:
print(f"{ERROR} Invalid post! _id not found: \n{post}")
sys.exit(1)
elif "tags" not in post:
print(f"{ERROR} Invalid post! tags not found: \n{post}")
sys.exit(1)
elif "related" not in post:
print(f"{ERROR} Invalid post! related not found: \n{post}")
sys.exit(1)
post_id = post["_id"]
if post_id in checked_posts:
print(f"{ERROR} Post {post_id} is duplicated!")
sys.exit(1)
checked_posts.add(post_id)
tag_set = set(post["tags"])
shared_tag_count = 0
for related_post in post["related"]:
shared_tag_count += len(set(related_post["tags"]) & tag_set)
if post_id not in correct_file:
print(f"{ERROR} Post {post_id} not found in correct file!\n{post}")
sys.exit(1)
correct_count = correct_file[post_id]
if shared_tag_count != correct_count:
print(
f"{ERROR} Post {post_id} is invalid!\nexpected: {correct_count} shared tag count\nactual: {shared_tag_count}"
)
sys.exit(1)
print(f"{SUCCESS} {filepath} is valid!")
sys.exit(0)