forked from hamanhbui/neurips2021_analysis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathneurips2021_analysis.py
164 lines (138 loc) · 4.28 KB
/
neurips2021_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# -*- coding: utf-8 -*-
"""neurips2021_analysis.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1gEuudZsEXE7ps2Hi-ZsYfagtuwj8XJ3c
"""
import numpy as np
import json
from collections import Counter
import pickle
import matplotlib.pyplot as plt
with open("data/pp_authors.txt", "rb") as fp: # Unpickling
pp_authors = pickle.load(fp)
unis = json.load(open("data/alias_map.json"))
affi_count = Counter()
new = []
for pp in pp_authors:
list_aff = []
for author in pp:
aff_full = author[author.find("(") + 1 : author.find("')")].lower()
for aff_nml in unis:
if aff_full in unis[aff_nml] and aff_nml not in list_aff:
list_aff.append(aff_nml)
for aff in list_aff:
affi_count[aff] += 1
# Hard-coded
affi_count["vinai research"] += 1
academic_prefix = [
"university",
"univerisity",
"institute",
"uc ",
"mit",
"college",
"telecom paris",
"chinese academy of sciences",
"school",
"lawrence livermore national laboratory",
"kaist",
"kaust",
"ist austria",
"cnrs",
"universit\u00e9",
"irit",
"inria",
"college",
"mila",
"eth zurich",
"oxford",
"cornell",
"harvard",
"csiro",
"tu darmstadt",
"kth",
"virginia tech",
"\u00e9cole polytechnique f\u00e9d\u00e9rale de lausanne",
"ens",
"academy",
"unist",
"national",
"yale",
"universite",
"universidad",
"univ.",
"lmu munich",
"cuny",
"tu dresden",
"technion",
"postech",
"telecom sudparis",
]
def is_academic(affi):
for prefix in academic_prefix:
if prefix in affi:
return True
return False
academic_affi_count = Counter({k: v for k, v in dict(affi_count).items() if is_academic(k)})
industry_affi_count = Counter({k: v for k, v in dict(affi_count).items() if not is_academic(k)})
# Show N most common keywords and their frequencies
num_keyowrd = 50
keywords_hist_vis = affi_count.most_common(num_keyowrd)
plt.rcdefaults()
fig, ax = plt.subplots(figsize=(8, 12))
key = [k[0] for k in keywords_hist_vis]
value = [k[1] for k in keywords_hist_vis]
y_pos = np.arange(len(key))
ax.barh(y_pos, value, align="center", ecolor="black", log=True)
ax.set_yticks(y_pos)
ax.set_yticklabels(key, rotation=0, fontsize=10)
ax.invert_yaxis()
for i, v in enumerate(value):
ax.text(v + 0.25, i + 0.25, str(v), color="black", fontsize=10)
ax.set_xlabel("Number of papers @ NeurIPS 2021")
ax.set_title(f"Top {num_keyowrd} institutes @ NeurIPS 2021")
fig.savefig("neurips_stats_top50.png", bbox_inches="tight")
plt.show()
# Show N most common keywords and their frequencies
num_keyowrd = 50
keywords_hist_vis = academic_affi_count.most_common(num_keyowrd)
plt.rcdefaults()
fig, ax = plt.subplots(figsize=(8, 12))
key = [k[0] for k in keywords_hist_vis]
value = [k[1] for k in keywords_hist_vis]
y_pos = np.arange(len(key))
ax.barh(y_pos, value, align="center", ecolor="black", log=True)
ax.set_yticks(y_pos)
ax.set_yticklabels(key, rotation=0, fontsize=10)
ax.invert_yaxis()
for i, v in enumerate(value):
ax.text(v + 0.25, i + 0.25, str(v), color="black", fontsize=10)
ax.set_xlabel("Number of papers @ NeurIPS 2021")
ax.set_title(f"Top {num_keyowrd} academic institutes @ NeurIPS 2021 (Academia only)")
fig.savefig("neurips_stats_top50_academic.png", bbox_inches="tight")
plt.show()
# Show N most common keywords and their frequencies
num_keyowrd = 50
keywords_hist_vis = industry_affi_count.most_common(num_keyowrd)
plt.rcdefaults()
fig, ax = plt.subplots(figsize=(8, 12))
key = [k[0] for k in keywords_hist_vis]
value = [k[1] for k in keywords_hist_vis]
y_pos = np.arange(len(key))
colors = []
for place in key:
if place != "vinai research":
colors.append("steelblue")
else:
colors.append("red")
ax.barh(y_pos, value, align="center", ecolor="black", log=True, color=colors)
ax.set_yticks(y_pos)
ax.set_yticklabels(key, rotation=0, fontsize=10)
ax.invert_yaxis()
for i, v in enumerate(value):
ax.text(v + 0.25, i + 0.25, str(v), color="black", fontsize=10)
ax.set_xlabel("Number of papers @ NeurIPS 2021")
ax.set_title(f"Top {num_keyowrd} industrial institutes @ NeurIPS 2021 (Industry only)")
fig.savefig("neurips_stats_top50_industry.png", bbox_inches="tight")
plt.show()