Skip to content

Commit 5059651

Browse files
authored
Merge pull request #120 from asl3/papers-over-time
Papers over time script
2 parents 6ee0b7b + 395e5bc commit 5059651

File tree

1 file changed

+124
-0
lines changed

1 file changed

+124
-0
lines changed

scripts/papers_over_time.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
import matplotlib.pyplot as plt
2+
import matplotlib.dates as mdates
3+
import pandas as pd
4+
5+
file_path = "./master_papers.csv"
6+
7+
arxiv_papers_df = pd.read_csv(file_path)
8+
9+
# Convert to datetime
10+
arxiv_papers_df["dateSubmitted"] = pd.to_datetime(
11+
arxiv_papers_df["dateSubmitted"], format="%Y-%m-%d", errors="coerce"
12+
)
13+
14+
15+
earliest_date = arxiv_papers_df["dateSubmitted"].min()
16+
latest_date = arxiv_papers_df["dateSubmitted"].max()
17+
18+
# count occurrences of each submission date
19+
submission_dates_counts = arxiv_papers_df["dateSubmitted"].value_counts().sort_index()
20+
21+
# Plot 1: number of papers submitted over time, by year
22+
fig, ax = plt.subplots(figsize=(15, 8))
23+
ax.plot(
24+
submission_dates_counts.index,
25+
submission_dates_counts.values,
26+
marker="o",
27+
linestyle="-",
28+
color="blue",
29+
)
30+
ax.set_title("Number of Papers Submitted Over Time")
31+
ax.set_xlabel("Submission Date")
32+
ax.set_ylabel("Number of Papers Submitted")
33+
ax.grid(True)
34+
35+
ax.xaxis.set_major_locator(mdates.AutoDateLocator())
36+
ax.xaxis.set_major_formatter(mdates.AutoDateFormatter(mdates.AutoDateLocator()))
37+
38+
ax.set_xlim(earliest_date, latest_date)
39+
plt.xticks(rotation=45, ha="right")
40+
41+
plt.show()
42+
43+
44+
# Plot 2: number of papers submitted over time, between 2021 and 2023 (majority of papers)
45+
# Filter papers between 2021 and 2023
46+
filtered_df = arxiv_papers_df[
47+
(arxiv_papers_df["dateSubmitted"].dt.year >= 2021)
48+
& (arxiv_papers_df["dateSubmitted"].dt.year <= 2023)
49+
]
50+
51+
earliest_date = filtered_df["dateSubmitted"].min()
52+
latest_date = filtered_df["dateSubmitted"].max()
53+
54+
submission_dates_counts = filtered_df["dateSubmitted"].value_counts().sort_index()
55+
56+
fig, ax = plt.subplots(figsize=(15, 8))
57+
ax.plot(
58+
submission_dates_counts.index,
59+
submission_dates_counts.values,
60+
marker="o",
61+
linestyle="-",
62+
color="blue",
63+
)
64+
ax.set_title("Number of Papers Submitted Between 2021 and 2023")
65+
ax.set_xlabel("Submission Date")
66+
ax.set_ylabel("Number of Papers Submitted")
67+
ax.grid(True)
68+
69+
ax.xaxis.set_major_locator(mdates.MonthLocator(bymonthday=1))
70+
ax.xaxis.set_major_formatter(mdates.DateFormatter("%m-%d-%Y"))
71+
72+
ax.set_xlim(earliest_date, latest_date)
73+
plt.xticks(rotation=45, ha="right")
74+
plt.show()
75+
76+
77+
# Plot 3: number of papers submitted over time, between 2021 and 2023,
78+
# with vertical lines indicating release dates of different LLMs
79+
80+
chatgpt_release_date = pd.to_datetime("2022-11-30")
81+
copilot_release_date = pd.to_datetime("2023-02-07")
82+
llama_release_date = pd.to_datetime("2023-02-24")
83+
bard_release_date = pd.to_datetime("2023-03-21")
84+
dolly_release_date = pd.to_datetime("2023-04-12")
85+
86+
fig, ax = plt.subplots(figsize=(15, 8))
87+
ax.plot(
88+
submission_dates_counts.index,
89+
submission_dates_counts.values,
90+
marker="o",
91+
linestyle="-",
92+
color="blue",
93+
)
94+
ax.set_title("Number of Papers Submitted Between 2021 and 2023")
95+
ax.set_xlabel("Submission Date")
96+
ax.set_ylabel("Number of Papers Submitted")
97+
ax.grid(True)
98+
99+
ax.xaxis.set_major_locator(mdates.MonthLocator(bymonthday=1))
100+
ax.xaxis.set_major_formatter(mdates.DateFormatter("%m-%d-%Y"))
101+
102+
ax.set_xlim(earliest_date, latest_date)
103+
104+
plt.xticks(rotation=45, ha="right")
105+
106+
# Add vertical lines to indicate release dates of different LLMs
107+
ax.axvline(x=chatgpt_release_date, color="red", linestyle="--", label="ChatGPT Release")
108+
ax.axvline(x=dolly_release_date, color="green", linestyle="--", label="Dolly Release")
109+
ax.axvline(
110+
x=llama_release_date, color="darkorange", linestyle="--", label="LLaMA Release"
111+
)
112+
ax.axvline(
113+
x=copilot_release_date,
114+
color="purple",
115+
linestyle="--",
116+
label="Microsoft Copilot Release",
117+
)
118+
ax.axvline(
119+
x=bard_release_date, color="brown", linestyle="--", label="Google Bard Release"
120+
)
121+
122+
plt.legend()
123+
124+
plt.show()

0 commit comments

Comments
 (0)