forked from spencerkimball/stargazers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcompetition_scraping.py
63 lines (48 loc) · 1.64 KB
/
competition_scraping.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import os
import subprocess
from pathlib import Path
import pandas as pd
from dotenv import load_dotenv
def main():
# Load environment variables
load_dotenv()
github_token = os.getenv("GITHUB_TOKEN")
if not github_token:
raise ValueError("GITHUB_TOKEN not found in .env file")
# Read the repos CSV
df = pd.read_csv("output/repo_to_scrap.csv")
# Create cache directory if it doesn't exist
Path("stargazer_cache").mkdir(exist_ok=True)
Path("email_reachout").mkdir(exist_ok=True)
# First build the stargazers binary
build_cmd = ["go", "build"]
subprocess.run(build_cmd, check=True)
# Process each repository
for _, row in df.iterrows():
repo = row["Repository"]
print(f"\nProcessing {repo}...")
try:
# Run the stargazers command using the built binary
cmd = [
"./stargazers",
"fetch",
f"--repo={repo}",
f"--token={github_token}",
"--cache=./stargazer_cache",
"--mode=basic",
]
print(f"Running command: {' '.join(cmd)}")
result = subprocess.run(
cmd,
text=True,
check=False, # Don't raise exception on non-zero exit
)
if result.returncode != 0:
print(f"Error processing {repo} (exit code {result.returncode})")
else:
print(f"Successfully processed {repo}")
except Exception as e:
print(f"Failed to process {repo}: {str(e)}")
continue
if __name__ == "__main__":
main()