forked from minimaxir/get-profile-data-of-repo-stargazers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrepo_stargazers.py
90 lines (63 loc) · 2.69 KB
/
repo_stargazers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
### Script to get GitHub profile data of all Stargazers of a given GitHub repository
###
### by Max Woolf (@minimaxir)
import json
import csv
import urllib2
import datetime
import time
access_token = <FILL IN>
repo = "minimaxir/big-list-of-naughty-strings"
fields = ["user_id", "username", "num_followers", "num_following", "num_repos","created_at","star_time"]
page_number = 0
users_processed = 0
stars_remaining = True
list_stars = []
print "Gathering Stargazers for %s..." % repo
###
### This block of code creates a list of tuples in the form of (username, star_time)
### for the Statgazers, which will laterbe used to extract full GitHub profile data
###
while stars_remaining:
query_url = "https://api.github.com/repos/%s/stargazers?page=%s&access_token=%s" % (repo, page_number, access_token)
req = urllib2.Request(query_url)
req.add_header('Accept', 'application/vnd.github.v3.star+json')
response = urllib2.urlopen(req)
data = json.loads(response.read())
for user in data:
username = user['user']['login']
star_time = datetime.datetime.strptime(user['starred_at'],'%Y-%m-%dT%H:%M:%SZ')
star_time = star_time + datetime.timedelta(hours=-5) # EST
star_time = star_time.strftime('%Y-%m-%d %H:%M:%S')
list_stars.append((username, star_time))
if len(data) < 25:
stars_remaining = False
page_number += 1
print "Done Gathering Stargazers for %s!" % repo
list_stars = list(set(list_stars)) # remove dupes
print "Now Gathering Stargazers' GitHub Profiles..."
###
### This block of code extracts the full profile data of the given Stargazer
### and writes to CSV
###
with open('%s-stargazers.csv' % repo.split('/')[1], 'wb') as stars:
stars_writer = csv.writer(stars)
stars_writer.writerow(fields)
for user in list_stars:
username = user[0]
query_url = "https://api.github.com/users/%s?access_token=%s" % (username, access_token)
req = urllib2.Request(query_url)
response = urllib2.urlopen(req)
data = json.loads(response.read())
user_id = data['id']
num_followers = data['followers']
num_following = data['following']
num_repos = data['public_repos']
created_at = datetime.datetime.strptime(data['created_at'],'%Y-%m-%dT%H:%M:%SZ')
created_at = created_at + datetime.timedelta(hours=-5) # EST
created_at = created_at.strftime('%Y-%m-%d %H:%M:%S')
stars_writer.writerow([user_id, username, num_followers, num_following, num_repos, created_at, user[1]])
users_processed += 1
if users_processed % 100 == 0:
print "%s Users Processed: %s" % (users_processed, datetime.datetime.now())
time.sleep(1) # stay within API rate limit of 5000 requests / hour + buffer