-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathscrape.py
107 lines (93 loc) · 3.19 KB
/
scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import os
from igramscraper.instagram import Instagram
from time import sleep
import pickle
instagram = Instagram()
def main():
pass
def get_following(username, limit=500):
filename = username + '_following.pkl'
if os.path.exists(filename):
f = open(filename, 'rb')
following = pickle.load(f)
return following
else:
# instagram = Instagram()
account = instagram.get_account(username)
sleep(1)
try:
following = instagram.get_following(account.identifier, limit, 100, delayed=True)
f = open(filename, 'wb')
pickle.dump(following, f)
sleep(5)
return following
except:
return None
def get_followers(username, limit=500):
filename = username + '_followers.pkl'
if os.path.exists(filename):
f = open(filename, 'rb')
followers = pickle.load(f)
else:
# instagram = Instagram()
account = instagram.get_account(username)
sleep(1)
try:
followers = instagram.get_followers(account.identifier, limit, 100, delayed=True)
f = open(filename, 'wb')
pickle.dump(followers, f)
except:
pass
return followers
def get_media(username, limit=200):
filename = username + '_media.pkl'
if os.path.exists(filename):
f = open(filename, 'rb')
media = pickle.load(f)
else:
# instagram = Instagram()
media = instagram.get_medias(username, limit)
f = open(filename, 'wb')
pickle.dump(media, f)
return media
def get_comments(media, comment_limit=100):
filename = username + '_comments.pkl'
if os.path.exists(filename):
f = open(filename, 'rb')
comments = pickle.load(f)
else:
comments = []
for m,medium in enumerate(media):
print("Medium number:", m)
if m > 0 and m % 100 == 0:
ckpt_filename = username + '_comments_ckpt' + str(m) + '.pkl'
f = open(ckpt_filename, 'wb')
pickle.dump(comments, f)
print("Saved", ckpt_filename)
sleep(2)
medium_comments = instagram.get_media_comments_by_id(medium.identifier, comment_limit)
for comment in medium_comments['comments']:
comment.medium_identifier = medium.identifier
comments.append(comment)
f = open(filename, 'wb')
pickle.dump(comments, f)
return comments
if __name__ == "__main__":
username = os.environ.get('IG_USERNAME')
password = os.environ.get('IG_PASSWORD')
instagram.with_credentials(username, password)
instagram.login()
username = 'iubloomington'
print("Scraping following.")
following = get_following(username, 356)
for u,user in enumerate(following['accounts']):
print(u, user.username)
_ = get_following(user.username, 100)
print("Scraping following complete.")
print("Scraping media.")
media = get_media(username,2311)
print("Scraping media complete.")
print("Scraping comments.")
comments = get_comments(media, 1000)
print("Scraping comments complete.")
print("Scraping complete.")