-
Notifications
You must be signed in to change notification settings - Fork 0
/
23. Web Scraping Project.py
41 lines (29 loc) · 1011 Bytes
/
23. Web Scraping Project.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from bs4 import BeautifulSoup
import requests
from selenium import webdriver
import pandas as pd
import time
driver = webdriver.Chrome('chromedriver')
driver.get('https://news.ycombinator.com/')
def scrap():
data1 = []
data2 = []
for i in range(1,3):
url ='https://news.ycombinator.com/news?p={}'.format(i)
driver.get(url)
response = requests.get('https://news.ycombinator.com/').text
soup = BeautifulSoup(response,'lxml')
data1 += [title.text for title in soup.find_all('a',class_='titlelink')]
data2 += [info['href'] for info in soup.find_all('a',class_='titlelink')]
data_final = pd.DataFrame()
data_final['title'] = data1
data_final['info'] = data2
data_final.to_csv(r'D:/materi/ngajar/bahan/web_scrap/data_final.csv',index=False)
index = 0
while True :
scrap()
index +=1
time_wait = 1
print(f'run ke {index}')
print(f'time wait {time_wait} menit')
time.sleep(time_wait*60)