-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
81 lines (68 loc) · 3.96 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
from bs4 import BeautifulSoup
import requests
import time
from selenium import webdriver
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
header = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36",
"Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8"
}
#Grabs the link to the zillow page we want to scrape
response = requests.get(
"https://www.zillow.com/homes/San-Francisco,-CA_rb/?searchQueryState=%7B%22pagination%22%3A%7B%7D%2C%22usersSearchTerm%22%3A%22San%20Francisco%2C%20CA%22%2C%22mapBounds%22%3A%7B%22west%22%3A-122.55177535009766%2C%22east%22%3A-122.31488264990234%2C%22south%22%3A37.69926912019228%2C%22north%22%3A37.851235694487485%7D%2C%22regionSelection%22%3A%5B%7B%22regionId%22%3A20330%2C%22regionType%22%3A6%7D%5D%2C%22isMapVisible%22%3Atrue%2C%22filterState%22%3A%7B%22fr%22%3A%7B%22value%22%3Atrue%7D%2C%22fsba%22%3A%7B%22value%22%3Afalse%7D%2C%22fsbo%22%3A%7B%22value%22%3Afalse%7D%2C%22nc%22%3A%7B%22value%22%3Afalse%7D%2C%22cmsn%22%3A%7B%22value%22%3Afalse%7D%2C%22auc%22%3A%7B%22value%22%3Afalse%7D%2C%22fore%22%3A%7B%22value%22%3Afalse%7D%2C%22pmf%22%3A%7B%22value%22%3Afalse%7D%2C%22pf%22%3A%7B%22value%22%3Afalse%7D%2C%22mp%22%3A%7B%22max%22%3A3000%7D%2C%22price%22%3A%7B%22max%22%3A872627%7D%2C%22beds%22%3A%7B%22min%22%3A1%7D%7D%2C%22isListVisible%22%3Atrue%2C%22mapZoom%22%3A12%7D",
headers=header)
data = response.text
soup = BeautifulSoup(data, "html.parser")
#This grabs all a tags
all_link_elements = soup.select(".list-card-top a")
#This block of code gets the href within the a tag and adds the root url to the href if absent. All href are saved in a list
all_links = []
for link in all_link_elements:
href = link["href"]
print(href)
if "http" not in href:
all_links.append(f"https://www.zillow.com{href}")
else:
all_links.append(href)
#This block of code grabs all addresses and saves them in a list
all_address_elements = soup.select(".list-card-info address")
all_addresses = [address.get_text().split(" | ")[-1] for address in all_address_elements]
#This block of code grabs all prices and saves them in a list
all_price_elements = soup.select(".list-card-heading")
all_prices = []
for element in all_price_elements:
# Get the prices. Single and multiple listings have different tag & class structures
try:
# Price with only one listing
price = element.select(".list-card-price")[0].contents[0]
except IndexError:
print('Multiple listings for the card')
# Price with multiple listings
price = element.select(".list-card-details li")[0].contents[0]
finally:
all_prices.append(price)
#After grabbing our research data, using BS, i used selenium to automate filling process on google forms.
# Create Spreadsheet using Google Form
# Substitute your own path here 👇
options = Options()
service = Service()
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install(), options=Options))
for n in range(len(all_links)):
# Substitute your own Google Form URL here 👇
driver.get("PLEASE INSERT YOUR GOOGLE LINKS FORM HERE")
time.sleep(2)
address = driver.find_element(By.XPATH,
'//*[@id="mG61Hd"]/div[2]/div/div[2]/div[1]/div/div/div[2]/div/div[1]/div/div[1]/input')
price = driver.find_element(By.XPATH,
'//*[@id="mG61Hd"]/div[2]/div/div[2]/div[2]/div/div/div[2]/div/div[1]/div/div[1]/input')
link = driver.find_element(By.XPATH,
'//*[@id="mG61Hd"]/div[2]/div/div[2]/div[3]/div/div/div[2]/div/div[1]/div/div[1]/input')
submit_button = driver.find_element(By.XPATH, '//*[@id="mG61Hd"]/div[2]/div/div[3]/div[1]/div/div')
address.send_keys(all_addresses[n])
price.send_keys(all_prices[n])
link.send_keys(all_links[n])
submit_button.click()