Skip to content

Commit

Permalink
Add Further Filtering / Refactorization
Browse files Browse the repository at this point in the history
  • Loading branch information
ggriffey committed Mar 1, 2024
1 parent 26a10bd commit 36cdefb
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 22 deletions.
7 changes: 1 addition & 6 deletions public/script.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,6 @@ fetch("./data/sites.json")
i++;
});
});
})

// $().add(`<p> ${property[0]} </p>`);
// console.log($());`

// bed / bath / price LISTING name name name
});

// THIS IS A SPACE IN A TEMPLATE LITERAL " \u00A0 "
37 changes: 21 additions & 16 deletions scraper/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,26 +64,31 @@ def fill_properties(old_listings: Dict, new_listings: Dict, ygl_url_base: str):
listing_addr = listing_element.get_text()
listing_url = listing_element['href']

if listing_addr not in new_listings:
if listing_addr in old_listings:
new_listings[listing_addr] = old_listings[listing_addr]
else:
if args.notify:
bot.notify(listing_url)
listing_props_elements = listing.find_all('div', class_='column')
listing_props = list(map(lambda tag: tag.text.strip(), listing_props_elements))

# the listing properties are well-ordered, so we parse them directly
listing_price = int(''.join(filter(lambda char: char.isdigit(), listing_props[0])))
listing_beds = float(listing_props[1].split(' ')[0])
listing_baths = float(listing_props[2].split(' ')[0])
listing_date = listing_props[3].split(' ')[1]

# Filter out 1 Baths.. and 4 Beds over $4,600:
if listing_baths >= 1.5 and listing_price/listing_beds <= 1150:
if listing_addr not in new_listings:
if listing_addr in old_listings:
new_listings[listing_addr] = old_listings[listing_addr]
else:
if args.notify:
bot.notify(listing_url)

# initialize a new entry for this listing
new_listing = {
'refs': [],
'price': -1,
}

listing_props_elements = listing.find_all('div', class_='column')
listing_props = list(map(lambda tag: tag.text.strip(), listing_props_elements))
# the listing properties are well-ordered, so we parse them directly
listing_price = int(''.join(filter(lambda char: char.isdigit(), listing_props[0])))
listing_beds = float(listing_props[1].split(' ')[0])
listing_baths = float(listing_props[2].split(' ')[0])
listing_date = listing_props[3].split(' ')[1]
# I moved the listing properties upward from here

new_listing['price'] = listing_price
new_listing['beds'] = listing_beds
Expand All @@ -96,9 +101,9 @@ def fill_properties(old_listings: Dict, new_listings: Dict, ygl_url_base: str):

new_listings[listing_addr] = new_listing

# always check if this is a new copy of the listing
if listing_url not in new_listings[listing_addr]['refs']:
new_listings[listing_addr]['refs'].append(listing_url)
# always check if this is a new copy of the listing
if listing_url not in new_listings[listing_addr]['refs']:
new_listings[listing_addr]['refs'].append(listing_url)


if __name__ == "__main__":
Expand Down

0 comments on commit 36cdefb

Please sign in to comment.