Skip to content

Commit

Permalink
scrape multiple pages and save all data to CSV after scraping completes
Browse files Browse the repository at this point in the history
  • Loading branch information
charakamihiranga committed Sep 19, 2024
1 parent 289dcd2 commit 80b0cca
Show file tree
Hide file tree
Showing 2 changed files with 4,245 additions and 9 deletions.
27 changes: 20 additions & 7 deletions Python-Web-Scraping/Scrape-Multiple-Pages/App.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ def ScrapeLandDetails(content):
land_cards = soup.find_all('div', class_='result-item')

if not land_cards:
# If no land cards are found, return False to stop scraping
return False
# If no land cards are found, return an empty list to indicate no more data
return []

for card in land_cards:
# Extract title
Expand Down Expand Up @@ -59,14 +59,16 @@ def ScrapeLandDetails(content):
'total_price': total_price
})

# Write the extracted data to the CSV file
writeDataIntoCSV(land_items)
return True
return land_items


# URL for the page to scrape
base_url = 'https://www.patpat.lk/property?page={}'


# List to hold all scraped land items
all_land_items = []

# Start scraping from page 1 and continue until no more listings are found
page_no = 1
while True:
Expand All @@ -75,12 +77,23 @@ def ScrapeLandDetails(content):

if response.status_code == 200:
print(f'Page {page_no} fetched successfully')
if not ScrapeLandDetails(response.content):
land_items = ScrapeLandDetails(response.content)

if not land_items:
print(f"No more listings found on page {page_no}. Stopping the scraper.")
break

# Append the current page's land items to the master list
all_land_items.extend(land_items)

page_no += 1
else:
print(f"Failed to fetch page {page_no}")
break

print("Scraping complete.")
# Once all pages are scraped, write the data to CSV
if all_land_items:
writeDataIntoCSV(all_land_items)
print(f"Scraping complete. {len(all_land_items)} land items written to CSV.")
else:
print("No data to write to CSV.")
Loading

0 comments on commit 80b0cca

Please sign in to comment.