forked from EverAnh/covid-basic-needs-scraping
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
completed and merging from my personal repo
- Loading branch information
Showing
11 changed files
with
1,179 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# How to Use | ||
1. npm install | ||
2. node index.js | ||
|
||
Please note that a GOOGLE_API_KEY is needed in a local .env file. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
let daysOfWeek = ['M','T','W','TH','F','SA','SU'] | ||
|
||
module.exports = function(string) { | ||
if(string === undefined) return false; | ||
if( string.match(/([A-Z]|[a-z]){1,2}-([A-Z]|[a-z]){1,2}/gm) !== null ) { | ||
let startDay = string.match(/([A-Z]|[a-z]){1,2}-([A-Z]|[a-z]){1,2}/gm)[0].match(/([A-Z]|[a-z]){1,2}-/gm)[0]; | ||
startDay = startDay.slice(0,startDay.length-1); | ||
let endDay = string.match(/([A-Z]|[a-z]){1,2}-([A-Z]|[a-z]){1,2}/gm)[0].match(/-([A-Z]|[a-z]){1,2}/gm)[0].slice(1).toUpperCase(); | ||
let days = []; | ||
for( let i = daysOfWeek.indexOf(startDay); i % daysOfWeek.length != daysOfWeek.indexOf(endDay); i++ ) days.push(daysOfWeek[i % daysOfWeek.length]); | ||
return days.toString(); | ||
} | ||
return 'M,T,W,TH,F'; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
module.exports = function(string) { | ||
let noDays = string.replace(/([A-Z]|[a-z]){1,2}-([A-Z]|[a-z]){1,2}/gm, "").trim(); | ||
return noDays; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
module.exports = function(address) { | ||
if(address !== undefined) { | ||
let zipSearch = address.match(/WA [0-9][0-9][0-9][0-9][0-9]/gm); | ||
if( zipSearch !== null) { | ||
zip = zipSearch[0].match(/[0-9]/gm).join(''); | ||
return zip; | ||
} | ||
} | ||
return '' | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
const got = require('got'); | ||
const delay = require('delay'); | ||
const parser = require('parse-address'); // US Address parser | ||
|
||
module.exports = async function(rawAddressString, cityName) { | ||
let parsedAddress = parser.parseLocation(rawAddressString) | ||
if( parsedAddress !== null ) { | ||
if( parsedAddress.street !== undefined && parsedAddress.number !== undefined) { | ||
let filteredStreet = parsedAddress.street.replace(' ','%20'); | ||
let filteredCity = parsedAddress.city ? parsedAddress.city.replace(' ', '%20') : cityName.replace(' ', '%20'); | ||
await delay(20); // Delay needed to adhere to Google API Rate Limit of 50 RPS | ||
let url = `https://maps.googleapis.com/maps/api/geocode/json?address=${filteredStreet},${filteredCity},WA&key=${process.env.GOOGLE_API_KEY}` | ||
let body = await got(url).json(); | ||
|
||
if( body.results.length === 1 ) { | ||
let LatLng = {}; | ||
LatLng.lat = body.results[0].geometry.location.lat; | ||
LatLng.lng = body.results[0].geometry.location.lng; | ||
return LatLng; | ||
} | ||
} | ||
} | ||
return {}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
const got = require('got'); | ||
const delay = require('delay'); | ||
|
||
module.exports = async function(street, city) { | ||
let filteredStreet = street.replace(' ','%20'); | ||
let filteredCity = city.replace(' ', '%20'); | ||
await delay(20); // Delay needed to adhere to Google API Rate Limit of 50 RPS | ||
let url = `https://maps.googleapis.com/maps/api/geocode/json?address=${filteredStreet},${filteredCity},WA&key=${process.env.GOOGLE_API_KEY}` | ||
let body = await got(url).json(); | ||
let zip = ''; | ||
|
||
if( body.results.length === 1 ) { | ||
let zipSearch = body.results[0].formatted_address.match(/WA [0-9][0-9][0-9][0-9][0-9]/gm); | ||
if( zipSearch !== null) { | ||
zip = zipSearch[0].match(/[0-9]/gm).join(''); | ||
} | ||
} | ||
return zip; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
require('dotenv').config(); | ||
const cheerio = require('cheerio'); | ||
const request = require('request'); | ||
const url = 'https://www.uwkc.org/free-meals-during-school-closures/'; | ||
const fs = require('fs'); | ||
|
||
//import custom functions | ||
const extractDays = require('./extractDays.js'); | ||
const extractTime = require('./extractTime.js'); | ||
const parseAddress = require('./parseAddress.js'); | ||
const extractZip = require('./extractZip.js'); | ||
const fetchLatLng = require('./fetchLatLng.js'); | ||
|
||
|
||
// Scrape html from url | ||
request(url, async function(error, response, html) { | ||
// Using cheerio to manipulate html using jquery like methods | ||
let $ = cheerio.load(html); | ||
|
||
let structuredResults = []; | ||
let unstructuredResults = []; | ||
|
||
// Select all locations in html | ||
let cities = $('.accordion_item'); | ||
|
||
// Loop through cities | ||
for( let i = 0; i < cities.length; i++) { | ||
// Store city name | ||
let cityName = $(cities[i]).find('.accordion_item-heading').text(); | ||
|
||
// Get multiple locations per city | ||
let locations = $(cities[i]).find('p') | ||
|
||
// Loop through each location | ||
for( let i = 0; i < locations.length; i++) { | ||
let locationData = {}; | ||
let locationText = $(locations[i]).text().split('\n'); | ||
if( locationText.includes('TBD') !== true ) { | ||
if( locationText.length === 3 ) { | ||
locationData.siteName = locationText[0]; | ||
locationData.siteStatus = 'Open'; | ||
locationData.siteState = 'WA'; | ||
locationData.siteAddress = await parseAddress(locationText[1], cityName); | ||
locationData.siteZip = extractZip(locationData.siteAddress); | ||
locationData.daysofOperation = extractDays(locationText[2]); | ||
locationData.lunchTime = extractTime(locationText[2]); | ||
locationData._geoloc = await fetchLatLng(locationText[1], cityName); | ||
|
||
// Print Result to show progress while running script | ||
console.log(locationData); | ||
|
||
// Push location data to results array | ||
structuredResults.push(locationData); | ||
} else { | ||
|
||
// Print Result to show progress while running script | ||
console.log(locationText.toString()); | ||
|
||
unstructuredResults.push(locationText.toString()); | ||
} | ||
} | ||
} | ||
} | ||
// Save structured Results to file | ||
fs.writeFile('structuredResults.json', JSON.stringify(structuredResults), function(err) { | ||
if(err) throw err; | ||
console.log("Structured Results Saved") | ||
}) | ||
|
||
// Save unstructured Results to file | ||
fs.writeFile('unstructuredResults.json', JSON.stringify(unstructuredResults), function(err) { | ||
if(err) throw err; | ||
console.log("Unstructured Results Saved") | ||
}) | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
const fetchZip = require('./fetchZip.js'); | ||
const parser = require('parse-address'); // US Address parser | ||
|
||
module.exports = async function(rawAddressString, cityName) { | ||
let parsedAddress = parser.parseLocation(rawAddressString) | ||
let finalAddressString; | ||
if( parsedAddress !== null ) { | ||
if( parsedAddress.zip === undefined && parsedAddress.street !== undefined && parsedAddress.number !== undefined) { | ||
parsedAddress.zip = await fetchZip( parsedAddress.street, parsedAddress.city || cityName ) | ||
} | ||
finalAddressString = `${parsedAddress.number} ${parsedAddress.street} ${parsedAddress.type}, ${parsedAddress.city || cityName}, WA ${parsedAddress.zip}` | ||
} else { | ||
return rawAddressString; | ||
} | ||
|
||
return finalAddressString; | ||
} |