Skip to content

Commit

Permalink
completed and merging from my personal repo
Browse files Browse the repository at this point in the history
  • Loading branch information
udugam committed Apr 15, 2020
1 parent c13edae commit c387a13
Show file tree
Hide file tree
Showing 11 changed files with 1,179 additions and 0 deletions.
1,009 changes: 1,009 additions & 0 deletions data/WashingtonUnitedWay.csv

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions data/WashingtonUnitedWay.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions data/WashingtonUnitedWayUnstructured.json

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions node/washington_unitedWay/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# How to Use
1. npm install
2. node index.js

Please note that a GOOGLE_API_KEY is needed in a local .env file.
14 changes: 14 additions & 0 deletions node/washington_unitedWay/extractDays.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
let daysOfWeek = ['M','T','W','TH','F','SA','SU']

module.exports = function(string) {
if(string === undefined) return false;
if( string.match(/([A-Z]|[a-z]){1,2}-([A-Z]|[a-z]){1,2}/gm) !== null ) {
let startDay = string.match(/([A-Z]|[a-z]){1,2}-([A-Z]|[a-z]){1,2}/gm)[0].match(/([A-Z]|[a-z]){1,2}-/gm)[0];
startDay = startDay.slice(0,startDay.length-1);
let endDay = string.match(/([A-Z]|[a-z]){1,2}-([A-Z]|[a-z]){1,2}/gm)[0].match(/-([A-Z]|[a-z]){1,2}/gm)[0].slice(1).toUpperCase();
let days = [];
for( let i = daysOfWeek.indexOf(startDay); i % daysOfWeek.length != daysOfWeek.indexOf(endDay); i++ ) days.push(daysOfWeek[i % daysOfWeek.length]);
return days.toString();
}
return 'M,T,W,TH,F';
}
4 changes: 4 additions & 0 deletions node/washington_unitedWay/extractTime.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
module.exports = function(string) {
let noDays = string.replace(/([A-Z]|[a-z]){1,2}-([A-Z]|[a-z]){1,2}/gm, "").trim();
return noDays;
}
10 changes: 10 additions & 0 deletions node/washington_unitedWay/extractZip.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
module.exports = function(address) {
if(address !== undefined) {
let zipSearch = address.match(/WA [0-9][0-9][0-9][0-9][0-9]/gm);
if( zipSearch !== null) {
zip = zipSearch[0].match(/[0-9]/gm).join('');
return zip;
}
}
return ''
}
24 changes: 24 additions & 0 deletions node/washington_unitedWay/fetchLatLng.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
const got = require('got');
const delay = require('delay');
const parser = require('parse-address'); // US Address parser

module.exports = async function(rawAddressString, cityName) {
let parsedAddress = parser.parseLocation(rawAddressString)
if( parsedAddress !== null ) {
if( parsedAddress.street !== undefined && parsedAddress.number !== undefined) {
let filteredStreet = parsedAddress.street.replace(' ','%20');
let filteredCity = parsedAddress.city ? parsedAddress.city.replace(' ', '%20') : cityName.replace(' ', '%20');
await delay(20); // Delay needed to adhere to Google API Rate Limit of 50 RPS
let url = `https://maps.googleapis.com/maps/api/geocode/json?address=${filteredStreet},${filteredCity},WA&key=${process.env.GOOGLE_API_KEY}`
let body = await got(url).json();

if( body.results.length === 1 ) {
let LatLng = {};
LatLng.lat = body.results[0].geometry.location.lat;
LatLng.lng = body.results[0].geometry.location.lng;
return LatLng;
}
}
}
return {};
}
19 changes: 19 additions & 0 deletions node/washington_unitedWay/fetchZip.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
const got = require('got');
const delay = require('delay');

module.exports = async function(street, city) {
let filteredStreet = street.replace(' ','%20');
let filteredCity = city.replace(' ', '%20');
await delay(20); // Delay needed to adhere to Google API Rate Limit of 50 RPS
let url = `https://maps.googleapis.com/maps/api/geocode/json?address=${filteredStreet},${filteredCity},WA&key=${process.env.GOOGLE_API_KEY}`
let body = await got(url).json();
let zip = '';

if( body.results.length === 1 ) {
let zipSearch = body.results[0].formatted_address.match(/WA [0-9][0-9][0-9][0-9][0-9]/gm);
if( zipSearch !== null) {
zip = zipSearch[0].match(/[0-9]/gm).join('');
}
}
return zip;
}
75 changes: 75 additions & 0 deletions node/washington_unitedWay/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
require('dotenv').config();
const cheerio = require('cheerio');
const request = require('request');
const url = 'https://www.uwkc.org/free-meals-during-school-closures/';
const fs = require('fs');

//import custom functions
const extractDays = require('./extractDays.js');
const extractTime = require('./extractTime.js');
const parseAddress = require('./parseAddress.js');
const extractZip = require('./extractZip.js');
const fetchLatLng = require('./fetchLatLng.js');


// Scrape html from url
request(url, async function(error, response, html) {
// Using cheerio to manipulate html using jquery like methods
let $ = cheerio.load(html);

let structuredResults = [];
let unstructuredResults = [];

// Select all locations in html
let cities = $('.accordion_item');

// Loop through cities
for( let i = 0; i < cities.length; i++) {
// Store city name
let cityName = $(cities[i]).find('.accordion_item-heading').text();

// Get multiple locations per city
let locations = $(cities[i]).find('p')

// Loop through each location
for( let i = 0; i < locations.length; i++) {
let locationData = {};
let locationText = $(locations[i]).text().split('\n');
if( locationText.includes('TBD') !== true ) {
if( locationText.length === 3 ) {
locationData.siteName = locationText[0];
locationData.siteStatus = 'Open';
locationData.siteState = 'WA';
locationData.siteAddress = await parseAddress(locationText[1], cityName);
locationData.siteZip = extractZip(locationData.siteAddress);
locationData.daysofOperation = extractDays(locationText[2]);
locationData.lunchTime = extractTime(locationText[2]);
locationData._geoloc = await fetchLatLng(locationText[1], cityName);

// Print Result to show progress while running script
console.log(locationData);

// Push location data to results array
structuredResults.push(locationData);
} else {

// Print Result to show progress while running script
console.log(locationText.toString());

unstructuredResults.push(locationText.toString());
}
}
}
}
// Save structured Results to file
fs.writeFile('structuredResults.json', JSON.stringify(structuredResults), function(err) {
if(err) throw err;
console.log("Structured Results Saved")
})

// Save unstructured Results to file
fs.writeFile('unstructuredResults.json', JSON.stringify(unstructuredResults), function(err) {
if(err) throw err;
console.log("Unstructured Results Saved")
})
})
17 changes: 17 additions & 0 deletions node/washington_unitedWay/parseAddress.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
const fetchZip = require('./fetchZip.js');
const parser = require('parse-address'); // US Address parser

module.exports = async function(rawAddressString, cityName) {
let parsedAddress = parser.parseLocation(rawAddressString)
let finalAddressString;
if( parsedAddress !== null ) {
if( parsedAddress.zip === undefined && parsedAddress.street !== undefined && parsedAddress.number !== undefined) {
parsedAddress.zip = await fetchZip( parsedAddress.street, parsedAddress.city || cityName )
}
finalAddressString = `${parsedAddress.number} ${parsedAddress.street} ${parsedAddress.type}, ${parsedAddress.city || cityName}, WA ${parsedAddress.zip}`
} else {
return rawAddressString;
}

return finalAddressString;
}

0 comments on commit c387a13

Please sign in to comment.