-
Notifications
You must be signed in to change notification settings - Fork 0
Cleaning Data
In order to properly clean up the datasets from Information Design using functional programming, I've decided to split up my most used 'cleanup functions' into a seperate file called utils.js.
Here's how the data comes into the application:
{
"amountcumulative": "0",
"amountfarepart": "0.02",
"areamanagerid": "512",
"enddatefarepart": "29991231",
"enddurationfarepart": "999999",
"farecalculationcode": "TAR04",
"startdatefarepart": "20150101",
"startdurationfarepart": "0",
"stepsizefarepart": "1"
}And here's how that same data looks after it has been processed.
{
area: "Weert",
areaId: "988_BEEKP",
areaManagerId: 988,
chargingPointCapacity: 0,
description: "Parkeergarage Poort van Limburg (Weert)",
hourlyCost: 0,
location: {
latitude: 51.256752992,
longitude: 5.702710045,
humanReadableAdress: {…}
},
parkingCapacity: 76,
wheelchairAccessible: false,
}Replace all occurences of a substring (
replace) within a string (string) with another value (replaceBy)
const replaceAll = (string, replace, replaceBy) => {
const regex = new RegExp(replace, 'g');
const newString = string.replace(regex, replaceBy);
return newString;
};Replace all occurences of multiple values within an array (
replaceArray) within a string (string) with another value (replaceBy)
const replaceByArray = (string, replaceArray, replaceBy) => {
let newString = string;
return replaceArray.forEach((item) => replaceAll(string, item, replaceBy));
return newString;
};Capitalizes the first letter of a string
const capitalizeFirst = (string) =>
string.charAt(0).toUpperCase() + string.slice(1);Removes leading and trailing whitespace from object properties.
export function removeWhitespace(data, property) {
return data.map((item) => {
const trimmed = item[property].trim();
return { [property]: trimmed };
});
}Sorts an array of objects alphabetically or by property
eexport function sortBy(array, property) {
if (property) {
return array.sort((a, z) => a[property] - z[property])
} else {
return array.sort()
}
}Here's an example of how I cleaned my actual data.
import { getData } from '/src/modules/utils';
// Constants
import {
TARIEFDEEL,
SPECIFICATIES_PARKEERGEBIED,
GEO_PARKEERGARAGES,
} from './constants';
export default async function cleanData() {
// Destructure data after all three promises have been resolved
const [tariefData, geoData, specsData] = await getData([
TARIEFDEEL,
GEO_PARKEERGARAGES,
SPECIFICATIES_PARKEERGEBIED,
]);
const mergedData = geoData
.map((location) => {
// Merge specs and geo based on AreaId
const spec = specsData.find((spec) => location.areaid === spec.areaid);
return { ...location, ...spec };
})
.map((entry) => {
// Merge tarief, specs and geo based on AreaManagerId
const tarief = tariefData.find(
(tarief) => entry.areamanagerid === tarief.areamanagerid
);
return { ...entry, ...tarief };
})
.map((entry) => ({
...entry,
wheelchairAccessible: Boolean(+entry.disabledaccess),
parkingCapacity: +entry.capacity,
chargingPointCapacity: +entry.chargingpointcapacity,
// Cost of parking for one hour.
hourlyCost: (entry.amountfarepart / entry.stepsizefarepart) * 60,
// The area ID of the parking zone.
areaManagerId: +entry.areamanagerid,
areaId: entry.areaid,
description: entry.areadesc,
location: {
latitude: +entry.location.latitude,
longitude: +entry.location.longitude,
humanReadableAdress: JSON.parse(entry.location.human_address || '{}'),
},
}));
// Return ✨utterly pristine✨ data
return mergedData;
}The above code merges and cleans up data from three different datasets. Say the returned data from the first dataset looks like this:
{
areadesc: "Parkeergarage Stationsplein (Weert)",
areaid: "988_STAT",
areamanagerid: "988",
enddataarea: "29991231",
location: {
latitude: "51.249263663",
longitude: "5.705462804",
human_address: {
address: "",
city: "",
state: "",
zip: ""
}
},
startdataarea: "20141027",
usageid: "GARAGEP",
}The first function will merge this data with the data from the next dataset, resulting in this large object:
{
areadesc: "Parkeergarage Stationsplein (Weert)",
areaid: "988_STAT",
areamanagerid: "988",
capacity: "97",
chargingpointcapacity: "0",
disabledaccess: "0",
enddataarea: "29991231",
limitedaccess: "N",
location: {
latitude: "51.249263663",
longitude: "5.705462804",
human_address: "{"address": "", "city": "", "state": "", "zip": ""}"
},
maximumvehicleheight: "210",
startdataarea: "20141027",
startdatespecifications: "20191018143131",
usageid: "GARAGEP"
}As you can see, all numeric values are represented in quotes — meaning they are strings. This is dirty behaviour, and is something I want to clean up before I get to using the data.
Aditionally, I want to remove keys in the data I won't be using, such as startdatearea.
After the third .map() on the dataset, the data should look like this.
{
areaId: "988_STAT",
areaManagerId: 988,
chargingPointCapacity: 0,
description: "Parkeergarage Stationsplein (Weert)",
hourlyCost: 1.30,
location: {
latitude: 51.249263663,
longitude: 5.705462804,
},
parkingCapacity: 97,
wheelchairAccessible: false
}Et voilà, ✨utterly pristine✨ data!
Content in this wiki is subject to change. External sources and imagery used throughout this wiki adhere to their own copyright rules. Do contact me if something doesn't seem right.