-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcollectIATA.js
134 lines (110 loc) · 4.12 KB
/
collectIATA.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
/*
Collect IATA codes from Wikipedia
- Adapter design pattern
- DOMParser tip came from Stackoverflow: https://stackoverflow.com/questions/36631762/returning-html-with-fetch
*/
/*
Imports
- node-fetch = so node can process the fetching
- node 'fs' = so the files can be writtent to disk
- jsdom = DOM parser for node
*/
const nFetch = require('node-fetch');
const fs = require('fs');
const jsdom = require('jsdom');
const { JSDOM } = jsdom;
/*
Constants: these will never change
*/
const alphabet = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z'];
const url = "https://en.wikipedia.org/wiki/List_of_airports_by_IATA_code:_";
/*
Functions:
- Fetching
- Parsing
*/
// collectHTML & parseHTML - LOOP START
const collectHTML = async (letter) => {
const response = await nFetch(url+letter);
const data = await response.text();
return data;
}
const parseHTML = async (incoming, letter) => {
// Prepare data for use
const data = await incoming;
const doc = new JSDOM(data);
// Select all that is relevant
const all_IATA = doc.window.document.querySelectorAll('tr td:first-child');
const all_airports = doc.window.document.querySelectorAll('tr td:nth-child(3)');
const all_locations = doc.window.document.querySelectorAll('tr td:nth-child(4)');
// Setup storage
const rawJSON = [];
// Build usable JSON, baby!
for(let i = 0; i<all_IATA.length; i++){
rawJSON.push(
{
"key": all_IATA[i].textContent,
"airport": all_airports[i].textContent,
"location": all_locations[i].textContent,
}
);
}
return rawJSON;
}
// collectHTML & parseHTML - LOOP END
// Make the data actually usable
const genJSON = async (incoming) => {
const raw = await incoming;
const useable = JSON.stringify(raw);
return useable;
}
/*
Processes:
- storeIATA = not a process or function, just a storage for combined json
- collectIATA = collects and writes list of airports to disk, grouped alphabetically based on IATA
- adaptIATA = filters/regroups the collected data, writing list of airports to disk, grouped alphabetically absed on Location
*/
let storeIATA = [];
const collectIATA = async (alphabet) => {
for(let i = 0; i<alphabet.length; i++){
console.log('collectIATA status: ', `Collection started of ${alphabet[i]}`);
// Collect & parse IATA based on ALphabet-letter
const html = await collectHTML(alphabet[i]);
const json = await parseHTML(html, alphabet[i]);
// Expand storeIATA array for use by adaptIATA
storeIATA = [...storeIATA,...json];
// Create usable IATA Grouped JSON
const iataGrouped = await genJSON(json);
// File-writing: IATA-grouped
fs.appendFile(`db_IATA/${alphabet[i]}_airports.json`, iataGrouped, (err)=>{
if (err) throw err;
console.log('File saved = ', `db_IATA/${alphabet[i]}_airports.json`);
})
}
console.log('collectIATA -> storage length is :', storeIATA.length);
}
const adaptIATA = async (alphabet, storage) => {
console.log('adaptIATA status: ', 'IATA collection reached end, now split into location groups...');
for(let i = 0; i<alphabet.length; i++){
const jsonFiltered = storage.filter((obj)=>{
return obj.location.startsWith(alphabet[i]) === true;
});
// Filter combined list and group alphabetically by location name
const locationGrouped = await genJSON(jsonFiltered);
// File-writing: Location-grouped
fs.appendFile(`db_LOCATION/${alphabet[i]}_airports.json`, locationGrouped, (err)=>{
if (err) throw err;
console.log('File saved = ', `db_LOCATION/${alphabet[i]}_airports.json`);
})
}
}
/*
INIT
- First collect all IATA codes, then sort them into separate files
- Total of 9012 IATA codes
*/
const startProcess = async () => {
collectIATA(alphabet)
.then(() => { adaptIATA(alphabet, storeIATA) });
}
startProcess();