forked from linhmtran168/vietnam-gis-crawler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.js
110 lines (88 loc) · 2.47 KB
/
main.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
var util = require('utils'),
fs = require('fs'),
_ = require('lodash'),
casper = require('casper').create({
verbose: true,
logLevel: 'debug',
waitTimeout: 10000000
});
var data = [];
// Get data from the dom
var getData = function() {
var dataList = [],
nodes = $('#dtree1>.dTreeNode');
dataList = _.map(nodes, function(n) {
var result, children,
name = $(n).text(),
childNodeId = $(n).children()[3].id.slice(5);
result = {
id: childNodeId - 1,
name: name,
};
// Get children nodes (districts)
children = $('#dtree' + childNodeId).children();
console.log('Province: ' + name);
// Add the children nodes
result.districts = _.map(children, function(c) {
var id = $(c).children()[3].id.slice(5),
name = $(c).text();
console.log('------------- ' + name);
return { id: id, name: name };
});
return result;
});
return dataList;
};
// Remove unneccessary TTTP text
var removeTTTP = function() {
var i;
for(i = 0; i < 5; i++) {
data[i].districts.splice(0, 1);
}
};
// Create the hash object structure
var createHash = function() {
var dataHash = {};
_.each(data, function(p) {
var dHash = {};
dataHash[p.id] = { name: p.name };
_.each(p.districts, function(d) {
dHash[d.id] = d.name;
});
dataHash[p.id].districts = dHash;
});
return dataHash;
};
casper.start('http://gis.chinhphu.vn/');
casper.on('remote.message', function(msg) {
this.echo('remote message caught: ' + msg);
});
// Must wait for all the tree nodes to load
casper.waitForSelector('.dTreeNode', function() {
this.page.injectJs('node_modules/jquery/dist/jquery.min.js');
this.page.injectJs('node_modules/lodash/lodash.js');
});
casper.then(function() {
var i;
// After each click the tree will create a new node so there is total 126 nodes.
// The original nodes are the ones with odd number
for (i = 1; i < 126; i += 2) {
this.click('#dtree1 .dTreeNode:nth-child(' + i + ') a:nth-child(2)');
}
});
// Wait for the last node to load then load the data from those nodes
casper.waitForSelector('#dtree64', function() {
data = this.evaluate(getData);
// Remove additional districts 'TTTP' for 5 cities
removeTTTP();
});
// Write o file
casper.then(function() {
// Dump the data to json
var dataStr = JSON.stringify(createHash());
fs.write('data.json', dataStr);
});
casper.run(function() {
this.echo(data.length + ' provinces found:');
this.exit();
});