This repository has been archived by the owner on Sep 20, 2024. It is now read-only.
forked from GovTechSG/purple-a11y
-
Notifications
You must be signed in to change notification settings - Fork 5
/
combine.js
117 lines (103 loc) · 3.43 KB
/
combine.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
const csv = require('csv-parser');
const {
crawlSitemap
} = require('./crawlers/crawlSitemap');
const {
crawlDomain
} = require('./crawlers/crawlDomain');
const {
mergeFiles,
storagePath
} = require('./mergeAxeResults');
const {
getHostnameFromRegex,
createAndUpdateFolders,
} = require('./utils');
const {
a11yStorage
} = require('./constants/constants');
process.env.APIFY_LOCAL_STORAGE_DIR = a11yStorage;
process.env.APIFY_HEADLESS = 1;
exports.combineRun = async (details, storagePath) => {
let envDetails = {
...details
};
if (typeof details === 'undefined') {
envDetails = {
type: process.env.TYPE,
url: process.env.URL,
randomToken: process.env.RANDOMTOKEN,
wappalyzer: process.env.WAPPALYZER,
email: process.env.EMAIL,
excludeExt: process.env.EXCLUDEEXT,
excludeMore: process.env.EXCLUDEMORE,
excludeQuery: process.env.EXCLUDEQUERY,
number: process.env.NUMBER,
};
}
const {
type,
url,
randomToken,
wappalyzer,
email,
excludeExt,
excludeMore,
excludeQuery,
number
} = envDetails;
const host = getHostnameFromRegex(url);
const scanDetails = {
startTime: new Date().getTime(),
crawlType: type,
requestUrl: url,
};
var domainURL = scanDetails.requestUrl;
var startTime = scanDetails.startTime;
var wappalyzer_json = wappalyzer;
// var email = email; // Placeholder for when you can send via email.
var maxRequestsPerCrawl = number; // Needs to be passed along.
// Highlight if strings or extensions are being excluded
const excludeExtArr = excludeExt.substring(1).split('.');
const excludeMoreArr = excludeMore.split(',');
if ((excludeExtArr[0] !== '') || (excludeMoreArr[0] !== '') || (excludeQuery == 1)) console.log("Exclude: ");
if (excludeExtArr[0] !== '') console.log(excludeExtArr);
if (excludeMoreArr[0] !== '') console.log(excludeMoreArr);
if (excludeQuery == 1) console.log("Exclude queries: ");
/* I couldn't override the constant.js and avoid a "ApifyClientError: Parameter "options.maxRequestsPerCrawl" of type Maybe Number" error
var maxRequestsPerCrawl = 0;
if (number > absoluteMaxRequestsPerCrawl ) {
maxRequestsPerCrawl=absoluteMaxRequestsPerCrawl;
} else {
maxRequestsPerCrawl=number;
}
exports.maxRequestsPerCrawl;
*/
let urlsCrawled;
switch (type) {
case 'crawlSitemap':
urlsCrawled = await crawlSitemap(url, randomToken, host);
break;
case 'crawlDomain':
urlsCrawled = await crawlDomain(url, randomToken, host, excludeExtArr, excludeMoreArr, excludeQuery, storagePath);
break;
default:
break;
}
scanDetails.urlsCrawled = urlsCrawled;
scanDetails.endTime = new Date().getTime();
var endTime = scanDetails.endTime;
var totalTimeSeconds = Math.round((endTime - startTime) / 1000);
var hours = (totalTimeSeconds / 3600);
var rhours = Math.floor(hours);
var minutes = (hours - rhours) * 60;
var rminutes = Math.round(minutes);
var seconds = (minutes - rminutes) * 60;
var rseconds = Math.abs(Math.round(seconds));
var speedExact = (urlsCrawled.scanned.length / totalTimeSeconds);
var speed = speedExact.toPrecision(2);
var totalTime = `${rhours} h ${rminutes} m ${rseconds} s `;
var countURLsCrawled = urlsCrawled['scanned'].length;
await createAndUpdateFolders(scanDetails, randomToken);
await mergeFiles(randomToken, domainURL, wappalyzer_json, startTime, endTime, speed, totalTime, countURLsCrawled);
};