-
Notifications
You must be signed in to change notification settings - Fork 1
/
indexer.js
64 lines (52 loc) · 1.67 KB
/
indexer.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
const { readdir, readFile, writeFile } = require('fs').promises;
const { relative, resolve } = require('path');
const { argv, exit } = require('process');
const MiniSearch = require('minisearch');
/*
* Recursively fetch all files from a given directory
*/
async function* getHtmlFiles(dir) {
const dirents = await readdir(dir, { withFileTypes: true });
for (const dirent of dirents) {
const res = resolve(dir, dirent.name);
if (dirent.isDirectory()) {
yield* getHtmlFiles(res);
} else if (dirent.name.endsWith('.html')) {
yield res;
}
}
}
if (argv.length != 4) {
// error
console.log("Required parameters: directory to index, and index file");
exit(1);
}
const input_path = argv[2];
const index_file = argv[3];
const documents = [];
;(async () => {
for await (const filename of getHtmlFiles(input_path)) {
const content = (await readFile(filename)).toString();
// We just want a chunk of this content
const slice_content = content.split('<article class="markdown-section" id="main">');
const slice_again = slice_content[1].split('</article>');
const body = slice_again[0].replace(/<[^>]*>?/gm, '').trim()
let url = "/" + relative(input_path, filename);
// Figure out the URL
if (url.endsWith('index.html')) {
url = url.substr(0, url.length - 10);
}
documents.push({
id: relative(input_path, filename),
text: body,
url: url,
snip: body,
});
}
const miniSearch = new MiniSearch({
fields: ['text'],
storeFields: ['url', 'snip'],
});
await miniSearch.addAllAsync(documents);
await writeFile(index_file, JSON.stringify(miniSearch.toJSON()));
})();