-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbuild.js
61 lines (61 loc) · 3.05 KB
/
build.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import fs from 'node:fs';
import { textToSearchTerms, OPTIONS_ALL } from '@cityssm/text-to-search-terms';
const repositoryURLs = [
'https://cityssm.github.io/council-agendas-2023',
'https://cityssm.github.io/council-agendas-2022',
'https://cityssm.github.io/council-agendas-2021',
'https://cityssm.github.io/council-agendas-2020',
'https://cityssm.github.io/council-agendas-2019',
'https://cityssm.github.io/council-agendas-2018',
'https://cityssm.github.io/council-agendas-2017',
'https://cityssm.github.io/council-agendas-2016',
'https://cityssm.github.io/council-agendas-2015',
'https://cityssm.github.io/council-agendas-2014',
'https://cityssm.github.io/council-agendas-2013',
'https://cityssm.github.io/council-agendas-2012',
'https://cityssm.github.io/council-agendas-2011',
'https://cityssm.github.io/council-agendas-2010',
'https://cityssm.github.io/council-agendas-2009',
'https://cityssm.github.io/council-agendas-2008',
'https://cityssm.github.io/council-agendas-2007',
'https://cityssm.github.io/council-agendas-2006',
'https://cityssm.github.io/council-agendas-2005',
'https://cityssm.github.io/council-agendas-2004',
'https://cityssm.github.io/council-agendas-2003',
'https://cityssm.github.io/council-agendas-2002',
'https://cityssm.github.io/council-agendas-2001',
'https://cityssm.github.io/council-agendas-2000',
'https://cityssm.github.io/council-agendas-1999'
];
async function buildAgendaMetadata() {
const allAgendaMetadata = [];
for (const repositoryURL of repositoryURLs) {
const metadataURL = repositoryURL + '/metadata.json';
console.log(`Fetching ${metadataURL} ...`);
const metadataResponse = await fetch(metadataURL);
const allPdfMetadata = (await metadataResponse.json());
console.log(`- Processing ${allPdfMetadata.length} agendas.`);
allPdfMetadata.reverse();
for (const [pdfIndex, pdfMetadata] of allPdfMetadata.entries()) {
console.log(` - ${pdfIndex + 1} / ${allPdfMetadata.length} - ${pdfMetadata.fileName}`);
delete pdfMetadata.author;
delete pdfMetadata.title;
const fileNameSplit = pdfMetadata.fileName.slice(0, -4).split(/[ _-]+/);
const agendaDate = fileNameSplit[0] + '-' + fileNameSplit[1] + '-' + fileNameSplit[2];
let agendaTitle = fileNameSplit[3];
for (let index = 4; index < fileNameSplit.length; index += 1) {
agendaTitle += ' ' + fileNameSplit[index];
}
pdfMetadata.fullContent = textToSearchTerms(pdfMetadata.fullContent ?? '', OPTIONS_ALL).join(' ');
const agendaMetadata = Object.assign({
url: repositoryURL + '/' + pdfMetadata.fileName,
agendaDate,
agendaTitle
}, pdfMetadata);
allAgendaMetadata.push(agendaMetadata);
}
}
return allAgendaMetadata;
}
const agendaMetadata = await buildAgendaMetadata();
fs.writeFileSync('metadata.json', JSON.stringify(agendaMetadata, undefined, 2));