Skip to content

Commit 94385ad

Browse files
committed
wip: import source
1 parent 102fc42 commit 94385ad

File tree

22 files changed

+1330
-0
lines changed

22 files changed

+1330
-0
lines changed

packages/lambda-analytic-cloudfront/CHANGELOG.md

Whitespace-only changes.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# @basemaps/lambda-analytic-cloudfront
2+
3+
Generate analytics from CloudFront distribution statistics
4+
5+
Every hour this lambda function runs and generates a rolled up summary of usage by API Key
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
{
2+
"name": "@basemaps/lambda-analytic-cloudfront",
3+
"version": "7.11.0",
4+
"private": true,
5+
"repository": {
6+
"type": "git",
7+
"url": "https://github.com/linz/basemaps.git",
8+
"directory": "packages/lambda-analytic-cloudfront"
9+
},
10+
"author": {
11+
"name": "Land Information New Zealand",
12+
"url": "https://linz.govt.nz",
13+
"organization": true
14+
},
15+
"type": "module",
16+
"engines": {
17+
"node": ">=16.0.0"
18+
},
19+
"license": "MIT",
20+
"dependencies": {
21+
"@basemaps/config": "^7.11.0",
22+
"@basemaps/geo": "^7.11.0",
23+
"@basemaps/shared": "^7.11.0",
24+
"ua-parser-js": "^1.0.39"
25+
},
26+
"scripts": {
27+
"test": "node --test",
28+
"bundle": "../../scripts/bundle.mjs package.json"
29+
},
30+
"devDependencies": {
31+
"@elastic/elasticsearch": "^8.16.2",
32+
"@types/ua-parser-js": "^0.7.36"
33+
},
34+
"bundle": {
35+
"entry": "src/index.ts",
36+
"outdir": "dist/",
37+
"external": [
38+
"pino-pretty"
39+
]
40+
}
41+
}
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
import assert from 'node:assert';
2+
import { beforeEach, describe, it, TestContext } from 'node:test';
3+
import { gzipSync } from 'node:zlib';
4+
5+
import { Env, fsa, FsMemory } from '@basemaps/shared';
6+
import { Client } from '@elastic/elasticsearch';
7+
8+
import { getYesterday } from '../date.js';
9+
import { Elastic } from '../elastic.js';
10+
import { handler } from '../handler.js';
11+
import { LogStats } from '../log.stats.js';
12+
import { LogData } from './log.data.js';
13+
14+
interface IndexOperation {
15+
index: { _index: string };
16+
}
17+
type BulkOperation = (IndexOperation | LogStats)[];
18+
19+
describe('analytic lambda', () => {
20+
const memory = new FsMemory();
21+
beforeEach(() => {
22+
fsa.register('mem://', memory);
23+
memory.files.clear();
24+
25+
Elastic.indexDelay = 1;
26+
Elastic._client = undefined;
27+
});
28+
29+
function setupEnv(t: TestContext): void {
30+
t.mock.method(Env, 'get', (key: string): string => {
31+
switch (key) {
32+
case Env.Analytics.CacheBucket:
33+
return 'mem://cache/';
34+
case Env.Analytics.CloudFrontSourceBucket:
35+
return 'mem://source/';
36+
case Env.Analytics.CloudFrontId:
37+
return 'cfid';
38+
case Env.Analytics.MaxRecords:
39+
return '30';
40+
}
41+
throw new Error(`Invalid test process.env access ${key}`);
42+
});
43+
}
44+
45+
it('should process some log data', async (t) => {
46+
setupEnv(t);
47+
48+
const operations: BulkOperation[] = [];
49+
Elastic._client = {
50+
bulk(op: { operations: BulkOperation }) {
51+
operations.push(op.operations);
52+
return Promise.resolve({});
53+
},
54+
} as unknown as Client;
55+
56+
const YesterDay = getYesterday();
57+
const shortDate = YesterDay.toISOString().slice(0, 13).replace('T', '-');
58+
59+
await fsa.write(new URL(`mem://source/cfid.${shortDate}/data.txt.gz`), gzipSync(LogData));
60+
61+
await handler();
62+
63+
// One call to insert
64+
assert.equal(operations.length, 1);
65+
66+
const op = operations[0];
67+
68+
const indexOpt = op[0] as IndexOperation;
69+
const logOpt = op[1] as LogStats;
70+
71+
// First Log line: /v1/tiles/aerial/EPSG:3857/19/516588/320039.webp
72+
assert.equal(indexOpt.index._index, 'basemaps-history-2020');
73+
assert.equal(logOpt.apiType, 'd');
74+
assert.equal(logOpt.tileMatrix, 'EPSG:3857');
75+
assert.equal(logOpt.tileMatrixId, 'WebMercatorQuad');
76+
assert.equal(logOpt.tileSet, 'aerial');
77+
assert.equal(logOpt.z, 19);
78+
assert.equal(logOpt.cacheHit, 1);
79+
assert.equal(logOpt.cacheMiss, 0);
80+
assert.equal(logOpt.total, 1);
81+
82+
assert.deepEqual(logOpt.ua, { os: 'linux', name: 'chrome', version: '85', variant: 'unknown' });
83+
84+
const files = [...memory.files.keys()];
85+
assert.equal(files.length, 2); // two files one input one output
86+
87+
assert.equal(
88+
files[1],
89+
`mem://cache/RollUpV3/${shortDate.slice(0, 4)}/${shortDate.slice(5, 7)}/${shortDate}.ndjson.gz`,
90+
);
91+
});
92+
93+
it('should write errors to storage', async (t) => {
94+
setupEnv(t);
95+
96+
Elastic._client = {
97+
bulk() {
98+
return Promise.resolve({ errors: ['Hello'] });
99+
},
100+
} as unknown as Client;
101+
102+
const YesterDay = getYesterday();
103+
const shortDate = YesterDay.toISOString().slice(0, 13).replace('T', '-');
104+
105+
await fsa.write(new URL(`mem://source/cfid.${shortDate}/data.txt.gz`), gzipSync(LogData));
106+
107+
const ret = await handler().catch((e: Error) => e);
108+
109+
assert.equal(String(ret), 'Error: Failed to index');
110+
111+
const files = [...memory.files.keys()];
112+
assert.equal(files.length, 2); // two files one input one output
113+
114+
assert.ok(
115+
files[1].startsWith(`mem://cache/errors-${new Date().toISOString().slice(0, 12)}`),
116+
// ${shortDate.slice(0, 4)}/${shortDate.slice(5, 7)}/${shortDate}.ndjson.gz`,
117+
);
118+
});
119+
});
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import { ulid } from 'ulid';
2+
3+
export const DevApiKey = 'd' + ulid().toLowerCase();
4+
export const ClientApiKey = 'c' + ulid().toLowerCase();
5+
6+
export const LogData = `#Version: 1.0
7+
#Fields: date time x-edge-location sc-bytes c-ip cs-method cs(Host) cs-uri-stem sc-status cs(Referer) cs(User-Agent) cs-uri-query cs(Cookie) x-edge-result-type x-edge-request-id x-host-header cs-protocol cs-bytes time-taken x-forwarded-for ssl-protocol ssl-cipher x-edge-response-result-type cs-protocol-version fle-status fle-encrypted-fields c-port time-to-first-byte x-edge-detailed-result-type sc-content-type sc-content-len sc-range-start sc-range-end
8+
2020-07-28 01:11:25 AKL50-C1 20753 255.255.255.141 GET d1mez8rta20vo0.cloudfront.net /v1/tiles/aerial/EPSG:3857/19/516588/320039.webp 200 https://bar.com/ Mozilla/5.0%20(X11;%20Linux%20x86_64)%20AppleWebKit/537.36%20(KHTML,%20like%20Gecko)%20Chrome/85.0.4183.101%20Safari/537.36 api=${DevApiKey} - Hit sBUoz03SwR_hVZkdj0LVC1s_bKakd9ONcKTYRrQLuIR3VPBQUx5xog== basemaps.linz.govt.nz https 82 0.049 - TLSv1.3 TLS_AES_128_GCM_SHA256 Hit HTTP/2.0 -- 21780 0.049 Hit image/webp 20320 - -
9+
2020-07-28 01:16:13 SYD1-C2 156474 255.255.255.4 GET d1mez8rta20vo0.cloudfront.net /v1/tiles/aerial/NZTM2000Quad/19/516542/319785.png 200 https://www.bar.com/ Mozilla/5.0%20(Macintosh;%20Intel%20Mac%20OS%20X%2010_15_4)%20AppleWebKit/605.1.15%20(KHTML,%20like%20Gecko)%20Version/13.1.2%20Safari/605.1.15 api=${DevApiKey}&foo=bar - Hit 9KNnEESjZA-yVs62ffwtRYNaa0gpYKLeEEHH490dmO7AAu3ZxnPc8Q== basemaps.linz.govt.nz https 77 1.791 - TLSv1.3 TLS_AES_128_GCM_SHA256 Hit HTTP/2.0 - - 19468 0.028 Hit image/png 155886 - -
10+
2020-07-28 01:16:21 SYD1-C2 21223 255.255.255.73 GET d1mez8rta20vo0.cloudfront.net /v1/tiles/topo50/3857/18/257866/162011.jpeg 200 https://bar.com/map/ Mozilla/5.0%20(Windows%20NT%2010.0;%20Win64;%20x64)%20AppleWebKit/537.36%20(KHTML,%20like%20Gecko)%20Chrome/85.0.4183.102%20Safari/537.36 api=${DevApiKey} - Miss a5nrTCsdsP5EDQ9EXkUQQJMCJTlbRUz5JIxowZ-1kRriRDUmLPxvVQ== basemaps.linz.govt.nz https 76 0.222 - TLSv1.3 TLS_AES_128_GCM_SHA256 Miss HTTP/2.0 - - 57799 0.222 Miss image/jpeg 20797 - -
11+
2020-07-28 01:13:33 SYD4-C2 2588 255.255.255.128 GET d1mez8rta20vo0.cloudfront.net /v1/tiles/topo50/EPSG:3857/WMTSCapabilities.xml 200 - Mozilla/5.0%20QGIS/31006 api=${ClientApiKey} - RefreshHit oflBr-vO5caoVpi2S23hGh9YWMUca-McU_Fl5oN9fqW_H9ea_iS-Kg== basemaps.linz.govt.nz https 243 0.051 - TLSv1.2 ECDHE-RSA-AES128-GCM-SHA256 RefreshHit HTTP/1.1 - - 55515 0.050 RefreshHit text/xml -
12+
2020-07-28 01:13:33 SYD4-C2 2588 255.255.255.128 GET d1mez8rta20vo0.cloudfront.net /v1/tiles/topo50/EPSG:2193/18/257866/162011.pbf 200 - Mozilla/5.0%20QGIS/31006 api=${ClientApiKey} - RefreshHit oflBr-vO5caoVpi2S23hGh9YWMUca-McU_Fl5oN9fqW_H9ea_iS-Kg== basemaps.linz.govt.nz https 243 0.051 - TLSv1.2 ECDHE-RSA-AES128-GCM-SHA256 RefreshHit HTTP/1.1 - - 55515 0.050 RefreshHit text/xml -
13+
2020-07-28 01:13:33 SYD4-C2 2588 255.255.255.128 GET d1mez8rta20vo0.cloudfront.net /v1/tiles/antipodes-islands-satellite-2019-2020-0.5m/NZTM2000Quad/18/257866/162011.webp 200 - Mozilla/5.0%20QGIS/31006 api=${ClientApiKey} - RefreshHit oflBr-vO5caoVpi2S23hGh9YWMUca-McU_Fl5oN9fqW_H9ea_iS-Kg== basemaps.linz.govt.nz https 243 0.051 - TLSv1.2 ECDHE-RSA-AES128-GCM-SHA256 RefreshHit HTTP/1.1 - - 55515 0.050 RefreshHit text/xml -
14+
2020-07-28 01:13:33 SYD4-C2 2588 255.255.255.128 GET d1mez8rta20vo0.cloudfront.net /v1/tiles/elevation/WebMercatorQuad/18/257866/162011.png 200 - Mozilla/5.0%20QGIS/31006 api=${ClientApiKey}&pipeline=terrain-rgb - RefreshHit oflBr-vO5caoVpi2S23hGh9YWMUca-McU_Fl5oN9fqW_H9ea_iS-Kg== basemaps.linz.govt.nz https 243 0.051 - TLSv1.2 ECDHE-RSA-AES128-GCM-SHA256 RefreshHit HTTP/1.1 - - 55515 0.050 RefreshHit text/xml -
15+
`.trim();
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
export function getYesterday(): Date {
2+
// Process up to about a day ago
3+
const maxDate = new Date();
4+
maxDate.setUTCMinutes(0);
5+
maxDate.setUTCSeconds(0);
6+
maxDate.setUTCMilliseconds(0);
7+
maxDate.setUTCDate(maxDate.getUTCDate() - 1);
8+
return maxDate;
9+
}
10+
11+
export function* byDay(startDate: Date, endDate: Date): Generator<string> {
12+
const currentDate = new Date(startDate);
13+
currentDate.setUTCMinutes(0);
14+
currentDate.setUTCSeconds(0);
15+
currentDate.setUTCMilliseconds(0);
16+
while (true) {
17+
yield currentDate.toISOString().slice(0, 10);
18+
currentDate.setUTCDate(currentDate.getUTCDate() - 1);
19+
if (currentDate.getTime() < endDate.getTime()) break;
20+
}
21+
}
22+
23+
export function* byMonth(startDate: Date, endDate: Date): Generator<string> {
24+
const currentDate = new Date(startDate);
25+
currentDate.setUTCMinutes(0);
26+
currentDate.setUTCSeconds(0);
27+
currentDate.setUTCMilliseconds(0);
28+
while (true) {
29+
yield currentDate.toISOString().slice(0, 7);
30+
currentDate.setUTCMonth(currentDate.getUTCMonth() - 1);
31+
if (currentDate.getTime() < endDate.getTime()) break;
32+
}
33+
}
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import { Env, LogType } from '@basemaps/shared';
2+
import { Client } from '@elastic/elasticsearch';
3+
4+
import { LogStats } from './log.stats.js';
5+
6+
export class ElasticClient {
7+
_client: Client | undefined;
8+
/** Between index requests delay this amount */
9+
indexDelay: number = 200;
10+
11+
get client(): Client {
12+
if (this._client != null) return this._client;
13+
14+
const id = Env.get(Env.Analytics.ElasticId);
15+
const apiKey = Env.get(Env.Analytics.ElasticApiKey);
16+
if (id == null) throw new Error(`$${Env.Analytics.ElasticId} is unset`);
17+
if (apiKey == null) throw new Error(`$${Env.Analytics.ElasticApiKey} is unset`);
18+
this._client = new Client({ cloud: { id }, auth: { apiKey } });
19+
return this._client;
20+
}
21+
22+
errors: unknown[] = [];
23+
insertQueue: Promise<void> = Promise.resolve();
24+
25+
async insert(prefix: string, combined: Iterable<LogStats>, log: LogType): Promise<void> {
26+
this.insertQueue = this.insertQueue.then(() => this._doInsert(prefix, combined, log));
27+
return this.insertQueue;
28+
}
29+
30+
async _doInsert(prefix: string, combined: Iterable<LogStats>, log: LogType): Promise<void> {
31+
const client = this.client;
32+
let inserts = 0;
33+
let skipHits = 0;
34+
let operations: unknown[] = [];
35+
36+
const startTime = performance.now();
37+
38+
const errors = this.errors;
39+
const indexDelay = this.indexDelay;
40+
41+
async function doInsert(): Promise<void> {
42+
inserts += operations.length / 2;
43+
log.trace({ prefix, records: operations.length / 2, skipHits, total: inserts }, 'log:ingest');
44+
const ret = await client.bulk({ operations });
45+
46+
if (ret.errors) {
47+
errors.push(ret);
48+
throw new Error('Failed to index: ' + prefix);
49+
}
50+
// Give it a little bit of time to index
51+
await new Promise((r) => setTimeout(r, indexDelay));
52+
operations = [];
53+
}
54+
55+
for (const rec of combined) {
56+
if (rec.total < 1) {
57+
skipHits++;
58+
continue;
59+
}
60+
operations.push({ index: { _index: 'basemaps-history-' + rec['@timestamp'].slice(0, 4), _id: rec.id } }, rec);
61+
if (operations.length > 50_000) await doInsert();
62+
}
63+
64+
if (operations.length > 0) await doInsert();
65+
66+
if (inserts > 0) {
67+
log.info({ prefix, skipHits, total: inserts, duration: performance.now() - startTime }, 'log:ingest');
68+
} else {
69+
log.trace({ prefix }, 'log:ingest:skip');
70+
}
71+
}
72+
}
73+
74+
export const Elastic = new ElasticClient();

0 commit comments

Comments
 (0)