Skip to content

Commit a415911

Browse files
committed
[Tech Debt] Use knex pooling correctly and provide DB connection to PgBoss
- Initialize the knex pool instance in the top level of publisher and consumer and pass the knex instance to the classes that make DB calls. - Update knex initialization to use the full knexfile config instead of just the connection field. - Add PgBossKnexAdapter class to convert PgBoss SQL statements into knex raw commands. - Pass PgBossKnexAdapter to queue client setup. This removes the separate database for queue messages. - Remove knex pool destroy statements from the message queue consumer and from postgres publisher. - Keep knex pool destroy statements in message queue publisher to close the pool after each interval script. - Update CI to remove the message queue database parameter from deploy jobs - Remove map statement from CSV formatter to avoid making a copy of the report dataset during formatting in order to reduce consumer memory usage. - Offset timed publisher script runs so they don't ever happen in parallel to reduce publisher memory usage. - Disable unused hourly publisher runs.
1 parent 480499c commit a415911

19 files changed

+184
-113
lines changed

.github/workflows/ci.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@ jobs:
8181
CF_ORGANIZATION_NAME: ${{ vars.CF_ORGANIZATION_NAME }}
8282
CF_SPACE_NAME: ${{ vars.CF_SPACE_NAME_DEV }}
8383
DB_SERVICE_NAME: ${{ vars.DB_SERVICE_NAME_DEV }}
84-
MESSAGE_QUEUE_DATABASE_NAME: ${{ vars.MESSAGE_QUEUE_DATABASE_NAME }}
8584
MESSAGE_QUEUE_NAME: ${{ vars.MESSAGE_QUEUE_NAME }}
8685
NEW_RELIC_APP_NAME: ${{ vars.NEW_RELIC_APP_NAME_DEV }}
8786
PROXY_FQDN: ${{ vars.PROXY_FQDN_DEV }}
@@ -111,7 +110,6 @@ jobs:
111110
CF_ORGANIZATION_NAME: ${{ vars.CF_ORGANIZATION_NAME }}
112111
CF_SPACE_NAME: ${{ vars.CF_SPACE_NAME_STG }}
113112
DB_SERVICE_NAME: ${{ vars.DB_SERVICE_NAME_STG }}
114-
MESSAGE_QUEUE_DATABASE_NAME: ${{ vars.MESSAGE_QUEUE_DATABASE_NAME }}
115113
MESSAGE_QUEUE_NAME: ${{ vars.MESSAGE_QUEUE_NAME }}
116114
NEW_RELIC_APP_NAME: ${{ vars.NEW_RELIC_APP_NAME_STG }}
117115
PROXY_FQDN: ${{ vars.PROXY_FQDN_STG }}
@@ -141,7 +139,6 @@ jobs:
141139
CF_ORGANIZATION_NAME: ${{ vars.CF_ORGANIZATION_NAME }}
142140
CF_SPACE_NAME: ${{ vars.CF_SPACE_NAME_PRD }}
143141
DB_SERVICE_NAME: ${{ vars.DB_SERVICE_NAME_PRD }}
144-
MESSAGE_QUEUE_DATABASE_NAME: ${{ vars.MESSAGE_QUEUE_DATABASE_NAME }}
145142
MESSAGE_QUEUE_NAME: ${{ vars.MESSAGE_QUEUE_NAME }}
146143
NEW_RELIC_APP_NAME: ${{ vars.NEW_RELIC_APP_NAME_PRD }}
147144
PROXY_FQDN: ${{ vars.PROXY_FQDN_PRD }}

.github/workflows/deploy.yml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,6 @@ on:
2727
DB_SERVICE_NAME:
2828
required: true
2929
type: string
30-
MESSAGE_QUEUE_DATABASE_NAME:
31-
required: true
32-
type: string
3330
MESSAGE_QUEUE_NAME:
3431
required: true
3532
type: string
@@ -72,7 +69,6 @@ env:
7269
CF_SPACE_NAME: ${{ inputs.CF_SPACE_NAME }}
7370
DB_SERVICE_NAME: ${{ inputs.DB_SERVICE_NAME }}
7471
GA4_CREDS: ${{ secrets.GA4_CREDS }}
75-
MESSAGE_QUEUE_DATABASE_NAME: ${{ inputs.MESSAGE_QUEUE_DATABASE_NAME }}
7672
MESSAGE_QUEUE_NAME: ${{ inputs.MESSAGE_QUEUE_NAME }}
7773
NEW_RELIC_APP_NAME: ${{ inputs.NEW_RELIC_APP_NAME }}
7874
NEW_RELIC_LICENSE_KEY: ${{ secrets.NEW_RELIC_LICENSE_KEY }}

.github/workflows/manual_deploy_to_dev.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ jobs:
1515
CF_ORGANIZATION_NAME: ${{ vars.CF_ORGANIZATION_NAME }}
1616
CF_SPACE_NAME: ${{ vars.CF_SPACE_NAME_DEV }}
1717
DB_SERVICE_NAME: ${{ vars.DB_SERVICE_NAME_DEV }}
18-
MESSAGE_QUEUE_DATABASE_NAME: ${{ vars.MESSAGE_QUEUE_DATABASE_NAME }}
1918
MESSAGE_QUEUE_NAME: ${{ vars.MESSAGE_QUEUE_NAME }}
2019
NEW_RELIC_APP_NAME: ${{ vars.NEW_RELIC_APP_NAME_DEV }}
2120
PROXY_FQDN: ${{ vars.PROXY_FQDN_DEV }}

deploy/api.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33
export ANALYTICS_REPORTS_PATH=reports/api.json
44
export ANALYTICS_SCRIPT_NAME=api.sh
55

6-
$ANALYTICS_ROOT_PATH/bin/analytics-publisher --debug --write-to-database --output /tmp --agenciesFile=$ANALYTICS_ROOT_PATH/deploy/agencies.json
6+
$ANALYTICS_ROOT_PATH/bin/analytics-publisher --debug --write-to-database --agenciesFile=$ANALYTICS_ROOT_PATH/deploy/agencies.json

deploy/cron.js

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -61,18 +61,18 @@ const daily_run = () => {
6161
runScriptWithLogName(`${scriptRootPath}/daily.sh`, "daily.sh");
6262
};
6363

64-
const hourly_run = () => {
64+
/*const hourly_run = () => {
6565
runScriptWithLogName(`${scriptRootPath}/hourly.sh`, "hourly.sh");
66-
};
66+
};*/
6767

6868
const realtime_run = () => {
6969
runScriptWithLogName(`${scriptRootPath}/realtime.sh`, "realtime.sh");
7070
};
7171

7272
/**
73-
Daily reports run every morning at 10 AM UTC.
74-
This calculates the offset between now and then for the next scheduled run.
75-
*/
73+
* Daily and API reports run every morning at 10 AM UTC.
74+
* This calculates the offset between now and then for the next scheduled run.
75+
*/
7676
const calculateNextDailyRunTimeOffset = () => {
7777
const currentTime = new Date();
7878
const nextRunTime = new Date(
@@ -85,26 +85,36 @@ const calculateNextDailyRunTimeOffset = () => {
8585
};
8686

8787
/**
88-
* All scripts run immediately upon application start (with a 10 second delay
88+
* All scripts run immediately upon application start (with a 60 second delay
8989
* between each so that they don't all run at once), then run again at intervals
9090
* going forward.
9191
*/
9292
setTimeout(realtime_run, 1000 * 10);
93-
setTimeout(hourly_run, 1000 * 20);
94-
setTimeout(daily_run, 1000 * 30);
95-
setTimeout(api_run, 1000 * 40);
93+
// setTimeout(hourly_run, 1000 * 70); No hourly reports exist at this time.
94+
setTimeout(daily_run, 1000 * 70);
95+
setTimeout(api_run, 1000 * 130);
9696

97-
// daily
97+
// Daily and API recurring script run setup.
9898
// Runs at 10 AM UTC, then every 24 hours afterwards
9999
setTimeout(() => {
100-
daily_run();
101-
setInterval(daily_run, 1000 * 60 * 60 * 24);
102-
// API
103-
api_run();
104-
setInterval(api_run, 1000 * 60 * 60 * 24);
100+
// Offset the daily script run by 30 seconds so that it never runs in parallel
101+
// with the realtime script in order to save memory/CPU.
102+
setTimeout(() => {
103+
daily_run();
104+
setInterval(daily_run, 1000 * 60 * 60 * 24);
105+
}, 1000 * 30);
106+
107+
// setTimeout(hourly_run, 1000 * 60);
108+
109+
// Offset the API script run by 90 seconds so that it never runs in parallel
110+
// with the daily or realtime scripts in order to save memory/CPU.
111+
setTimeout(() => {
112+
api_run();
113+
setInterval(api_run, 1000 * 60 * 60 * 24);
114+
}, 1000 * 90);
105115
}, calculateNextDailyRunTimeOffset());
106-
// hourly
107-
setInterval(hourly_run, 1000 * 60 * 60);
108-
// realtime. Runs every 15 minutes.
109-
// Google updates realtime reports every 30 minutes, so there is some overlap.
116+
// hourly (no hourly reports exist at this time).
117+
// setInterval(hourly_run, 1000 * 60 * 60);
118+
119+
// Realtime recurring script run setup. Runs every 15 minutes.
110120
setInterval(realtime_run, 1000 * 60 * 15);

index.js

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
const { AsyncLocalStorage } = require("node:async_hooks");
2+
const knex = require("knex");
23
const PgBoss = require("pg-boss");
34
const util = require("util");
45
const AppConfig = require("./src/app_config");
56
const ReportProcessingContext = require("./src/report_processing_context");
67
const Logger = require("./src/logger");
78
const Processor = require("./src/processor");
9+
const PgBossKnexAdapter = require("./src/pg_boss_knex_adapter");
810

911
/**
1012
* Gets an array of JSON report objects from the application confing, then runs
@@ -80,7 +82,7 @@ async function _processReport(appConfig, context, reportConfig, processor) {
8082
await processor.processChain(context);
8183
logger.info("Processing complete");
8284
} catch (e) {
83-
logger.error("Encountered an error");
85+
logger.error("Encountered an error during report processing");
8486
logger.error(util.inspect(e));
8587
}
8688
});
@@ -121,8 +123,8 @@ async function runQueuePublish(options = {}) {
121123
agencyName: appConfig.agencyLogName,
122124
scriptName: appConfig.scriptName,
123125
});
124-
const queueClient = await _initQueueClient(appConfig, appLogger);
125-
const queue = "analytics-reporter-job-queue";
126+
const knexInstance = await knex(appConfig.knexConfig);
127+
const queueClient = await _initQueueClient(knexInstance, appLogger);
126128

127129
for (const agency of agencies) {
128130
for (const reportConfig of reportConfigs) {
@@ -134,7 +136,7 @@ async function runQueuePublish(options = {}) {
134136
});
135137
try {
136138
let jobId = await queueClient.send(
137-
queue,
139+
appConfig.messageQueueName,
138140
_createQueueMessage(
139141
options,
140142
agency,
@@ -151,13 +153,17 @@ async function runQueuePublish(options = {}) {
151153
);
152154
if (jobId) {
153155
reportLogger.info(
154-
`Created job in queue: ${queue} with job ID: ${jobId}`,
156+
`Created job in queue: ${appConfig.messageQueueName} with job ID: ${jobId}`,
155157
);
156158
} else {
157-
reportLogger.info(`Found a duplicate job in queue: ${queue}`);
159+
reportLogger.info(
160+
`Found a duplicate job in queue: ${appConfig.messageQueueName}`,
161+
);
158162
}
159163
} catch (e) {
160-
reportLogger.error(`Error sending to queue: ${queue}`);
164+
reportLogger.error(
165+
`Error sending to queue: ${appConfig.messageQueueName}`,
166+
);
161167
reportLogger.error(util.inspect(e));
162168
}
163169
}
@@ -169,6 +175,9 @@ async function runQueuePublish(options = {}) {
169175
} catch (e) {
170176
appLogger.error("Error stopping queue client");
171177
appLogger.error(util.inspect(e));
178+
} finally {
179+
appLogger.debug(`Destroying database connection pool`);
180+
knexInstance.destroy();
172181
}
173182
}
174183

@@ -189,10 +198,10 @@ function _initAgencies(agencies_file) {
189198
return Array.isArray(agencies) ? agencies : legacyAgencies;
190199
}
191200

192-
async function _initQueueClient(appConfig, logger) {
201+
async function _initQueueClient(knexInstance, logger) {
193202
let queueClient;
194203
try {
195-
queueClient = new PgBoss(appConfig.messageQueueDatabaseConnection);
204+
queueClient = new PgBoss({ db: new PgBossKnexAdapter(knexInstance) });
196205
await queueClient.start();
197206
logger.debug("Starting queue client");
198207
} catch (e) {
@@ -230,15 +239,19 @@ function _messagePriority(reportConfig) {
230239
async function runQueueConsume() {
231240
const appConfig = new AppConfig();
232241
const appLogger = Logger.initialize();
233-
const queueClient = await _initQueueClient(appConfig, appLogger);
234-
const queue = "analytics-reporter-job-queue";
242+
const knexInstance = await knex(appConfig.knexConfig);
243+
const queueClient = await _initQueueClient(knexInstance, appLogger);
235244

236245
try {
237246
const context = new ReportProcessingContext(new AsyncLocalStorage());
238-
const processor = Processor.buildAnalyticsProcessor(appConfig, appLogger);
247+
const processor = Processor.buildAnalyticsProcessor(
248+
appConfig,
249+
appLogger,
250+
knexInstance,
251+
);
239252

240253
await queueClient.work(
241-
queue,
254+
appConfig.messageQueueName,
242255
{ newJobCheckIntervalSeconds: 1 },
243256
async (message) => {
244257
appLogger.info("Queue message received");

knexfile.js

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ module.exports = {
88
password: process.env.POSTGRES_PASSWORD || "123abc",
99
port: 5432,
1010
},
11+
pool: {
12+
min: 2,
13+
max: 10,
14+
},
1115
},
1216
test: {
1317
client: "postgresql",
@@ -18,6 +22,10 @@ module.exports = {
1822
password: process.env.POSTGRES_PASSWORD || "123abc",
1923
port: 5432,
2024
},
25+
pool: {
26+
min: 2,
27+
max: 10,
28+
},
2129
migrations: {
2230
tableName: "knex_migrations",
2331
},
@@ -31,5 +39,9 @@ module.exports = {
3139
password: process.env.POSTGRES_PASSWORD,
3240
ssl: true,
3341
},
42+
pool: {
43+
min: 2,
44+
max: 10,
45+
},
3446
},
3547
};

manifest.consumer.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ applications:
2121
ANALYTICS_REPORT_EMAIL: ${ANALYTICS_REPORT_EMAIL}
2222
AWS_CACHE_TIME: '0'
2323
GOOGLE_APPLICATION_CREDENTIALS: /home/vcap/app/${ANALYTICS_KEY_FILE_NAME}
24-
MESSAGE_QUEUE_DATABASE_NAME: ${MESSAGE_QUEUE_DATABASE_NAME}
2524
MESSAGE_QUEUE_NAME: ${MESSAGE_QUEUE_NAME}
2625
NEW_RELIC_APP_NAME: ${NEW_RELIC_APP_NAME}
2726
NEW_RELIC_LICENSE_KEY: ${NEW_RELIC_LICENSE_KEY}

manifest.publisher.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ applications:
2121
# The default path for reports (used for gov-wide reports)
2222
AWS_BUCKET_PATH: data/live
2323
AWS_CACHE_TIME: '0'
24-
MESSAGE_QUEUE_DATABASE_NAME: ${MESSAGE_QUEUE_DATABASE_NAME}
2524
MESSAGE_QUEUE_NAME: ${MESSAGE_QUEUE_NAME}
2625
NEW_RELIC_APP_NAME: ${NEW_RELIC_APP_NAME}
2726
NEW_RELIC_LICENSE_KEY: ${NEW_RELIC_LICENSE_KEY}

src/actions/format_processed_analytics_data.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ class FormatProcessedAnalyticsData extends Action {
1717
*/
1818
async executeStrategy(context) {
1919
context.logger.debug("Formatting analytics data");
20-
const formattedAnalyticsData = {};
20+
let formattedAnalyticsData = {};
2121
for (const format of context.appConfig.formats) {
2222
formattedAnalyticsData[format] = await ResultFormatter.formatResult(
2323
context.processedAnalyticsData,
@@ -29,6 +29,7 @@ class FormatProcessedAnalyticsData extends Action {
2929
}
3030
context.processedAnalyticsData = undefined;
3131
context.formattedAnalyticsData = formattedAnalyticsData;
32+
formattedAnalyticsData = undefined;
3233
}
3334
}
3435

src/app_config.js

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,21 @@ class AppConfig {
3535
return this.#options.csv ? "csv" : "json";
3636
}
3737

38+
/**
39+
* Array order here is important because the CSV formatter maps headers in
40+
* place on the analytics report object and we don't want that mapping done on
41+
* the JSON version.
42+
*
43+
* @returns {string[]} the formats to use for report formatting.
44+
*/
3845
get formats() {
3946
const formats = [];
40-
if (this.#options.csv) {
41-
formats.push("csv");
42-
}
4347
if (this.#options.json) {
4448
formats.push("json");
4549
}
50+
if (this.#options.csv) {
51+
formats.push("csv");
52+
}
4653
return formats;
4754
}
4855

@@ -194,18 +201,12 @@ class AppConfig {
194201
};
195202
}
196203

197-
get messageQueueDatabaseConnection() {
198-
const connection =
199-
knexfile[process.env.NODE_ENV || "development"].connection;
200-
return `postgres://${connection.user}:${connection.password}@${connection.host}/${process.env.MESSAGE_QUEUE_DATABASE_NAME}${process.env.NODE_ENV == "production" ? "?ssl=true" : ""}`;
201-
}
202-
203204
get messageQueueName() {
204-
return process.env.MESSAGE_QUEUE_NAME;
205+
return process.env.MESSAGE_QUEUE_NAME || "analytics_reporter_job_queue";
205206
}
206207

207-
get postgres() {
208-
return knexfile[process.env.NODE_ENV || "development"].connection;
208+
get knexConfig() {
209+
return knexfile[process.env.NODE_ENV || "development"];
209210
}
210211

211212
get static() {

src/pg_boss_knex_adapter.js

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/**
2+
* Handles providing a database client for the Pg-Boss library using knex.
3+
*/
4+
class PgBossKnexAdapter {
5+
#knex;
6+
7+
/**
8+
* @param {import('knex')} knexInstance an initialized instance of the knex
9+
* library which provides a database connection.
10+
*/
11+
constructor(knexInstance) {
12+
this.#knex = knexInstance;
13+
}
14+
15+
/**
16+
* Execute PgBoss SQL using the knex library interface
17+
*
18+
* @param {string} sql the SQL string to execute.
19+
* @param {string[]} parameters the parameters to insert into the SQL string.
20+
* @returns {Promise} which resolves with the result of the SQL query.
21+
*/
22+
executeSql(sql, parameters = []) {
23+
// This is needed to replace pg-boss' $1, $2 arguments
24+
// into knex's :val, :val2 style.
25+
const replacedSql = sql.replace(
26+
/\$(\d+)\b/g,
27+
(_, number) => `:param_${number}`,
28+
);
29+
30+
const parametersObject = {};
31+
parameters.forEach(
32+
(value, index) => (parametersObject[`param_${index + 1}`] = value),
33+
);
34+
35+
return this.#knex.raw(replacedSql, parametersObject);
36+
}
37+
}
38+
39+
module.exports = PgBossKnexAdapter;

0 commit comments

Comments
 (0)