diff --git a/.github/workflows/generate-ai-training-ecs.yml b/.github/workflows/generate-ai-training-ecs.yml index 06ee1143f..46da2db7e 100644 --- a/.github/workflows/generate-ai-training-ecs.yml +++ b/.github/workflows/generate-ai-training-ecs.yml @@ -49,12 +49,11 @@ jobs: - name: Run New ECS task id: run-task run: | - CLUSTER="${{ inputs.environment }}-ecs-cluster" + CLUSTER="development-ecs-cluster" FAMILY="${{ inputs.environment }}-hrm-scheduled-task" SUBNETS=$(if [ ${{ inputs.environment }} == "development" ]; then echo "subnet-034e5c652dbad09dd"; else echo "subnet-00b4e88c0ea178f3d"; fi) SECURITY_GROUPS=($(if [ ${{ inputs.environment}} == "development" ]; then echo "sg-09194f9a648baf082 sg-047498d4c7b2cedd8 sg-06d6458accc0ec5ed sg-0ace4338c75e5d3de"; else echo "sg-02fd053fe7b4660ed sg-0e7119c4423f2c7b0 sg-066d0055caa7cbe90 sg-0c7fb74e11cb64ff8"; fi)) - COMMAND="npm run start:generate-ai-training-set ${{ inputs.environment }} ${{ inputs.helpline-shortcodes }} tl-aselo-ai-${{ inputs.environment }}-us-east-1" TASK_ROLE="arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/${{ inputs.environment }}-ecsTaskRole" EXEC_ROLE="arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/${{ inputs.environment }}-ecsTaskExecutionRole" @@ -66,7 +65,15 @@ jobs: --overrides '{ "containerOverrides": [{ "name": "'"$FAMILY"'", - "command": ["sh", "-c", "'"$COMMAND"'"], + "command": [ + "npm", + "run", + "start:generate-ai-training-set", + "${{ inputs.environment }}", + "${{ inputs.helpline-shortcodes }}", + "tl-aselo-ai-${{ inputs.environment }}-us-east-1", + "tl-aselo-ai-${{ inputs.environment }}-us-east-1" + ], "environment": [ {"name": "SSM_REGION", "value": "us-east-1"}, {"name": "AWS_REGION", "value": "us-east-1"} diff --git a/hrm-domain/scheduled-tasks/generate-ai-training-set/hrmdbAccess.ts b/hrm-domain/scheduled-tasks/generate-ai-training-set/hrmdbAccess.ts index 1d98c26db..808a23469 100644 --- a/hrm-domain/scheduled-tasks/generate-ai-training-set/hrmdbAccess.ts +++ b/hrm-domain/scheduled-tasks/generate-ai-training-set/hrmdbAccess.ts @@ -52,15 +52,18 @@ export type TrainingSetContact = { export const streamTrainingSetContacts = async ( accountSid: HrmAccountId, ): Promise => { - const qs = new QueryStream( - pgp.as.format(SELECT_CATEGORIES_SUMMARY_AND_TRANSCRIPTS_SQL, { accountSid }), - [], - { highWaterMark: HIGH_WATER_MARK }, - ); + const formattedQuery = pgp.as.format(SELECT_CATEGORIES_SUMMARY_AND_TRANSCRIPTS_SQL, { + accountSid, + }); + + const qs = new QueryStream(formattedQuery, [], { highWaterMark: HIGH_WATER_MARK }); // Expose the readable stream to the caller as a promise for further pipelining - return new Promise(resolve => { + return new Promise((resolve, reject) => { db.stream(qs, resultStream => { resolve(resultStream); + }).catch(error => { + console.error('Error streaming contacts:', error); + reject(error); }); }); }; diff --git a/hrm-domain/scheduled-tasks/generate-ai-training-set/index.ts b/hrm-domain/scheduled-tasks/generate-ai-training-set/index.ts index f5755673f..c0a1dd0fc 100644 --- a/hrm-domain/scheduled-tasks/generate-ai-training-set/index.ts +++ b/hrm-domain/scheduled-tasks/generate-ai-training-set/index.ts @@ -60,10 +60,9 @@ export const generate = async ( environment: 'development' | 'staging' | 'production', hlShortCodes: string[], targetBucket: string, - sourceBucket?: string, + sourceBucket: string, ) => { const accountSidMappings = await lookupAccountSids(environment, hlShortCodes); - console.log('Account SIDs found:'); accountSidMappings.forEach(({ accountSid, shortCode }) => { console.log(`Account SID for ${shortCode}: ${accountSid}`); }); @@ -90,8 +89,8 @@ export const generate = async ( sourceBucket, ); } catch (error) { - console.info( - `No transcript found for contact ${trainingSetContact.contactId} in ${shortCode} bucket. Skipping...`, + console.debug( + `No transcript found for contact ${trainingSetContact.contactId} in ${trainingSetContact.transcriptBucket}/${trainingSetContact.transcriptKey} . Skipping...`, ); callback(); return;