From f84d8629ae8f5a5886da48470f15806fa746a7a0 Mon Sep 17 00:00:00 2001 From: Andres Bolocco Date: Fri, 26 Jun 2020 14:20:33 -0300 Subject: [PATCH] fixes city of phoenix water firstname, lastname & bill_date --- lib/documentTypes/cityOfPhoenixWaterBill.js | 28 +++++++++++++++------ 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/lib/documentTypes/cityOfPhoenixWaterBill.js b/lib/documentTypes/cityOfPhoenixWaterBill.js index 980663f..3424b50 100644 --- a/lib/documentTypes/cityOfPhoenixWaterBill.js +++ b/lib/documentTypes/cityOfPhoenixWaterBill.js @@ -32,13 +32,22 @@ module.exports = { extractor: (keyValues, rawText) => { const keys = Object.keys(keyValues); - const [, fullNameLine] = rawText; + const first5Lines = rawText.slice(0, 5); + let fullName; let first_name; let last_name; - if (fullNameLine) { + + fullName = first5Lines.find((line) => ( + !line.match(/City of Phoenix/gi) + && !line.match(/City Services Bill?/gi) + && !line.match(/\d+/g) + && line.toUpperCase() === line + )); + + if (fullName) { // first name & last name (dropping anything between first and last words) - first_name = fullNameLine.split(' ').slice(0, 1).join(' '); - last_name = fullNameLine.split(' ').slice(-1).join(' '); + first_name = fullName.split(' ').slice(0, 1).join(' '); + last_name = fullName.split(' ').slice(-1).join(' '); } // street address line 1 & 2 @@ -75,11 +84,16 @@ module.exports = { } // bill date - let bill_date = keyValues[keys[stringSimilarity.findBestMatch( + const billDateIndex = stringSimilarity.findBestMatch( 'Due Date', keys, - ).bestMatchIndex]]; - if (!bill_date || !bill_date.length || !bill_date.match(/^(\d{1,2})\/(\d{1,2})\/(\d{4})$/g)) { + ).bestMatchIndex; + let bill_date = keyValues[keys[billDateIndex]]; + + if (!bill_date + || !bill_date.length + || keys[billDateIndex].match(/Bill Date/gi) + || !bill_date.match(/^(\d{1,2})\/(\d{1,2})\/(\d{4})$/g)) { const lineWithDueDate = rawText.findIndex((line) => line.match(/^Due Date$/g)); bill_date = rawText.slice(lineWithDueDate - 5, lineWithDueDate + 5).filter((line) => ( line.match(/^(\d{1,2})\/(\d{1,2})\/(\d{4})$/g)