Skip to content

Commit

Permalink
BHBC-2140: Add functions for safely lowercasing/trimming unknown valu…
Browse files Browse the repository at this point in the history
…es (#939)
  • Loading branch information
NickPhura authored Feb 8, 2023
1 parent fee0b44 commit f37b559
Show file tree
Hide file tree
Showing 9 changed files with 235 additions and 60 deletions.
5 changes: 3 additions & 2 deletions api/src/utils/media/csv/csv-file.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import xlsx from 'xlsx';
import { SUBMISSION_MESSAGE_TYPE } from '../../../constants/status';
import { safeToLowerCase, safeTrim } from '../../string-utils';
import { IMediaState, MediaValidation } from '../media-file';
import { getCellValue, getWorksheetRange, replaceCellDates, trimCellWhitespace } from '../xlsx/xlsx-utils';

Expand Down Expand Up @@ -101,7 +102,7 @@ export class CSVWorksheet {

if (aoaHeaders.length > 0) {
// Parse the headers array from the array of arrays produced by calling `xlsx.utils.sheet_to_json`
this._headers = aoaHeaders[0].map((item) => item?.trim());
this._headers = aoaHeaders[0].map(safeTrim);
}
}

Expand All @@ -110,7 +111,7 @@ export class CSVWorksheet {

getHeadersLowerCase(): string[] {
if (!this._headersLowerCase.length) {
this._headersLowerCase = this.getHeaders().map((item) => item?.toLowerCase());
this._headersLowerCase = this.getHeaders().map(safeToLowerCase);
}

return this._headersLowerCase;
Expand Down
9 changes: 5 additions & 4 deletions api/src/utils/media/csv/validation/csv-header-validator.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { SUBMISSION_MESSAGE_TYPE } from '../../../../constants/status';
import { safeToLowerCase, safeTrim } from '../../../string-utils';
import { CSVValidator } from '../csv-file';

/**
Expand Down Expand Up @@ -62,7 +63,7 @@ export const hasRequiredHeadersValidator = (config?: FileRequiredHeaderValidator
const headersLowerCase = csvWorksheet.getHeadersLowerCase();

for (const requiredHeader of config.file_required_columns_validator.required_columns) {
if (!headersLowerCase.includes(requiredHeader.toLowerCase())) {
if (!headersLowerCase.includes(safeToLowerCase(requiredHeader))) {
csvWorksheet.csvValidation.addHeaderErrors([
{
errorCode: SUBMISSION_MESSAGE_TYPE.MISSING_REQUIRED_HEADER,
Expand Down Expand Up @@ -118,7 +119,7 @@ export const hasRecommendedHeadersValidator = (config?: FileRecommendedHeaderVal
}

for (const recommendedHeader of config.file_recommended_columns_validator.recommended_columns) {
if (!headersLowerCase.includes(recommendedHeader.toLowerCase())) {
if (!headersLowerCase.includes(safeToLowerCase(recommendedHeader))) {
csvWorksheet.csvValidation.addHeaderWarnings([
{
errorCode: SUBMISSION_MESSAGE_TYPE.MISSING_RECOMMENDED_HEADER,
Expand Down Expand Up @@ -162,8 +163,8 @@ export const getValidHeadersValidator = (config?: FileValidHeadersValidatorConfi
for (const header of headers) {
if (
!config.file_valid_columns_validator.valid_columns
.map((item) => item.toLowerCase())
.includes(header.trim().toLowerCase())
.map(safeToLowerCase)
.includes(safeToLowerCase(safeTrim(header)))
) {
csvWorksheet.csvValidation.addHeaderWarnings([
{
Expand Down
21 changes: 11 additions & 10 deletions api/src/utils/media/csv/validation/csv-row-validator.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { SUBMISSION_MESSAGE_TYPE } from '../../../../constants/status';
import { safeToLowerCase } from '../../../string-utils';
import { CSVValidator } from '../csv-file';

export type RequiredFieldsValidatorConfig = {
Expand All @@ -21,7 +22,7 @@ export const getRequiredFieldsValidator = (config?: RequiredFieldsValidatorConfi
const headersLowerCase = csvWorksheet.getHeadersLowerCase();

rows.forEach((row, rowIndex) => {
const columnIndex = headersLowerCase.indexOf(config.columnName.toLowerCase());
const columnIndex = headersLowerCase.indexOf(safeToLowerCase(config.columnName));

// if column does not exist, return
if (columnIndex < 0) {
Expand Down Expand Up @@ -80,7 +81,7 @@ export const getCodeValueFieldsValidator = (config?: ColumnCodeValidatorConfig):
const headersLowerCase = csvWorksheet.getHeadersLowerCase();

rows.forEach((row, rowIndex) => {
const columnIndex = headersLowerCase.indexOf(config.columnName.toLowerCase());
const columnIndex = headersLowerCase.indexOf(safeToLowerCase(config.columnName));

// if column does not exist, return
if (columnIndex < 0) {
Expand All @@ -95,14 +96,14 @@ export const getCodeValueFieldsValidator = (config?: ColumnCodeValidatorConfig):
}

// compare allowed code values as lowercase strings
const allowedCodeValuesLowerCase: string[] = [];
const allowedCodeValuesLowerCase: (string | number)[] = [];
const allowedCodeValues = config.column_code_validator.allowed_code_values.map((allowedCode) => {
allowedCodeValuesLowerCase.push(allowedCode.name?.toString().toLowerCase());
allowedCodeValuesLowerCase.push(safeToLowerCase(allowedCode.name));
return allowedCode.name;
});

// Add an error if the cell value is not one of the elements in the codeValues array
if (!allowedCodeValuesLowerCase.includes(rowValueForColumn?.toLowerCase())) {
if (!allowedCodeValuesLowerCase.includes(safeToLowerCase(rowValueForColumn))) {
csvWorksheet.csvValidation.addRowErrors([
{
errorCode: SUBMISSION_MESSAGE_TYPE.INVALID_VALUE,
Expand Down Expand Up @@ -147,7 +148,7 @@ export const getValidRangeFieldsValidator = (config?: ColumnRangeValidatorConfig
const headersLowerCase = csvWorksheet.getHeadersLowerCase();

rows.forEach((row, rowIndex) => {
const columnIndex = headersLowerCase.indexOf(config.columnName.toLowerCase());
const columnIndex = headersLowerCase.indexOf(safeToLowerCase(config.columnName));

// if column does not exist, return
if (columnIndex < 0) {
Expand Down Expand Up @@ -248,7 +249,7 @@ export const getNumericFieldsValidator = (config?: ColumnNumericValidatorConfig)
const headersLowerCase = csvWorksheet.getHeadersLowerCase();

rows.forEach((row, rowIndex) => {
const columnIndex = headersLowerCase.indexOf(config.columnName.toLowerCase());
const columnIndex = headersLowerCase.indexOf(safeToLowerCase(config.columnName));

// if column does not exist, return
if (columnIndex < 0) {
Expand Down Expand Up @@ -311,7 +312,7 @@ export const getValidFormatFieldsValidator = (config?: ColumnFormatValidatorConf
const headersLowerCase = csvWorksheet.getHeadersLowerCase();

rows.forEach((row, rowIndex) => {
const columnIndex = headersLowerCase.indexOf(config.columnName.toLowerCase());
const columnIndex = headersLowerCase.indexOf(safeToLowerCase(config.columnName));

// if column does not exist, return
if (columnIndex < 0) {
Expand Down Expand Up @@ -367,7 +368,7 @@ export const getUniqueColumnsValidator = (config?: FileColumnUniqueValidatorConf

// find the indices of all provided column names in the worksheet
const columnIndices = config.file_column_unique_validator.column_names.map((column) =>
lowercaseHeaders.indexOf(column.toLocaleLowerCase())
lowercaseHeaders.indexOf(safeToLowerCase(column))
);

// checks list of column indices if any are missing (-1) and returns early
Expand All @@ -377,7 +378,7 @@ export const getUniqueColumnsValidator = (config?: FileColumnUniqueValidatorConf

rows.forEach((row, rowIndex) => {
const key = config.file_column_unique_validator.column_names
.map((columnIndex) => `${row[columnIndex] || ''}`.trim().toLocaleLowerCase())
.map((columnIndex) => `${row[columnIndex] || ''}`.trim().toLowerCase())
.join(', ');
// check if key exists already
if (!keySet.has(key)) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { safeToLowerCase } from '../../string-utils';
import { DWCArchive, DWCArchiveValidator } from '../dwc/dwc-archive-file';
import { MediaValidator } from '../media-file';
import { XLSXCSV, XLSXCSVValidator } from '../xlsx/xlsx-file';
Expand Down Expand Up @@ -93,7 +94,7 @@ const checkRequiredFieldsInDWCArchive = (dwcArchive: DWCArchive, config: Submiss
const fileNames = dwcArchive.rawFile.mediaFiles.map((mediaFile) => mediaFile.name);

config.submission_required_files_validator.required_files.forEach((requiredFile) => {
if (!fileNames.includes(requiredFile.toLowerCase())) {
if (!fileNames.includes(safeToLowerCase(requiredFile))) {
dwcArchive.mediaValidation.addFileErrors([`Missing required file: ${requiredFile}`]);
}
});
Expand All @@ -112,10 +113,10 @@ const checkRequiredFieldsInXLSXCSV = (xlsxCsv: XLSXCSV, config: SubmissionRequir
return xlsxCsv;
}

const worksheetNames = Object.keys(xlsxCsv.workbook.worksheets).map((item) => item.toLowerCase());
const worksheetNames = Object.keys(xlsxCsv.workbook.worksheets).map(safeToLowerCase);

config.submission_required_files_validator.required_files.forEach((requiredFile) => {
if (!worksheetNames.includes(requiredFile.toLowerCase())) {
if (!worksheetNames.includes(safeToLowerCase(requiredFile))) {
xlsxCsv.mediaValidation.addFileErrors([`Missing required sheet: ${requiredFile}`]);
}
});
Expand Down
5 changes: 3 additions & 2 deletions api/src/utils/media/xlsx/validation/xlsx-validation.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { SUBMISSION_MESSAGE_TYPE } from '../../../../constants/status';
import { safeTrim } from '../../../string-utils';
import { CSVWorkBook, WorkBookValidator } from '../../csv/csv-file';

export type ParentChildKeyMatchValidatorConfig = {
Expand Down Expand Up @@ -48,7 +49,7 @@ export const getParentChildKeyMatchValidator = (config?: ParentChildKeyMatchVali
}

// Filter column names to only check key violation on columns included in the child sheet
const filteredColumnNames = column_names.filter((columnName: string) => Boolean(childRowObjects[0][columnName]));
const filteredColumnNames = column_names.filter((columnName) => Boolean(childRowObjects[0][columnName]));

/**
* Encodes the column values for a worksheet at a given row into a string, which is used for comparison with another worksheet
Expand All @@ -65,7 +66,7 @@ export const getParentChildKeyMatchValidator = (config?: ParentChildKeyMatchVali
.filter(Boolean)

// Trim whitespace
.map((columnValue: string) => columnValue.trim())
.map(safeTrim)

// Deliminate column values
.join('|')
Expand Down
5 changes: 3 additions & 2 deletions api/src/utils/media/xlsx/xlsx-utils.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import xlsx, { CellObject } from 'xlsx';
import { safeTrim } from '../../string-utils';

/**
* Get a worksheet by name.
Expand Down Expand Up @@ -63,12 +64,12 @@ export function prepareWorksheetCells(worksheet: xlsx.WorkSheet) {
export function trimCellWhitespace(cell: CellObject) {
// check and clean raw strings
if (cell.t === 's') {
cell.v = (cell.v as string).trim();
cell.v = safeTrim(cell.v);
}

// check and clean formatted strings
if (cell.w) {
cell.w = cell.w.trim();
cell.w = safeTrim(cell.w);
}

return cell;
Expand Down
120 changes: 120 additions & 0 deletions api/src/utils/string-utils.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import { expect } from 'chai';
import { safeToLowerCase, safeTrim } from './string-utils';

describe('safeToLowerCase', () => {
describe('returns value lowercase', () => {
it('when value is a lowercase string', () => {
expect(safeToLowerCase('string')).to.equal('string');
});

it('when value is an uppercase string', () => {
expect(safeToLowerCase('STRING')).to.equal('string');
});

it('when value is a mixed case string', () => {
expect(safeToLowerCase('sTRiNG')).to.equal('string');
});
});

describe('returns value unaltered', () => {
it('when value is a negative number', () => {
expect(safeToLowerCase(-123)).to.equal(-123);
});

it('when value is a zero', () => {
expect(safeToLowerCase(0)).to.equal(0);
});

it('when value is a positive number', () => {
expect(safeToLowerCase(123)).to.equal(123);
});

it('when value is `false`', () => {
expect(safeToLowerCase(false)).to.equal(false);
});

it('when value is `true`', () => {
expect(safeToLowerCase(true)).to.equal(true);
});

it('when value is an empty object', () => {
expect(safeToLowerCase({})).to.eql({});
});

it('when value is an empty array', () => {
expect(safeToLowerCase([])).to.eql([]);
});

it('when value is a non-empty array of numbers', () => {
expect(safeToLowerCase([1, 2, 3])).to.eql([1, 2, 3]);
});

it('when value is a non-empty array of strings', () => {
expect(safeToLowerCase(['1', 'string', 'false'])).to.eql(['1', 'string', 'false']);
});

it('when value is a function', () => {
const fn = (a: number, b: number) => a * b;
expect(safeToLowerCase(fn)).to.equal(fn);
});
});
});

describe('safeTrim', () => {
describe('returns value trimmed', () => {
it('when value is a lowercase string', () => {
expect(safeTrim(' string ')).to.equal('string');
});

it('when value is an uppercase string', () => {
expect(safeTrim(' STRING ')).to.equal('STRING');
});

it('when value is a mixed case string', () => {
expect(safeTrim(' sTRiNG ')).to.equal('sTRiNG');
});
});

describe('returns value unaltered', () => {
it('when value is a negative number', () => {
expect(safeTrim(-123)).to.equal(-123);
});

it('when value is a zero', () => {
expect(safeTrim(0)).to.equal(0);
});

it('when value is a positive number', () => {
expect(safeTrim(123)).to.equal(123);
});

it('when value is `false`', () => {
expect(safeTrim(false)).to.equal(false);
});

it('when value is `true`', () => {
expect(safeTrim(true)).to.equal(true);
});

it('when value is an empty object', () => {
expect(safeTrim({})).to.eql({});
});

it('when value is an empty array', () => {
expect(safeTrim([])).to.eql([]);
});

it('when value is a non-empty array of numbers', () => {
expect(safeTrim([1, 2, 3])).to.eql([1, 2, 3]);
});

it('when value is a non-empty array of strings', () => {
expect(safeTrim([' 1 ', ' string ', ' false '])).to.eql([' 1 ', ' string ', ' false ']);
});

it('when value is a function', () => {
const fn = (a: number, b: number) => a * b;
expect(safeTrim(fn)).to.equal(fn);
});
});
});
37 changes: 37 additions & 0 deletions api/src/utils/string-utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import { isString } from 'lodash';

/**
* Safely apply `.toLowerCase()` to a value of unknown type.
*
* If the value is not a string, then the original unaltered value will be returned.
*
* @export
* @template T
* @param {T} value
* @return {*} {T}
*/
export function safeToLowerCase<T>(value: T): T {
if (isString(value)) {
return (value.toLowerCase() as unknown) as T;
}

return value;
}

/**
* Safely apply `.trim()` to a value of unknown type.
*
* If the value is not a string, then the original unaltered value will be returned.
*
* @export
* @template T
* @param {T} value
* @return {*} {T}
*/
export function safeTrim<T>(value: T): T {
if (isString(value)) {
return (value.trim() as unknown) as T;
}

return value;
}
Loading

0 comments on commit f37b559

Please sign in to comment.