Skip to content

Commit

Permalink
Fixes an issue where fields spanning multiple metadata fields weren't…
Browse files Browse the repository at this point in the history
… searching correctly
  • Loading branch information
paulpilone committed Dec 29, 2023
1 parent bfae3b5 commit 8830007
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 10 deletions.
2 changes: 1 addition & 1 deletion api/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ Search indexed documents by one or more keywords.
| Component | Description | Default |
| ---------------- | ----------- | ------- |
| LOGICAL_OPERATOR | Possible values are `+`, `-`, or blank. The operators map to `AND`, `NOT`, and `OR` respectively. | (blank) |
| FIELD | The index field to target with this keyword. * maps to all fields. Possible values include (but are not limited to) `title`, `primaryAuthor`, etc. | * |
| FIELD | The index field to target with this keyword. * maps to all fields. Possible values include (but are not limited to) `dc_title`, `dc_contributor`, etc. | * |
| KEYWORD | The keyword used to match documents. | * |

**Optional Parameters:**
Expand Down
31 changes: 28 additions & 3 deletions api/lambdas/search-by-keywords.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,15 @@ describe('search-by-keywords.handler', () => {

const doc4 = {
uuid: uuid4,
dc_title: 'JERICO-S3 Deliverable 5.2. Electronic Handbook for Mature Platforms: Mooring - HF Radar - FerryBox – Glider. Version 1.1.',
dc_identifier_doi: [
'http://dx.doi.org/10.25607/OBP-765',
],
dc_contributor_author: [
'Mantovani, Carlo',
'Pearlman, Jay',
'Simpson, Pauline',
],
};

await osClient.addDocument(esUrl, documentsIndexName, doc1);
Expand Down Expand Up @@ -125,7 +131,7 @@ describe('search-by-keywords.handler', () => {
(h) => h._source.uuid
);

expect(uuids).toEqual([uuid1, uuid2, uuid3]);
expect(uuids).toEqual([uuid1, uuid3, uuid2]);
},
done
);
Expand Down Expand Up @@ -235,7 +241,7 @@ describe('search-by-keywords.handler', () => {
const uuids = results.hits.hits.map(
(h) => h._source.uuid
);
expect(uuids).toEqual([uuid1, uuid2, uuid3]);
expect(uuids).toEqual([uuid1, uuid3, uuid2]);
},
done
);
Expand All @@ -260,6 +266,25 @@ describe('search-by-keywords.handler', () => {
);
});

test('should find matching documents using the AND boolean operator and targeted wildcard fields', (done) => {
const proxyEvent = {
queryStringParameters: {
keywords: ':dc_contributor\\*:pearlman,+:dc_title\\*:jerico',
},
};

searchHandler(
proxyEvent,
(results) => {
expect(results.hits.total.value).toEqual(1);

const [result] = results.hits.hits;
expect(result?._source.uuid).toEqual(uuid4);
},
done
);
});

test('should find matching documents using the NOT boolean operator', (done) => {
const proxyEvent = {
queryStringParameters: {
Expand All @@ -275,7 +300,7 @@ describe('search-by-keywords.handler', () => {
const uuids = results.hits.hits.map(
(h: { _source: { uuid: string } }) => h._source.uuid
);
expect(uuids).toEqual([uuid2, uuid3]);
expect(uuids).toEqual([uuid3, uuid2]);
},
done
);
Expand Down
28 changes: 27 additions & 1 deletion api/lib/search-query-builder.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,35 @@ describe('search-document-builder', () => {
expect(result).toEqual('*:(or term) NOT title:(not term) AND title:(and term)');
});

test('should build a targeted wildcard field for dc_title', () => {
const keywordComps = [
{
operator: '',
field: 'dc_title',
term: 'this is a title and an alternative title',
},
];
const result = formatQueryString(keywordComps);

expect(result).toEqual('dc_title\\*:(this is a title and an alternative title)');
});

test.only('should build a targeted wildcard field for dc_contributor', () => {
const keywordComps = [
{
operator: '',
field: 'dc_contributor',
term: 'search all authors',
},
];
const result = formatQueryString(keywordComps);

expect(result).toEqual('dc_contributor\\*:(search all authors)');
});

test.todo('should boost the dc_title keyword field if searching all fields');

test.todo('shoudl boost the dc_description_abstract field if searching all fields');
test.todo('should boost the dc_description_abstract field if searching all fields');

describe('when escaping query string special characters', () => {
// + - = && || > < ! ( ) { } [ ] ^ " ~ * ? : \ /
Expand Down
17 changes: 16 additions & 1 deletion api/lib/search-query-builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,19 @@ export const nestedQuery = (termPhrase: unknown) => ({
},
});

// We can target wildcard fields so that search expands multiple metadata fields without
// exposing all of our metadata to users. e.g. dc_title will also search
// dc_title_alternative.
const encodeQueryStringField = (field: string): string => {
if (field === 'dc_title') {
return 'dc_title\\*';
} if (field === 'dc_contributor') {
return 'dc_contributor\\*';
}

return field;
};

// Elasticsearch's query_string query has a list of special characters. We don't
// want to necessarily escape them all (e.g. the user can use a wildcard if they want)
// but there are a few obvious ones we need to escape.
Expand Down Expand Up @@ -57,11 +70,13 @@ const formatKeywordComp = (keywordComp: SearchKeywordComps) => {
openSearchOperator = 'OR';
}

const encodedKeywordCompField = encodeQueryStringField(keywordComp.field);

const encodedKeywordCompTerm = encodeQueryStringTerm(
keywordComp.term,
keywordComp.field
);
return `${openSearchOperator} ${keywordComp.field}:(${encodedKeywordCompTerm})`;
return `${openSearchOperator} ${encodedKeywordCompField}:(${encodedKeywordCompTerm})`;
};

/**
Expand Down
5 changes: 1 addition & 4 deletions website/src/js/reducers/fields.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@ const initialState = [
title: 'Author',
id: 'author',
value: [
'dc_contributor_author',
'dc_contributor_corpauthor',
'dc_contributor_editor',
'dc_contributor',
],
active_search: false,
help_text: 'Enter the <b>author name</b>. You can enter just the surname if it is not a common name otherwise eg. Johannes Karstensen or if you know more than one author Karstensen Pearlman',
Expand All @@ -32,7 +30,6 @@ const initialState = [
id: 'title',
value: [
'dc_title',
'dc_title_alternative',
],
active_search: false,
autocomplete: true,
Expand Down

0 comments on commit 8830007

Please sign in to comment.