From f98e2f94bd5f155aaaa8b11ba3668cc0f2eaeec5 Mon Sep 17 00:00:00 2001 From: jheer Date: Wed, 25 Sep 2024 10:14:41 -0700 Subject: [PATCH] feat: Add collate helper for custom sort orders. --- docs/api/index.md | 22 +++++++++- docs/api/verbs.md | 14 +++++- src/api.js | 1 + src/expression/compare.js | 17 +++++--- src/helpers/collate.js | 25 +++++++++++ src/util/is-function.js | 3 ++ test/verbs/orderby-test.js | 88 +++++++++++++++++++++++++++++++++++++- 7 files changed, 159 insertions(+), 11 deletions(-) create mode 100644 src/helpers/collate.js diff --git a/docs/api/index.md b/docs/api/index.md index f1b001f..adf807c 100644 --- a/docs/api/index.md +++ b/docs/api/index.md @@ -11,7 +11,7 @@ title: Arquero API Reference * [load](#load), [loadArrow](#loadArrow), [loadCSV](#loadCSV), [loadFixed](#loadFixed), [loadJSON](#loadJSON) * [Expression Helpers](#expression-helpers) * [op](#op), [agg](#agg), [escape](#escape) - * [bin](#bin), [desc](#desc), [frac](#frac), [rolling](#rolling), [seed](#seed) + * [bin](#bin), [collate](#collate), [desc](#desc), [frac](#frac), [rolling](#rolling), [seed](#seed) * [Selection Helpers](#selection-helpers) * [all](#all), [not](#not), [range](#range) * [matches](#matches), [startswith](#startswith), [endswith](#endswith) @@ -491,6 +491,26 @@ Generate a table expression that performs uniform binning of number values. The aq.bin('colA', { maxbins: 20 }) ``` +
# +aq.collate(expr, comparator[, options]) · [Source](https://github.com/uwdata/arquero/blob/master/src/helpers/collate.js) + +Annotate a table expression with collation metadata, indicating how expression values should be compared and sorted. The [orderby](verbs#orderby) verb uses collation metadata to determine sort order. The collate helper is particularly useful for locale-specific string comparisons. The collation information can either take the form a standard two-argument comparator function, or as locale and option arguments compatible with [`Intl.Collator`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Collator). + +* *expr*: The table expression to annotate with collation metadata. +* *comparator*: A comparator function or the locale(s) to use. For locales, both string (e.g., `'de'`, `'tr'`, etc.) and [`Intl.Locale`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Locale) objects (or an array with either) is supported. +* *options*: Collation options compatible with [`Intl.Collator`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Collator). This argument only applies if locales are provided as the second argument. + +*Examples* + +```js +// order colA using a German locale +aq.collate('colA', 'de') +``` + +```js +// order colA using a provided comparator function +aq.collate('colA', new Intl.Collator('de').compare) +```
# aq.desc(expr) · [Source](https://github.com/uwdata/arquero/blob/master/src/helpers/desc.js) diff --git a/docs/api/verbs.md b/docs/api/verbs.md index fd630b8..ee10cc1 100644 --- a/docs/api/verbs.md +++ b/docs/api/verbs.md @@ -120,7 +120,7 @@ table.ungroup() Order table rows based on a set of column values. Subsequent operations sensitive to ordering (such as window functions) will operate over sorted values. The resulting table provides an view over the original data, without any copying. To create a table with sorted data copied to new data strucures, call [reify](#reify) on the result of this method. To undo ordering, use [unorder](#unorder). -* *keys*: Key values to sort by, in precedence order. By default, sorting is done in ascending order. To sort in descending order, wrap values using [desc](./#desc). If a string, order by the column with that name. If a number, order by the column with that index. If a function, must be a valid table expression; aggregate functions are permitted, but window functions are not. If an object, object values must be valid values parameters with output column names for keys and table expressions for values (the output names will be ignored). If an array, array values must be valid key parameters. +* *keys*: Key values to sort by, in precedence order. By default, sorting is done in ascending order. To sort in descending order, wrap values using [desc](./#desc). To provide a custom sort order for a key (such as for locale-specific string comparison), wrap the key value using [collate](./#collate). If a key is a string, order by the column with that name. If a number, order by the column with that index. If a function, the key must be a valid table expression; aggregate functions are permitted, but window functions are not. If an object, object values must be valid values parameters with output column names for keys and table expressions for values (the output names will be ignored). If an array, array values must be valid key parameters. *Examples* @@ -135,9 +135,19 @@ table.orderby('a', aq.desc('b')) table.orderby({ a: 'a', b: aq.desc('b') )}) ``` +```js +// order by column 'a' according to German locale settings +table.orderby(aq.collate('a', 'de')) +``` + ```js // orderby accepts table expressions as well as column names -table.orderby(aq.desc(d => d.a)) +table.orderby(d => d.a) +``` + +```js +// the configurations above can be combined +table.orderby(aq.desc(aq.collate(d => d.a, 'de'))) ```
# diff --git a/src/api.js b/src/api.js index ec25761..a26f7a0 100644 --- a/src/api.js +++ b/src/api.js @@ -20,6 +20,7 @@ export { default as toJSON } from './format/to-json.js'; export { default as toMarkdown } from './format/to-markdown.js'; export { default as bin } from './helpers/bin.js'; export { default as escape } from './helpers/escape.js'; +export { default as collate } from './helpers/collate.js'; export { default as desc } from './helpers/desc.js'; export { default as field } from './helpers/field.js'; export { default as frac } from './helpers/frac.js'; diff --git a/src/expression/compare.js b/src/expression/compare.js index 399bd6a..2c1298c 100644 --- a/src/expression/compare.js +++ b/src/expression/compare.js @@ -3,11 +3,8 @@ import parse from './parse.js'; import { aggregate } from '../verbs/reduce/util.js'; // generate code to compare a single field -const _compare = (u, v, lt, gt) => - `((u = ${u}) < (v = ${v}) || u == null) && v != null ? ${lt} - : (u > v || v == null) && u != null ? ${gt} - : ((v = v instanceof Date ? +v : v), (u = u instanceof Date ? +u : u)) !== u && v === v ? ${lt} - : v !== v && u === u ? ${gt} : `; +const _compare = (u, v, lt, gt) => `((u = ${u}) < (v = ${v}) || u == null) && v != null ? ${lt} : (u > v || v == null) && u != null ? ${gt} : ((v = v instanceof Date ? +v : v), (u = u instanceof Date ? +u : u)) !== u && v === v ? ${lt} : v !== v && u === u ? ${gt} : `; +const _collate = (u, v, lt, gt, f) => `(v = ${v}, (u = ${u}) == null && v == null) ? 0 : v == null ? ${gt} : u == null ? ${lt} : (u = ${f}(u,v)) ? u : `; export default function(table, fields) { // parse expressions, generate code for both a and b values @@ -50,9 +47,15 @@ export default function(table, fields) { + (op && table.isGrouped() ? 'const ka = keys[a], kb = keys[b];' : '') + 'let u, v; return '; for (let i = 0; i < n; ++i) { - const o = fields.get(names[i]).desc ? -1 : 1; + const field = fields.get(names[i]); + const o = field.desc ? -1 : 1; const [u, v] = exprs[i]; - code += _compare(u, v, -o, o); + if (field.collate) { + code += _collate(u, v, -o, o, `${o < 0 ? '-' : ''}fn[${fn.length}]`); + fn.push(field.collate); + } else { + code += _compare(u, v, -o, o); + } } code += '0;};'; diff --git a/src/helpers/collate.js b/src/helpers/collate.js new file mode 100644 index 0000000..c922719 --- /dev/null +++ b/src/helpers/collate.js @@ -0,0 +1,25 @@ +import isFunction from '../util/is-function.js'; +import wrap from './wrap.js'; + +/** + * Annotate a table expression with collation metadata, indicating how + * expression values should be compared and sorted. The orderby verb uses + * collation metadata to determine sort order. The collation information can + * either take the form a standard two-argument comparator function, or as + * locale and option arguments compatible with `Intl.Collator`. + * @param {string|Function|object} expr The table expression to annotate + * with collation metadata. + * @param {Intl.LocalesArgument | ((a: any, b: any) => number)} comparator + * A comparator function or the locale(s) to collate by. + * @param {Intl.CollatorOptions} [options] Collation options, applicable + * with locales only. + * @return {object} A wrapper object representing the collated value. + * @example orderby(collate('colA', 'de')) + */ +export default function(expr, comparator, options) { + return wrap(expr, { + collate: isFunction(comparator) + ? comparator + : new Intl.Collator(comparator, options).compare + }); +} diff --git a/src/util/is-function.js b/src/util/is-function.js index 5249120..34f6529 100644 --- a/src/util/is-function.js +++ b/src/util/is-function.js @@ -1,3 +1,6 @@ +/** + * @returns {value is Function} + */ export default function(value) { return typeof value === 'function'; } diff --git a/test/verbs/orderby-test.js b/test/verbs/orderby-test.js index ea30f1f..7ea58f3 100644 --- a/test/verbs/orderby-test.js +++ b/test/verbs/orderby-test.js @@ -1,6 +1,6 @@ import assert from 'node:assert'; import tableEqual from '../table-equal.js'; -import { desc, op, table } from '../../src/index.js'; +import { collate, desc, op, table } from '../../src/index.js'; describe('orderby', () => { it('orders a table', () => { @@ -23,6 +23,92 @@ describe('orderby', () => { tableEqual(dt, ordered, 'orderby data'); }); + it('orders a table with collate comparator', () => { + const cmp = new Intl.Collator('tr-TR').compare; + + const data = { + a: ['çilek', 'şeftali', 'erik', 'armut', 'üzüm', 'erik'], + b: [1, 2, 1, 2, 1, 2] + }; + + const dt = table(data).orderby(collate('a', cmp), desc('b')); + + const rows = []; + dt.scan(row => rows.push(row), true); + assert.deepEqual(rows, [3, 0, 5, 2, 1, 4], 'orderby scan'); + + tableEqual( + dt, + { + a: ['armut', 'çilek', 'erik', 'erik', 'şeftali', 'üzüm'], + b: [2, 1, 2, 1, 2, 1] + }, + 'orderby data' + ); + + tableEqual( + table(data).orderby(desc(collate('a', cmp)), desc('b')), + { + a: ['üzüm', 'şeftali', 'erik', 'erik', 'çilek', 'armut'], + b: [1, 2, 2, 1, 1, 2] + }, + 'orderby data' + ); + }); + + it('orders a table with collate locale', () => { + const data = { + a: ['çilek', 'şeftali', 'erik', 'armut', 'üzüm', 'erik'], + b: [1, 2, 1, 2, 1, 2] + }; + + const dt = table(data).orderby(collate('a', 'tr-TR'), desc('b')); + + const rows = []; + dt.scan(row => rows.push(row), true); + assert.deepEqual(rows, [3, 0, 5, 2, 1, 4], 'orderby scan'); + + tableEqual( + dt, + { + a: ['armut', 'çilek', 'erik', 'erik', 'şeftali', 'üzüm'], + b: [2, 1, 2, 1, 2, 1] + }, + 'orderby data' + ); + + tableEqual( + table(data).orderby(desc(collate('a', 'tr-TR')), desc('b')), + { + a: ['üzüm', 'şeftali', 'erik', 'erik', 'çilek', 'armut'], + b: [1, 2, 2, 1, 1, 2] + }, + 'orderby data' + ); + }); + + it('orders a table with combined annotations', () => { + const data = { + a: ['çilek', 'şeftali', 'erik', 'armut', 'üzüm', 'erik'], + b: [1, 2, 1, 2, 1, 2] + }; + + const dt = table(data).orderby(desc(collate(d => d.a, 'tr-TR')), 'b'); + + const rows = []; + dt.scan(row => rows.push(row), true); + assert.deepEqual(rows, [4, 1, 2, 5, 0, 3], 'orderby scan'); + + tableEqual( + dt, + { + a: ['üzüm', 'şeftali', 'erik', 'erik', 'çilek', 'armut'], + b: [1, 2, 1, 2, 1, 2] + }, + 'orderby data' + ); + }); + it('supports aggregate functions', () => { const data = { a: [1, 2, 2, 3, 4, 5],