From 47cb099ede3734b6e502ab434dbe9f5f7c91badb Mon Sep 17 00:00:00 2001 From: Michael Kay Date: Fri, 31 Jan 2025 09:49:18 +0000 Subject: [PATCH] Combine the "duplicates" and "combine" options --- .../src/function-catalog.xml | 158 ++++++++++-------- .../src/xpath-functions.xml | 8 +- 2 files changed, 88 insertions(+), 78 deletions(-) diff --git a/specifications/xpath-functions-40/src/function-catalog.xml b/specifications/xpath-functions-40/src/function-catalog.xml index a20013e2f..60455bcbb 100644 --- a/specifications/xpath-functions-40/src/function-catalog.xml +++ b/specifications/xpath-functions-40/src/function-catalog.xml @@ -23056,9 +23056,7 @@ map:of-pairs($maps =!> map:pairs(), -

An error is raised if both the combine and duplicates - options are present.

+

An error is raised if the value of @@ -23104,6 +23102,12 @@ map:of-pairs($maps =!> map:pairs(), { 0: "no", 1: "yes" } Returns a map with two entries + + map:merge(({ "red": 0 }, { "green": 1}, { "blue": 2 })) + => map:keys() + "red", "green", "blue" + Note the order of the result. + map:merge( ($week, { 7: "Unbekannt" }) @@ -23153,21 +23157,26 @@ map:of-pairs($maps =!> map:pairs(), entry that appears in the result is the sequence concatenation of the entries in the input maps, retaining order. - - map:merge(({ "red": 0 }, { "green": 1}, { "blue": 2 })) - => map:keys() - "red", "green", "blue" + map:merge( + ({ "oxygen": 0.22, "hydrogen": 0.68, "nitrogen": 0.1 }, + { "oxygen": 0.24, "hydrogen": 0.70, "nitrogen": 0.06 }), + { "duplicates": fn($a, $b){ max(($a, $b)) } }) + + { "oxygen": 0.24, "hydrogen": 0.70, "nitrogen": 0.1 } + The result map holds, for each distinct key, the maximum of the values + for that key in the input. + - +

For consistency with the new functions map:build and map:of-pairs, the handling of duplicates - may now be controlled by the combine option as an alternative - to the existing duplicates option.

+ may now be controlled by supplying a user-defined callback function as an alternative + to the fixed values for the earlier duplicates option.

@@ -23214,54 +23223,49 @@ map:of-pairs($maps =!> map:pairs(), taken if two entries in the input sequence have key values K1 and K2 where K1 and K2 are the same key. This option and the combine - option are mutually exclusive. + def="dt-same-key">same key. - xs:string - combine + (enum( "reject", "use-first", "use-last", "use-any", "combine") | fn(item()*, item()*) as item()*)? + "combine" - - Equivalent to specifying "combine": fn(){error(xs:QName("err:FOJS0003"), ...) - (the remaining arguments to fn:error being - ). + + Equivalent to supplying a function that raises a dynamic error + with error code "FOJS0003". The effect is that duplicate keys + result in an error. - Equivalent to specifying "combine": fn($a, $b){ $a }. + Equivalent to supplying the function fn($a, $b){ $a }. + The effect is that the first of the duplicates is chosen. - Equivalent to specifying "combine": fn($a, $b){ $b }. + Equivalent to supplying the function fn($a, $b){ $b }. + The effect is that the last of the duplicates is chosen. - Equivalent to specifying "combine": fn($a, $b){ one-of($a, $b) } + Equivalent to supplying the function fn($a, $b){ one-of($a, $b) } where one-of chooses either $a or $b in - an way. + an way. The effect is that it is + which of the duplicates is chosen. + + Equivalent to supplying the function fn($a, $b){ $a, $b } + (or equivalently, the function op(",")). + The effect is that the result contains the + of the values having the same key, retaining order. - Equivalent to specifying "combine": fn($a, $b){ $a, $b }. + + A function with signature fn(item()*, item()*) as item()*. + The function is called for any entry in the input sequence that has the + as a previous entry. The first argument + is the existing value associated with the key; the second argument + is the value associated with the key in the duplicate input entry, + and the result is the new value to be associated with the key. The effect + is cumulative: for example if there are three values X, Y, + and Z associated with the same key, and the supplied function is + F, then the result is an entry whose value is + X => F(Y) => F(Z). - - - - Supplies a function for handling duplicate keys: specifically, the action to be - taken if entries in the input sequence contain entries with key values - K1 and K2 where K1 and K2 are the - same key. This option and the duplicates - option are mutually exclusive. - - (fn($existing-value as item()*, $new-value as item()*) as item()*)? - fn($a, $b){ $a, $b } - - - A function with signature fn(item()*, item()*) as item()*. - The function is called for any entry in the input sequence that has the - as a previous entry. The first argument - is the existing value associated with the key; the second argument - is the value associated with the key in the duplicate input entry, - and the result is the new value to be associated with the key. - - @@ -23276,15 +23280,19 @@ let $one-of := fn($a, $b) { (: select either $a or $b at implementation option :) if (environment-variable("X")) then $a else $b } +let $duplicates := $options ? duplicates let $combine as function(item()*, item()*) as item()* := - { "reject": fn($a, $b){ error(xs:QName("err:FOJS0003")) }, - "use-first": fn($a, $b){ $a }, - "use-last": fn($a, $b){ $b }, - "use-any": fn($a, $b){ $one-of($a, $b) }, - "combine": fn($a, $b){ $a, $b } - } ? ($options?duplicates) - otherwise $options?combine - otherwise fn($a, $b) { $a, $b } + if ($duplicates instance of xs:string) + then + { "reject": fn($a, $b){ error(xs:QName("err:FOJS0003")) }, + "use-first": fn($a, $b){ $a }, + "use-last": fn($a, $b){ $b }, + "use-any": fn($a, $b){ $one-of($a, $b) }, + "combine": fn($a, $b){ $a, $b } + } ? $duplicates + else if ($duplicates instance of function(*)) + then $duplicates + else fn($a, $b) { $a, $b } return fold-left( $input, {}, fn ( $out, $next ) { let $newVal := @@ -23295,17 +23303,11 @@ return fold-left( $input, {}, }) -

An error is raised if both the combine and duplicates - options are present.

An error is raised if the value of $options indicates that duplicates are to be rejected, and a duplicate key is encountered.

-

An error is raised if the value of - $options includes an entry whose key is defined - in this specification, and whose value is not a permitted value for that key.

+
@@ -23393,20 +23395,20 @@ return fold-left( $input, {}, map:of-pairs( (map:pairs($week), { "key": 6, "value": "Sonnabend" }), - { "combine": fn($old, $new) { $new } } + { "duplicates": "use-last" } ) { 0: "Sonntag", 1: "Montag", 2: "Dienstag", 3: "Mittwoch", 4: "Donnerstag", 5: "Freitag", 6: "Sonnabend" } The value of the existing map is unchanged; the returned map contains all the entries from $week, with one entry replaced by a new entry. Both input maps contain an entry with the key 6; the - supplied $combine function ensures that the one used in the result + supplied $duplicates option ensures that the one used in the result is the one that comes last in the input sequence. map:of-pairs( (map:pairs($week), { "key": 6, "value": "Sonnabend" }), - { "combine": concat(?, '|', ?) } + { "duplicates": concat(?, '|', ?) } ) { 0: "Sonntag", 1: "Montag", 2: "Dienstag", 3: "Mittwoch", 4: "Donnerstag", 5: "Freitag", 6: "Samstag|Sonnabend" } @@ -23414,6 +23416,17 @@ return fold-left( $input, {}, from the two input maps, with a separator character. + + map:of-pairs( + ( map:pairs({ "England": 2, "Germany": 1 }), + map:pairs({ "France": 2, "Germany": 2 }) + map:pairs({ "England": 0, "France": 1 }) ), + { "duplicates": op("+") }) + + { "England": 2, "Germany": 3, "France": 3 } + The values for each distinct key are summed. + + map:of-pairs((map:pair("red": 0), map:pair("green": 1), map:pair("blue": 2 )) => map:keys() @@ -24604,8 +24617,8 @@ else map:put($map, $key, $action(()))

If the key is not already present in the target map, the processor adds a new key-value pair to the map, with that key and that value.

If the key is already present, the processor combines the new value for the key - with the existing value as determined by the combine - and duplicates options.

+ with the existing value as determined by the + and duplicates option.

By default, when two duplicate entries occur:

A single combined entry will be present in the result.

@@ -24645,9 +24658,6 @@ else map:put($map, $key, $action(())) ) => map:of-pairs($options) -

An error is raised if both the combine and duplicates - options are present.

An error is raised if the value of @@ -24710,7 +24720,7 @@ else map:put($map, $key, $action(())) ("apple", "apricot", "banana", "blueberry", "cherry"), substring(?, 1, 1), string-length#1, - { "combine": op("+") } + { "duplicates": op("+") } ) { "a": 12, "b": 15, "c": 6 } Constructs a map where the key is the first character of an input item, and where the corresponding value @@ -24766,7 +24776,7 @@ return map:build($titles/title, fn($title) { $title/ix })

The following expression creates a map whose keys are employee @location values, and whose corresponding values represent the number of employees at each distinct location. Any employees that lack an @location attribute will be excluded from the result.

- map:build(//employee, fn { @location }, fn { 1 }, { "combine": op("+") }) + map:build(//employee, fn { @location }, fn { 1 }, { "duplicates": op("+") })

The following expression creates a map whose keys are employee @location values, and whose diff --git a/specifications/xpath-functions-40/src/xpath-functions.xml b/specifications/xpath-functions-40/src/xpath-functions.xml index 40e591837..9ddd5ce31 100644 --- a/specifications/xpath-functions-40/src/xpath-functions.xml +++ b/specifications/xpath-functions-40/src/xpath-functions.xml @@ -771,8 +771,7 @@ Michael Sperberg-McQueen (1954–2024).

The type of the options parameter in the function signature is always given as map(*).

Although option names are described above as strings, the actual key may be - any value that compares equal to the required string (using the eq operator - with Unicode codepoint collation; or equivalently, the fn:atomic-equal relation). + any value that is the as the required string. For example, instances of xs:untypedAtomic or xs:anyURI are equally acceptable.

This means that the implementation of the function can check for the @@ -805,6 +804,7 @@ Michael Sperberg-McQueen (1954–2024).

A dynamic error occurs if the supplied value after conversion is not one of the permitted values for the option in question: the error codes for this error are defined in the specification of each function.

+

It is the responsibility of each function implementation to invoke this conversion; it does not happen automatically as a consequence of the function-calling rules.

@@ -13278,12 +13278,12 @@ ISBN 0 521 77752 6.

Raised when the digits in the string supplied to fn:parse-integer are not in the range appropriate to the chosen radix.

-

Raised if an inconsistent set of options is supplied in an option map.

-
+ -->

Raised by regular expression functions such as fn:matches and fn:replace if the