From 6019279203373e76c95ba258e70811d861932ced Mon Sep 17 00:00:00 2001 From: Michael Kay Date: Sun, 26 Jan 2025 12:21:47 +0000 Subject: [PATCH 1/2] WIP --- specifications/grammar-40/grammar.dtd | 70 +- specifications/grammar-40/xpath-grammar.xml | 688 ++++++-------------- style/assemble-spec.xsl | 8 +- style/grammar2spec.xsl | 130 +--- 4 files changed, 248 insertions(+), 648 deletions(-) diff --git a/specifications/grammar-40/grammar.dtd b/specifications/grammar-40/grammar.dtd index 788e19239..dbcf20312 100644 --- a/specifications/grammar-40/grammar.dtd +++ b/specifications/grammar-40/grammar.dtd @@ -6,16 +6,16 @@ or via an XSLT stylesheet or other transformation, may generate a parser compiler specification such as for YACC or JavaCC. -Norm and Scott moved this file, and added an explicit prefix, as part of the -transition toward a unified build process for last call and beyond. This involved -moving the location of the CVS repository, For earlier history information, -see /WWW/XML/Group/xpath-query-src/grammar.dtd + In 2025 Michael Kay simplified the DTD to remove parts that were no + longer used or maintained. =========================================================================--> - + @@ -146,44 +146,44 @@ see /WWW/XML/Group/xpath-query-src/grammar.dtd process-value (no | yes) #IMPLIED > - + - + + - + - + - + - + - + - + - + - + - + - +>--> - + @@ -580,13 +580,13 @@ VersionDecl ::= "xquery" (("encoding" StringLiteral) | ("version" StringLiteral - + + + declare @@ -600,7 +600,7 @@ VersionDecl ::= "xquery" (("encoding" StringLiteral) | ("version" StringLiteral - --> + updating @@ -1408,7 +1408,180 @@ ErrorVal ::= "$" VarName - + + + + or + + + + + + + + and + + + + + + + + + + + + + + + + + + + + otherwise + + + + + + + + || + + + + + + + + to + + + + + + + + + + + - + + + + + + + + + + * + × + div + ÷ + idiv + mod + + + + + + + + + + union + | + + + + + + + + + + intersect + except + + + + + + + + + instance + of + + + + + + + + treat + as + + + + + + + + castable + as + + + ? + + + + + + + + cast + as + + + ? + + + + + + + + -> + + + + + + + + + + + + + + + + + - + + + + + + + + + + + + => @@ -2121,22 +2294,21 @@ ErrorVal ::= "$" VarName - + - + /> - + > - - + </ + - + > @@ -2145,11 +2317,11 @@ ErrorVal ::= "$" VarName - + - + = @@ -2217,9 +2389,9 @@ ErrorVal ::= "$" VarName - + <!-- - + --> @@ -2232,13 +2404,13 @@ ErrorVal ::= "$" VarName - + <? - + ?> @@ -2248,9 +2420,9 @@ ErrorVal ::= "$" VarName - + <![CDATA[ - + ]]> @@ -2494,9 +2666,9 @@ ErrorVal ::= "$" VarName - + ``[ - + ]`` @@ -2514,11 +2686,11 @@ ErrorVal ::= "$" VarName - + `{ - + }` @@ -2880,7 +3052,7 @@ ErrorVal ::= "$" VarName - + - + + @@ -3097,13 +3269,13 @@ ErrorVal ::= "$" VarName - +--> - - + + without content - + --> @@ -4073,34 +4245,6 @@ ErrorVal ::= "$" VarName - - - - - - > - - - - /> - - - - </ - - - - - - - - > - - - - = - - " @@ -4125,14 +4269,6 @@ ErrorVal ::= "$" VarName }} - - <!-- - - - - --> - - @@ -4142,38 +4278,7 @@ ErrorVal ::= "$" VarName - - <? - - - - ?> - - - - <![CDATA[ - - - - - ]]> - - - - ``[ - - - - ]`` - - - - `{ - - - - }` - + - - - - - This is not an actual state, but rather a collection of - sub-terminals that are referenced by g:token rules. - In the file that is generated for input to JavaCC, - each becomes a "private regular expression". - (It would be better to make this distinction - in the g:token element.) - - - No state change. - - - - - - - - - - - - - - - - - - - - - - - - XXX - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - The "(:" token marks the beginning of an expression - Comment, and the ":)" token marks the end. This allows no special - interpretation of other characters in this state. - - - - - - - - No state change. - - - - - - - - This state allows attributes in the native XML syntax, - and marks the beginning of an element construction. Element - constructors also push the current state, popping it at the - conclusion of an end tag. In the START_TAG state, the string ">" is - recognized as a token which is associated with the transition to - the original state. - - - - - - - - - - - - - - No state change. - - - - - - - - This state allows content valid for attributes. The - character "{" marks a transition to the OPERAND state, i.e. the - start of an embedded expression, and the "}" character pops back to - the original state. To allow curly braces to be used as character - content, a double left or right curly brace is interpreted as a - single curly brace character. This state is the same as - APOS_ATTRIBUTE_CONTENT, except that apostrophes are allowed without - escaping, and an unescaped quote marks the end of the - state. - - - - - Transition to an Attribute Value - Template. - - - - - - No state change. - - - - - - - - - - - - This state is the same as QUOT_ATTRIBUTE_CONTENT, except - that quotes are allowed, and an unescaped apostrophe marks the end - of the state. - - - - - Transition to an Attribute Value - Template. - - - - - - No state change. - - - - - - - - - - - - This state allows XML-like content, without these - characters being misinterpreted as expressions. The character "{" - marks a transition to the OPERAND state, i.e. the start of an - embedded expression, and the "}" character pops back to the - ELEMENT_CONTENT state. To allow curly braces to be used as - character content, a double left or right curly brace is - interpreted as a single curly brace character. The string "</" - is interpreted as the beginning of an end tag, which is associated - with a transition to the END_TAG state. - - - - - Transition to an Element Value - Template. - - - - - - - - - - - - - - - - - No state change. - - - - - - - - - - - When the end tag is terminated, the state is popped to - the state that was pushed at the start of the corresponding start - tag. - - - - - No state change. - - - - - - - - - The "<--" token marks the beginning of an XML - Comment, and the "-->" token marks the end. This allows no special - interpretation of other characters in this state. - - - - - No state change. - - - - - - - - - - In this state, only patterns that are valid in a - processing instruction name are recognized. - - - - - - - - No state change. - - - - - - - In this state, only characters are that are valid in - processing instruction content are recognized. - - - - - No state change. - - - - - - - - In this state, only lexemes that are valid in a CDATA - section are recognized. - - - - - No state change. - - - - - - - - This state is entered in a a pragma expression, and recognizes a - QName that transits to a PRAGMA_3 state rather than a OPERATOR state. - - - - - - No state change. - - - - - - This state recognizes the space(s) required to preceed pragma contents. If you do not have - this, and try to recognize S in PRAGMA_3, then Char will be recognized first, - and the pragma production will not work properly. - - - - - - - - - - This state recognizes characters in pragma content, and transits out of this - state when a “#)” pattern is recognized. - - - - - - - - - - - - - - - - - - - - - - - + - + diff --git a/style/assemble-spec.xsl b/style/assemble-spec.xsl index c16355770..6ab07ab92 100644 --- a/style/assemble-spec.xsl +++ b/style/assemble-spec.xsl @@ -179,13 +179,7 @@ - - - - - - - + diff --git a/style/grammar2spec.xsl b/style/grammar2spec.xsl index dc77c5654..20cfb60be 100644 --- a/style/grammar2spec.xsl +++ b/style/grammar2spec.xsl @@ -298,135 +298,7 @@ - - - - - - - - - state- - - - - - - - - - - - - - - - - - - - - -

- -

-

- - - - - - - - - - - - - - - - -
PatternTransition To State
- - - - [ - - ] - - - - - - - - , - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - #lexaction-pushstate - - - #lexaction-pushstate-with-param - - - #lexaction-popstate - - - #lexaction-backup - - - - - - () - - -
- - - -
- - (maintain state) - -
-
-

- -
-
-
-
-
+ From b1ac4d9555dc1a9cc6c0d5909513a7082775c1ea Mon Sep 17 00:00:00 2001 From: Michael Kay Date: Sun, 26 Jan 2025 22:05:42 +0000 Subject: [PATCH 2/2] Deletion and simplification of grammar constructs --- specifications/grammar-40/xpath-grammar.xml | 833 +++---------------- specifications/xquery-40/src/expressions.xml | 2 +- style/grammar2spec.xsl | 34 +- 3 files changed, 113 insertions(+), 756 deletions(-) diff --git a/specifications/grammar-40/xpath-grammar.xml b/specifications/grammar-40/xpath-grammar.xml index a9aaf1929..2f2abe7c9 100644 --- a/specifications/grammar-40/xpath-grammar.xml +++ b/specifications/grammar-40/xpath-grammar.xml @@ -19,20 +19,12 @@ apply. - - - + @@ -127,13 +119,13 @@ apply. - + / - + // @@ -147,8 +139,8 @@ apply. - - + / + // @@ -168,13 +160,6 @@ apply. - )
@@ -189,8 +174,8 @@ apply. - - + / + // @@ -269,17 +254,11 @@ apply. - - - @@ -348,6 +327,9 @@ VersionDecl ::= "xquery" (("encoding" StringLiteral) | ("version" StringLiteral + @@ -424,8 +406,8 @@ VersionDecl ::= "xquery" (("encoding" StringLiteral) | ("version" StringLiteral order empty - - + greatest + least @@ -456,7 +438,7 @@ VersionDecl ::= "xquery" (("encoding" StringLiteral) | ("version" StringLiteral decimal-format - + default @@ -482,7 +464,7 @@ VersionDecl ::= "xquery" (("encoding" StringLiteral) | ("version" StringLiteral zero-digit digit pattern-separator - exponent-separator + exponent-separator
@@ -509,11 +491,6 @@ VersionDecl ::= "xquery" (("encoding" StringLiteral) | ("version" StringLiteral -
@@ -549,10 +526,6 @@ VersionDecl ::= "xquery" (("encoding" StringLiteral) | ("version" StringLiteral - @@ -608,7 +581,7 @@ VersionDecl ::= "xquery" (("encoding" StringLiteral) | ("version" StringLiteral % - + ( @@ -656,7 +629,7 @@ VersionDecl ::= "xquery" (("encoding" StringLiteral) | ("version" StringLiteral - + external := @@ -698,7 +671,7 @@ VersionDecl ::= "xquery" (("encoding" StringLiteral) | ("version" StringLiteral - + external := @@ -716,7 +689,7 @@ VersionDecl ::= "xquery" (("encoding" StringLiteral) | ("version" StringLiteral function - + ( @@ -727,7 +700,7 @@ VersionDecl ::= "xquery" (("encoding" StringLiteral) | ("version" StringLiteral - + external @@ -762,16 +735,15 @@ VersionDecl ::= "xquery" (("encoding" StringLiteral) | ("version" StringLiteral - + - - - + { + - + } @@ -814,7 +786,7 @@ VersionDecl ::= "xquery" (("encoding" StringLiteral) | ("version" StringLiteral declare option - + @@ -852,10 +824,9 @@ VersionDecl ::= "xquery" (("encoding" StringLiteral) | ("version" StringLiteral - - + @@ -868,37 +839,18 @@ VersionDecl ::= "xquery" (("encoding" StringLiteral) | ("version" StringLiteral - + - - - - @@ -910,8 +862,6 @@ VersionDecl ::= "xquery" (("encoding" StringLiteral) | ("version" StringLiteral - - @@ -925,7 +875,7 @@ VersionDecl ::= "xquery" (("encoding" StringLiteral) | ("version" StringLiteral - + @@ -1195,11 +1145,9 @@ VersionDecl ::= "xquery" (("encoding" StringLiteral) | ("version" StringLiteral order by - - - , + - + @@ -1210,15 +1158,15 @@ VersionDecl ::= "xquery" (("encoding" StringLiteral) | ("version" StringLiteral - - + ascending + descending empty - - + greatest + least @@ -1370,15 +1318,6 @@ VersionDecl ::= "xquery" (("encoding" StringLiteral) | ("version" StringLiteral - @@ -1581,195 +1520,18 @@ ErrorVal ::= "$" VarName - - + => - + =!> - + @@ -1782,21 +1544,17 @@ ErrorVal ::= "$" VarName - + =?> - + = != - - + < <= > >= @@ -1833,9 +1591,9 @@ ErrorVal ::= "$" VarName - + { - + } @@ -1849,24 +1607,24 @@ ErrorVal ::= "$" VarName - + { - + } - + (# - + - + #) @@ -1890,14 +1648,14 @@ ErrorVal ::= "$" VarName - + / - + // @@ -1908,8 +1666,8 @@ ErrorVal ::= "$" VarName - - + / + // @@ -2023,7 +1781,7 @@ ErrorVal ::= "$" VarName - + @@ -2101,12 +1859,6 @@ ErrorVal ::= "$" VarName - - [ @@ -2154,18 +1906,12 @@ ErrorVal ::= "$" VarName - * - - @@ -2235,29 +1981,18 @@ ErrorVal ::= "$" VarName . - - ordered - - - - + unordered - - - - @@ -2293,8 +2028,8 @@ ErrorVal ::= "$" VarName - - + < + /> @@ -2304,7 +2039,7 @@ ErrorVal ::= "$" VarName </ - + @@ -2317,7 +2052,7 @@ ErrorVal ::= "$" VarName - + @@ -2397,8 +2132,11 @@ ErrorVal ::= "$" VarName - - + + + - + + @@ -2445,10 +2183,7 @@ ErrorVal ::= "$" VarName document - - - - + @@ -2462,9 +2197,9 @@ ErrorVal ::= "$" VarName - + { - + } @@ -2474,9 +2209,9 @@ ErrorVal ::= "$" VarName - + { - + } @@ -2489,20 +2224,6 @@ ErrorVal ::= "$" VarName - - @@ -2519,43 +2240,20 @@ ErrorVal ::= "$" VarName - - text - - - - + comment - - - - + processing-instruction - - - - - - + @@ -2566,12 +2264,10 @@ ErrorVal ::= "$" VarName - - # @@ -2598,11 +2294,11 @@ ErrorVal ::= "$" VarName map - + { - + } @@ -2735,7 +2431,7 @@ ErrorVal ::= "$" VarName ? * - + + @@ -2748,7 +2444,6 @@ ErrorVal ::= "$" VarName - @@ -2869,11 +2564,11 @@ ErrorVal ::= "$" VarName - + - + @@ -2886,7 +2581,7 @@ ErrorVal ::= "$" VarName - + @@ -3621,7 +3316,7 @@ ErrorVal ::= "$" VarName - + - - - - - - - @@ -3727,12 +3395,6 @@ ErrorVal ::= "$" VarName - - - - - - @@ -3742,217 +3404,7 @@ ErrorVal ::= "$" VarName - - - - - array - attribute - comment - document-node - element - empty-sequence - function - fn - if - item - map - namespace-node - node - processing-instruction - schema-attribute - schema-element - switch - text - typeswitch - while - - - - - - - - - - - - NaN - after - all - allowing - ancestor - ancestor-or-self - and - any - as - at - base-uri - before - block - boundary-space - by - case - cast - castable - catch - child - collation - construction - contains - content - context - copy - copy-namespaces - count - decimal-format - decimal-separator - declare - default - delete - descendant - descendant-or-self - diacritics - different - digit - distance - div - doc - document - element-with-id - else - empty - encoding - end - entire - eq - every - exactly - except - exit - exponent-separator - first - following - following-sibling - for - from - ft-option - ftand - ftnot - ftor - ge - group - grouping-separator - gt - id - idiv - import - in - infinity - inherit - insensitive - insert - instance - intersect - into - invoke - is - key - language - last - lax - le - let - levels - lowercase - lt - minus-sign - mod - modify - module - most - namespace - ne - next - no - no-inherit - no-preserve - nodes - not - occurs - of - only - option - or - order - ordered - ordering - paragraph - paragraphs - parent - pattern-separator - per-mille - percent - phrase - preceding - preceding-sibling - preserve - previous - relationship - rename - replace - return - returning - revalidation - same - satisfies - schema - score - self - sensitive - sentence - sentences - skip - sliding - some - stable - start - stemming - stop - strict - strip - then - thesaurus - times - to - transform - treat - try - tumbling - type - union - unordered - updating - uppercase - using - validate - value - variable - version - weight - when - where - wildcards - window - with - without - word - words - xquery - zero-digit - - @@ -4062,38 +3514,7 @@ ErrorVal ::= "$" VarName - - - } - - - - < - - - - & - - - - - - XML - XMl - XmL - Xml - xML - xMl - xmL - xml - - + - + - - / - - - - // - - - - greatest - - - - least - - - - external - - - - { - - - - } - - - - ascending - - - - descending - - - - < - - - - + - - - - - - - - - (# - - - - #) - - - + * - + : * :* @@ -4242,14 +3607,14 @@ ErrorVal ::= "$" VarName * : *: - + - + " - + " @@ -4269,15 +3634,7 @@ ErrorVal ::= "$" VarName }} - - - - - - - - - - + @@ -4404,7 +3761,7 @@ ErrorVal ::= "$" VarName - + @@ -4426,7 +3783,7 @@ ErrorVal ::= "$" VarName } - + & lt @@ -4481,7 +3838,7 @@ ErrorVal ::= "$" VarName --> - + @@ -4496,15 +3853,15 @@ ErrorVal ::= "$" VarName ; - + - + : - + @@ -4651,7 +4008,7 @@ ErrorVal ::= "$" VarName - + diff --git a/specifications/xquery-40/src/expressions.xml b/specifications/xquery-40/src/expressions.xml index d083add97..ccb67a788 100644 --- a/specifications/xquery-40/src/expressions.xml +++ b/specifications/xquery-40/src/expressions.xml @@ -16463,7 +16463,7 @@ element because it is defined by a - +

The semantics of FLWOR expressions are based on a concept called a tuple stream. - + @@ -758,7 +758,7 @@ - + - + - + - + - + - + - + + - + -->