docs-src/assets/polygen.lang

--[[*****************************************************************************
    *                                                                           *
    *                     Polygen Grammars Syntax Definition                    *
    *                                                                           *
    *               v1.0.1 (2018/01/18) | Highlight v3.41 | Lua 5.3             *
    *                                                                           *
    *                             by Tristano Ajmone                            *
    *                                                                           *
    *****************************************************************************
    Associated file extensions: ".grm"
    Syntax type: EBNF
    -----------------------------------------------------------------------------
    Polygen is a cross-platform command line tool for generating random sentences
    according to a grammar definition -- ie: following custom syntactical and
    lexical rules. It takes an Ascii text file ("*.grm") as source program defining
    a grammar by means of EBNF-like probabilistic rules and executes it. At each
    execution, the grammar will be run against different random seeds, therefore
    producing a different text output.

    The main goal of Polygen is to generate cursory nonsense for entertainment;
    or, in the words of its author, "a first effort towards satyre in computer
    science". Polygen was created by Alvise Spanò.

    Polygen Website and GitHub repository:
        http://www.polygen.org/
        https://github.com/alvisespano/Polygen

    Polygen grammars documentation (in Italian):
        http://www.polygen.org/it/manuale

    An outdated English translation (probably from an earlier version of Polygen,
    since it doesn't cover the full syntax) can be found at:
        http://lapo.it/polygen/polygen-1.0.6-20040705-doc.zip
    -----------------------------------------------------------------------------
    Written by Tristano Ajmone:
        <tajmone@gmail.com>
        https://github.com/tajmone
    Released into the public domain according to the Unlicense terms:
        http://unlicense.org/
    -----------------------------------------------------------------------------
--]]
Description="Polygen"

IgnoreCase=false
EnableIndentation=false
---------------------------------------------------------------------------------
-- DISABLE/OVERRIDE UNUSED SYNTAX ELEMENTS
---------------------------------------------------------------------------------
NEVER_MATCH_RE=[=[ \A(?!x)x ]=] -- A Never-Matching RegEx!

Digits=NEVER_MATCH_RE -- Numbers are just text in Polygen!

Identifiers=NEVER_MATCH_RE -- Highlight's default Identifiers RegEx prevents
-- capturing the Epsilon operator ('_'). Since in this syntax, all identifiers
-- are defined as RegEx Keywords, and because we don't use any Keywords lists,
-- we may as well disable Identifiers by defining them as a never-matching RegEx.
-- NOTE: Defining Identifiers as a never-matching RegEx prevents using Kewyords
--       lists (the parser will fail to capture them).

-- ==============================================================================
--                                    COMMENTS
-- ==============================================================================
-- OCaml style comments, no nesting: (* ...COMMENT BLOCK... *)
Comments={
  { Block=true,
    Nested=false,
    Delimiter = {
      [=[ \(\* ]=], -- Comment start: '(*'
      [=[ \*\) ]=]  -- Comment end:   '*)'
    }
  },
}
-- ==============================================================================
--                                    STRINGS
-- ==============================================================================
Strings={
  -------------------------------------------------------------------------------
  --                             STRING DELIMITERS
  -------------------------------------------------------------------------------
  -- Polygen reckognises only double quotes as string delimiter: "...STRING..."
  Delimiter=[=[ " ]=],
--[[-----------------------------------------------------------------------------
                                  ESCAPE SEQUENCES
   ------------------------------------------------------------------------------
   Escape sequences can occur only inside strings -- here enforced via a custom
   OnStateChange() hook-function, further on. Valid escape sequences:

      \\    Backslash
      \"    Quote
      \n    New line
      \r    Carriage return
      \b    Backspace
      \t    Tab
      \nnn  ASCII decimal code (must always be three digits)                 --]]
  Escape=[=[ \\\d{3}|\\[nrbt\\"] ]=],
}
--[[=============================================================================
                                     OPERATORS
    =============================================================================
    ::=   :=    :   ;   ^   .   ,   _   |   +   -   >   <   \
    >>  <<  (   )   [   ]   {   }

--]]
Operators=[=[ ::?=|\^|\.|:|\+|-|>|<|\(|\)|\[|]|\{|}|\||,|;|_|\\ ]=]
-- ==============================================================================
--                                    KEYWORDS
-- ==============================================================================
Keywords={
  -- KNOWN ISSUES: An unspaced non-terminal symbol definition will be parsed as
  -- as a label (eg: 'S::=` and 'X:=`, instead of 'S ::=` and 'X :=`) because of
  -- the colon; and a label with spaces before the colon will be parsed as a non-
  -- terminal symbol (eg: 'Label :' instead of 'Label:'). Since both usages are
  -- considered bad (albeit valid) styles in Polygen grammars (and indeed are
  -- rarely found in actual gramamrs), it's not worth implementing complex RegExs
  -- to capture such edge cases.
  -------------------------------------------------------------------------------
  --                            Non-Terminal Symbol
  -------------------------------------------------------------------------------
  { Id=1,
    Regex=[=[ (?<!\.)([A-Z][A-Za-z0-9]*)\b(?!:) ]=],
    Group=1
  },
  -------------------------------------------------------------------------------
  --                              Label Identifier
  -------------------------------------------------------------------------------
  -- Captures a label identifier at definition time:     LABEL: <..definition...>
  { Id=2,
    Regex=[=[ ([A-Za-z0-9]+)(?::) ]=],
    Group=1
  },
  -------------------------------------------------------------------------------
  --                               Label Selector
  -------------------------------------------------------------------------------
  -- Either a dot followed by a single Label or by a group of labels within round
  -- brackets:    .LABEL    .(LABEL1|LABEL2)    .(++LABEL1|-LABEL2)
  -- The dot selector is excluded from the match; the whole bracketed group will
  -- be treated as a single keyword (as in PolyGUI tool).
  { Id=3,
    Regex=[=[ (?:\.)(\(.*?\)|[A-Za-z0-9]+) ]=],
    Group=1
  },
}
-- ******************************************************************************
-- *                                                                            *
-- *                           CUSTOM HOOK-FUNCTIONS                            *
-- *                                                                            *
-- ******************************************************************************

-- ==============================================================================
--                      Escape Sequences Only Inside String
-- ==============================================================================
function OnStateChange(oldState, newState, token, kwgroup)
--  This function ensures that escape sequences outside strings are ignored.
--  Based on André Simon's reply to Issue #23:
--  https://github.com/andre-simon/highlight/issues/23#issuecomment-332002639
  if newState==HL_ESC_SEQ and oldState~=HL_STRING  then
    return HL_STANDARD
  end
  return newState
end
--[[==============================================================================
                                        CHANGELOG
==================================================================================
 v1.0.1 (2018/01/18) | Highlight v3.41)
    - Changed "PolyGen" to "Polygen" (the author has now officially adopted the
      latter syntax).
 v1.0.0 (2018/01/04) | Highlight v3.41)
    - First release.
--]]