cucumber · temyers · Nov 8, 2022 · Nov 25, 2022 · Nov 25, 2022 · Nov 28, 2022
diff --git a/javascript/Makefile b/javascript/Makefile
@@ -5,6 +5,7 @@ GHERKIN_PARSER = src/Parser.ts
 GHERKIN_RAZOR = gherkin-javascript.razor
 SOURCE_FILES = $(shell find . -name "*.js" | grep -v $(GHERKIN_PARSER))
 
+# HELP - is this correnct now?
 GHERKIN = npx gherkin-javascript
 
 GOOD_FEATURE_FILES = $(shell find ../testdata/good -name "*.feature" -o -name "*.feature.md")

diff --git a/javascript/src/GherkinInMarkdownTokenMatcher.ts b/javascript/src/GherkinInMarkdownTokenMatcher.ts
@@ -130,7 +130,13 @@ export default class GherkinInMarkdownTokenMatcher implements ITokenMatcher<Toke
     let result = false
     if (token.line.startsWith('|')) {
       const tableCells = token.line.getTableCells()
-      if (this.isGfmTableSeparator(tableCells)) result = true
+      if (this.isGfmTableSeparator(tableCells)) {
+        // Maintain consistency with Python implementation
+        token.matchedType = TokenType.Comment
+        token.matchedText = undefined
+        token.matchedIndent=0
+        result = true
+      }
     }
     return this.setTokenMatched(token, null, result)
   }

diff --git a/javascript/test/GherkinInMarkdownTokenMatcherTest.ts b/javascript/test/GherkinInMarkdownTokenMatcherTest.ts
@@ -34,35 +34,58 @@ describe('GherkinInMarkdownTokenMatcher', function () {
     assert.strictEqual(token.matchedText, 'hello')
   })
 
+  it('matches FeatureLine without the Feature: keyword', () => {
+    const line = new GherkinLine('# hello', location.line)
+    const token = new Token(line, location)
+    assert(tm.match_FeatureLine(token))
+    assert.strictEqual(token.matchedType, TokenType.FeatureLine)
+    assert.strictEqual(token.matchedKeyword, undefined)
+    assert.strictEqual(token.matchedText, '# hello')
+  })
+
   it('matches bullet Step', () => {
     const line = new GherkinLine('  *  Given I have 3 cukes', location.line)
     const token = new Token(line, location)
     assert(tm.match_StepLine(token))
     assert.strictEqual(token.matchedType, TokenType.StepLine)
     assert.strictEqual(token.matchedKeyword, 'Given ')
+    assert.strictEqual(token.matchedKeywordType, 'Context')
     assert.strictEqual(token.matchedText, 'I have 3 cukes')
     assert.strictEqual(token.location.column, 6)
   })
-
+  
   it('matches plus Step', () => {
     const line = new GherkinLine('  +  Given I have 3 cukes', location.line)
     const token = new Token(line, location)
     assert(tm.match_StepLine(token))
     assert.strictEqual(token.matchedType, TokenType.StepLine)
     assert.strictEqual(token.matchedKeyword, 'Given ')
+    assert.strictEqual(token.matchedKeywordType, 'Context')
     assert.strictEqual(token.matchedText, 'I have 3 cukes')
     assert.strictEqual(token.location.column, 6)
   })
-
+  
   it('matches hyphen Step', () => {
     const line = new GherkinLine('  -  Given I have 3 cukes', location.line)
     const token = new Token(line, location)
     assert(tm.match_StepLine(token))
     assert.strictEqual(token.matchedType, TokenType.StepLine)
     assert.strictEqual(token.matchedKeyword, 'Given ')
+    assert.strictEqual(token.matchedKeywordType, 'Context')
     assert.strictEqual(token.matchedText, 'I have 3 cukes')
     assert.strictEqual(token.location.column, 6)
   })
+
+  it('matches a when Step', () => {
+    const line = new GherkinLine('  -  When I do something', location.line)
+    const token = new Token(line, location)
+    assert(tm.match_StepLine(token))
+    assert.strictEqual(token.matchedType, TokenType.StepLine)
+    assert.strictEqual(token.matchedKeyword, 'When ')
+    assert.strictEqual(token.matchedKeywordType, 'Action')
+    assert.strictEqual(token.matchedText, 'I do something')
+    assert.strictEqual(token.location.column, 6)
+  })
 
   it('matches arbitrary text as Other', () => {
     const line = new GherkinLine('Whatever', location.line)
@@ -186,4 +209,21 @@ describe('GherkinInMarkdownTokenMatcher', function () {
     ]
     assert.deepStrictEqual(t.matchedItems, expectedItems)
   })
+
+  it('matches arbitrary text as Empty after the FeatureLine has already been matched', () => {
+    // White Box testing - implementation detail...
+    // Given the FeatureLine has already been matched
+    const tFeatureLine = new Token(new GherkinLine('# something arbitrary', location.line), location);
+    assert(tm.match_FeatureLine(tFeatureLine))
+
+
+    const t = new Token(new GherkinLine('arbitrary text', location.line), location);
+    // (tm as any).matchedFeatureLine = true
+    assert(tm.match_Empty(t))
+    assert.strictEqual(t.matchedType, TokenType.Empty)
+    const expectedItems: Item[] =undefined
+    assert.deepStrictEqual(t.matchedItems, expectedItems)
+    assert.strictEqual(t.matchedKeyword, undefined)
+    assert.strictEqual(t.matchedText, undefined)
+  } )
 })
diff --git a/javascript/test/ParserTest.ts b/javascript/test/ParserTest.ts
@@ -6,16 +6,17 @@ import GherkinClassicTokenMatcher from '../src/GherkinClassicTokenMatcher'
 import AstNode from '../src/AstNode'
 import generateMessages from '../src/generateMessages'
 import GherkinInMarkdownTokenMatcher from '../src/GherkinInMarkdownTokenMatcher'
+import { StepKeywordType } from '@cucumber/messages'
 
 describe('Parser', function () {
   describe('with Gherkin Classic', () => {
     let parser: Parser<AstNode>
     beforeEach(
       () =>
-        (parser = new Parser<AstNode>(
-          new AstBuilder(messages.IdGenerator.incrementing()),
-          new GherkinClassicTokenMatcher()
-        ))
+      (parser = new Parser<AstNode>(
+        new AstBuilder(messages.IdGenerator.incrementing()),
+        new GherkinClassicTokenMatcher()
+      ))
     )
 
     it('parses a simple feature', function () {
@@ -93,20 +94,20 @@ describe('Parser', function () {
       try {
         parser.parse(
           '# a comment\n' +
-            'Feature: Foo\n' +
-            '  Scenario: Bar\n' +
-            '    Given x\n' +
-            '      ```\n' +
-            '      unclosed docstring\n'
+          'Feature: Foo\n' +
+          '  Scenario: Bar\n' +
+          '    Given x\n' +
+          '      ```\n' +
+          '      unclosed docstring\n'
         )
       } catch (expected) {
         ast = parser.parse(
           'Feature: Foo\n' +
-            '  Scenario: Bar\n' +
-            '    Given x\n' +
-            '      """\n' +
-            '      closed docstring\n' +
-            '      """'
+          '  Scenario: Bar\n' +
+          '    Given x\n' +
+          '      """\n' +
+          '      closed docstring\n' +
+          '      """'
         )
       }
 
@@ -155,11 +156,11 @@ describe('Parser', function () {
     it('interpolates data tables', function () {
       const envelopes = generateMessages(
         'Feature: Foo\n' +
-          '  Scenario Outline: Parenthesis\n' +
-          '    Given the thing <is (not) triggered> and has <value>\n' +
-          '  Examples:\n' +
-          '    | is (not) triggered | value |\n' +
-          '    | is triggered       | foo   |\n ',
+        '  Scenario Outline: Parenthesis\n' +
+        '    Given the thing <is (not) triggered> and has <value>\n' +
+        '  Examples:\n' +
+        '    | is (not) triggered | value |\n' +
+        '    | is triggered       | foo   |\n ',
         '',
         messages.SourceMediaType.TEXT_X_CUCUMBER_GHERKIN_PLAIN,
         { includePickles: true, newId: messages.IdGenerator.incrementing() }
@@ -194,10 +195,10 @@ describe('Parser', function () {
     let parser: Parser<AstNode>
     beforeEach(
       () =>
-        (parser = new Parser<AstNode>(
-          new AstBuilder(messages.IdGenerator.incrementing()),
-          new GherkinInMarkdownTokenMatcher()
-        ))
+      (parser = new Parser<AstNode>(
+        new AstBuilder(messages.IdGenerator.incrementing()),
+        new GherkinInMarkdownTokenMatcher()
+      ))
     )
 
     it('does not parse a feature description', function () {
@@ -308,5 +309,128 @@ description
 
       assert.strictEqual(pickle.steps[0].argument.docString.content, '```what')
     })
+
+    it("parses Markdown data tables with headers", () => {
+      const markdown = `## Feature: DataTables
+
+### Scenario: minimalistic
+
+* Given a simple data table 
+  | foo | bar |
+  | --- | --- |
+  | boz | boo |
+`
+      const ast = parser.parse(markdown)
+      const gherkinDocument: messages.GherkinDocument = {
+        "comments": [
+          {
+            "location": {
+              "column": 3,
+              "line": 7
+            },
+            "text": undefined
+          }
+        ],
+        "feature": {
+          "children": [
+            {
+              "scenario": {
+                "description": "",
+                "examples": [],
+                "id": "3",
+                "keyword": "Scenario",
+                "location": {
+                  "column": 5,
+                  "line": 3
+                },
+                "name": "minimalistic",
+                "steps": [
+                  {
+                    "dataTable": {
+                      "location": {
+                        "column": 3,
+                        "line": 6
+                      },
+                      "rows": [
+                        {
+                          "cells": [
+                            {
+                              "location": {
+                                "column": 5,
+                                "line": 6
+                              },
+                              "value": "foo"
+                            },
+                            {
+                              "location": {
+                                "column": 11,
+                                "line": 6
+                              },
+                              "value": "bar"
+                            }
+                          ],
+                          "id": "0",
+                          "location": {
+                            "column": 3,
+                            "line": 6
+                          }
+                        },
+                        {
+                          "cells": [
+                            {
+                              "location": {
+                                "column": 5,
+                                "line": 8
+                              },
+                              "value": "boz"
+                            },
+                            {
+                              "location": {
+                                "column": 11,
+                                "line": 8
+                              },
+                              "value": "boo"
+                            }
+                          ],
+                          "id": "1",
+                          "location": {
+                            "column": 3,
+                            "line": 8
+                          }
+                        }
+                      ]
+                    },
+                    "id": "2",
+                    docString: undefined,
+                    "keyword": "Given ",
+                    "keywordType": StepKeywordType.CONTEXT,
+                    "location": {
+                      "column": 3,
+                      "line": 5
+                    },
+                    "text": "a simple data table"
+                  }
+                ],
+                "tags": []
+              }
+            }
+          ],
+          "description": "",
+          "keyword": "Feature",
+          "language": "en",
+          "location": {
+            "column": 4,
+            "line": 1
+          },
+          "name": "DataTables",
+          "tags": []
+        }
+      }
+      assert.deepStrictEqual(ast, gherkinDocument)
+    })
+
   })
+
+
 })
+
diff --git a/python/Makefile b/python/Makefile
@@ -8,8 +8,8 @@ SOURCE_FILES = $(shell find . -name "*.py" | grep -v $(GHERKIN_PARSER))
 GHERKIN = bin/gherkin
 GHERKIN_GENERATE_TOKENS = bin/gherkin-generate-tokens
 
-GOOD_FEATURE_FILES = $(shell find ../testdata/good -name "*.feature")
-BAD_FEATURE_FILES  = $(shell find ../testdata/bad -name "*.feature")
+GOOD_FEATURE_FILES = $(shell find ../testdata/good -name "*.feature" -o -name "*.feature.md")
+BAD_FEATURE_FILES  = $(shell find ../testdata/bad -name "*.feature" -o -name "*.feature.md")
 
 TOKENS       = $(patsubst ../testdata/%,acceptance/testdata/%.tokens,$(GOOD_FEATURE_FILES))
 ASTS         = $(patsubst ../testdata/%,acceptance/testdata/%.ast.ndjson,$(GOOD_FEATURE_FILES))

diff --git a/python/bin/gherkin_generate_tokens.py b/python/bin/gherkin_generate_tokens.py
@@ -7,12 +7,18 @@
 from gherkin.token_scanner import TokenScanner
 from gherkin.token_formatter_builder import TokenFormatterBuilder
 from gherkin.parser import Parser
+from gherkin.token_matcher_markdown import GherkinInMarkdownTokenMatcher
 
 files = sys.argv[1:]
 if sys.version_info < (3, 0) and os.name != 'nt':  # for Python2 unless on Windows native
     UTF8Writer = codecs.getwriter('utf8')
     sys.stdout = UTF8Writer(sys.stdout)
+
 parser = Parser(TokenFormatterBuilder())
 for file in files:
     scanner = TokenScanner(file)
-    print(parser.parse(scanner))
+
+    if(file.endswith('.md')):
+        print(parser.parse(scanner, GherkinInMarkdownTokenMatcher()) )
+    else:
+        print(parser.parse(scanner))
diff --git a/python/gherkin/stream/gherkin_events.py b/python/gherkin/stream/gherkin_events.py
@@ -3,6 +3,7 @@
 from gherkin.pickles.compiler import Compiler
 from gherkin.errors import ParserError, CompositeParserException
 from gherkin.stream.id_generator import IdGenerator
+from gherkin.token_matcher_markdown import GherkinInMarkdownTokenMatcher
 
 def create_errors(errors, uri):
     for error in errors:
@@ -28,7 +29,10 @@ def enum(self, source_event):
         source = source_event['source']['data']
 
         try:
-            gherkin_document = self.parser.parse(source)
+            matcher=None
+            if(uri.endswith('.md')):
+                matcher=GherkinInMarkdownTokenMatcher()
+            gherkin_document = self.parser.parse(source, matcher)
             gherkin_document['uri'] = uri
 
             if (self.options.print_source):

diff --git a/python/gherkin/stream/source_events.py b/python/gherkin/stream/source_events.py
@@ -5,11 +5,18 @@ def source_event(path):
         'source': {
             'uri': path,
             'data': io.open(path, 'r', encoding='utf8', newline='').read(),
-            'mediaType': 'text/x.cucumber.gherkin+plain'
+            'mediaType': _media_type(path)
         }
     }
     return event
 
+
+def _media_type(path):
+    if(path.endswith(".feature")):
+        return 'text/x.cucumber.gherkin+plain'
+    if(path.endswith(".feature.md")):
+        return 'text/x.cucumber.gherkin+markdown'
+
 class SourceEvents:
     def __init__(self, paths):
         self.paths = paths