From 01bdb2aa909efc9eec583d099ab8d53bb498669d Mon Sep 17 00:00:00 2001
From: Vitalii Perehonchuk <brute18@gmail.com>
Date: Wed, 22 Nov 2023 16:03:03 +0200
Subject: [PATCH] feat: spellcheck action (#2506)

* feat: spellcheck action

* fix: debug only changed markdown file

* fix: run spellcheck only on one input file

* chore: test translation change

* fix: try another pattern

* fix: try singleline pattern

* fix: consider index.md only

* fix: pull_request_target

* fix: debug all changes

* fix: own translation changes script

* fix: adjust script

* fix: fetch-depth 0

* fix: close unclosed else

* feat: two separate jobs

* fix: adjust output

* fix: actions/ prefix

* fix: remove exit commands

* fix: restore exit commands

* fix: debug translation_files

* fix: debug changed_files

* fix: translation_files grep

* fix: toJSON output

* fix: set job outputs

* fix: just languagetool

* fix: restore prepare

* fix: change test change

* feature: install reviewdog

* fix: remove macros before languagetool

* fix: install pandoc

* fix: plain text check

* fix: debug md2txt

* fix: unwrap

* fix: add UA

* fix: latest lt action

* fix: error on no translation

* fix: strip code blocks

* fix: debug file name

* fix: check translation text file exists

* fix: increase server startup duration

* fix: languagetool action from master

* fix: add reporter

* fix: try less sleep time

* fix: check broad pattern

* fix: broad .txt pattern

* fix: broad .txt pattern

* broader pattern

* fix: remove preprocessing

* feat: customized action

* fix: txt file

* fix: add UA

* fix: plain JAR

* fix: setup java 8

* feature: run full LT server

* feature: LT CLI

* fix: quiet unzip

* fix: unzip to folder

* fix: pwd debug

* fix: debug ls

* fix: set LT 6.3

* fix: different steps

* fix: outer dir

* feature: reviewdog

* feature: third job

* fix: install reviewdog with curl

* fix: install tmpl

* fix: set github output

* fix: paths

* fix: disabled-rules step

* fix: EOF wrap

* fix: sophisticated EOF

* fix: no sp in tmpl

* fix: no nl in tmpl

* fix: better tmpl call

* fix: install new tmpl

* fix: bigger JSON

* fix: has_matches false-true

* feature: mustache template

* fix: yq install path

* fix: set yq binary

* fix: yq amd64

* fix: yq -Poy

* save result in json

* fix: reorder

* fix: mkdir /bin

* fix: use JSON result

* fix: reviewdog in PATH

* fix: reviewdog setup action

* fix: restore yq

* fix: debug result.json

* fix: proper download path

* fix: message.txt

* feat: create mapping and message errorformat

* fix: js modules

* fix: correct result.json reference

* fix: debug mapping

* fix: proper txt file used

* fix: proper file arg order

* fix: reviewdog fails on error

* fix: try pull_request_target

* fix: default reporter

* fix: nofilter

* fix: complex sentence end search

* fix: parse int from JSON

* fix: correct line & column check

* fix: permission to write

* fix: permission to read

* fix: PR add comment

* fix: message-path

* fix: Better comment

* fix: better comment

* chore: restore base64 glossary

* fix: filter out deleted files

* fix: more granular steps

* fix: EOF

* fix: multiline changed-files run

* fix: changed_files with EOF

* fix: translation-check step

* fix: add missing fi

* fix: save changes in txt

* fix: suppress grep error

* fix: add noop command

* chore: add another text test

* fix: oneline translations

* fix: pass.sh

* fix: get translation_files

* chore: one more translation change

* fix: multiple translations can be

* fix: error on more than one translation

* fix: enable translation-check

* chore: remove one of textual changes

* chore: remove textual changes
---
 .github/workflows/spellcheck.yml | 154 +++++++++++++++++++++++++++++++
 pass.sh                          |   8 ++
 scripts/create-file-mapping.js   |  39 ++++++++
 scripts/create-message.js        |  80 ++++++++++++++++
 4 files changed, 281 insertions(+)
 create mode 100644 .github/workflows/spellcheck.yml
 create mode 100755 pass.sh
 create mode 100644 scripts/create-file-mapping.js
 create mode 100644 scripts/create-message.js

diff --git a/.github/workflows/spellcheck.yml b/.github/workflows/spellcheck.yml
new file mode 100644
index 0000000000..49fe00062f
--- /dev/null
+++ b/.github/workflows/spellcheck.yml
@@ -0,0 +1,154 @@
+name: spellcheck
+on: [pull_request]
+jobs:
+  prepare-translation:
+    outputs:
+      translation: ${{ steps.translation-check.outputs.translation }}
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - id: changed-files
+        run: git diff --diff-filter=d --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} > ./changed_files.txt
+      - name: Check translation
+        # Check there is only one translation change
+        # If there are more than one, the workflow will fail
+        id: translation-changes
+        run: |
+          changed_files=$(cat ./changed_files.txt)
+          if [ -z "$changed_files" ]; then
+            echo "No files changed"
+          else
+            translation_files=$(echo "$changed_files" | grep -E ".*\.md" || ./pass.sh)
+            echo "$translation_files"
+            echo "$translation_files" > ./translation_files.txt
+          fi
+      - id: translation-check
+        run: |
+          translation_files=$(cat ./translation_files.txt)
+          if [ -z "$translation_files" ]; then
+            echo "No translation files changed"
+          else
+            if [ $(echo "$translation_files" | wc -l) -gt 1 ]; then
+              echo "More than one translation file changed"
+              exit 1
+            else
+              echo "translation=$translation_files" >> $GITHUB_OUTPUT
+            fi
+          fi
+  languagetool:
+    if: ${{ needs.prepare-translation.outputs.translation != '' }}
+    name: runner / languagetool
+    needs: [prepare-translation]
+    outputs:
+      has_matches: ${{ steps.check-spelling.outputs.has_matches }}
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Remove code blocks
+        run: |
+          file=${{ needs.prepare-translation.outputs.translation }}
+          sed -i '/```/,/```/d' $file
+      # Strip all macros
+      - id: strip-macros
+        run: |
+          file=${{ needs.prepare-translation.outputs.translation }}
+          # Remove all macros with no arguments
+          sed -i 's/{{[a-zA-Z_-]*}}//' $file
+          # Replace macros with one argument
+          sed -i 's/{{[[:alnum:]_-]*(\("[[:alnum:]_-]*"\))}}/\1/g' $file
+          # Replace macros with two arguments
+          sed -i 's/{{[[:alnum:]_-]*(\("[[:alnum:]_-]*"\), \("[[:alnum:]_-]*"\))}}/\2/g' $file
+          # Replace macros with more than two arguments
+          sed -i 's/{{[[:alnum:]_-]*(\("[[:alnum:]_-]*"\), \("[[:alnum:]_-]*"\), \("[[:alnum:]_-]*"\))}}/\2/g' $file
+      # Reduce markdown to plain text
+      - run: sudo apt install pandoc -y
+      - id: md2txt
+        name: Convert markdown to plain text
+        run: |
+          file=${{ needs.prepare-translation.outputs.translation }}
+          newFileName=$(echo $file | sed 's/\.md/\.txt/')
+          pandoc -f markdown -t plain -o $newFileName $file
+          echo "translation=$newFileName" >> $GITHUB_OUTPUT
+          echo $newFileName
+      # Error if translation file is not found
+      - if: steps.md2txt.outputs.translation == ''
+        name: Check translation is found
+        run: echo "No translation file found" && exit 1
+      - uses: actions/setup-java@v2
+        with:
+          distribution: "temurin"
+          java-version: "8"
+      - name: Download LanguageTool
+        run: wget -q https://languagetool.org/download/LanguageTool-6.3.zip
+      - name: Unzip LanguageTool
+        run: unzip -qq LanguageTool-6.3.zip
+      - name: Add spelling additions
+        run: |
+          cat uk_spelling_additions.txt >> ./LanguageTool-6.3/org/languagetool/resource/uk/hunspell/spelling.txt
+          cat uk_ignore_additions.txt >> ./LanguageTool-6.3/org/languagetool/resource/uk/hunspell/ignore.txt
+      - id: disabled-rules
+        name: Determine disabled rules
+        run: echo "disabled_rules=$(cat disabled_rules.txt | tr '\n' ',')" >> $GITHUB_OUTPUT
+      - id: check-spelling
+        name: Check spelling
+        run: |
+          cd LanguageTool-6.3
+          java -jar languagetool-commandline.jar -d ${{steps.disabled-rules.outputs.disabled_rules}} -l uk --json ../${{ steps.md2txt.outputs.translation }} > ../result.json
+          matches=$(cat ../result.json | jq '.matches')
+          # Check if matches equal []
+          echo "has_matches=$(if [ "$matches" == "[]" ]; then echo "false"; else echo "true"; fi)" >> $GITHUB_OUTPUT
+      - name: Upload result.json
+        uses: actions/upload-artifact@v3
+        with:
+          name: result
+          path: result.json
+      - name: Upload text file
+        uses: actions/upload-artifact@v3
+        with:
+          name: text
+          path: ${{ steps.md2txt.outputs.translation }}
+  report-spelling:
+    if: ${{ needs.languagetool.outputs.has_matches != 'false'  }}
+    name: Report spelling
+    needs: [prepare-translation, languagetool]
+    permissions:
+      contents: read
+      pull-requests: write
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - uses: actions/download-artifact@v3
+        with:
+          name: result
+          path: .
+      - uses: actions/setup-node@v4
+        with:
+          node-version: "16"
+      - uses: actions/download-artifact@v3
+        with:
+          name: text
+          path: .
+      - name: Create mapping
+        run: node scripts/create-file-mapping.js ./index.txt ${{ needs.prepare-translation.outputs.translation }} > ./mapping.json && cat ./mapping.json
+      - id: create-message
+        name: Create message
+        run: node scripts/create-message.js ${{ needs.prepare-translation.outputs.translation }} > message.txt && cat ./message.txt
+      # - uses: reviewdog/action-setup@v1
+      # - name: Send results
+      #   run: |
+      #     export REVIEWDOG_GITHUB_API_TOKEN=${{ secrets.GITHUB_TOKEN }}
+      #     cat ./message.txt | reviewdog -efm="%A%f:%l:%c:%e:%k: %m%Z" -fail-on-error -reporter=github-pr-review -filter-mode=nofilter -name="LanguageTool" -level=info
+      - name: Send results
+        uses: mshick/add-pr-comment@v2
+        with:
+          message-id: ${{ needs.prepare-translation.outputs.translation }}
+          message-path: ./message.txt
+          refresh-message-position: true
+      - name: Exit with error
+        run: echo "Spelling errors found" && cat result.json && exit 1
diff --git a/pass.sh b/pass.sh
new file mode 100755
index 0000000000..94bf7287f5
--- /dev/null
+++ b/pass.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+# Read from stdin
+# and write to stdout
+
+while read line; do
+  echo $line
+done
diff --git a/scripts/create-file-mapping.js b/scripts/create-file-mapping.js
new file mode 100644
index 0000000000..5eaeccd171
--- /dev/null
+++ b/scripts/create-file-mapping.js
@@ -0,0 +1,39 @@
+// This script creates a mapping from a text file to a markdown file it was created from.
+
+import { readFileSync } from "fs";
+
+const TEXT_FILE_ARG_INDEX = 2;
+const MARKDOWN_FILE_ARG_INDEX = 3;
+const textFile = process.argv[TEXT_FILE_ARG_INDEX];
+const markdownFile = process.argv[MARKDOWN_FILE_ARG_INDEX];
+
+if (!textFile || !markdownFile) {
+  console.log(
+    "Usage: node scripts/create-file-mapping.js <text-file> <markdown-file>",
+  );
+  process.exit(1);
+}
+const mapping = {};
+const text = readFileSync(textFile, "utf8");
+const markdown = readFileSync(markdownFile, "utf8");
+const WHITESPACE_REGEXP = /\s/;
+// Iterate text by Unicode runes
+let markdownIndex = 0;
+const textRunes = Array.from(text);
+// eslint-disable-next-line no-restricted-syntax
+for (const [index, rune] of textRunes.entries()) {
+  if (!WHITESPACE_REGEXP.test(rune)) {
+    const indexInMarkdown = markdown.indexOf(rune, markdownIndex);
+    if (indexInMarkdown === -1) {
+      console.error(
+        `Could not find rune "${rune}" in markdown file "${markdownFile}"`,
+      );
+    } else {
+      mapping[index] = indexInMarkdown;
+      markdownIndex = indexInMarkdown + 1;
+    }
+  }
+}
+
+// Write mapping to stdout
+console.log(JSON.stringify(mapping));
diff --git a/scripts/create-message.js b/scripts/create-message.js
new file mode 100644
index 0000000000..1532393854
--- /dev/null
+++ b/scripts/create-message.js
@@ -0,0 +1,80 @@
+// Read LanguageTool output from ./results.json
+// and txt-to-markdown mapping from ./mapping.json
+// and print an errorformat message for each error
+// to stdout.
+
+import { readFileSync } from "fs";
+
+const results = JSON.parse(readFileSync("./result.json"));
+const MARKDOWN_FILE_ARG_INDEX = 2;
+/**
+ * @type {Record<number, number>}
+ */
+const mapping = JSON.parse(readFileSync("./mapping.json"));
+const markdownFile = process.argv[MARKDOWN_FILE_ARG_INDEX];
+const markdown = readFileSync(markdownFile, "utf8");
+
+const markdownRunes = Array.from(markdown);
+
+function convertOffsetToLineAndColumn(offset) {
+  let line = 1;
+  let column = 1;
+  for (let index = 0; index < offset; index += 1) {
+    const rune = markdownRunes[index];
+    if (rune === "\n") {
+      line += 1;
+      column = 1;
+    } else {
+      column += 1;
+    }
+  }
+  return [line, column];
+}
+
+// eslint-disable-next-line no-restricted-syntax
+for (const match of results.matches) {
+  const { context, message, offset, replacements, rule, sentence } = match;
+  let { length } = match;
+  let endOffset = offset + length;
+  const start = Number.parseInt(mapping[offset], 10);
+  let end;
+  endOffset -= 1;
+  length -= 1;
+  while (!end) {
+    endOffset += 1;
+    length += 1;
+    end = Number.parseInt(mapping[endOffset], 10);
+  }
+  console.error(start, end);
+  const [startLine, startColumn] = convertOffsetToLineAndColumn(start);
+  const [endLine, endColumn] = convertOffsetToLineAndColumn(end);
+  if (endLine < startLine) {
+    console.error(startLine, endLine);
+    throw new Error(`Line not found in source file: ${sentence}`);
+  }
+  if (endLine === startLine && endColumn < startColumn) {
+    console.error(startColumn, endColumn);
+    throw new Error(`Column not found in source file: ${sentence}`);
+  }
+  // const errorformatLine = `${markdownFile}:${startLine}:${startColumn}:${endLine}:${endColumn}: ${message}`;
+  // console.log(errorformatLine);
+  console.log(`## ${message}\n`);
+  console.log(`\`${markdownFile}:${startLine}:${startColumn}\n\``);
+  console.log(`${rule.description}:\n`);
+  console.log(
+    `> ${context.text.slice(0, context.offset)}**${context.text.slice(
+      context.offset,
+      context.offset + context.length,
+    )}**${context.text.slice(context.offset + context.length)}_`,
+  );
+  console.log("Варіанти заміни:");
+  if (replacements.length > 0) {
+    // eslint-disable-next-line no-restricted-syntax
+    for (const replacement of replacements) {
+      console.log(`- ${replacement.value}`);
+    }
+  } else {
+    console.log("Немає");
+  }
+  console.log("\n");
+}