From 01bdb2aa909efc9eec583d099ab8d53bb498669d Mon Sep 17 00:00:00 2001 From: Vitalii Perehonchuk Date: Wed, 22 Nov 2023 16:03:03 +0200 Subject: [PATCH] feat: spellcheck action (#2506) * feat: spellcheck action * fix: debug only changed markdown file * fix: run spellcheck only on one input file * chore: test translation change * fix: try another pattern * fix: try singleline pattern * fix: consider index.md only * fix: pull_request_target * fix: debug all changes * fix: own translation changes script * fix: adjust script * fix: fetch-depth 0 * fix: close unclosed else * feat: two separate jobs * fix: adjust output * fix: actions/ prefix * fix: remove exit commands * fix: restore exit commands * fix: debug translation_files * fix: debug changed_files * fix: translation_files grep * fix: toJSON output * fix: set job outputs * fix: just languagetool * fix: restore prepare * fix: change test change * feature: install reviewdog * fix: remove macros before languagetool * fix: install pandoc * fix: plain text check * fix: debug md2txt * fix: unwrap * fix: add UA * fix: latest lt action * fix: error on no translation * fix: strip code blocks * fix: debug file name * fix: check translation text file exists * fix: increase server startup duration * fix: languagetool action from master * fix: add reporter * fix: try less sleep time * fix: check broad pattern * fix: broad .txt pattern * fix: broad .txt pattern * broader pattern * fix: remove preprocessing * feat: customized action * fix: txt file * fix: add UA * fix: plain JAR * fix: setup java 8 * feature: run full LT server * feature: LT CLI * fix: quiet unzip * fix: unzip to folder * fix: pwd debug * fix: debug ls * fix: set LT 6.3 * fix: different steps * fix: outer dir * feature: reviewdog * feature: third job * fix: install reviewdog with curl * fix: install tmpl * fix: set github output * fix: paths * fix: disabled-rules step * fix: EOF wrap * fix: sophisticated EOF * fix: no sp in tmpl * fix: no nl in tmpl * fix: better tmpl call * fix: install new tmpl * fix: bigger JSON * fix: has_matches false-true * feature: mustache template * fix: yq install path * fix: set yq binary * fix: yq amd64 * fix: yq -Poy * save result in json * fix: reorder * fix: mkdir /bin * fix: use JSON result * fix: reviewdog in PATH * fix: reviewdog setup action * fix: restore yq * fix: debug result.json * fix: proper download path * fix: message.txt * feat: create mapping and message errorformat * fix: js modules * fix: correct result.json reference * fix: debug mapping * fix: proper txt file used * fix: proper file arg order * fix: reviewdog fails on error * fix: try pull_request_target * fix: default reporter * fix: nofilter * fix: complex sentence end search * fix: parse int from JSON * fix: correct line & column check * fix: permission to write * fix: permission to read * fix: PR add comment * fix: message-path * fix: Better comment * fix: better comment * chore: restore base64 glossary * fix: filter out deleted files * fix: more granular steps * fix: EOF * fix: multiline changed-files run * fix: changed_files with EOF * fix: translation-check step * fix: add missing fi * fix: save changes in txt * fix: suppress grep error * fix: add noop command * chore: add another text test * fix: oneline translations * fix: pass.sh * fix: get translation_files * chore: one more translation change * fix: multiple translations can be * fix: error on more than one translation * fix: enable translation-check * chore: remove one of textual changes * chore: remove textual changes --- .github/workflows/spellcheck.yml | 154 +++++++++++++++++++++++++++++++ pass.sh | 8 ++ scripts/create-file-mapping.js | 39 ++++++++ scripts/create-message.js | 80 ++++++++++++++++ 4 files changed, 281 insertions(+) create mode 100644 .github/workflows/spellcheck.yml create mode 100755 pass.sh create mode 100644 scripts/create-file-mapping.js create mode 100644 scripts/create-message.js diff --git a/.github/workflows/spellcheck.yml b/.github/workflows/spellcheck.yml new file mode 100644 index 0000000000..49fe00062f --- /dev/null +++ b/.github/workflows/spellcheck.yml @@ -0,0 +1,154 @@ +name: spellcheck +on: [pull_request] +jobs: + prepare-translation: + outputs: + translation: ${{ steps.translation-check.outputs.translation }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - id: changed-files + run: git diff --diff-filter=d --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} > ./changed_files.txt + - name: Check translation + # Check there is only one translation change + # If there are more than one, the workflow will fail + id: translation-changes + run: | + changed_files=$(cat ./changed_files.txt) + if [ -z "$changed_files" ]; then + echo "No files changed" + else + translation_files=$(echo "$changed_files" | grep -E ".*\.md" || ./pass.sh) + echo "$translation_files" + echo "$translation_files" > ./translation_files.txt + fi + - id: translation-check + run: | + translation_files=$(cat ./translation_files.txt) + if [ -z "$translation_files" ]; then + echo "No translation files changed" + else + if [ $(echo "$translation_files" | wc -l) -gt 1 ]; then + echo "More than one translation file changed" + exit 1 + else + echo "translation=$translation_files" >> $GITHUB_OUTPUT + fi + fi + languagetool: + if: ${{ needs.prepare-translation.outputs.translation != '' }} + name: runner / languagetool + needs: [prepare-translation] + outputs: + has_matches: ${{ steps.check-spelling.outputs.has_matches }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Remove code blocks + run: | + file=${{ needs.prepare-translation.outputs.translation }} + sed -i '/```/,/```/d' $file + # Strip all macros + - id: strip-macros + run: | + file=${{ needs.prepare-translation.outputs.translation }} + # Remove all macros with no arguments + sed -i 's/{{[a-zA-Z_-]*}}//' $file + # Replace macros with one argument + sed -i 's/{{[[:alnum:]_-]*(\("[[:alnum:]_-]*"\))}}/\1/g' $file + # Replace macros with two arguments + sed -i 's/{{[[:alnum:]_-]*(\("[[:alnum:]_-]*"\), \("[[:alnum:]_-]*"\))}}/\2/g' $file + # Replace macros with more than two arguments + sed -i 's/{{[[:alnum:]_-]*(\("[[:alnum:]_-]*"\), \("[[:alnum:]_-]*"\), \("[[:alnum:]_-]*"\))}}/\2/g' $file + # Reduce markdown to plain text + - run: sudo apt install pandoc -y + - id: md2txt + name: Convert markdown to plain text + run: | + file=${{ needs.prepare-translation.outputs.translation }} + newFileName=$(echo $file | sed 's/\.md/\.txt/') + pandoc -f markdown -t plain -o $newFileName $file + echo "translation=$newFileName" >> $GITHUB_OUTPUT + echo $newFileName + # Error if translation file is not found + - if: steps.md2txt.outputs.translation == '' + name: Check translation is found + run: echo "No translation file found" && exit 1 + - uses: actions/setup-java@v2 + with: + distribution: "temurin" + java-version: "8" + - name: Download LanguageTool + run: wget -q https://languagetool.org/download/LanguageTool-6.3.zip + - name: Unzip LanguageTool + run: unzip -qq LanguageTool-6.3.zip + - name: Add spelling additions + run: | + cat uk_spelling_additions.txt >> ./LanguageTool-6.3/org/languagetool/resource/uk/hunspell/spelling.txt + cat uk_ignore_additions.txt >> ./LanguageTool-6.3/org/languagetool/resource/uk/hunspell/ignore.txt + - id: disabled-rules + name: Determine disabled rules + run: echo "disabled_rules=$(cat disabled_rules.txt | tr '\n' ',')" >> $GITHUB_OUTPUT + - id: check-spelling + name: Check spelling + run: | + cd LanguageTool-6.3 + java -jar languagetool-commandline.jar -d ${{steps.disabled-rules.outputs.disabled_rules}} -l uk --json ../${{ steps.md2txt.outputs.translation }} > ../result.json + matches=$(cat ../result.json | jq '.matches') + # Check if matches equal [] + echo "has_matches=$(if [ "$matches" == "[]" ]; then echo "false"; else echo "true"; fi)" >> $GITHUB_OUTPUT + - name: Upload result.json + uses: actions/upload-artifact@v3 + with: + name: result + path: result.json + - name: Upload text file + uses: actions/upload-artifact@v3 + with: + name: text + path: ${{ steps.md2txt.outputs.translation }} + report-spelling: + if: ${{ needs.languagetool.outputs.has_matches != 'false' }} + name: Report spelling + needs: [prepare-translation, languagetool] + permissions: + contents: read + pull-requests: write + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: actions/download-artifact@v3 + with: + name: result + path: . + - uses: actions/setup-node@v4 + with: + node-version: "16" + - uses: actions/download-artifact@v3 + with: + name: text + path: . + - name: Create mapping + run: node scripts/create-file-mapping.js ./index.txt ${{ needs.prepare-translation.outputs.translation }} > ./mapping.json && cat ./mapping.json + - id: create-message + name: Create message + run: node scripts/create-message.js ${{ needs.prepare-translation.outputs.translation }} > message.txt && cat ./message.txt + # - uses: reviewdog/action-setup@v1 + # - name: Send results + # run: | + # export REVIEWDOG_GITHUB_API_TOKEN=${{ secrets.GITHUB_TOKEN }} + # cat ./message.txt | reviewdog -efm="%A%f:%l:%c:%e:%k: %m%Z" -fail-on-error -reporter=github-pr-review -filter-mode=nofilter -name="LanguageTool" -level=info + - name: Send results + uses: mshick/add-pr-comment@v2 + with: + message-id: ${{ needs.prepare-translation.outputs.translation }} + message-path: ./message.txt + refresh-message-position: true + - name: Exit with error + run: echo "Spelling errors found" && cat result.json && exit 1 diff --git a/pass.sh b/pass.sh new file mode 100755 index 0000000000..94bf7287f5 --- /dev/null +++ b/pass.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +# Read from stdin +# and write to stdout + +while read line; do + echo $line +done diff --git a/scripts/create-file-mapping.js b/scripts/create-file-mapping.js new file mode 100644 index 0000000000..5eaeccd171 --- /dev/null +++ b/scripts/create-file-mapping.js @@ -0,0 +1,39 @@ +// This script creates a mapping from a text file to a markdown file it was created from. + +import { readFileSync } from "fs"; + +const TEXT_FILE_ARG_INDEX = 2; +const MARKDOWN_FILE_ARG_INDEX = 3; +const textFile = process.argv[TEXT_FILE_ARG_INDEX]; +const markdownFile = process.argv[MARKDOWN_FILE_ARG_INDEX]; + +if (!textFile || !markdownFile) { + console.log( + "Usage: node scripts/create-file-mapping.js ", + ); + process.exit(1); +} +const mapping = {}; +const text = readFileSync(textFile, "utf8"); +const markdown = readFileSync(markdownFile, "utf8"); +const WHITESPACE_REGEXP = /\s/; +// Iterate text by Unicode runes +let markdownIndex = 0; +const textRunes = Array.from(text); +// eslint-disable-next-line no-restricted-syntax +for (const [index, rune] of textRunes.entries()) { + if (!WHITESPACE_REGEXP.test(rune)) { + const indexInMarkdown = markdown.indexOf(rune, markdownIndex); + if (indexInMarkdown === -1) { + console.error( + `Could not find rune "${rune}" in markdown file "${markdownFile}"`, + ); + } else { + mapping[index] = indexInMarkdown; + markdownIndex = indexInMarkdown + 1; + } + } +} + +// Write mapping to stdout +console.log(JSON.stringify(mapping)); diff --git a/scripts/create-message.js b/scripts/create-message.js new file mode 100644 index 0000000000..1532393854 --- /dev/null +++ b/scripts/create-message.js @@ -0,0 +1,80 @@ +// Read LanguageTool output from ./results.json +// and txt-to-markdown mapping from ./mapping.json +// and print an errorformat message for each error +// to stdout. + +import { readFileSync } from "fs"; + +const results = JSON.parse(readFileSync("./result.json")); +const MARKDOWN_FILE_ARG_INDEX = 2; +/** + * @type {Record} + */ +const mapping = JSON.parse(readFileSync("./mapping.json")); +const markdownFile = process.argv[MARKDOWN_FILE_ARG_INDEX]; +const markdown = readFileSync(markdownFile, "utf8"); + +const markdownRunes = Array.from(markdown); + +function convertOffsetToLineAndColumn(offset) { + let line = 1; + let column = 1; + for (let index = 0; index < offset; index += 1) { + const rune = markdownRunes[index]; + if (rune === "\n") { + line += 1; + column = 1; + } else { + column += 1; + } + } + return [line, column]; +} + +// eslint-disable-next-line no-restricted-syntax +for (const match of results.matches) { + const { context, message, offset, replacements, rule, sentence } = match; + let { length } = match; + let endOffset = offset + length; + const start = Number.parseInt(mapping[offset], 10); + let end; + endOffset -= 1; + length -= 1; + while (!end) { + endOffset += 1; + length += 1; + end = Number.parseInt(mapping[endOffset], 10); + } + console.error(start, end); + const [startLine, startColumn] = convertOffsetToLineAndColumn(start); + const [endLine, endColumn] = convertOffsetToLineAndColumn(end); + if (endLine < startLine) { + console.error(startLine, endLine); + throw new Error(`Line not found in source file: ${sentence}`); + } + if (endLine === startLine && endColumn < startColumn) { + console.error(startColumn, endColumn); + throw new Error(`Column not found in source file: ${sentence}`); + } + // const errorformatLine = `${markdownFile}:${startLine}:${startColumn}:${endLine}:${endColumn}: ${message}`; + // console.log(errorformatLine); + console.log(`## ${message}\n`); + console.log(`\`${markdownFile}:${startLine}:${startColumn}\n\``); + console.log(`${rule.description}:\n`); + console.log( + `> ${context.text.slice(0, context.offset)}**${context.text.slice( + context.offset, + context.offset + context.length, + )}**${context.text.slice(context.offset + context.length)}_`, + ); + console.log("Варіанти заміни:"); + if (replacements.length > 0) { + // eslint-disable-next-line no-restricted-syntax + for (const replacement of replacements) { + console.log(`- ${replacement.value}`); + } + } else { + console.log("Немає"); + } + console.log("\n"); +}