Skip to content

Commit

Permalink
feat: spellcheck action (#2506)
Browse files Browse the repository at this point in the history
* feat: spellcheck action

* fix: debug only changed markdown file

* fix: run spellcheck only on one input file

* chore: test translation change

* fix: try another pattern

* fix: try singleline pattern

* fix: consider index.md only

* fix: pull_request_target

* fix: debug all changes

* fix: own translation changes script

* fix: adjust script

* fix: fetch-depth 0

* fix: close unclosed else

* feat: two separate jobs

* fix: adjust output

* fix: actions/ prefix

* fix: remove exit commands

* fix: restore exit commands

* fix: debug translation_files

* fix: debug changed_files

* fix: translation_files grep

* fix: toJSON output

* fix: set job outputs

* fix: just languagetool

* fix: restore prepare

* fix: change test change

* feature: install reviewdog

* fix: remove macros before languagetool

* fix: install pandoc

* fix: plain text check

* fix: debug md2txt

* fix: unwrap

* fix: add UA

* fix: latest lt action

* fix: error on no translation

* fix: strip code blocks

* fix: debug file name

* fix: check translation text file exists

* fix: increase server startup duration

* fix: languagetool action from master

* fix: add reporter

* fix: try less sleep time

* fix: check broad pattern

* fix: broad .txt pattern

* fix: broad .txt pattern

* broader pattern

* fix: remove preprocessing

* feat: customized action

* fix: txt file

* fix: add UA

* fix: plain JAR

* fix: setup java 8

* feature: run full LT server

* feature: LT CLI

* fix: quiet unzip

* fix: unzip to folder

* fix: pwd debug

* fix: debug ls

* fix: set LT 6.3

* fix: different steps

* fix: outer dir

* feature: reviewdog

* feature: third job

* fix: install reviewdog with curl

* fix: install tmpl

* fix: set github output

* fix: paths

* fix: disabled-rules step

* fix: EOF wrap

* fix: sophisticated EOF

* fix: no sp in tmpl

* fix: no nl in tmpl

* fix: better tmpl call

* fix: install new tmpl

* fix: bigger JSON

* fix: has_matches false-true

* feature: mustache template

* fix: yq install path

* fix: set yq binary

* fix: yq amd64

* fix: yq -Poy

* save result in json

* fix: reorder

* fix: mkdir /bin

* fix: use JSON result

* fix: reviewdog in PATH

* fix: reviewdog setup action

* fix: restore yq

* fix: debug result.json

* fix: proper download path

* fix: message.txt

* feat: create mapping and message errorformat

* fix: js modules

* fix: correct result.json reference

* fix: debug mapping

* fix: proper txt file used

* fix: proper file arg order

* fix: reviewdog fails on error

* fix: try pull_request_target

* fix: default reporter

* fix: nofilter

* fix: complex sentence end search

* fix: parse int from JSON

* fix: correct line & column check

* fix: permission to write

* fix: permission to read

* fix: PR add comment

* fix: message-path

* fix: Better comment

* fix: better comment

* chore: restore base64 glossary

* fix: filter out deleted files

* fix: more granular steps

* fix: EOF

* fix: multiline changed-files run

* fix: changed_files with EOF

* fix: translation-check step

* fix: add missing fi

* fix: save changes in txt

* fix: suppress grep error

* fix: add noop command

* chore: add another text test

* fix: oneline translations

* fix: pass.sh

* fix: get translation_files

* chore: one more translation change

* fix: multiple translations can be

* fix: error on more than one translation

* fix: enable translation-check

* chore: remove one of textual changes

* chore: remove textual changes
  • Loading branch information
undead404 authored Nov 22, 2023
1 parent 69ef1c2 commit 01bdb2a
Show file tree
Hide file tree
Showing 4 changed files with 281 additions and 0 deletions.
154 changes: 154 additions & 0 deletions .github/workflows/spellcheck.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
name: spellcheck
on: [pull_request]
jobs:
prepare-translation:
outputs:
translation: ${{ steps.translation-check.outputs.translation }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- id: changed-files
run: git diff --diff-filter=d --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} > ./changed_files.txt
- name: Check translation
# Check there is only one translation change
# If there are more than one, the workflow will fail
id: translation-changes
run: |
changed_files=$(cat ./changed_files.txt)
if [ -z "$changed_files" ]; then
echo "No files changed"
else
translation_files=$(echo "$changed_files" | grep -E ".*\.md" || ./pass.sh)
echo "$translation_files"
echo "$translation_files" > ./translation_files.txt
fi
- id: translation-check
run: |
translation_files=$(cat ./translation_files.txt)
if [ -z "$translation_files" ]; then
echo "No translation files changed"
else
if [ $(echo "$translation_files" | wc -l) -gt 1 ]; then
echo "More than one translation file changed"
exit 1
else
echo "translation=$translation_files" >> $GITHUB_OUTPUT
fi
fi
languagetool:
if: ${{ needs.prepare-translation.outputs.translation != '' }}
name: runner / languagetool
needs: [prepare-translation]
outputs:
has_matches: ${{ steps.check-spelling.outputs.has_matches }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Remove code blocks
run: |
file=${{ needs.prepare-translation.outputs.translation }}
sed -i '/```/,/```/d' $file
# Strip all macros
- id: strip-macros
run: |
file=${{ needs.prepare-translation.outputs.translation }}
# Remove all macros with no arguments
sed -i 's/{{[a-zA-Z_-]*}}//' $file
# Replace macros with one argument
sed -i 's/{{[[:alnum:]_-]*(\("[[:alnum:]_-]*"\))}}/\1/g' $file
# Replace macros with two arguments
sed -i 's/{{[[:alnum:]_-]*(\("[[:alnum:]_-]*"\), \("[[:alnum:]_-]*"\))}}/\2/g' $file
# Replace macros with more than two arguments
sed -i 's/{{[[:alnum:]_-]*(\("[[:alnum:]_-]*"\), \("[[:alnum:]_-]*"\), \("[[:alnum:]_-]*"\))}}/\2/g' $file
# Reduce markdown to plain text
- run: sudo apt install pandoc -y
- id: md2txt
name: Convert markdown to plain text
run: |
file=${{ needs.prepare-translation.outputs.translation }}
newFileName=$(echo $file | sed 's/\.md/\.txt/')
pandoc -f markdown -t plain -o $newFileName $file
echo "translation=$newFileName" >> $GITHUB_OUTPUT
echo $newFileName
# Error if translation file is not found
- if: steps.md2txt.outputs.translation == ''
name: Check translation is found
run: echo "No translation file found" && exit 1
- uses: actions/setup-java@v2
with:
distribution: "temurin"
java-version: "8"
- name: Download LanguageTool
run: wget -q https://languagetool.org/download/LanguageTool-6.3.zip
- name: Unzip LanguageTool
run: unzip -qq LanguageTool-6.3.zip
- name: Add spelling additions
run: |
cat uk_spelling_additions.txt >> ./LanguageTool-6.3/org/languagetool/resource/uk/hunspell/spelling.txt
cat uk_ignore_additions.txt >> ./LanguageTool-6.3/org/languagetool/resource/uk/hunspell/ignore.txt
- id: disabled-rules
name: Determine disabled rules
run: echo "disabled_rules=$(cat disabled_rules.txt | tr '\n' ',')" >> $GITHUB_OUTPUT
- id: check-spelling
name: Check spelling
run: |
cd LanguageTool-6.3
java -jar languagetool-commandline.jar -d ${{steps.disabled-rules.outputs.disabled_rules}} -l uk --json ../${{ steps.md2txt.outputs.translation }} > ../result.json
matches=$(cat ../result.json | jq '.matches')
# Check if matches equal []
echo "has_matches=$(if [ "$matches" == "[]" ]; then echo "false"; else echo "true"; fi)" >> $GITHUB_OUTPUT
- name: Upload result.json
uses: actions/upload-artifact@v3
with:
name: result
path: result.json
- name: Upload text file
uses: actions/upload-artifact@v3
with:
name: text
path: ${{ steps.md2txt.outputs.translation }}
report-spelling:
if: ${{ needs.languagetool.outputs.has_matches != 'false' }}
name: Report spelling
needs: [prepare-translation, languagetool]
permissions:
contents: read
pull-requests: write
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: actions/download-artifact@v3
with:
name: result
path: .
- uses: actions/setup-node@v4
with:
node-version: "16"
- uses: actions/download-artifact@v3
with:
name: text
path: .
- name: Create mapping
run: node scripts/create-file-mapping.js ./index.txt ${{ needs.prepare-translation.outputs.translation }} > ./mapping.json && cat ./mapping.json
- id: create-message
name: Create message
run: node scripts/create-message.js ${{ needs.prepare-translation.outputs.translation }} > message.txt && cat ./message.txt
# - uses: reviewdog/action-setup@v1
# - name: Send results
# run: |
# export REVIEWDOG_GITHUB_API_TOKEN=${{ secrets.GITHUB_TOKEN }}
# cat ./message.txt | reviewdog -efm="%A%f:%l:%c:%e:%k: %m%Z" -fail-on-error -reporter=github-pr-review -filter-mode=nofilter -name="LanguageTool" -level=info
- name: Send results
uses: mshick/add-pr-comment@v2
with:
message-id: ${{ needs.prepare-translation.outputs.translation }}
message-path: ./message.txt
refresh-message-position: true
- name: Exit with error
run: echo "Spelling errors found" && cat result.json && exit 1
8 changes: 8 additions & 0 deletions pass.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash

# Read from stdin
# and write to stdout

while read line; do
echo $line
done
39 changes: 39 additions & 0 deletions scripts/create-file-mapping.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// This script creates a mapping from a text file to a markdown file it was created from.

import { readFileSync } from "fs";

const TEXT_FILE_ARG_INDEX = 2;
const MARKDOWN_FILE_ARG_INDEX = 3;
const textFile = process.argv[TEXT_FILE_ARG_INDEX];
const markdownFile = process.argv[MARKDOWN_FILE_ARG_INDEX];

if (!textFile || !markdownFile) {
console.log(
"Usage: node scripts/create-file-mapping.js <text-file> <markdown-file>",
);
process.exit(1);
}
const mapping = {};
const text = readFileSync(textFile, "utf8");
const markdown = readFileSync(markdownFile, "utf8");
const WHITESPACE_REGEXP = /\s/;
// Iterate text by Unicode runes
let markdownIndex = 0;
const textRunes = Array.from(text);
// eslint-disable-next-line no-restricted-syntax
for (const [index, rune] of textRunes.entries()) {
if (!WHITESPACE_REGEXP.test(rune)) {
const indexInMarkdown = markdown.indexOf(rune, markdownIndex);
if (indexInMarkdown === -1) {
console.error(
`Could not find rune "${rune}" in markdown file "${markdownFile}"`,
);
} else {
mapping[index] = indexInMarkdown;
markdownIndex = indexInMarkdown + 1;
}
}
}

// Write mapping to stdout
console.log(JSON.stringify(mapping));
80 changes: 80 additions & 0 deletions scripts/create-message.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// Read LanguageTool output from ./results.json
// and txt-to-markdown mapping from ./mapping.json
// and print an errorformat message for each error
// to stdout.

import { readFileSync } from "fs";

const results = JSON.parse(readFileSync("./result.json"));
const MARKDOWN_FILE_ARG_INDEX = 2;
/**
* @type {Record<number, number>}
*/
const mapping = JSON.parse(readFileSync("./mapping.json"));
const markdownFile = process.argv[MARKDOWN_FILE_ARG_INDEX];
const markdown = readFileSync(markdownFile, "utf8");

const markdownRunes = Array.from(markdown);

function convertOffsetToLineAndColumn(offset) {
let line = 1;
let column = 1;
for (let index = 0; index < offset; index += 1) {
const rune = markdownRunes[index];
if (rune === "\n") {
line += 1;
column = 1;
} else {
column += 1;
}
}
return [line, column];
}

// eslint-disable-next-line no-restricted-syntax
for (const match of results.matches) {
const { context, message, offset, replacements, rule, sentence } = match;
let { length } = match;
let endOffset = offset + length;
const start = Number.parseInt(mapping[offset], 10);
let end;
endOffset -= 1;
length -= 1;
while (!end) {
endOffset += 1;
length += 1;
end = Number.parseInt(mapping[endOffset], 10);
}
console.error(start, end);
const [startLine, startColumn] = convertOffsetToLineAndColumn(start);
const [endLine, endColumn] = convertOffsetToLineAndColumn(end);
if (endLine < startLine) {
console.error(startLine, endLine);
throw new Error(`Line not found in source file: ${sentence}`);
}
if (endLine === startLine && endColumn < startColumn) {
console.error(startColumn, endColumn);
throw new Error(`Column not found in source file: ${sentence}`);
}
// const errorformatLine = `${markdownFile}:${startLine}:${startColumn}:${endLine}:${endColumn}: ${message}`;
// console.log(errorformatLine);
console.log(`## ${message}\n`);
console.log(`\`${markdownFile}:${startLine}:${startColumn}\n\``);
console.log(`${rule.description}:\n`);
console.log(
`> ${context.text.slice(0, context.offset)}**${context.text.slice(
context.offset,
context.offset + context.length,
)}**${context.text.slice(context.offset + context.length)}_`,
);
console.log("Варіанти заміни:");
if (replacements.length > 0) {
// eslint-disable-next-line no-restricted-syntax
for (const replacement of replacements) {
console.log(`- ${replacement.value}`);
}
} else {
console.log("Немає");
}
console.log("\n");
}

0 comments on commit 01bdb2a

Please sign in to comment.