diff --git a/README.md b/README.md index 420557e..4fd2b07 100644 --- a/README.md +++ b/README.md @@ -155,6 +155,27 @@ jobs: ERROR_EMAILS: example@example.com,example2@example.com ``` +### word_diff + +Word-diff creates a report of the changes to any .docx files between `main` and the current +pull request, after converting the files to markdown + +#### Usage + +```yml +on: + pull_request: + paths: + - '**/*.docx' + +jobs: + my-workflow: + ... + steps: + - uses: SuffolkLITLab/ALActions/word_diff@main +``` + + ## Development Details Using [codeql-action](https://github.com/github/codeql-action) as diff --git a/word_diff/action.yml b/word_diff/action.yml new file mode 100644 index 0000000..13ecf30 --- /dev/null +++ b/word_diff/action.yml @@ -0,0 +1,96 @@ +name: DOCX Diff +description: | + This action compares DOCX files in a pull request with the base branch and generates a diff in HTML format. + The HTML diff files are uploaded as artifacts and can be viewed in the 'Artifacts' section of the workflow run. +jobs: + docx-diff: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Fetch all history for all branches and tags + + - name: Install pandoc and npm + run: | + sudo apt-get install -y pandoc npm + sudo npm install -g diff2html-cli + + - name: Log base branch and SHA + run: | + echo "Base branch: ${{ github.event.pull_request.base.ref }}" + echo "PR branch SHA: ${{ github.sha }}" + + - name: Convert Base DOCX to Markdown + run: | + mkdir -p converted/base + base_branch="origin/${{ github.event.pull_request.base.ref }}" + echo "Fetching base branch $base_branch" + git fetch origin ${{ github.event.pull_request.base.ref }}:${{ github.event.pull_request.base.ref }} + echo "Checking out base branch $base_branch" + git checkout $base_branch + for file in $(git diff --name-only ${{ github.sha }} $base_branch | grep '.docx'); do + echo "Converting $file from base branch" + if [[ -f "$file" ]]; then + mkdir -p "converted/base/$(dirname "$file")" + pandoc "$file" -t markdown -o "converted/base/${file%.docx}.md" + echo "Converted $file to converted/base/${file%.docx}.md" + else + echo "$file does not exist in base branch" + fi + done + + - name: Convert PR DOCX to Markdown + run: | + mkdir -p converted/head + git checkout ${{ github.sha }} + for file in $(git diff --name-only ${{ github.sha }} origin/${{ github.event.pull_request.base.ref }} | grep '.docx'); do + echo "Converting $file from PR branch" + if [[ -f "$file" ]]; then + mkdir -p "converted/head/$(dirname "$file")" + pandoc "$file" -t markdown -o "converted/head/${file%.docx}.md" + echo "Converted $file to converted/head/${file%.docx}.md" + else + echo "$file does not exist in PR branch" + fi + done + + - name: Generate and Display Diff + run: | + mkdir -p diff_output + base_branch="origin/${{ github.event.pull_request.base.ref }}" + echo "# DOCX Diffs" > diff_summary.md + for file in $(git diff --name-only ${{ github.sha }} $base_branch | grep '.docx'); do + if [[ -f "converted/base/${file%.docx}.md" && -f "converted/head/${file%.docx}.md" ]]; then + echo "Diff for $file:" + mkdir -p "diff_output/$(dirname "$file")" + diff "converted/base/${file%.docx}.md" "converted/head/${file%.docx}.md" > "diff_output/${file%.docx}.diff" || true + echo "Diff output for $file:" + cat "diff_output/${file%.docx}.diff" + if [[ -s "diff_output/${file%.docx}.diff" ]]; then + diff2html -i file -o stdout -F "diff_output/${file%.docx}.html" -- "diff_output/${file%.docx}.diff" + echo "Generated HTML diff for ${file}:" + cat "diff_output/${file%.docx}.html" + echo "## Diff for ${file}" >> diff_summary.md + else + echo "No differences found for $file" + fi + else + echo "Converted files for $file do not exist" + fi + done + + - name: Upload Diff Artifacts + uses: actions/upload-artifact@v2 + with: + name: docx-diff + path: diff_output/ + + - name: Add Diff Links to Job Summary + if: always() + run: | + echo "# DOCX Diffs" > $GITHUB_STEP_SUMMARY + echo "The diff files have been generated and uploaded as artifacts." >> $GITHUB_STEP_SUMMARY + echo "You can download and view the diff files from the 'Artifacts' section in this workflow run." >> $GITHUB_STEP_SUMMARY + cat diff_summary.md >> $GITHUB_STEP_SUMMARY