From 7fb0f7ca95c2742f0a13b4542759ba9ed725fd0d Mon Sep 17 00:00:00 2001 From: Wyatt Pearsall Date: Thu, 7 Mar 2024 16:17:42 -0500 Subject: [PATCH] sed script --- .github/workflows/save-ecfr.yml | 2 +- .github/workflows/save-iregs.yml | 2 +- .github/workflows/sed.sh | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) create mode 100755 .github/workflows/sed.sh diff --git a/.github/workflows/save-ecfr.yml b/.github/workflows/save-ecfr.yml index 7a515cd..199f02d 100644 --- a/.github/workflows/save-ecfr.yml +++ b/.github/workflows/save-ecfr.yml @@ -38,7 +38,7 @@ jobs: curl -sSL "https://www.ecfr.gov/api/renderer/v1/content/enhanced/${{ steps.date.outputs.value }}/title-12?chapter=X&part=${PART}" | htmlq -r 'h1, h2, .source, .authority, .citation' | htmlq -t '.part' | - sed -e '/^ *$/d' -e 's/ / /g' -e 's/–/-/g' | + ./sed.sh | awk '{$1=$1};1' > "./ecfr/${PART}.txt" done diff --git a/.github/workflows/save-iregs.yml b/.github/workflows/save-iregs.yml index d391d75..34f1e24 100644 --- a/.github/workflows/save-iregs.yml +++ b/.github/workflows/save-iregs.yml @@ -36,7 +36,7 @@ jobs: curl -sSL "${CHUNKS[@]}" | htmlq -r '.regulation-meta, .inline-interpretation, .block__sub, .o-regulations-wayfinder' | htmlq -t '.u-layout-grid_main' | - sed -e '/^ *$/d' -e 's/ / /g' -e 's/–/-/g' | + ./sed.sh | awk '{$1=$1};1' > "./iregs/${PART}.txt" sleep 1 done diff --git a/.github/workflows/sed.sh b/.github/workflows/sed.sh new file mode 100755 index 0000000..ef93f45 --- /dev/null +++ b/.github/workflows/sed.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +sed -e '/^ *$/d' -e 's/ / /g' -e 's/–/-/g' -e "s/’/'/g"