From 1f93ff4631a1aa6c7225cb14a048b75888db24f0 Mon Sep 17 00:00:00 2001 From: Alistair Johnson Date: Fri, 7 Nov 2025 22:11:21 -0500 Subject: [PATCH 1/7] update sqlfluff action to use latest version --- .github/workflows/lint_sqlfluff.yml | 34 ++++++++++++----------------- .sqlfluff | 27 ++++++++++++----------- 2 files changed, 28 insertions(+), 33 deletions(-) diff --git a/.github/workflows/lint_sqlfluff.yml b/.github/workflows/lint_sqlfluff.yml index 79d87d438..8bce932c3 100644 --- a/.github/workflows/lint_sqlfluff.yml +++ b/.github/workflows/lint_sqlfluff.yml @@ -1,20 +1,20 @@ name: SQLFluff on: - - pull_request + - push jobs: lint-mimic-iv: runs-on: ubuntu-latest steps: - name: checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install Python - uses: "actions/setup-python@v2" + uses: actions/setup-python@v5 with: - python-version: "3.7" + python-version: "3.12" - name: install sqlfluff - run: "pip install sqlfluff==1.4.5" + run: "pip install sqlfluff==3.5.0" - name: Get changed files id: get_file_changes uses: trilom/file-changes-action@v1.2.4 @@ -22,28 +22,22 @@ jobs: output: ' ' - name: Get changed .sql files in mimic-iv concepts folder id: get_files_to_lint - shell: bash -l {0} + shell: bash run: | - # Set the command in the $() brackets as an output to use in later steps - echo "lintees=$( - echo \ - $(echo ${{ steps.get_file_changes.outputs.files_modified }} | - tr -s ' ' '\n' | - grep -E '^mimic-iv/concepts/.*[.]sql$' | - tr -s '\n' ' ') \ - $(echo ${{ steps.get_file_changes.outputs.files_added }} | - tr -s ' ' '\n' | - grep -E '^mimic-iv/concepts/.*[.]sql$' | - tr -s '\n' ' ') - ) >> $GITHUB_OUTPUT" + # Compose list of changed SQL files under mimic-iv/concepts and export to step output + files="$(echo "${{ steps.get_file_changes.outputs.files_modified }} ${{ steps.get_file_changes.outputs.files_added }}" \ + | tr -s ' ' '\n' \ + | grep -E '^mimic-iv/concepts/.*[.]sql$' \ + | tr -s '\n' ' ')" + echo "lintees=${files}" >> "$GITHUB_OUTPUT" - name: Lint SQL files id: sqlfluff_json if: steps.get_files_to_lint.outputs.lintees != '' - shell: bash -l {0} + shell: bash run: sqlfluff lint --format github-annotation --annotation-level failure --nofail ${{ steps.get_files_to_lint.outputs.lintees }} > annotations.json - name: Annotate - uses: yuzutech/annotations-action@v0.3.0 + uses: yuzutech/annotations-action@v0.5.0 with: repo-token: "${{ secrets.GITHUB_TOKEN }}" title: "SQLFluff Lint" diff --git a/.sqlfluff b/.sqlfluff index 28663d303..62eff0dde 100644 --- a/.sqlfluff +++ b/.sqlfluff @@ -1,20 +1,21 @@ -[sqlfluff:core] -rules = core,L019 -large_file_skip_byte_limit = 40000 +[sqlfluff] dialect = bigquery +large_file_skip_byte_limit = 40000 +exclude_rules = LT02 + +# Enable core rules plus explicit layout.commas (leading commas) +[sqlfluff:core] +rules = core,layout.commas [sqlfluff:layout:type:comma] line_position = leading -[sqlfluff:indentation] -indented_joins = false -indented_using_on = true -template_blocks_indent = false - -[sqlfluff:rules:L010] -# Keywords should be upper case +# Capitalisation rules +[sqlfluff:rules:capitalisation.keywords] capitalisation_policy = upper -[sqlfluff:rules:L030] -# Functions should be upper case -extended_capitalisation_policy = upper \ No newline at end of file +[sqlfluff:rules:capitalisation.functions] +extended_capitalisation_policy = upper + +[sqlfluff:rules:capitalisation.identifiers] +extended_capitalisation_policy = lower \ No newline at end of file From 24a571a9ee715c1af2fd7463f2b7dcd883eb024a Mon Sep 17 00:00:00 2001 From: Alistair Johnson Date: Fri, 7 Nov 2025 22:22:16 -0500 Subject: [PATCH 2/7] fix pagination in ls limiting copy to first 50 tables --- mimic-iv/concepts/copy_concepts_to_versioned_schema.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mimic-iv/concepts/copy_concepts_to_versioned_schema.sh b/mimic-iv/concepts/copy_concepts_to_versioned_schema.sh index c012f6575..df0afa8fa 100644 --- a/mimic-iv/concepts/copy_concepts_to_versioned_schema.sh +++ b/mimic-iv/concepts/copy_concepts_to_versioned_schema.sh @@ -27,7 +27,7 @@ else fi echo "Copying tables from ${SOURCE_DATASET} to ${TARGET_DATASET}." -for TABLE in `bq ls ${PROJECT_ID}:${SOURCE_DATASET} | cut -d' ' -f3`; +for TABLE in `bq ls -n 500 ${PROJECT_ID}:${SOURCE_DATASET} | cut -d' ' -f3`; do # skip the first line of dashes if [[ "${TABLE:0:2}" == '--' ]]; then From 5dcb3dbc5a69e1f1d545549cc2cf739304179813 Mon Sep 17 00:00:00 2001 From: Alistair Johnson Date: Fri, 7 Nov 2025 22:24:11 -0500 Subject: [PATCH 3/7] change to on PR --- .github/workflows/lint_sqlfluff.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint_sqlfluff.yml b/.github/workflows/lint_sqlfluff.yml index 8bce932c3..2bcf1d8fd 100644 --- a/.github/workflows/lint_sqlfluff.yml +++ b/.github/workflows/lint_sqlfluff.yml @@ -1,7 +1,7 @@ name: SQLFluff on: - - push + - pull_request jobs: lint-mimic-iv: From eff175f7eb2009d923e22dd2eaaec09baf762d77 Mon Sep 17 00:00:00 2001 From: Alistair Johnson Date: Fri, 7 Nov 2025 22:38:54 -0500 Subject: [PATCH 4/7] fix error if no SQL files were changed --- .github/workflows/lint_sqlfluff.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/lint_sqlfluff.yml b/.github/workflows/lint_sqlfluff.yml index 2bcf1d8fd..785f4413a 100644 --- a/.github/workflows/lint_sqlfluff.yml +++ b/.github/workflows/lint_sqlfluff.yml @@ -25,10 +25,11 @@ jobs: shell: bash run: | # Compose list of changed SQL files under mimic-iv/concepts and export to step output - files="$(echo "${{ steps.get_file_changes.outputs.files_modified }} ${{ steps.get_file_changes.outputs.files_added }}" \ - | tr -s ' ' '\n' \ - | grep -E '^mimic-iv/concepts/.*[.]sql$' \ - | tr -s '\n' ' ')" + raw="${{ steps.get_file_changes.outputs.files_modified }} ${{ steps.get_file_changes.outputs.files_added }}" + # Check for mimic-iv SQL files which may have changed + filtered="$(printf '%s' "$raw" | tr -s ' ' '\n' | grep -E '^mimic-iv/concepts/.*[.]sql$' || true)" + # Turn this into a space separated list for the next step + files="$(printf '%s' "$filtered" | tr -s '\n' ' ')" echo "lintees=${files}" >> "$GITHUB_OUTPUT" - name: Lint SQL files From f9c7a56e1f1a2eda61f17e76b5d6d1572e1b19ab Mon Sep 17 00:00:00 2001 From: Alistair Johnson Date: Fri, 7 Nov 2025 22:41:00 -0500 Subject: [PATCH 5/7] update a single SQL file to test sqlfluff workflow --- mimic-iv/concepts/firstday/first_day_bg_art.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mimic-iv/concepts/firstday/first_day_bg_art.sql b/mimic-iv/concepts/firstday/first_day_bg_art.sql index fb6276e99..e70aaf453 100644 --- a/mimic-iv/concepts/firstday/first_day_bg_art.sql +++ b/mimic-iv/concepts/firstday/first_day_bg_art.sql @@ -28,7 +28,8 @@ SELECT , MIN(sodium) AS sodium_min, MAX(sodium) AS sodium_max FROM `physionet-data.mimiciv_icu.icustays` ie LEFT JOIN `physionet-data.mimiciv_derived.bg` bg - ON ie.subject_id = bg.subject_id + ON + ie.subject_id = bg.subject_id AND bg.specimen = 'ART.' AND bg.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR) AND bg.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY) From 7797717950043c47764997657dbea9705adcf1b3 Mon Sep 17 00:00:00 2001 From: Alistair Johnson Date: Fri, 7 Nov 2025 22:44:14 -0500 Subject: [PATCH 6/7] update a file with known sqlfluff linting issues --- mimic-iv/concepts/medication/arb.sql | 1 - 1 file changed, 1 deletion(-) diff --git a/mimic-iv/concepts/medication/arb.sql b/mimic-iv/concepts/medication/arb.sql index d83ba10e6..6d236860c 100644 --- a/mimic-iv/concepts/medication/arb.sql +++ b/mimic-iv/concepts/medication/arb.sql @@ -14,7 +14,6 @@ WITH arb_drug AS ( END AS arb FROM `physionet-data.mimiciv_hosp.prescriptions` ) - SELECT pr.subject_id , pr.hadm_id From 55d3f95a57b33ec201d315d01999fe94a40d7a3e Mon Sep 17 00:00:00 2001 From: Alistair Johnson Date: Fri, 7 Nov 2025 22:48:15 -0500 Subject: [PATCH 7/7] update to pass sqlfluff --- mimic-iv/concepts/medication/arb.sql | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/mimic-iv/concepts/medication/arb.sql b/mimic-iv/concepts/medication/arb.sql index 6d236860c..d825cb8d8 100644 --- a/mimic-iv/concepts/medication/arb.sql +++ b/mimic-iv/concepts/medication/arb.sql @@ -1,19 +1,21 @@ -WITH arb_drug AS ( +WITH arb_drug AS ( SELECT DISTINCT drug , CASE - WHEN UPPER(drug) LIKE '%AZILSARTAN%' OR UPPER(drug) LIKE '%EDARBI%' THEN 1 - WHEN UPPER(drug) LIKE '%CANDESARTAN%' OR UPPER(drug) LIKE '%ATACAND%' THEN 1 - WHEN UPPER(drug) LIKE '%IRBESARTAN%' OR UPPER(drug) LIKE '%AVAPRO%' THEN 1 - WHEN UPPER(drug) LIKE '%LOSARTAN%' OR UPPER(drug) LIKE '%COZAAR%' THEN 1 - WHEN UPPER(drug) LIKE '%OLMESARTAN%' OR UPPER(drug) LIKE '%BENICAR%' THEN 1 - WHEN UPPER(drug) LIKE '%TELMISARTAN%' OR UPPER(drug) LIKE '%MICARDIS%' THEN 1 - WHEN UPPER(drug) LIKE '%VALSARTAN%' OR UPPER(drug) LIKE '%DIOVAN%' THEN 1 - WHEN UPPER(drug) LIKE '%SACUBITRIL%' OR UPPER(drug) LIKE '%ENTRESTO%' THEN 1 - ELSE 0 + WHEN UPPER(drug) LIKE '%AZILSARTAN%' OR UPPER(drug) LIKE '%EDARBI%' + OR UPPER(drug) LIKE '%CANDESARTAN%' OR UPPER(drug) LIKE '%ATACAND%' + OR UPPER(drug) LIKE '%IRBESARTAN%' OR UPPER(drug) LIKE '%AVAPRO%' + OR UPPER(drug) LIKE '%LOSARTAN%' OR UPPER(drug) LIKE '%COZAAR%' + OR UPPER(drug) LIKE '%OLMESARTAN%' OR UPPER(drug) LIKE '%BENICAR%' + OR UPPER(drug) LIKE '%TELMISARTAN%' OR UPPER(drug) LIKE '%MICARDIS%' + OR UPPER(drug) LIKE '%VALSARTAN%' OR UPPER(drug) LIKE '%DIOVAN%' + OR UPPER(drug) LIKE '%SACUBITRIL%' OR UPPER(drug) LIKE '%ENTRESTO%' + THEN 1 + ELSE 0 END AS arb FROM `physionet-data.mimiciv_hosp.prescriptions` ) + SELECT pr.subject_id , pr.hadm_id @@ -23,8 +25,7 @@ SELECT FROM `physionet-data.mimiciv_hosp.prescriptions` pr INNER JOIN arb_drug - ON - pr.drug = arb_drug.drug + ON pr.drug = arb_drug.drug WHERE arb_drug.arb = 1 ;