.github/workflows/master.yml

name: MLflow tests

on:
  pull_request:
  push:
    branches:
      - master
      - branch-[0-9]+.[0-9]+

permissions:
  contents: read

concurrency:
  group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.head_ref || github.ref }}
  cancel-in-progress: true

# Use `bash --noprofile --norc -exo pipefail` by default for all `run` steps in this workflow:
# https://docs.github.com/en/actions/reference/workflow-syntax-for-github-actions#defaultsrun
defaults:
  run:
    shell: bash --noprofile --norc -exo pipefail {0}

env:
  MLFLOW_HOME: /home/runner/work/mlflow/mlflow
  # Note miniconda is pre-installed in the virtual environments for GitHub Actions:
  # https://github.com/actions/virtual-environments/blob/main/images/linux/scripts/installers/miniconda.sh
  MLFLOW_CONDA_HOME: /usr/share/miniconda
  SPARK_LOCAL_IP: localhost
  PIP_EXTRA_INDEX_URL: https://download.pytorch.org/whl/cpu

jobs:
  lint:
    runs-on: ubuntu-latest
    timeout-minutes: 30
    if: github.event_name != 'pull_request' && github.event.pull_request.draft == false
    steps:
      - uses: actions/checkout@v3
      - uses: ./.github/actions/untracked
      - uses: ./.github/actions/setup-python
        id: setup-python
      - name: Add problem matchers
        run: |
          echo "::add-matcher::.github/workflows/matchers/pylint.json"
          echo "::add-matcher::.github/workflows/matchers/black.json"
          echo "::add-matcher::.github/workflows/matchers/ruff.json"
      - uses: ./.github/actions/cache-pip
      - name: Install dependencies
        run: |
          python -m venv .venv
          source .venv/bin/activate
          source ./dev/install-common-deps.sh --ml
          pip install -r requirements/lint-requirements.txt
      - uses: ./.github/actions/pipdeptree
      - name: Install pre-commit hooks
        run: |
          source .venv/bin/activate
          pre-commit install -t pre-commit -t prepare-commit-msg
      - name: Run pre-commit
        id: pre-commit
        env:
          IS_MAINTAINER: ${{ contains(fromJSON('["OWNER", "MEMBER", "COLLABORATOR"]'), github.event.pull_request.author_association )}}
        run: |
          source .venv/bin/activate
          pre-commit run --all-files

  # python-skinny tests cover a subset of mlflow functionality
  # that is meant to be supported with a smaller dependency footprint.
  # The python skinny tests cover the subset of mlflow functionality
  # while also verifying certain dependencies are omitted.
  python-skinny:
    if: github.event_name != 'pull_request' || github.event.pull_request.draft == false
    runs-on: ubuntu-latest
    timeout-minutes: 30
    steps:
      - uses: actions/checkout@v3
        with:
          submodules: recursive
      - uses: ./.github/actions/untracked
      - uses: ./.github/actions/setup-python
      - name: Install dependencies
        run: |
          source ./dev/install-common-deps.sh --skinny
      - name: Run tests
        run: |
          ./dev/run-python-skinny-tests.sh

  python:
    if: github.event_name != 'pull_request' || github.event.pull_request.draft == false
    runs-on: ubuntu-latest
    timeout-minutes: 120
    strategy:
      fail-fast: false
      matrix:
        group: [1, 2]
        include:
          - splits: 2
    steps:
      - uses: actions/checkout@v3
        with:
          submodules: recursive
      - uses: ./.github/actions/untracked
      - uses: ./.github/actions/free-disk-space
      - uses: ./.github/actions/setup-python
      - uses: ./.github/actions/setup-pyenv
      - uses: ./.github/actions/setup-java
      - uses: ./.github/actions/cache-pip
      - name: Install dependencies
        run: |
          python -m venv .venv
          source .venv/bin/activate
          source ./dev/install-common-deps.sh --ml
      - uses: ./.github/actions/pipdeptree
      - name: Import check
        run: |
          source .venv/bin/activate
          python tests/check_mlflow_lazily_imports_ml_packages.py
      - name: Run tests
        run: |
          source .venv/bin/activate
          source dev/setup-ssh.sh
          pytest --splits=${{ matrix.splits }} --group=${{ matrix.group }} --quiet --requires-ssh \
            --ignore-flavors --ignore=tests/examples --ignore=tests/recipes --ignore=tests/evaluate \
            tests

  database:
    if: github.event_name != 'pull_request' || github.event.pull_request.draft == false
    runs-on: ubuntu-latest
    timeout-minutes: 90
    steps:
      - uses: actions/checkout@v3
        with:
          submodules: recursive
      - uses: ./.github/actions/untracked
      - name: Build
        run: |
          ./tests/db/compose.sh pull -q postgresql mysql mssql
          ./tests/db/compose.sh build --build-arg DEPENDENCIES="$(python setup.py -q dependencies)"
      - name: Run tests
        run: |
          set +e
          err=0
          trap 'err=1' ERR

          for service in $(./tests/db/compose.sh config --services | grep '^mlflow-')
          do
            # Set `--no-TTY` to show container logs on GitHub Actions:
            # https://github.com/actions/virtual-environments/issues/5022
            ./tests/db/compose.sh run --rm --no-TTY $service pytest \
              tests/store/tracking/test_sqlalchemy_store.py \
              tests/store/model_registry/test_sqlalchemy_store.py \
              tests/db
          done

          test $err = 0

      - name: Run migration check
        run: |
          set +e
          err=0
          trap 'err=1' ERR

          ./tests/db/compose.sh down --volumes --remove-orphans
          for service in $(./tests/db/compose.sh config --services | grep '^migration-')
          do
            ./tests/db/compose.sh run --rm --no-TTY $service
          done

          test $err = 0

      - name: Clean up
        run: |
          ./tests/db/compose.sh down --volumes --remove-orphans --rmi all

  java:
    if: github.event_name != 'pull_request' || github.event.pull_request.draft == false
    runs-on: ubuntu-latest
    timeout-minutes: 30
    steps:
      - uses: actions/checkout@v3
        with:
          submodules: recursive
      - uses: ./.github/actions/untracked
      - uses: ./.github/actions/setup-python
      - uses: ./.github/actions/setup-java
      - name: Install dependencies
        run: |
          source ./dev/install-common-deps.sh
      - uses: ./.github/actions/pipdeptree
      - name: Run tests
        run: |
          cd mlflow/java
          mvn clean package -q

  flavors:
    if: github.event_name != 'pull_request' || github.event.pull_request.draft == false
    runs-on: ubuntu-latest
    timeout-minutes: 120
    steps:
      - uses: actions/checkout@v3
        with:
          submodules: recursive
      - uses: ./.github/actions/untracked
      - uses: ./.github/actions/setup-python
      - uses: ./.github/actions/setup-pyenv
      - uses: ./.github/actions/setup-java
      - uses: ./.github/actions/cache-pip
      - name: Install dependencies
        run: |
          python -m venv .venv
          source .venv/bin/activate
          source ./dev/install-common-deps.sh --ml
      - uses: ./.github/actions/pipdeptree
      - name: Run tests
        run: |
          source .venv/bin/activate
          pytest \
            tests/utils/test_model_utils.py \
            tests/tracking/fluent/test_fluent_autolog.py \
            tests/autologging \
            tests/server/auth

  # It takes 9 ~ 10 minutes to run tests in `tests/models`. To make CI finish faster,
  # run these tests in a separate job.
  models:
    if: github.event_name != 'pull_request' || github.event.pull_request.draft == false
    runs-on: ubuntu-latest
    timeout-minutes: 120
    strategy:
      fail-fast: false
      matrix:
        group: [1, 2]
        include:
          - splits: 2
    steps:
      - uses: actions/checkout@v3
        with:
          submodules: recursive
      - uses: ./.github/actions/untracked
      - uses: ./.github/actions/free-disk-space
      - uses: ./.github/actions/setup-python
      - uses: ./.github/actions/setup-pyenv
      - uses: ./.github/actions/setup-java
      - name: Install dependencies
        run: |
          source ./dev/install-common-deps.sh
          pip install pyspark
      - uses: ./.github/actions/pipdeptree
      - name: Run tests
        run: |
          pytest --splits=${{ matrix.splits }} --group=${{ matrix.group }} tests/models

  # NOTE: numpy is pinned in this suite due to its heavy reliance on shap, which internally uses
  # references to the now fully deprecated (as of 1.24.x) numpy types (i.e., np.bool).
  # When the shap cross version tests are passing in a new release version of shap, this pin should
  # be removed.
  evaluate:
    if: github.event_name != 'pull_request' || github.event.pull_request.draft == false
    runs-on: ubuntu-latest
    timeout-minutes: 90
    steps:
      - uses: actions/checkout@v3
        with:
          submodules: recursive
      - uses: ./.github/actions/untracked
      - uses: ./.github/actions/setup-python
      - uses: ./.github/actions/setup-pyenv
      - uses: ./.github/actions/setup-java
      - name: Install dependencies
        run: |
          source ./dev/install-common-deps.sh
          pip install pyspark torch transformers
      - uses: ./.github/actions/pipdeptree
      - name: Run tests
        run: |
          dev/pytest.sh tests/evaluate

  pyfunc:
    if: github.event_name != 'pull_request' || github.event.pull_request.draft == false
    runs-on: ubuntu-latest
    timeout-minutes: 120
    strategy:
      fail-fast: false
      matrix:
        group: [1, 2]
        include:
          - splits: 2
    steps:
      - uses: actions/checkout@v3
        with:
          submodules: recursive
      - uses: ./.github/actions/untracked
      - uses: ./.github/actions/free-disk-space
      - uses: ./.github/actions/setup-python
      - uses: ./.github/actions/setup-pyenv
      - uses: ./.github/actions/setup-java
      - name: Install dependencies
        run: |
          source ./dev/install-common-deps.sh
          pip install tensorflow 'pyspark[connect]'
      - uses: ./.github/actions/pipdeptree
      - name: Run tests
        run: |
          pytest --splits=${{ matrix.splits }} --group=${{ matrix.group }} --durations=30 \
            tests/pyfunc --ignore tests/pyfunc/test_spark_connect.py

          # test_spark_connect.py fails if it's run with ohter tests, so run it separately.
          pytest tests/pyfunc/test_spark_connect.py

  sagemaker:
    if: github.event_name != 'pull_request' || github.event.pull_request.draft == false
    runs-on: ubuntu-latest
    timeout-minutes: 120
    steps:
      - uses: actions/checkout@v3
        with:
          submodules: recursive
      - uses: ./.github/actions/untracked
      - uses: ./.github/actions/setup-python
      - uses: ./.github/actions/setup-java
      - name: Install dependencies
        run: |
          source ./dev/install-common-deps.sh --ml
      - uses: ./.github/actions/pipdeptree
      - name: Run tests
        run: |
          pytest tests/sagemaker

  windows:
    if: github.event_name != 'pull_request' || github.event.pull_request.draft == false
    runs-on: windows-latest
    timeout-minutes: 120
    strategy:
      fail-fast: false
      matrix:
        group: [1, 2]
        include:
          - splits: 2
    steps:
      - uses: actions/checkout@v3
        with:
          submodules: recursive
      - uses: ./.github/actions/untracked
      - uses: ./.github/actions/setup-python
      - uses: ./.github/actions/setup-pyenv
      - uses: ./.github/actions/setup-java
      - uses: ./.github/actions/cache-pip
      - name: Install python dependencies
        run: |
          python -m venv .venv
          source .venv/Scripts/activate
          pip install -r requirements/test-requirements.txt
          pip install --no-dependencies tests/resources/mlflow-test-plugin
          pip install -e .[extras]
          pip install pyspark
          pip install mleap
          # Install Hugging Face datasets to test Hugging Face usage with MLflow dataset tracking
          pip install datasets
          # Install TensorFlow to test TensorFlow dataset usage with MLflow dataset tracking
          pip install tensorflow
          # Install torch and transformers to test metrics
          pip install torch transformers
      - uses: ./.github/actions/pipdeptree
      - name: Download Hadoop winutils for Spark
        run: |
          git clone https://github.com/cdarlint/winutils /tmp/winutils
      - name: Run python tests
        env:
          # Starting from SQLAlchemy version 2.0, `QueuePool` is the default connection pool
          # when creating an `Engine`. `QueuePool` prevents the removal of temporary database
          # files created during tests on Windows as it keeps the DB connection open until
          # it's explicitly disposed.
          MLFLOW_SQLALCHEMYSTORE_POOLCLASS: "NullPool"
        run: |
          source .venv/Scripts/activate
          # Set Hadoop environment variables required for testing Spark integrations on Windows
          export HADOOP_HOME=/tmp/winutils/hadoop-3.2.2
          export PATH=$PATH:$HADOOP_HOME/bin
          # Run Windows tests
          pytest --splits=${{ matrix.splits }} --group=${{ matrix.group }} \
            --ignore-flavors --ignore=tests/projects --ignore=tests/examples --ignore=tests/recipes --ignore=tests/evaluate \
            tests
          # MLeap is incompatible on Windows with PySpark3.4 release. 
          # Reinstate tests when MLeap has released a fix. [ML-30491]
          # pytest tests/mleap