Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 31 additions & 5 deletions .github/workflows/python.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,20 @@ jobs:
fail-fast: false
matrix:
os: [ "ubuntu-latest", "macos-13", "windows-latest" ]
python-version: [ "3.9", "3.10", "3.11", "3.12" ]
python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13" ]
module: [ "xinference" ]
exclude:
- { os: macos-13, python-version: 3.10 }
- { os: macos-13, python-version: 3.11 }
- { os: macos-13, python-version: 3.12 }
- { os: macos-13, python-version: 3.13 }
- { os: windows-latest, python-version: 3.10 }
- { os: windows-latest, python-version: 3.11 }
- { os: windows-latest, python-version: 3.12 }
include:
- { os: self-hosted, module: gpu, python-version: 3.9}
- { os: macos-latest, module: metal, python-version: "3.10" }
- { os: macos-latest, python-version: "3.13" }

steps:
- name: Check out code
Expand All @@ -99,15 +103,21 @@ jobs:
python-version: ${{ matrix.python-version }}
activate-environment: ${{ env.CONDA_ENV }}

# Important for python == 3.12
# Important for python == 3.12 and 3.13
- name: Update pip and setuptools
if: ${{ matrix.python-version == '3.12' }}
if: ${{ matrix.python-version == '3.12' || matrix.python-version == '3.13' }}
run: |
python -m pip install -U pip setuptools

# Install torch for Python 3.13 using nightly builds
- name: Install torch for Python 3.13
if: ${{ matrix.python-version == '3.13'}}
run: |
python -m pip install torch torchvision torchaudio

- name: Install numpy
if: |
(startsWith(matrix.os, 'macos') && (matrix.python-version == '3.12' || matrix.python-version == '3.9')) ||
(startsWith(matrix.os, 'macos') && (matrix.python-version == '3.13' || matrix.python-version == '3.9')) ||
(startsWith(matrix.os, 'windows') && matrix.python-version == '3.9')
run: |
python -m pip install "numpy<2"
Expand Down Expand Up @@ -139,7 +149,9 @@ jobs:
pip install "transformers<4.49"
pip install attrdict
pip install "timm>=0.9.16"
pip install torch torchvision
if [ "${{ matrix.python-version }}" != "3.13" ]; then
pip install torch torchvision
fi
pip install accelerate
pip install sentencepiece
pip install transformers_stream_generator
Expand All @@ -158,9 +170,22 @@ jobs:
fi
working-directory: .

- name: Clean up disk
if: |
(startsWith(matrix.os, 'ubuntu'))
run: |
sudo rm -rf /usr/share/dotnet
sudo rm -rf /usr/local/lib/android
sudo rm -rf /opt/ghc
sudo apt-get clean
sudo rm -rf /var/lib/apt/lists/*
df -h

- name: Test with pytest
env:
MODULE: ${{ matrix.module }}
PYTORCH_MPS_HIGH_WATERMARK_RATIO: 1.0
PYTORCH_MPS_LOW_WATERMARK_RATIO: 0.2
run: |
if [ "$MODULE" == "gpu" ]; then
${{ env.SELF_HOST_PYTHON }} -m pip install -U -e ".[audio]"
Expand Down Expand Up @@ -296,6 +321,7 @@ jobs:
--ignore xinference/model/llm/sglang \
--ignore xinference/client/tests/test_client.py \
--ignore xinference/client/tests/test_async_client.py \
--ignore xinference/model/llm/mlx \
xinference

fi
Expand Down
8 changes: 8 additions & 0 deletions xinference/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import os

# Configure MPS memory management to avoid "invalid low watermark ratio" error in PyTorch 3.13+
if os.environ.get("PYTORCH_MPS_HIGH_WATERMARK_RATIO") is None:
os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "1.0"
if os.environ.get("PYTORCH_MPS_LOW_WATERMARK_RATIO") is None:
os.environ["PYTORCH_MPS_LOW_WATERMARK_RATIO"] = "0.2"

from . import _version

__version__ = _version.get_versions()["version"]
Expand Down
1 change: 1 addition & 0 deletions xinference/core/tests/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ async def test_disable_metrics_exporter_server(disable_metrics, setup_cluster):
requests.get(metrics_exporter_address)


@pytest.mark.timeout(300) # 5 minutes timeout to prevent hanging in Python 3.13
async def test_metrics_exporter_data(setup_cluster):
endpoint, metrics_exporter_address, supervisor_address = setup_cluster

Expand Down
12 changes: 11 additions & 1 deletion xinference/device_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,17 @@ def empty_cache():
if torch.cuda.is_available():
torch.cuda.empty_cache()
if torch.backends.mps.is_available():
torch.mps.empty_cache()
try:
torch.mps.empty_cache()
except RuntimeError as e:
# Handle known MPS memory management issues in PyTorch 3.13+
if "invalid low watermark ratio" in str(e):
# This is a known issue with PyTorch 3.13+ on macOS.
# We can safely ignore this error as it doesn't affect functionality.
pass
else:
# Re-raise other RuntimeErrors
raise
if is_xpu_available():
torch.xpu.empty_cache()
if is_npu_available():
Expand Down
59 changes: 53 additions & 6 deletions xinference/model/embedding/tests/test_embedding_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,11 +222,16 @@ def test_register_custom_embedding():


def test_register_fault_embedding():
import warnings

from ....constants import XINFERENCE_MODEL_DIR
from .. import _install

os.makedirs(os.path.join(XINFERENCE_MODEL_DIR, "v2", "embedding"), exist_ok=True)
file_path = os.path.join(XINFERENCE_MODEL_DIR, "v2", "embedding/GTE.json")
embedding_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "embedding")

os.makedirs(embedding_dir, exist_ok=True)
file_path = os.path.join(embedding_dir, "GTE.json")

data = {
"model_name": "GTE",
"model_hub": "huggingface",
Expand All @@ -247,11 +252,53 @@ def test_register_fault_embedding():
with open(file_path, "w") as f:
json.dump(data, f, indent=4)

with pytest.warns(UserWarning) as record:
all_warnings = []

def custom_warning_handler(
message, category, filename, lineno, file=None, line=None
):
warning_info = {
"message": str(message),
"category": category.__name__,
"filename": filename,
"lineno": lineno,
}
all_warnings.append(warning_info)

old_showwarning = warnings.showwarning
warnings.showwarning = custom_warning_handler

try:
_install()
assert any(
"Invalid model URI /new_data/cache/gte-Qwen2" in str(r.message) for r in record
)

warnings.showwarning = old_showwarning

with pytest.warns(UserWarning) as record:
_install()

found_warning = False
for warning in record:
message = str(warning.message)
if (
"has error" in message
and (
"Invalid model URI" in message
or "Model URI cannot be a relative path" in message
)
and "/new_data/cache/gte-Qwen2" in message
):
found_warning = True
break

assert (
found_warning
), f"Expected warning about invalid model URI not found. Warnings: {[str(w.message) for w in record]}"

finally:
warnings.showwarning = old_showwarning

if os.path.exists(file_path):
os.remove(file_path)


def test_convert_ids_to_tokens():
Expand Down
Loading