diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..25b7497 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,138 @@ +# This file is autogenerated by maturin v1.5.1 +# To update, run +# +# maturin generate-ci github +# +name: CI + +on: + push: + branches: + - main + - master + tags: + - '*' + pull_request: + workflow_dispatch: + +permissions: + contents: read + +jobs: + linux: + runs-on: ${{ matrix.platform.runner }} + strategy: + matrix: + platform: + - runner: ubuntu-latest + target: x86_64 + - runner: ubuntu-latest + target: x86 + - runner: ubuntu-latest + target: aarch64 + - runner: ubuntu-latest + target: armv7 + - runner: ubuntu-latest + target: s390x + - runner: ubuntu-latest + target: ppc64le + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.10' + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.platform.target }} + args: --release --out dist --find-interpreter + sccache: 'true' + manylinux: auto + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: wheels-linux-${{ matrix.platform.target }} + path: dist + + windows: + runs-on: ${{ matrix.platform.runner }} + strategy: + matrix: + platform: + - runner: windows-latest + target: x64 + - runner: windows-latest + target: x86 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.10' + architecture: ${{ matrix.platform.target }} + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.platform.target }} + args: --release --out dist --find-interpreter + sccache: 'true' + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: wheels-windows-${{ matrix.platform.target }} + path: dist + + macos: + runs-on: ${{ matrix.platform.runner }} + strategy: + matrix: + platform: + - runner: macos-latest + target: x86_64 + - runner: macos-14 + target: aarch64 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.10' + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.platform.target }} + args: --release --out dist --find-interpreter + sccache: 'true' + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: wheels-macos-${{ matrix.platform.target }} + path: dist + + sdist: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Build sdist + uses: PyO3/maturin-action@v1 + with: + command: sdist + args: --out dist + - name: Upload sdist + uses: actions/upload-artifact@v4 + with: + name: wheels-sdist + path: dist + + release: + name: Release + runs-on: ubuntu-latest + if: "startsWith(github.ref, 'refs/tags/')" + needs: [linux, windows, macos, sdist] + steps: + - uses: actions/download-artifact@v4 + - name: Publish to PyPI + uses: PyO3/maturin-action@v1 + env: + MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_PASSWORD }} + with: + command: upload + args: --non-interactive --skip-existing wheels-*/* diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml deleted file mode 100644 index 2551414..0000000 --- a/.github/workflows/wheels.yml +++ /dev/null @@ -1,231 +0,0 @@ -name: Build and Publish Wheels - -on: - push: - branches: - - main - - "release/*" - -jobs: - build-linux: - runs-on: ubuntu-latest - - strategy: - matrix: - python-version: ["38", "39", "310", "311"] - - steps: - - name: Checkout code - uses: actions/checkout@v3 - - - name: Install Docker - run: | - sudo apt-get update - sudo apt-get install -y \ - ca-certificates \ - curl \ - gnupg \ - lsb-release - sudo mkdir -p /etc/apt/keyrings - curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg - echo \ - "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ - $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null - sudo apt-get update - sudo apt-get install -y docker-ce docker-ce-cli containerd.io - - - name: Build the package in manylinux2014 docker container - run: | - docker run --rm \ - -v $(pwd):/io quay.io/pypa/manylinux2014_x86_64 \ - /bin/bash -c ' - curl https://sh.rustup.rs -sSf | sh -s -- -y - source $HOME/.cargo/env - export PATH=$PATH:$HOME/.cargo/bin - rustc --version - for PYBIN in /opt/python/cp${PYTHON_VERSION}*/bin; do - "${PYBIN}/python" -m venv /venv - source /venv/bin/activate - pip install setuptools-rust==1.9.0 wheel - cd /io - python setup.py bdist_wheel - deactivate - done - ' \ - --env PYTHON_VERSION=${{ matrix.python-version }} - - - name: Rename wheels - run: | - sudo mkdir -p renamed_wheels - sudo chown -R $USER:$(id -gn) renamed_wheels - for whl in dist/*.whl; do - sudo mv "$whl" "renamed_wheels/$(basename "$whl" | sed 's/linux/manylinux2014/')" - done - - - name: Upload artifacts - uses: actions/upload-artifact@v3 - with: - name: dist-manylinux2014-${{ matrix.python-version }} - path: renamed_wheels/* - - build-macos: - runs-on: macos-latest - - strategy: - matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] - - steps: - - name: Checkout code - uses: actions/checkout@v3 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - - name: Install Rust - run: brew install rust - - - name: Install package dependencies - run: pip install setuptools-rust==1.9.0 - - - name: Install build tools - run: pip install wheel - - - name: Build the package - run: python setup.py sdist bdist_wheel - - - name: Upload artifacts - uses: actions/upload-artifact@v3 - with: - name: dist-macos-${{ matrix.python-version }} - path: dist/* - - build-windows: - runs-on: windows-latest - - strategy: - matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] - - steps: - - name: Checkout code - uses: actions/checkout@v3 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - - name: Install Rust - run: rustup default stable - - - name: Install package dependencies - run: pip install setuptools-rust==1.9.0 - - - name: Install build tools - run: pip install wheel - - - name: Build the package - run: python setup.py sdist bdist_wheel - - - name: Upload artifacts - uses: actions/upload-artifact@v3 - with: - name: dist-windows-${{ matrix.python-version }} - path: dist/* - - publish: - needs: [build-linux, build-macos, build-windows] - runs-on: ubuntu-latest - - steps: - - name: Download Linux artifacts for Python 3.8 - uses: actions/download-artifact@v3 - with: - name: dist-manylinux2014-38 - path: dist/linux/3.8 - - - name: Download Linux artifacts for Python 3.9 - uses: actions/download-artifact@v3 - with: - name: dist-manylinux2014-39 - path: dist/linux/3.9 - - - name: Download Linux artifacts for Python 3.10 - uses: actions/download-artifact@v3 - with: - name: dist-manylinux2014-310 - path: dist/linux/3.10 - - - name: Download Linux artifacts for Python 3.11 - uses: actions/download-artifact@v3 - with: - name: dist-manylinux2014-311 - path: dist/linux/3.11 - - - name: Download macOS artifacts for Python 3.8 - uses: actions/download-artifact@v3 - with: - name: dist-macos-3.8 - path: dist/macos/3.8 - - - name: Download macOS artifacts for Python 3.9 - uses: actions/download-artifact@v3 - with: - name: dist-macos-3.9 - path: dist/macos/3.9 - - - name: Download macOS artifacts for Python 3.10 - uses: actions/download-artifact@v3 - with: - name: dist-macos-3.10 - path: dist/macos/3.10 - - - name: Download macOS artifacts for Python 3.11 - uses: actions/download-artifact@v3 - with: - name: dist-macos-3.11 - path: dist/macos/3.11 - - - name: Download Windows artifacts for Python 3.8 - uses: actions/download-artifact@v3 - with: - name: dist-windows-3.8 - path: dist/windows/3.8 - - - name: Download Windows artifacts for Python 3.9 - uses: actions/download-artifact@v3 - with: - name: dist-windows-3.9 - path: dist/windows/3.9 - - - name: Download Windows artifacts for Python 3.10 - uses: actions/download-artifact@v3 - with: - name: dist-windows-3.10 - path: dist/windows/3.10 - - - name: Download Windows artifacts for Python 3.11 - uses: actions/download-artifact@v3 - with: - name: dist-windows-3.11 - path: dist/windows/3.11 - - - name: Combine all artifacts - run: mkdir -p final_dist && find dist/ -type f -exec cp {} final_dist/ \; - - - name: Set up Python 3.11 - uses: actions/setup-python@v4 - with: - python-version: "3.11" - - - name: Install twine - run: pip install twine - - - name: Publish to PyPI - env: - TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} - run: twine upload final_dist/* diff --git a/Cargo.toml b/Cargo.toml index 281ee96..8de5683 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,9 @@ [package] -name = "lenlp" +name = "_rslenlp" edition = "2021" [lib] -name = "rslenlp" +name = "_rslenlp" crate-type = ["cdylib"] path = "rust/lib.rs" @@ -23,5 +23,5 @@ opt-level = 0 [profile.release] opt-level = 3 -[tool.setuptools.packages] -find = { where = ["lenlp"] } +[tool.maturin] +features = ["pyo3/extension-module"] diff --git a/Manifest.in b/Manifest.in deleted file mode 100644 index b96db57..0000000 --- a/Manifest.in +++ /dev/null @@ -1,2 +0,0 @@ -include Cargo.toml -recursive include rust/*.rs \ No newline at end of file diff --git a/lenlp/__version__.py b/lenlp/__version__.py deleted file mode 100644 index e986d53..0000000 --- a/lenlp/__version__.py +++ /dev/null @@ -1,3 +0,0 @@ -VERSION = (1, 0, 5) - -__version__ = ".".join(map(str, VERSION)) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..660d3a5 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,78 @@ +[build-system] +requires = ["maturin >= 1.5.1"] +build-backend = "maturin" + +[project] +name = "lenlp" +version = "1.0.6" +description = "Natural Language Processing library for Python from Rust." + +authors = [ + {name = "Raphael Sourty", email = "raphael.sourty@gmail.com"}, +] + + +keywords = [] + +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Rust", + "Operating System :: OS Independent", +] + +requires-python = ">=3.8" + +dependencies = [ + "scikit-learn >= 1.5.0", + "scipy >= 1.13.1", +] + +[project.urls] +Homepage = "https://github.com/raphaelsty/lenlp" +Documentation = "https://github.com/raphaelsty/lenlp" +Repository = "https://github.com/raphaelsty/lenlp" + +[project.optional-dependencies] +dev = [ + "maturin >= 1.5.1", + "pytest-cov >= 5.0.0", + "pytest >= 7.4.4", + "ruff >= 0.1.15", +] +[tool.maturin] +bindings = "pyo3" +features = ["pyo3/extension-module"] +python-source = "python" +module-name = "lenlp._rslenlp" + +[tool.include] +include = [ + "Cargo.toml", + "pyproject.toml", + "README.md", + "rust/*", +] + +[tool.pytest.ini_options] +filterwarnings = [ + "ignore::DeprecationWarning", + "ignore::RuntimeWarning", + "ignore::UserWarning", +] +addopts = [ + "--doctest-modules", + "--verbose", + "-ra", + "--cov-config=.coveragerc", + "-m not web and not slow", +] +doctest_optionflags = ["NORMALIZE_WHITESPACE", "NUMBER"] +norecursedirs = [ + "build", + "docs", + "node_modules", +] +markers = [ + "web: tests that require using the Internet", + "slow: tests that take a long time to run", +] \ No newline at end of file diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index 500d872..0000000 --- a/pytest.ini +++ /dev/null @@ -1,19 +0,0 @@ -[pytest] -filterwarnings = - ignore::DeprecationWarning - ignore::RuntimeWarning - ignore::UserWarning -addopts = - --doctest-modules - --verbose - -ra - --cov-config=.coveragerc - -m "not web and not slow" -doctest_optionflags = NORMALIZE_WHITESPACE NUMBER -norecursedirs = - build - docs - node_modules -markers = - web: tests that require using the Internet - slow: tests that take a long time to run \ No newline at end of file diff --git a/lenlp/__init__.py b/python/lenlp/__init__.py similarity index 100% rename from lenlp/__init__.py rename to python/lenlp/__init__.py diff --git a/lenlp/analyzer/__init__.py b/python/lenlp/analyzer/__init__.py similarity index 100% rename from lenlp/analyzer/__init__.py rename to python/lenlp/analyzer/__init__.py diff --git a/lenlp/analyzer/analyze.py b/python/lenlp/analyzer/analyze.py similarity index 95% rename from lenlp/analyzer/analyze.py rename to python/lenlp/analyzer/analyze.py index ae2ccb6..d5bcb40 100644 --- a/lenlp/analyzer/analyze.py +++ b/python/lenlp/analyzer/analyze.py @@ -1,4 +1,4 @@ -from rslenlp import rschar_ngrams_many, rschar_wb_ngrams_many, rssplit_words_many +from lenlp._rslenlp import rschar_ngrams_many, rschar_wb_ngrams_many, rssplit_words_many __all__ = ["analyze"] diff --git a/lenlp/counter/__init__.py b/python/lenlp/counter/__init__.py similarity index 100% rename from lenlp/counter/__init__.py rename to python/lenlp/counter/__init__.py diff --git a/lenlp/counter/count.py b/python/lenlp/counter/count.py similarity index 99% rename from lenlp/counter/count.py rename to python/lenlp/counter/count.py index 1cfd565..eab36f3 100644 --- a/lenlp/counter/count.py +++ b/python/lenlp/counter/count.py @@ -1,4 +1,4 @@ -from rslenlp import ( +from lenlp._rslenlp import ( rsvectorize_char_ngrams_many, rsvectorize_char_wb_ngrams_many, rsvectorize_split_words_many, diff --git a/lenlp/flash/__init__.py b/python/lenlp/flash/__init__.py similarity index 100% rename from lenlp/flash/__init__.py rename to python/lenlp/flash/__init__.py diff --git a/lenlp/flash/flash_text.py b/python/lenlp/flash/flash_text.py similarity index 96% rename from lenlp/flash/flash_text.py rename to python/lenlp/flash/flash_text.py index 670f00d..ecbfc44 100644 --- a/lenlp/flash/flash_text.py +++ b/python/lenlp/flash/flash_text.py @@ -1,4 +1,4 @@ -from rslenlp import RSKeywordProcessor +from lenlp._rslenlp import RSKeywordProcessor __all__ = ["FlashText"] diff --git a/lenlp/normalizer/__init__.py b/python/lenlp/normalizer/__init__.py similarity index 100% rename from lenlp/normalizer/__init__.py rename to python/lenlp/normalizer/__init__.py diff --git a/lenlp/normalizer/normalize.py b/python/lenlp/normalizer/normalize.py similarity index 88% rename from lenlp/normalizer/normalize.py rename to python/lenlp/normalizer/normalize.py index d73e207..2318b9c 100644 --- a/lenlp/normalizer/normalize.py +++ b/python/lenlp/normalizer/normalize.py @@ -1,4 +1,4 @@ -from rslenlp import rsnormalize, rsnormalize_many +from lenlp._rslenlp import rsnormalize, rsnormalize_many __all__ = ["normalize"] diff --git a/lenlp/sparse/__init__.py b/python/lenlp/sparse/__init__.py similarity index 100% rename from lenlp/sparse/__init__.py rename to python/lenlp/sparse/__init__.py diff --git a/lenlp/sparse/bm25_vectorizer.py b/python/lenlp/sparse/bm25_vectorizer.py similarity index 100% rename from lenlp/sparse/bm25_vectorizer.py rename to python/lenlp/sparse/bm25_vectorizer.py diff --git a/lenlp/sparse/count_vectorizer.py b/python/lenlp/sparse/count_vectorizer.py similarity index 98% rename from lenlp/sparse/count_vectorizer.py rename to python/lenlp/sparse/count_vectorizer.py index 9067b63..2288809 100644 --- a/lenlp/sparse/count_vectorizer.py +++ b/python/lenlp/sparse/count_vectorizer.py @@ -1,6 +1,7 @@ -from rslenlp import SparseMatrixBuilder from scipy.sparse import csr_matrix +from lenlp._rslenlp import SparseMatrixBuilder + __all__ = ["CountVectorizer"] diff --git a/lenlp/sparse/tfidf_vectorizer.py b/python/lenlp/sparse/tfidf_vectorizer.py similarity index 100% rename from lenlp/sparse/tfidf_vectorizer.py rename to python/lenlp/sparse/tfidf_vectorizer.py diff --git a/rust/lib.rs b/rust/lib.rs index 6a8b48a..084881d 100644 --- a/rust/lib.rs +++ b/rust/lib.rs @@ -9,7 +9,7 @@ mod rsstop_words; mod rsvectorizer; #[pymodule] -fn rslenlp(_py: Python, m: &PyModule) -> PyResult<()> { +fn _rslenlp(_py: Python, m: &PyModule) -> PyResult<()> { rsanalyzer::register_functions(m)?; rscounter::register_functions(m)?; rsflashtext::register_functions(m)?; diff --git a/setup.py b/setup.py deleted file mode 100644 index cd0900a..0000000 --- a/setup.py +++ /dev/null @@ -1,34 +0,0 @@ -import setuptools -from setuptools_rust import Binding, RustExtension - -from lenlp.__version__ import __version__ - -with open(file="README.md", mode="r", encoding="utf-8") as fh: - long_description = fh.read() - -base_packages = ["scikit-learn >= 1.5.0", "scipy >= 1.13.1"] -dev = ["maturin >= 1.5.1", "pytest-cov >= 5.0.0", "pytest >= 7.4.4", "ruff >= 0.1.15"] - - -setuptools.setup( - name="lenlp", - version=f"{__version__}", - author="Raphael Sourty", - long_description=long_description, - long_description_content_type="text/markdown", - author_email="raphael.sourty@gmail.com", - url="https://github.com/raphaelsty/lenlp", - download_url="https://github.com/raphaelsty/lenlp/archive/v_01.tar.gz", - keywords=[], - packages=setuptools.find_packages(), - install_requires=base_packages, - extras_require={"dev": base_packages + dev}, - classifiers=[ - "Programming Language :: Python :: 3", - "Programming Language :: Rust", - "Operating System :: OS Independent", - ], - python_requires=">=3.8", - rust_extensions=[RustExtension("rslenlp", binding=Binding.PyO3)], - setup_requires=["setuptools-rust>=1.9.0"], -)