From e52404793274728df29b3f0ddbdbc52a64720967 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Sun, 18 Dec 2022 20:46:39 +0900 Subject: [PATCH 01/19] adjust numpy and scipy versions for Py3.10 and 3.11 in travis.yml --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8937c0c74c..b60acd7db5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -49,11 +49,11 @@ matrix: - os: linux env: - MB_PYTHON_VERSION=3.10 - - BUILD_DEPENDS="numpy==1.19.3 scipy==1.7.0" + - BUILD_DEPENDS="numpy==1.22.2 scipy==1.8.0" - os: linux env: - MB_PYTHON_VERSION=3.11 - - BUILD_DEPENDS="numpy==1.19.3 scipy==1.7.0" + - BUILD_DEPENDS="numpy==1.23.2 scipy==1.9.2" before_install: - source multibuild/common_utils.sh From 5c5bd60c9cbcaf52629e055a67b5c13dcb747fd0 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Sun, 18 Dec 2022 21:44:24 +0900 Subject: [PATCH 02/19] ensure importlib-metadata is up-to-date --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index b60acd7db5..c4ca530155 100644 --- a/.travis.yml +++ b/.travis.yml @@ -49,11 +49,11 @@ matrix: - os: linux env: - MB_PYTHON_VERSION=3.10 - - BUILD_DEPENDS="numpy==1.22.2 scipy==1.8.0" + - BUILD_DEPENDS="numpy==1.22.2 scipy==1.8.0 importlib-metadata==5.1.0" - os: linux env: - MB_PYTHON_VERSION=3.11 - - BUILD_DEPENDS="numpy==1.23.2 scipy==1.9.2" + - BUILD_DEPENDS="numpy==1.23.2 scipy==1.9.2 importlib-metadata==5.1.0" before_install: - source multibuild/common_utils.sh From 9c1183d6a439c6bb404962dbf02e963a52c89de6 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Mon, 19 Dec 2022 16:23:10 +0900 Subject: [PATCH 03/19] disable nsmlib tests for Py3.10 and .11 --- setup.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index deace40c59..b111bd154f 100644 --- a/setup.py +++ b/setup.py @@ -282,10 +282,13 @@ def run(self): ] if not sys.platform.lower().startswith("win") and sys.version_info[:2] < (3, 11): - core_testenv.extend([ - 'POT', - 'nmslib', - ]) + core_testenv.append('POT') + +if not sys.platform.lower().startswith("win") and sys.version_info[:2] < (3, 10): + # + # nmslib wheels not available for Python 3.10 and 3.11 as of Dec 2022 + # + core_testenv.append('nmslib') # Add additional requirements for testing on Linux that are skipped on Windows. linux_testenv = core_testenv[:] + visdom_req From 6cfeb32ed48375b8911b0a82240ba0d07069057e Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Mon, 19 Dec 2022 16:31:52 +0900 Subject: [PATCH 04/19] get rid of nmslib test dependency in travis.yml --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index c4ca530155..e8f175b2ca 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,7 +26,7 @@ env: # them here for now. They'll get picked up by the multibuild stuff # running in multibuild/common_utils.sh. # - - TEST_DEPENDS="pytest mock cython nmslib POT testfixtures python-levenshtein==0.12.0 visdom==0.1.8.9 scikit-learn" + - TEST_DEPENDS="pytest mock cython POT testfixtures python-levenshtein==0.12.0 visdom==0.1.8.9 scikit-learn" matrix: # From 38e7273c37782c7f8ac1706f5274135968bc167b Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Mon, 19 Dec 2022 16:57:10 +0900 Subject: [PATCH 05/19] get rid of visdom from .travis.yml it's causing the Py3.10 wheel graviton build to fail --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index e8f175b2ca..532d8aeb6f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,7 +26,7 @@ env: # them here for now. They'll get picked up by the multibuild stuff # running in multibuild/common_utils.sh. # - - TEST_DEPENDS="pytest mock cython POT testfixtures python-levenshtein==0.12.0 visdom==0.1.8.9 scikit-learn" + - TEST_DEPENDS="pytest mock cython POT testfixtures python-levenshtein==0.12.0 scikit-learn" matrix: # From f3b32fc5270bb61bbe7ac1e7758a8b6474c1dc31 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Mon, 19 Dec 2022 18:17:57 +0900 Subject: [PATCH 06/19] let multibuild pick DOCKER_TEST_IMAGE as advised here: https://github.com/multi-build/multibuild/issues/492#issuecomment-1357307275 --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 532d8aeb6f..236dadbe3a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,7 +19,6 @@ env: - PLAT=aarch64 - MB_ML_VER=2014 - SKIP_NETWORK_TESTS=1 - - DOCKER_TEST_IMAGE=multibuild/xenial_arm64v8 # # The contents of this file mirror the linux_testenv list # in gensim's setup.py. I can't think of a better way to include From f67ab877b7501c93626fef974ce0ad0dbcaa6dd7 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Mon, 19 Dec 2022 20:17:45 +0900 Subject: [PATCH 07/19] continue using xenial to build 3.8 aarch64 wheels --- .travis.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.travis.yml b/.travis.yml index 236dadbe3a..62f64d3d2d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -32,10 +32,15 @@ matrix: # See .github/workflows/build-wheels.yml for a discussion of why we # handle numpy versions explicitly. # + # Usually, multibuild is clever enough to pick the right docker test + # image. That is not the case for Py3.8, so we specify the image + # ourselves. + # - os: linux env: - MB_PYTHON_VERSION=3.8 - BUILD_DEPENDS="numpy==1.19.2 scipy==1.7.0" + - DOCKER_TEST_IMAGE=multibuild/xenial_arm64v - os: linux env: - MB_PYTHON_VERSION=3.9 From 7ae8bdc668e8f41bd6de7dc3f419f9fbff564c88 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Mon, 19 Dec 2022 20:23:09 +0900 Subject: [PATCH 08/19] correct DOCKER_TEST_IMAGE env var --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 62f64d3d2d..6502f68e57 100644 --- a/.travis.yml +++ b/.travis.yml @@ -40,7 +40,7 @@ matrix: env: - MB_PYTHON_VERSION=3.8 - BUILD_DEPENDS="numpy==1.19.2 scipy==1.7.0" - - DOCKER_TEST_IMAGE=multibuild/xenial_arm64v + - DOCKER_TEST_IMAGE=multibuild/xenial_arm64v8 - os: linux env: - MB_PYTHON_VERSION=3.9 From 23f54886339889846ea0c4b264afee0d8810c8c8 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Wed, 21 Dec 2022 09:39:22 +0900 Subject: [PATCH 09/19] bump develop version --- docs/src/conf.py | 2 +- gensim/__init__.py | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/src/conf.py b/docs/src/conf.py index 4d202faa8f..0ba6816c09 100644 --- a/docs/src/conf.py +++ b/docs/src/conf.py @@ -63,7 +63,7 @@ # The short X.Y version. version = '4.3.0' # The full version, including alpha/beta/rc tags. -release = '4.3.0' +release = '4.3.1.dev0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/gensim/__init__.py b/gensim/__init__.py index e7c59b6bd6..064bffdfb1 100644 --- a/gensim/__init__.py +++ b/gensim/__init__.py @@ -4,7 +4,7 @@ """ -__version__ = '4.3.0' +__version__ = '4.3.1.dev0' import logging diff --git a/setup.py b/setup.py index deace40c59..d2aac65422 100644 --- a/setup.py +++ b/setup.py @@ -352,7 +352,7 @@ def run(self): setup( name='gensim', - version='4.3.0', + version='4.3.1.dev0', description='Python framework for fast Vector Space Modelling', long_description=LONG_DESCRIPTION, From f260d1e4fe47ba9388b2a792e587106e4c4b179a Mon Sep 17 00:00:00 2001 From: Thien Tran Date: Fri, 17 Feb 2023 22:45:00 +0800 Subject: [PATCH 10/19] check hs and negative. add tests (#3443) --- gensim/models/word2vec.py | 15 +++++++++++++-- gensim/test/test_word2vec.py | 15 ++++++++++++++- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py index d4a4ba992e..a3fe865b7a 100755 --- a/gensim/models/word2vec.py +++ b/gensim/models/word2vec.py @@ -286,11 +286,11 @@ def __init__( Training algorithm: 1 for skip-gram; otherwise CBOW. hs : {0, 1}, optional If 1, hierarchical softmax will be used for model training. - If 0, and `negative` is non-zero, negative sampling will be used. + If 0, hierarchical softmax will not be used for model training. negative : int, optional If > 0, negative sampling will be used, the int for negative specifies how many "noise words" should be drawn (usually between 5-20). - If set to 0, no negative sampling is used. + If 0, negative sampling will not be used. ns_exponent : float, optional The exponent used to shape the negative sampling distribution. A value of 1.0 samples exactly in proportion to the frequencies, 0.0 samples all words equally, while a negative value samples low-frequency words more @@ -1536,6 +1536,17 @@ def _check_training_sanity(self, epochs=0, total_examples=None, total_words=None If the combination of input parameters is inconsistent. """ + if (not self.hs) and (not self.negative): + raise ValueError( + "You must set either 'hs' or 'negative' to be positive for proper training. " + "When both 'hs=0' and 'negative=0', there will be no training." + ) + if self.hs and self.negative: + logger.warning( + "Both hierarchical softmax and negative sampling are activated. " + "This is probably a mistake. You should set either 'hs=0' " + "or 'negative=0' to disable one of them. " + ) if self.alpha > self.min_alpha_yet_reached: logger.warning("Effective 'alpha' higher than previous training cycles") diff --git a/gensim/test/test_word2vec.py b/gensim/test/test_word2vec.py index a07cf08b10..74639af865 100644 --- a/gensim/test/test_word2vec.py +++ b/gensim/test/test_word2vec.py @@ -888,7 +888,7 @@ def test_predict_output_word(self): self.assertRaises(RuntimeError, binary_model_with_neg.predict_output_word, ['system', 'human']) # negative sampling scheme not used - model_without_neg = word2vec.Word2Vec(sentences, min_count=1, negative=0) + model_without_neg = word2vec.Word2Vec(sentences, min_count=1, hs=1, negative=0) self.assertRaises(RuntimeError, model_without_neg.predict_output_word, ['system', 'human']) # passing indices instead of words in context @@ -1033,6 +1033,19 @@ def test_train_warning(self, loglines): warning = "Effective 'alpha' higher than previous training cycles" self.assertTrue(warning in str(loglines)) + @log_capture() + def test_train_hs_and_neg(self, loglines): + """ + Test if ValueError is raised when both hs=0 and negative=0 + Test if warning is raised if both hs and negative are activated + """ + with self.assertRaises(ValueError): + word2vec.Word2Vec(sentences, min_count=1, hs=0, negative=0) + + word2vec.Word2Vec(sentences, min_count=1, hs=1, negative=5) + warning = "Both hierarchical softmax and negative sampling are activated." + self.assertTrue(warning in str(loglines)) + def test_train_with_explicit_param(self): model = word2vec.Word2Vec(vector_size=2, min_count=1, hs=1, negative=0) model.build_vocab(sentences) From 63fce182abc7d1e1e64fe7074dc0ed2c0eb90b3e Mon Sep 17 00:00:00 2001 From: monash849 <62815453+monash849@users.noreply.github.com> Date: Fri, 3 Mar 2023 14:02:43 +0200 Subject: [PATCH 11/19] fix: conversion of cosine distance to cosine similarity is incorrect (#3441) Co-authored-by: Mona Shaheen --- gensim/similarities/annoy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/similarities/annoy.py b/gensim/similarities/annoy.py index e586b2d2e3..688985ca51 100644 --- a/gensim/similarities/annoy.py +++ b/gensim/similarities/annoy.py @@ -185,4 +185,4 @@ def most_similar(self, vector, num_neighbors): ids, distances = self.index.get_nns_by_vector( vector, num_neighbors, include_distances=True) - return [(self.labels[ids[i]], 1 - distances[i] / 2) for i in range(len(ids))] + return [(self.labels[ids[i]], 1 - distances[i] ** 2 / 2) for i in range(len(ids))] From b4f0e0bdb6b946dd378219eb47bde60f59ea7baf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A1bor=20Lipt=C3=A1k?= Date: Fri, 3 Mar 2023 07:03:30 -0500 Subject: [PATCH 12/19] Update Python version in docs (#3446) --- docs/src/_templates/indexcontent.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/_templates/indexcontent.html b/docs/src/_templates/indexcontent.html index 396f48b50d..6d5562daaa 100644 --- a/docs/src/_templates/indexcontent.html +++ b/docs/src/_templates/indexcontent.html @@ -145,10 +145,10 @@

Code dependencies

Gensim runs on Linux, Windows and Mac OS X, and should run on any other platform that supports - Python 3.6+ and NumPy. Gensim depends on the following software:

+ Python 3.8+ and NumPy. Gensim depends on the following software:

  • - Python, tested with versions 3.6, 3.7 and 3.8. + Python, tested with versions 3.8, 3.9, 3.10 and 3.11.
  • NumPy for number crunching. From 0f63f353d1dc663516b2c46a8ec9756bbc741d65 Mon Sep 17 00:00:00 2001 From: Nicolas Karolak Date: Mon, 6 Mar 2023 08:19:23 +0100 Subject: [PATCH 13/19] ci: enable arm64/aarch64 wheel builds (#3448) * ci: move linters into reusable workflow Move lint steps into its own workflow that can be called from other workflows (`on.workflow_dispatch`). * ci: enable builds for macos-arm64 and linux-aarch64 * ci: replace multibuild by qemu+cibuildwheel * ci: configure dependabot for github-actions This allow dependabot to create PR to update thrid-party GitHub Actions. * ci: run wheel test only on cp311 and native archs --- .github/dependabot.yml | 6 + .github/workflows/build-wheels.yml | 351 ++++------------------------- .github/workflows/linters.yml | 32 +++ .github/workflows/tests.yml | 25 +- .gitmodules | 3 - multibuild | 1 - 6 files changed, 81 insertions(+), 337 deletions(-) create mode 100644 .github/dependabot.yml create mode 100644 .github/workflows/linters.yml delete mode 100644 .gitmodules delete mode 160000 multibuild diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000000..8ac6b8c498 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,6 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "monthly" diff --git a/.github/workflows/build-wheels.yml b/.github/workflows/build-wheels.yml index dab4957b2f..31544d6b26 100644 --- a/.github/workflows/build-wheels.yml +++ b/.github/workflows/build-wheels.yml @@ -9,328 +9,59 @@ on: - cron: '0 0 * * sun,wed' jobs: - # - # The linters job duplicates tests.yml, can't think of a way to avoid this right now. - # - linters: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - name: Setup up Python ${{ matrix.python }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python }} - - - name: Update pip - run: python -m pip install -U pip - - - name: Install dependencies - run: python -m pip install flake8 flake8-rst - - - name: Run flake8 linter (source) - run: flake8 --ignore E12,W503 --max-line-length 120 --show-source gensim - # - name: Run flake8 linter (documentation) - # run: flake8 --ignore E202,E402,E302,E305,F821 --max-line-length 120 --filename '*.py,*.rst' docs - - - name: Check Sphinx Gallery cache - run: python docs/src/check_gallery.py + linters: + uses: ./.github/workflows/linters.yml - multibuild: - timeout-minutes: 35 + build_wheels: + needs: linters + name: Build wheels on ${{ matrix.os }} runs-on: ${{ matrix.os }} defaults: run: shell: bash - - needs: [linters] - - strategy: - fail-fast: false - matrix: - include: - # - # We want the _oldest_ possible manylinux version to ensure our - # wheels work on the widest possible range of distros. Version 1 - # seems to break for certain Python versions under Linux and Windows, - # so we use 2010, which is the next oldest. - # - # When selecting the numpy version to build against, we need to satisfy - # two conditions. First, we want the wheel to be available for the - # version of Python we're building against, because building numpy - # wheels on our own is too much work. - # - # Second, in order to guarantee compatibility with the greatest range - # of numpy versions, we want to build against the oldest possible numpy - # version, as long as it's 1.17.0 or newer. Building versions earlier - # than 1.17.0 yields wheels that are incompatible with some newer - # versions of numpy. See https://github.com/RaRe-Technologies/gensim/issues/3226 - # for details. - # - # The logic for numpy version selection is based on - # https://github.com/scipy/oldest-supported-numpy/blob/master/setup.cfg - # with the exception that we enforce the minimum version to be 1.17.0. - # - - os: ubuntu-latest - manylinux-version: 2010 - python-version: "3.8" - build-depends: numpy==1.17.3 - - - os: ubuntu-latest - manylinux-version: 2010 - python-version: "3.9" - build-depends: numpy==1.19.3 - - - os: ubuntu-latest - manylinux-version: 2014 - python-version: "3.10" - build-depends: numpy==1.22.2 scipy==1.8.0 - - - os: ubuntu-latest - manylinux-version: 2014 - python-version: "3.11" - build-depends: numpy==1.23.2 scipy==1.9.2 - - - os: macos-latest - travis-os-name: osx - manylinux-version: 1 - python-version: "3.8" - build-depends: numpy==1.17.3 - - - os: macos-latest - travis-os-name: osx - manylinux-version: 1 - python-version: "3.9" - build-depends: numpy==1.19.3 - - - os: macos-latest - travis-os-name: osx - manylinux-version: 1 - python-version: "3.10" - build-depends: numpy==1.22.2 scipy==1.8.0 - - - os: macos-latest - travis-os-name: osx - manylinux-version: 1 - python-version: "3.11" - build-depends: numpy==1.23.2 scipy==1.9.2 - - env: - SKIP_NETWORK_TESTS: 1 - TEST_DEPENDS: pytest mock testfixtures - BUILD_DEPENDS: ${{ matrix.build-depends }} - - # - # For multibuild - # - BUILD_COMMIT: HEAD - DOCKER_TEST_IMAGE: multibuild/xenial_x86_64 - MB_ML_VER: ${{ matrix.manylinux-version }} - MB_PYTHON_VERSION: ${{ matrix.python-version }} # MB_PYTHON_VERSION is needed by Multibuild - PKG_NAME: gensim - PLAT: x86_64 - REPO_DIR: gensim - TRAVIS_OS_NAME: ${{ matrix.travis-os-name }} - UNICODE_WIDTH: 32 - - steps: - - uses: actions/checkout@v3 - with: - submodules: recursive - fetch-depth: 0 - - name: Print environment variables - run: | - echo "PLAT: ${PLAT}" - echo "MB_ML_VER: ${MB_ML_VER}" - echo "DOCKER_TEST_IMAGE: ${DOCKER_TEST_IMAGE}" - echo "TEST_DEPENDS: ${TEST_DEPENDS}" - echo "TRAVIS_OS_NAME: ${TRAVIS_OS_NAME}" - echo "SKIP_NETWORK_TESTS: ${SKIP_NETWORK_TESTS}" - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install virtualenv - - name: Build Wheel - run: | - echo ::group::Set up Multibuild - source multibuild/common_utils.sh - source multibuild/travis_steps.sh - source config.sh - echo ::endgroup:: - echo ::group::Before install - before_install - echo ::endgroup:: - echo ::group::Build wheel - find . -type f -name "*.egg" -exec rm -v {} \; - build_wheel $REPO_DIR ${{ matrix.PLAT }} - echo ::endgroup:: - - - name: Prepare for testing - run: | - # - # FIXME: Why are these eggs here? - # - # These eggs prevent the wheel from building and running on Py3.10 - # - find . -type f -name "*.egg" -exec rm -v {} \; - python -m venv test_environment - - # - # Multibuild has a test step but it essentially just installs the wheel - # and runs the test, and requires a lot of magic to get it working. - # It also does not work under Windows. - # So, we create our own simple test step here. - # - - name: Install and Test Wheel - run: | - . test_environment/bin/activate - python -m pip install --upgrade pip - pip install pytest testfixtures mock - pip install wheelhouse/*.whl - cd test_environment - python -c 'import gensim;print(gensim.__version__)' - # - # This part relies on the wheel containing tests and required data. - # If we remove that from the wheel, we'll need to rewrite this step. - # - pytest -rfxEXs --durations=20 --disable-warnings --showlocals --pyargs gensim - - - name: Upload wheels to s3://gensim-wheels - # - # Only do this if the credentials are set. - # This means that PRs will still build wheels, but not upload them. - # (PRs do not have access to secrets). - # - # The always() ensures this step runs even if a previous step fails. - # We want to upload wheels whenever possible (even if e.g. tests failed) - # because we don't want an innocuous test failure from blocking a release. - # - if: ${{ always() && env.WHEELHOUSE_UPLOADER_USERNAME && env.WHEELHOUSE_UPLOADER_SECRET }} - run: | - python -m pip install wheelhouse-uploader - ls wheelhouse/*.whl - python -m wheelhouse_uploader upload --local-folder wheelhouse/ --no-ssl-check gensim-wheels --provider S3 --no-enable-cdn - env: - WHEELHOUSE_UPLOADER_USERNAME: ${{ secrets.AWS_ACCESS_KEY_ID }} - WHEELHOUSE_UPLOADER_SECRET: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - - - # - # The build process for windows is different to that of Linux and MacOS. - # First, we cannot use multibuild (it does not support Windows). - # This means we have to write our own building and testing steps, but in a - # way it's simpler, because we don't need to care about configuring - # multibuild ourselves. - # Second, the syntax to enable virtual environments, etc. is different. - # - build_windows: - timeout-minutes: 35 - runs-on: windows-latest - defaults: - run: - shell: bash - - needs: [linters] - strategy: fail-fast: false matrix: - include: - - python-version: "3.8" - build-depends: numpy==1.17.3 - - - python-version: "3.9" - build-depends: numpy==1.19.3 - - - python-version: "3.10" - build-depends: numpy==1.22.2 scipy==1.8.0 - - - python-version: "3.11" - build-depends: numpy==1.23.2 scipy==1.9.2 - - env: - SKIP_NETWORK_TESTS: 1 - TEST_DEPENDS: pytest mock testfixtures - BUILD_DEPENDS: ${{ matrix.build-depends }} - + os: [ubuntu-20.04, windows-2019, macos-11] steps: - - uses: actions/checkout@v3 - with: - submodules: recursive - fetch-depth: 0 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install virtualenv - - name: Build Wheel - run: | - echo ::group::Set up dependencies - python --version - python -c "import struct; print(struct.calcsize('P') * 8)" - python -m pip install -U pip setuptools wheel wheelhouse_uploader ${{ env.BUILD_DEPENDS }} - echo ::endgroup:: - echo ::group::Build wheel - python setup.py bdist_wheel - echo ::endgroup - echo ::group::Install run - ls dist - python continuous_integration/install_wheel.py - echo ::endgroup:: - # - # For consistency with the multibuild step. The wheel uploader expects - # the wheels to be under wheelhouse. - # - mv dist wheelhouse + - name: Checkout + uses: actions/checkout@v3 - - name: Prepare for testing - run: | + - name: Set up QEMU + if: runner.os == 'Linux' + uses: docker/setup-qemu-action@v2 + with: + platforms: all + + - name: Build wheels + uses: pypa/cibuildwheel@v2.12.0 + env: + CIBW_ARCHS_LINUX: x86_64 aarch64 + CIBW_ARCHS_MACOS: x86_64 arm64 + CIBW_ARCHS_WINDOWS: AMD64 x86 ARM64 + CIBW_BEFORE_BUILD: pip install numpy scipy + CIBW_SKIP: pp* cp36-* cp37-* *-win32 *_i686 *-musllinux_* + CIBW_TEST_COMMAND: pytest -rfxEXs --durations=20 --disable-warnings --showlocals --pyargs gensim + CIBW_TEST_REQUIRES: pytest testfixtures mock + CIBW_TEST_SKIP: cp38* cp39* cp310* *_aarch64 *_arm64 *_universal2 + + - name: Upload wheels to s3://gensim-wheels # - # FIXME: Why are these eggs here? + # Only do this if the credentials are set. + # This means that PRs will still build wheels, but not upload them. + # (PRs do not have access to secrets). # - # These eggs prevent the wheel from building and running on Py3.10 + # The always() ensures this step runs even if a previous step fails. + # We want to upload wheels whenever possible (even if e.g. tests failed) + # because we don't want an innocuous test failure from blocking a release. # - find . -type f -name "*.egg" -exec rm -v {} \; - python -m venv test_environment - - # - # We need a separate testing step for windows because the command for - # activating the virtual environment is slightly different - # - - name: Install and Test Wheel (Windows) - run: | - test_environment/Scripts/activate.bat - python -m pip install --upgrade pip - pip install pytest testfixtures mock - pip install wheelhouse/*.whl - cd test_environment - python -c 'import gensim;print(gensim.__version__)' - pytest -rfxEXs --durations=20 --disable-warnings --showlocals --pyargs gensim - - - name: Upload wheels to s3://gensim-wheels - # - # Only do this if the credentials are set. - # This means that PRs will still build wheels, but not upload them. - # (PRs do not have access to secrets). - # - # The always() ensures this step runs even if a previous step fails. - # We want to upload wheels whenever possible (even if e.g. tests failed) - # because we don't want an innocuous test failure from blocking a release. - # - if: ${{ always() && env.WHEELHOUSE_UPLOADER_USERNAME && env.WHEELHOUSE_UPLOADER_SECRET }} - run: | - python -m pip install wheelhouse-uploader - ls wheelhouse/*.whl - python -m wheelhouse_uploader upload --local-folder wheelhouse/ --no-ssl-check gensim-wheels --provider S3 --no-enable-cdn - env: - WHEELHOUSE_UPLOADER_USERNAME: ${{ secrets.AWS_ACCESS_KEY_ID }} - WHEELHOUSE_UPLOADER_SECRET: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + if: ${{ always() && env.WHEELHOUSE_UPLOADER_USERNAME && env.WHEELHOUSE_UPLOADER_SECRET }} + run: | + python -m pip install wheelhouse-uploader + ls wheelhouse/*.whl + python -m wheelhouse_uploader upload --local-folder wheelhouse/ --no-ssl-check gensim-wheels --provider S3 --no-enable-cdn + env: + WHEELHOUSE_UPLOADER_USERNAME: ${{ secrets.AWS_ACCESS_KEY_ID }} + WHEELHOUSE_UPLOADER_SECRET: ${{ secrets.AWS_SECRET_ACCESS_KEY }} diff --git a/.github/workflows/linters.yml b/.github/workflows/linters.yml new file mode 100644 index 0000000000..05693fd252 --- /dev/null +++ b/.github/workflows/linters.yml @@ -0,0 +1,32 @@ +name: Linters + +on: + workflow_call: + +jobs: + + linters: + runs-on: ubuntu-latest + steps: + + - uses: actions/checkout@v3 + + - name: Setup up Python ${{ matrix.python }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python }} + + - name: Update pip + run: python -m pip install -U pip + + - name: Install dependencies + run: python -m pip install flake8 flake8-rst + + - name: Run flake8 linter (source) + run: flake8 --ignore E12,W503 --max-line-length 120 --show-source gensim + + # - name: Run flake8 linter (documentation) + # run: flake8 --ignore E202,E402,E302,E305,F821 --max-line-length 120 --filename '*.py,*.rst' docs + + - name: Check Sphinx Gallery cache + run: python docs/src/check_gallery.py diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 9d52759538..4a1d90f808 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -6,30 +6,9 @@ on: branches: [ develop ] jobs: - linters: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - name: Setup up Python ${{ matrix.python }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python }} - - name: Update pip - run: python -m pip install -U pip - - - name: Install dependencies - run: python -m pip install flake8 flake8-rst - - - name: Run flake8 linter (source) - run: flake8 --ignore E12,W503 --max-line-length 120 --show-source gensim - - # - name: Run flake8 linter (documentation) - # run: flake8 --ignore E202,E402,E302,E305,F821 --max-line-length 120 --filename '*.py,*.rst' docs - - - name: Check Sphinx Gallery cache - run: python docs/src/check_gallery.py + linters: + uses: ./.github/workflows/linters.yml docs: name: build documentation diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 52a1b1716c..0000000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "multibuild"] - path = multibuild - url = https://github.com/multi-build/multibuild diff --git a/multibuild b/multibuild deleted file mode 160000 index 8f9229a442..0000000000 --- a/multibuild +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 8f9229a442dcaad809ecde96e23f872e56d6ad60 From 7002a606f4c328d1de023a6e5310b2049db4c38b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 6 Mar 2023 17:17:54 +0900 Subject: [PATCH 14/19] Bump codecov/codecov-action from 2 to 3 (#3450) Bumps [codecov/codecov-action](https://github.com/codecov/codecov-action) from 2 to 3. - [Release notes](https://github.com/codecov/codecov-action/releases) - [Changelog](https://github.com/codecov/codecov-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/codecov/codecov-action/compare/v2...v3) --- updated-dependencies: - dependency-name: codecov/codecov-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 4a1d90f808..b63b5d89b2 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -138,7 +138,7 @@ jobs: - name: Upload coverage to Codecov if: matrix.coverage == true - uses: codecov/codecov-action@v2 + uses: codecov/codecov-action@v3 with: fail_ci_if_error: true files: ./coverage.xml From cfb1a62a0f188cc6a6f0befaa2eb3eca4ce9f6e8 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Tue, 7 Mar 2023 21:37:38 +0900 Subject: [PATCH 15/19] remove unused dependency, handle ImportError (#3447) --- gensim/models/flsamodel.py | 23 +++++++++++++++++++---- setup.py | 1 - 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/gensim/models/flsamodel.py b/gensim/models/flsamodel.py index 9ed815abe1..be873ccb08 100644 --- a/gensim/models/flsamodel.py +++ b/gensim/models/flsamodel.py @@ -5,15 +5,17 @@ @author: 20200016 """ -import math from collections import Counter -import warnings -import pickle import itertools +import math +import pickle +import sys +import warnings + import numpy as np from scipy.sparse.linalg import svds from scipy.sparse import dok_matrix -from pyfume import Clustering + import gensim.corpora as corpora from gensim.models.coherencemodel import CoherenceModel from gensim.models import Word2Vec @@ -699,6 +701,19 @@ def _create_partition_matrix( ------- numpy.array : float """ + + try: + from pyfume import Clustering + except ImportError: + msg = ( + "FlsaModel requires pyfume; install manually via " + "`pip install pyfume` or otherwise" + ) + print('-' * len(msg), file=sys.stderr) + print(msg, file=sys.stderr) + print('-' * len(msg), file=sys.stderr) + raise + clusterer = Clustering.Clusterer( nr_clus=number_of_clusters, data=data, diff --git a/setup.py b/setup.py index 43a1c358fe..e0dc4e8881 100644 --- a/setup.py +++ b/setup.py @@ -344,7 +344,6 @@ def run(self): NUMPY_STR, 'scipy >= 1.7.0', 'smart_open >= 1.8.1', - 'FuzzyTM >= 0.4.0' ] setup_requires = [NUMPY_STR] From e7d77a122f9b9ff5409c4e1f6e9901221cee1e1d Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Thu, 9 Mar 2023 15:42:54 +0900 Subject: [PATCH 16/19] update change log for 3.4.1 (#3451) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * update change log for 3.4.1 * Update CHANGELOG.md Co-authored-by: Radim Řehůřek --------- Co-authored-by: Radim Řehůřek --- CHANGELOG.md | 19 +++++++++++++++++++ release/generate_changelog.py | 15 +++++++++++++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a1fb190013..c6c84c5448 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,25 @@ Changes ======= +## 4.3.1, 2022-03-08 + +### :red_circle: Bug fixes + +* [#3447](https://github.com/RaRe-Technologies/gensim/pull/3447): Remove unused FuzzyTM dependency, handle ImportError, by [@mpenkov](https://github.com/mpenkov) +* [#3441](https://github.com/RaRe-Technologies/gensim/pull/3441): Fix changed calculation of cosine distance in Annoy wrapper, by [@monash849](https://github.com/monash849) + +### :+1: Improvements + +* [#3443](https://github.com/RaRe-Technologies/gensim/pull/3443): Sanity check for `hs` and `negative` in Word2Vec, by [@gau-nernst](https://github.com/gau-nernst) + +### 🔮 Testing, CI, housekeeping + +* [#3448](https://github.com/RaRe-Technologies/gensim/pull/3448): Enable arm64/aarch64 wheel builds, by [@nikaro](https://github.com/nikaro) + +### :books: Tutorial and doc improvements + +* [#3446](https://github.com/RaRe-Technologies/gensim/pull/3446): Update Python version in docs, by [@gliptak](https://github.com/gliptak) + ## 4.3.0, 2022-12-17 ### :star2: New Features diff --git a/release/generate_changelog.py b/release/generate_changelog.py index 62ca7b329b..1f930d1f38 100644 --- a/release/generate_changelog.py +++ b/release/generate_changelog.py @@ -8,10 +8,12 @@ """Generate changelog entries for all PRs merged since the last release.""" import re import requests +import sys import time def throttle_get(*args, seconds=10, **kwargs): + print(args, kwargs, file=sys.stderr) result = requests.get(*args, **kwargs) result.raise_for_status() @@ -39,13 +41,16 @@ def iter_merged_prs(since=release_timestamp): ) pulls = get.json() - if not pulls: - break + count = 0 for i, pr in enumerate(pulls): if pr['merged_at'] and pr['merged_at'] > since: + count += 1 yield pr + if count == 0: + break + page += 1 @@ -60,12 +65,18 @@ def iter_closed_issues(since=release_timestamp): if not issues: break + count = 0 for i, issue in enumerate(issues): # # In the github API, all pull requests are issues, but not vice versa. # if 'pull_request' not in issue and issue['closed_at'] > since: + count += 1 yield issue + + if count == 0: + break + page += 1 From 6b35d00e01e8625a2954e67ee877b79db3a35de8 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Thu, 9 Mar 2023 15:45:57 +0900 Subject: [PATCH 17/19] bumped version to 4.3.1 --- docs/src/conf.py | 2 +- gensim/__init__.py | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/src/conf.py b/docs/src/conf.py index 0ba6816c09..4c99f90267 100644 --- a/docs/src/conf.py +++ b/docs/src/conf.py @@ -63,7 +63,7 @@ # The short X.Y version. version = '4.3.0' # The full version, including alpha/beta/rc tags. -release = '4.3.1.dev0' +release = '4.3.1' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/gensim/__init__.py b/gensim/__init__.py index 064bffdfb1..ce8dcafde1 100644 --- a/gensim/__init__.py +++ b/gensim/__init__.py @@ -4,7 +4,7 @@ """ -__version__ = '4.3.1.dev0' +__version__ = '4.3.1' import logging diff --git a/setup.py b/setup.py index e0dc4e8881..ee53578e43 100644 --- a/setup.py +++ b/setup.py @@ -354,7 +354,7 @@ def run(self): setup( name='gensim', - version='4.3.1.dev0', + version='4.3.1', description='Python framework for fast Vector Space Modelling', long_description=LONG_DESCRIPTION, From faca509cf9a8733417ec6eb2271ae07568b8ce7c Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Thu, 9 Mar 2023 15:46:47 +0900 Subject: [PATCH 18/19] bump date in change log --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c6c84c5448..b78ff30f78 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,7 @@ Changes ======= -## 4.3.1, 2022-03-08 +## 4.3.1, 2022-03-09 ### :red_circle: Bug fixes From 35c586e6611385113ccd9bbd41a859d540be4d41 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Thu, 9 Mar 2023 15:48:19 +0900 Subject: [PATCH 19/19] bump versions --- docs/src/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/conf.py b/docs/src/conf.py index 4c99f90267..6f45e76f70 100644 --- a/docs/src/conf.py +++ b/docs/src/conf.py @@ -61,7 +61,7 @@ # built documents. # # The short X.Y version. -version = '4.3.0' +version = '4.3' # The full version, including alpha/beta/rc tags. release = '4.3.1'