From 0bb0cb360e524b19e5f7d1347669bfe792f08430 Mon Sep 17 00:00:00 2001 From: Fhrozen Date: Sat, 23 Aug 2025 18:27:34 +0900 Subject: [PATCH 1/7] updates for new python versions --- .github/dependabot.yml | 8 ++++++++ .github/workflows/python-package.yml | 2 +- README.md | 6 +++--- setup.py | 8 ++++---- 4 files changed, 16 insertions(+), 8 deletions(-) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..208770e --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,8 @@ +version: 2 +updates: + # Maintain dependencies for GitHub Actions + # Enable version updates for github-actions + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index a410810..a62582f 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.7', '3.8', '3.9', '3.10'] + python-version: ['3.9', '3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v2 diff --git a/README.md b/README.md index d98cf2b..7296300 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # CTC segmentation -[![build status](https://github.com/lumaku/ctc-segmentation/actions/workflows/python-package.yml/badge.svg)](https://github.com/lumaku/ctc-segmentation/actions/workflows/python-package.yml) +[![build status](https://github.com/espnet/ctc-segmentation/actions/workflows/python-package.yml/badge.svg)](https://github.com/espnet/ctc-segmentation/actions/workflows/python-package.yml) [![version](https://img.shields.io/pypi/v/ctc-segmentation)](https://pypi.org/project/ctc-segmentation/) [![AUR](https://img.shields.io/aur/version/python-ctc-segmentation-git)](https://aur.archlinux.org/packages/python-ctc-segmentation-git) [![downloads](https://img.shields.io/pypi/dm/ctc-segmentation)](https://pypi.org/project/ctc-segmentation/) @@ -19,7 +19,7 @@ The CTC segmentation package is not standalone, as it needs a neural network wit * In ESPnet 1 as corpus recipe: [Alignment script](https://github.com/espnet/espnet/blob/master/espnet/bin/asr_align.py), [Example recipe](https://github.com/espnet/espnet/tree/master/egs/tedlium2/align1), [Demo](https://github.com/espnet/espnet#ctc-segmentation-demo ) * In ESPnet 2, as script or directly as python interface: [Alignment script](https://github.com/espnet/espnet/blob/master/espnet2/bin/asr_align.py), [Demo](https://github.com/espnet/espnet#ctc-segmentation-demo ) * In Nvidia NeMo as dataset creation tool: [Documentation](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/tools/ctc_segmentation.html), [Example](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tools/CTC_Segmentation_Tutorial.ipynb) -* In Speechbrain, as python interface: [Alignment module](https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/alignment/ctc_segmentation.py), [Examples](https://gist.github.com/lumaku/75eca1c86d9467a54888d149dc7b84f1) +* In Speechbrain, as python interface: [Alignment module](https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/alignment/ctc_segmentation.py), [Examples](https://gist.github.com/espnet/75eca1c86d9467a54888d149dc7b84f1) It can also be used with other frameworks: @@ -145,7 +145,7 @@ pip install ctc-segmentation * From source: ```sh -git clone https://github.com/lumaku/ctc-segmentation +git clone https://github.com/espnet/ctc-segmentation cd ctc-segmentation cythonize -3 ctc_segmentation/ctc_segmentation_dyn.pyx python setup.py build diff --git a/setup.py b/setup.py index eb9c6f8..d85834d 100644 --- a/setup.py +++ b/setup.py @@ -29,14 +29,14 @@ It can be combined with CTC-based ASR models. This package includes the core functions. -https://github.com/lumaku/ctc-segmentation +https://github.com/espnet/ctc-segmentation """ setup( name="ctc_segmentation", - version="1.7.4", + version="1.7.5", - python_requires='>=3.6', + python_requires='>=3.9', packages=find_packages(exclude=["tests"]), setup_requires=["numpy"], install_requires=["setuptools", "numpy", "Cython"], @@ -49,7 +49,7 @@ "Dominik Winkelbauer ", description="CTC segmentation to align utterances within " "large audio files.", - url="https://github.com/lumaku/ctc-segmentation", + url="https://github.com/espnet/ctc-segmentation", long_description_content_type="text/markdown", long_description=package_information, From 511911a5e57c7a1ddc9632184226a747cc64536c Mon Sep 17 00:00:00 2001 From: Fhrozen Date: Sat, 23 Aug 2025 18:32:23 +0900 Subject: [PATCH 2/7] add precommit --- .pre-commit-config.yaml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..35d7e05 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,22 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + +- repo: https://github.com/psf/black + rev: 25.1.0 + hooks: + - id: black + exclude: ^(doc) + +- repo: https://github.com/pycqa/isort + rev: 6.0.1 + hooks: + - id: isort + exclude: ^(doc) From 998d02d47cb1e9ee66b6148711ca8182eafb44a2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 23 Aug 2025 09:32:31 +0000 Subject: [PATCH 3/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- MANIFEST.in | 1 - Makefile | 5 ++- README.md | 38 +++++++++++------------ ctc_segmentation/__init__.py | 10 +++--- ctc_segmentation/ctc_segmentation.py | 1 + ctc_segmentation/ctc_segmentation_dyn.pyx | 2 ++ setup.py | 23 ++++++-------- tests/test_ctc_segmentation.py | 9 ++---- 8 files changed, 41 insertions(+), 48 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index 28a98c9..ba04882 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1 @@ include ctc_segmentation/ctc_segmentation_dyn.pyx - diff --git a/Makefile b/Makefile index 7f353f6..4191579 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ clean: upload: twine upload dist/* - + test: cd tests; python -c "import test_ctc_segmentation as test; test.test_ctc_segmentation()" cd tests; python -c "import test_ctc_segmentation as test; test.test_determine_utterance_segments()" @@ -23,7 +23,7 @@ test: # To test the various installation methods: github: cd /; pip install git+https://github.com/lumaku/ctc-segmentation --user - + pip: cd /; pip install ctc-segmentation --user @@ -32,4 +32,3 @@ local: rm: cd /; pip uninstall -y ctc-segmentation - diff --git a/README.md b/README.md index 7296300..574b3dc 100644 --- a/README.md +++ b/README.md @@ -59,29 +59,29 @@ def align_with_transcript( with torch.no_grad(): logits = model(inputs.input_values).logits.cpu()[0] probs = torch.nn.functional.softmax(logits,dim=-1) - + # Tokenize transcripts vocab = tokenizer.get_vocab() inv_vocab = {v:k for k,v in vocab.items()} unk_id = vocab[""] - + tokens = [] for transcript in transcripts: assert len(transcript) > 0 tok_ids = tokenizer(transcript.replace("\n"," ").lower())['input_ids'] tok_ids = np.array(tok_ids,dtype=np.int) tokens.append(tok_ids[tok_ids != unk_id]) - + # Align char_list = [inv_vocab[i] for i in range(len(inv_vocab))] config = ctc_segmentation.CtcSegmentationParameters(char_list=char_list) config.index_duration = audio.shape[0] / probs.size()[0] / samplerate - + ground_truth_mat, utt_begin_indices = ctc_segmentation.prepare_token_list(config, tokens) timings, char_probs, state_list = ctc_segmentation.ctc_segmentation(config, probs.numpy(), ground_truth_mat) segments = ctc_segmentation.determine_utterance_segments(config, utt_begin_indices, char_probs, timings, transcripts) return [{"text" : t, "start" : p[0], "end" : p[1], "conf" : p[2]} for t,p in zip(transcripts, segments)] - + def get_word_timestamps( audio : np.ndarray, samplerate : int = SAMPLERATE, @@ -95,38 +95,38 @@ def get_word_timestamps( with torch.no_grad(): logits = model(inputs.input_values).logits.cpu()[0] probs = torch.nn.functional.softmax(logits,dim=-1) - + predicted_ids = torch.argmax(logits, dim=-1) pred_transcript = processor.decode(predicted_ids) - + # Split the transcription into words words = pred_transcript.split(" ") - + # Align vocab = tokenizer.get_vocab() inv_vocab = {v:k for k,v in vocab.items()} char_list = [inv_vocab[i] for i in range(len(inv_vocab))] config = ctc_segmentation.CtcSegmentationParameters(char_list=char_list) config.index_duration = audio.shape[0] / probs.size()[0] / samplerate - + ground_truth_mat, utt_begin_indices = ctc_segmentation.prepare_text(config, words) timings, char_probs, state_list = ctc_segmentation.ctc_segmentation(config, probs.numpy(), ground_truth_mat) segments = ctc_segmentation.determine_utterance_segments(config, utt_begin_indices, char_probs, timings, words) return [{"text" : w, "start" : p[0], "end" : p[1], "conf" : p[2]} for w,p in zip(words, segments)] print(align_with_transcript(audio,transcripts)) -# [{'text': 'A MAN SAID TO THE UNIVERSE', 'start': 0.08124999999999993, 'end': 2.034375, 'conf': 0.0}, +# [{'text': 'A MAN SAID TO THE UNIVERSE', 'start': 0.08124999999999993, 'end': 2.034375, 'conf': 0.0}, # {'text': 'SIR I EXIST', 'start': 2.3260775862068965, 'end': 4.078771551724138, 'conf': 0.0}] print(get_word_timestamps(audio)) -# [{'text': 'a', 'start': 0.08124999999999993, 'end': 0.5912715517241378, 'conf': 0.9999501323699951}, -# {'text': 'man', 'start': 0.5912715517241378, 'end': 0.9219827586206896, 'conf': 0.9409108982174931}, -# {'text': 'said', 'start': 0.9219827586206896, 'end': 1.2326508620689656, 'conf': 0.7700278702302796}, -# {'text': 'to', 'start': 1.2326508620689656, 'end': 1.3529094827586206, 'conf': 0.5094435178226225}, -# {'text': 'the', 'start': 1.3529094827586206, 'end': 1.4831896551724135, 'conf': 0.4580493446392211}, -# {'text': 'universe', 'start': 1.4831896551724135, 'end': 2.034375, 'conf': 0.9285054256219009}, -# {'text': 'sir', 'start': 2.3260775862068965, 'end': 3.036530172413793, 'conf': 0.0}, -# {'text': 'i', 'start': 3.036530172413793, 'end': 3.347198275862069, 'conf': 0.7995760873559864}, +# [{'text': 'a', 'start': 0.08124999999999993, 'end': 0.5912715517241378, 'conf': 0.9999501323699951}, +# {'text': 'man', 'start': 0.5912715517241378, 'end': 0.9219827586206896, 'conf': 0.9409108982174931}, +# {'text': 'said', 'start': 0.9219827586206896, 'end': 1.2326508620689656, 'conf': 0.7700278702302796}, +# {'text': 'to', 'start': 1.2326508620689656, 'end': 1.3529094827586206, 'conf': 0.5094435178226225}, +# {'text': 'the', 'start': 1.3529094827586206, 'end': 1.4831896551724135, 'conf': 0.4580493446392211}, +# {'text': 'universe', 'start': 1.4831896551724135, 'end': 2.034375, 'conf': 0.9285054256219009}, +# {'text': 'sir', 'start': 2.3260775862068965, 'end': 3.036530172413793, 'conf': 0.0}, +# {'text': 'i', 'start': 3.036530172413793, 'end': 3.347198275862069, 'conf': 0.7995760873559864}, # {'text': 'exist', 'start': 3.347198275862069, 'end': 4.078771551724138, 'conf': 0.0}] ``` @@ -229,7 +229,7 @@ For examples, see the `prepare_*` functions in `ctc_segmentation.py`, or the exa ### Segments clean-up -Segments that were written to a `segments` file can be filtered using the confidence score. This is the minium confidence score in log space as described in the paper. +Segments that were written to a `segments` file can be filtered using the confidence score. This is the minium confidence score in log space as described in the paper. Utterances with a low confidence score are discarded in a data clean-up. This parameter may need adjustment depending on dataset, ASR model and used text conversion. diff --git a/ctc_segmentation/__init__.py b/ctc_segmentation/__init__.py index 07406c2..a819ca4 100644 --- a/ctc_segmentation/__init__.py +++ b/ctc_segmentation/__init__.py @@ -1,8 +1,6 @@ """Import all functions of the CTC segmentation package.""" -from .ctc_segmentation import ctc_segmentation -from .ctc_segmentation import CtcSegmentationParameters -from .ctc_segmentation import determine_utterance_segments -from .ctc_segmentation import prepare_text -from .ctc_segmentation import prepare_tokenized_text -from .ctc_segmentation import prepare_token_list + +from .ctc_segmentation import (CtcSegmentationParameters, ctc_segmentation, + determine_utterance_segments, prepare_text, + prepare_token_list, prepare_tokenized_text) from .partitioning import get_partitions diff --git a/ctc_segmentation/ctc_segmentation.py b/ctc_segmentation/ctc_segmentation.py index a984550..ca19025 100755 --- a/ctc_segmentation/ctc_segmentation.py +++ b/ctc_segmentation/ctc_segmentation.py @@ -18,6 +18,7 @@ """ import logging + import numpy as np logger = logging.getLogger("ctc_segmentation") diff --git a/ctc_segmentation/ctc_segmentation_dyn.pyx b/ctc_segmentation/ctc_segmentation_dyn.pyx index 343d1a5..ce06ccd 100755 --- a/ctc_segmentation/ctc_segmentation_dyn.pyx +++ b/ctc_segmentation/ctc_segmentation_dyn.pyx @@ -13,7 +13,9 @@ For a description, see https://arxiv.org/abs/2007.09127 """ import logging + import numpy as np + cimport numpy as np diff --git a/setup.py b/setup.py index d85834d..f088051 100644 --- a/setup.py +++ b/setup.py @@ -1,25 +1,26 @@ -from setuptools import setup, find_packages, Extension -from setuptools.command.build_ext import build_ext import numpy - +from setuptools import Extension, find_packages, setup +from setuptools.command.build_ext import build_ext try: from Cython.Build import cythonize + USE_CYTHON = True except ImportError: USE_CYTHON = False # https://cython.readthedocs.io/en/latest/src/userguide/source_files_and_compilation.html -ext = '.pyx' if USE_CYTHON else '.c' +ext = ".pyx" if USE_CYTHON else ".c" extensions = [ Extension( name="ctc_segmentation.ctc_segmentation_dyn", - sources=["ctc_segmentation/ctc_segmentation_dyn"+ext], + sources=["ctc_segmentation/ctc_segmentation_dyn" + ext], include_dirs=[numpy.get_include()], ) ] if USE_CYTHON: from Cython.Build import cythonize + extensions = cythonize(extensions) package_information = """ @@ -35,22 +36,18 @@ setup( name="ctc_segmentation", version="1.7.5", - - python_requires='>=3.9', + python_requires=">=3.9", packages=find_packages(exclude=["tests"]), setup_requires=["numpy"], install_requires=["setuptools", "numpy", "Cython"], tests_require=["pytest", "torch"], zip_safe=False, ext_modules=extensions, - cmdclass={'build_ext': build_ext}, - + cmdclass={"build_ext": build_ext}, author="Ludwig Kuerzinger , " - "Dominik Winkelbauer ", - description="CTC segmentation to align utterances within " - "large audio files.", + "Dominik Winkelbauer ", + description="CTC segmentation to align utterances within " "large audio files.", url="https://github.com/espnet/ctc-segmentation", - long_description_content_type="text/markdown", long_description=package_information, ) diff --git a/tests/test_ctc_segmentation.py b/tests/test_ctc_segmentation.py index fae34f7..fe222a6 100644 --- a/tests/test_ctc_segmentation.py +++ b/tests/test_ctc_segmentation.py @@ -7,12 +7,9 @@ """Test functions for CTC segmentation.""" import numpy as np -from ctc_segmentation import ctc_segmentation -from ctc_segmentation import CtcSegmentationParameters -from ctc_segmentation import determine_utterance_segments -from ctc_segmentation import prepare_text -from ctc_segmentation import prepare_tokenized_text -from ctc_segmentation import prepare_token_list +from ctc_segmentation import (CtcSegmentationParameters, ctc_segmentation, + determine_utterance_segments, prepare_text, + prepare_token_list, prepare_tokenized_text) def test_ctcsegmentationparameters(): From c9a39864ed7fdb4697bea437c51f79f7128a77a5 Mon Sep 17 00:00:00 2001 From: Fhrozen Date: Sat, 23 Aug 2025 18:36:08 +0900 Subject: [PATCH 4/7] updates --- .github/workflows/python-package.yml | 2 +- ctc_segmentation/__init__.py | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index a62582f..2125edd 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -27,7 +27,7 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install . - pip install flake8 pytest wheel torch + pip install flake8 pytest wheel torch --index-url https://download.pytorch.org/whl/cpu - name: Test with pytest run: | pytest diff --git a/ctc_segmentation/__init__.py b/ctc_segmentation/__init__.py index a819ca4..a88b219 100644 --- a/ctc_segmentation/__init__.py +++ b/ctc_segmentation/__init__.py @@ -1,6 +1,11 @@ """Import all functions of the CTC segmentation package.""" -from .ctc_segmentation import (CtcSegmentationParameters, ctc_segmentation, - determine_utterance_segments, prepare_text, - prepare_token_list, prepare_tokenized_text) +from .ctc_segmentation import ( + CtcSegmentationParameters, + ctc_segmentation, + determine_utterance_segments, + prepare_text, + prepare_token_list, + prepare_tokenized_text, +) from .partitioning import get_partitions From 1758b4016dda220a3d4df5442ea17d936dcf9499 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 23 Aug 2025 09:36:16 +0000 Subject: [PATCH 5/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ctc_segmentation/__init__.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/ctc_segmentation/__init__.py b/ctc_segmentation/__init__.py index a88b219..a819ca4 100644 --- a/ctc_segmentation/__init__.py +++ b/ctc_segmentation/__init__.py @@ -1,11 +1,6 @@ """Import all functions of the CTC segmentation package.""" -from .ctc_segmentation import ( - CtcSegmentationParameters, - ctc_segmentation, - determine_utterance_segments, - prepare_text, - prepare_token_list, - prepare_tokenized_text, -) +from .ctc_segmentation import (CtcSegmentationParameters, ctc_segmentation, + determine_utterance_segments, prepare_text, + prepare_token_list, prepare_tokenized_text) from .partitioning import get_partitions From ff4202f8ab41227c10b153faa5e74b32997ef7a7 Mon Sep 17 00:00:00 2001 From: Fhrozen Date: Sat, 23 Aug 2025 18:38:32 +0900 Subject: [PATCH 6/7] updates on isort --- ctc_segmentation/__init__.py | 11 ++++++++--- setup.cfg | 2 ++ tests/test_ctc_segmentation.py | 11 ++++++++--- 3 files changed, 18 insertions(+), 6 deletions(-) create mode 100644 setup.cfg diff --git a/ctc_segmentation/__init__.py b/ctc_segmentation/__init__.py index a819ca4..a88b219 100644 --- a/ctc_segmentation/__init__.py +++ b/ctc_segmentation/__init__.py @@ -1,6 +1,11 @@ """Import all functions of the CTC segmentation package.""" -from .ctc_segmentation import (CtcSegmentationParameters, ctc_segmentation, - determine_utterance_segments, prepare_text, - prepare_token_list, prepare_tokenized_text) +from .ctc_segmentation import ( + CtcSegmentationParameters, + ctc_segmentation, + determine_utterance_segments, + prepare_text, + prepare_token_list, + prepare_tokenized_text, +) from .partitioning import get_partitions diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..c76db01 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,2 @@ +[isort] +profile = black diff --git a/tests/test_ctc_segmentation.py b/tests/test_ctc_segmentation.py index fe222a6..74d91cd 100644 --- a/tests/test_ctc_segmentation.py +++ b/tests/test_ctc_segmentation.py @@ -7,9 +7,14 @@ """Test functions for CTC segmentation.""" import numpy as np -from ctc_segmentation import (CtcSegmentationParameters, ctc_segmentation, - determine_utterance_segments, prepare_text, - prepare_token_list, prepare_tokenized_text) +from ctc_segmentation import ( + CtcSegmentationParameters, + ctc_segmentation, + determine_utterance_segments, + prepare_text, + prepare_token_list, + prepare_tokenized_text, +) def test_ctcsegmentationparameters(): From 537404bcdde5e64e5cc353f9d92ce8421c61cc11 Mon Sep 17 00:00:00 2001 From: Fhrozen Date: Sat, 23 Aug 2025 18:44:15 +0900 Subject: [PATCH 7/7] update github action --- .github/workflows/python-package.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 2125edd..9e95cf6 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -27,7 +27,8 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install . - pip install flake8 pytest wheel torch --index-url https://download.pytorch.org/whl/cpu + pip install torch --index-url https://download.pytorch.org/whl/cpu + pip install flake8 pytest wheel - name: Test with pytest run: | pytest