espnet · Fhrozen · Aug 23, 2025 · Aug 23, 2025 · Aug 23, 2025 · Aug 23, 2025
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -0,0 +1,8 @@
+version: 2
+updates:
+  # Maintain dependencies for GitHub Actions
+  # Enable version updates for github-actions
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "weekly"
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -15,7 +15,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ['3.7', '3.8', '3.9', '3.10']
+        python-version: ['3.9', '3.10', '3.11', '3.12']
 
     steps:
     - uses: actions/checkout@v2
@@ -27,7 +27,8 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         python -m pip install .
-        pip install flake8 pytest wheel torch
+        pip install torch --index-url https://download.pytorch.org/whl/cpu
+        pip install flake8 pytest wheel
     - name: Test with pytest
       run: |
         pytest
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,22 @@
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v6.0.0
+    hooks:
+    -   id: trailing-whitespace
+    -   id: end-of-file-fixer
+    -   id: check-yaml
+    -   id: check-added-large-files
+
+-   repo: https://github.com/psf/black
+    rev: 25.1.0
+    hooks:
+    -   id: black
+        exclude: ^(doc)
+
+-   repo: https://github.com/pycqa/isort
+    rev: 6.0.1
+    hooks:
+    -   id: isort
+        exclude: ^(doc)
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,2 +1 @@
 include ctc_segmentation/ctc_segmentation_dyn.pyx
-
diff --git a/Makefile b/Makefile
@@ -11,7 +11,7 @@ clean:
 
 upload:
 	twine upload dist/*
-  
+
 test:
 	cd tests; python -c "import test_ctc_segmentation as test; test.test_ctc_segmentation()"
 	cd tests; python -c "import test_ctc_segmentation as test; test.test_determine_utterance_segments()"
@@ -23,7 +23,7 @@ test:
 # To test the various installation methods:
 github:
 	cd /; pip install git+https://github.com/lumaku/ctc-segmentation --user
-	
+
 pip:
 	cd /; pip install ctc-segmentation --user
 
@@ -32,4 +32,3 @@ local:
 
 rm:
 	cd /; pip uninstall -y ctc-segmentation
-
diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 # CTC segmentation
 
 <!-- Badges -->
-[![build status](https://github.com/lumaku/ctc-segmentation/actions/workflows/python-package.yml/badge.svg)](https://github.com/lumaku/ctc-segmentation/actions/workflows/python-package.yml)
+[![build status](https://github.com/espnet/ctc-segmentation/actions/workflows/python-package.yml/badge.svg)](https://github.com/espnet/ctc-segmentation/actions/workflows/python-package.yml)
 [![version](https://img.shields.io/pypi/v/ctc-segmentation)](https://pypi.org/project/ctc-segmentation/)
 [![AUR](https://img.shields.io/aur/version/python-ctc-segmentation-git)](https://aur.archlinux.org/packages/python-ctc-segmentation-git)
 [![downloads](https://img.shields.io/pypi/dm/ctc-segmentation)](https://pypi.org/project/ctc-segmentation/)
@@ -19,7 +19,7 @@ The CTC segmentation package is not standalone, as it needs a neural network wit
 * In ESPnet 1 as corpus recipe: [Alignment script](https://github.com/espnet/espnet/blob/master/espnet/bin/asr_align.py), [Example recipe](https://github.com/espnet/espnet/tree/master/egs/tedlium2/align1), [Demo](https://github.com/espnet/espnet#ctc-segmentation-demo )
 * In ESPnet 2, as script or directly as python interface: [Alignment script](https://github.com/espnet/espnet/blob/master/espnet2/bin/asr_align.py), [Demo](https://github.com/espnet/espnet#ctc-segmentation-demo )
 * In Nvidia NeMo as dataset creation tool: [Documentation](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/tools/ctc_segmentation.html), [Example](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tools/CTC_Segmentation_Tutorial.ipynb)
-* In Speechbrain, as python interface: [Alignment module](https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/alignment/ctc_segmentation.py), [Examples](https://gist.github.com/lumaku/75eca1c86d9467a54888d149dc7b84f1)
+* In Speechbrain, as python interface: [Alignment module](https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/alignment/ctc_segmentation.py), [Examples](https://gist.github.com/espnet/75eca1c86d9467a54888d149dc7b84f1)
 
 It can also be used with other frameworks:
 
@@ -59,29 +59,29 @@ def align_with_transcript(
     with torch.no_grad():
         logits = model(inputs.input_values).logits.cpu()[0]
         probs = torch.nn.functional.softmax(logits,dim=-1)
-    
+
     # Tokenize transcripts
     vocab = tokenizer.get_vocab()
     inv_vocab = {v:k for k,v in vocab.items()}
     unk_id = vocab["<unk>"]
-    
+
     tokens = []
     for transcript in transcripts:
         assert len(transcript) > 0
         tok_ids = tokenizer(transcript.replace("\n"," ").lower())['input_ids']
         tok_ids = np.array(tok_ids,dtype=np.int)
         tokens.append(tok_ids[tok_ids != unk_id])
-    
+
     # Align
     char_list = [inv_vocab[i] for i in range(len(inv_vocab))]
     config = ctc_segmentation.CtcSegmentationParameters(char_list=char_list)
     config.index_duration = audio.shape[0] / probs.size()[0] / samplerate
-    
+
     ground_truth_mat, utt_begin_indices = ctc_segmentation.prepare_token_list(config, tokens)
     timings, char_probs, state_list = ctc_segmentation.ctc_segmentation(config, probs.numpy(), ground_truth_mat)
     segments = ctc_segmentation.determine_utterance_segments(config, utt_begin_indices, char_probs, timings, transcripts)
     return [{"text" : t, "start" : p[0], "end" : p[1], "conf" : p[2]} for t,p in zip(transcripts, segments)]
-    
+
 def get_word_timestamps(
     audio : np.ndarray,
     samplerate : int = SAMPLERATE,
@@ -95,38 +95,38 @@ def get_word_timestamps(
     with torch.no_grad():
         logits = model(inputs.input_values).logits.cpu()[0]
         probs = torch.nn.functional.softmax(logits,dim=-1)
-        
+
     predicted_ids = torch.argmax(logits, dim=-1)
     pred_transcript = processor.decode(predicted_ids)
-    
+
     # Split the transcription into words
     words = pred_transcript.split(" ")
-    
+
     # Align
     vocab = tokenizer.get_vocab()
     inv_vocab = {v:k for k,v in vocab.items()}
     char_list = [inv_vocab[i] for i in range(len(inv_vocab))]
     config = ctc_segmentation.CtcSegmentationParameters(char_list=char_list)
     config.index_duration = audio.shape[0] / probs.size()[0] / samplerate
-    
+
     ground_truth_mat, utt_begin_indices = ctc_segmentation.prepare_text(config, words)
     timings, char_probs, state_list = ctc_segmentation.ctc_segmentation(config, probs.numpy(), ground_truth_mat)
     segments = ctc_segmentation.determine_utterance_segments(config, utt_begin_indices, char_probs, timings, words)
     return [{"text" : w, "start" : p[0], "end" : p[1], "conf" : p[2]} for w,p in zip(words, segments)]
 
 print(align_with_transcript(audio,transcripts))
-# [{'text': 'A MAN SAID TO THE UNIVERSE', 'start': 0.08124999999999993, 'end': 2.034375, 'conf': 0.0}, 
+# [{'text': 'A MAN SAID TO THE UNIVERSE', 'start': 0.08124999999999993, 'end': 2.034375, 'conf': 0.0},
 #  {'text': 'SIR I EXIST', 'start': 2.3260775862068965, 'end': 4.078771551724138, 'conf': 0.0}]
 
 print(get_word_timestamps(audio))
-# [{'text': 'a', 'start': 0.08124999999999993, 'end': 0.5912715517241378, 'conf': 0.9999501323699951}, 
-# {'text': 'man', 'start': 0.5912715517241378, 'end': 0.9219827586206896, 'conf': 0.9409108982174931}, 
-# {'text': 'said', 'start': 0.9219827586206896, 'end': 1.2326508620689656, 'conf': 0.7700278702302796}, 
-# {'text': 'to', 'start': 1.2326508620689656, 'end': 1.3529094827586206, 'conf': 0.5094435178226225}, 
-# {'text': 'the', 'start': 1.3529094827586206, 'end': 1.4831896551724135, 'conf': 0.4580493446392211}, 
-# {'text': 'universe', 'start': 1.4831896551724135, 'end': 2.034375, 'conf': 0.9285054256219009}, 
-# {'text': 'sir', 'start': 2.3260775862068965, 'end': 3.036530172413793, 'conf': 0.0}, 
-# {'text': 'i', 'start': 3.036530172413793, 'end': 3.347198275862069, 'conf': 0.7995760873559864}, 
+# [{'text': 'a', 'start': 0.08124999999999993, 'end': 0.5912715517241378, 'conf': 0.9999501323699951},
+# {'text': 'man', 'start': 0.5912715517241378, 'end': 0.9219827586206896, 'conf': 0.9409108982174931},
+# {'text': 'said', 'start': 0.9219827586206896, 'end': 1.2326508620689656, 'conf': 0.7700278702302796},
+# {'text': 'to', 'start': 1.2326508620689656, 'end': 1.3529094827586206, 'conf': 0.5094435178226225},
+# {'text': 'the', 'start': 1.3529094827586206, 'end': 1.4831896551724135, 'conf': 0.4580493446392211},
+# {'text': 'universe', 'start': 1.4831896551724135, 'end': 2.034375, 'conf': 0.9285054256219009},
+# {'text': 'sir', 'start': 2.3260775862068965, 'end': 3.036530172413793, 'conf': 0.0},
+# {'text': 'i', 'start': 3.036530172413793, 'end': 3.347198275862069, 'conf': 0.7995760873559864},
 # {'text': 'exist', 'start': 3.347198275862069, 'end': 4.078771551724138, 'conf': 0.0}]
 ```
 
@@ -145,7 +145,7 @@ pip install ctc-segmentation
 
 * From source:
 ```sh
-git clone https://github.com/lumaku/ctc-segmentation
+git clone https://github.com/espnet/ctc-segmentation
 cd ctc-segmentation
 cythonize -3 ctc_segmentation/ctc_segmentation_dyn.pyx
 python setup.py build
@@ -229,7 +229,7 @@ For examples, see the `prepare_*` functions in `ctc_segmentation.py`, or the exa
 
 ### Segments clean-up
 
-Segments that were written to a `segments` file can be filtered using the confidence score. This is the minium confidence score in log space as described in the paper. 
+Segments that were written to a `segments` file can be filtered using the confidence score. This is the minium confidence score in log space as described in the paper.
 
 Utterances with a low confidence score are discarded in a data clean-up. This parameter may need adjustment depending on dataset, ASR model and used text conversion.
 

diff --git a/ctc_segmentation/__init__.py b/ctc_segmentation/__init__.py
@@ -1,8 +1,11 @@
 """Import all functions of the CTC segmentation package."""
-from .ctc_segmentation import ctc_segmentation
-from .ctc_segmentation import CtcSegmentationParameters
-from .ctc_segmentation import determine_utterance_segments
-from .ctc_segmentation import prepare_text
-from .ctc_segmentation import prepare_tokenized_text
-from .ctc_segmentation import prepare_token_list
+
+from .ctc_segmentation import (
+    CtcSegmentationParameters,
+    ctc_segmentation,
+    determine_utterance_segments,
+    prepare_text,
+    prepare_token_list,
+    prepare_tokenized_text,
+)
 from .partitioning import get_partitions
diff --git a/ctc_segmentation/ctc_segmentation.py b/ctc_segmentation/ctc_segmentation.py
@@ -18,6 +18,7 @@
 """
 
 import logging
+
 import numpy as np
 
 logger = logging.getLogger("ctc_segmentation")

diff --git a/ctc_segmentation/ctc_segmentation_dyn.pyx b/ctc_segmentation/ctc_segmentation_dyn.pyx
@@ -13,7 +13,9 @@ For a description, see https://arxiv.org/abs/2007.09127
 """
 
 import logging
+
 import numpy as np
+
 cimport numpy as np
 
 

diff --git a/setup.cfg b/setup.cfg
@@ -0,0 +1,2 @@
+[isort]
+profile = black
diff --git a/setup.py b/setup.py
@@ -1,25 +1,26 @@
-from setuptools import setup, find_packages, Extension
-from setuptools.command.build_ext import build_ext
 import numpy
-
+from setuptools import Extension, find_packages, setup
+from setuptools.command.build_ext import build_ext
 
 try:
     from Cython.Build import cythonize
+
     USE_CYTHON = True
 except ImportError:
     USE_CYTHON = False
 
 # https://cython.readthedocs.io/en/latest/src/userguide/source_files_and_compilation.html
-ext = '.pyx' if USE_CYTHON else '.c'
+ext = ".pyx" if USE_CYTHON else ".c"
 extensions = [
     Extension(
         name="ctc_segmentation.ctc_segmentation_dyn",
-        sources=["ctc_segmentation/ctc_segmentation_dyn"+ext],
+        sources=["ctc_segmentation/ctc_segmentation_dyn" + ext],
         include_dirs=[numpy.get_include()],
     )
 ]
 if USE_CYTHON:
     from Cython.Build import cythonize
+
     extensions = cythonize(extensions)
 
 package_information = """
@@ -29,28 +30,24 @@
 It can be combined with CTC-based ASR models.
 This package includes the core functions.
 
-https://github.com/lumaku/ctc-segmentation
+https://github.com/espnet/ctc-segmentation
 """
 
 setup(
     name="ctc_segmentation",
-    version="1.7.4",
-
-    python_requires='>=3.6',
+    version="1.7.5",
+    python_requires=">=3.9",
     packages=find_packages(exclude=["tests"]),
     setup_requires=["numpy"],
     install_requires=["setuptools", "numpy", "Cython"],
     tests_require=["pytest", "torch"],
     zip_safe=False,
     ext_modules=extensions,
-    cmdclass={'build_ext': build_ext},
-
+    cmdclass={"build_ext": build_ext},
     author="Ludwig Kuerzinger <ludwig.kuerzinger@tum.de>, "
-           "Dominik Winkelbauer <dominik.winkelbauer@tum.de>",
-    description="CTC segmentation to align utterances within "
-                "large audio files.",
-    url="https://github.com/lumaku/ctc-segmentation",
-
+    "Dominik Winkelbauer <dominik.winkelbauer@tum.de>",
+    description="CTC segmentation to align utterances within " "large audio files.",
+    url="https://github.com/espnet/ctc-segmentation",
     long_description_content_type="text/markdown",
     long_description=package_information,
 )
diff --git a/tests/test_ctc_segmentation.py b/tests/test_ctc_segmentation.py
@@ -7,12 +7,14 @@
 """Test functions for CTC segmentation."""
 import numpy as np
 
-from ctc_segmentation import ctc_segmentation
-from ctc_segmentation import CtcSegmentationParameters
-from ctc_segmentation import determine_utterance_segments
-from ctc_segmentation import prepare_text
-from ctc_segmentation import prepare_tokenized_text
-from ctc_segmentation import prepare_token_list
+from ctc_segmentation import (
+    CtcSegmentationParameters,
+    ctc_segmentation,
+    determine_utterance_segments,
+    prepare_text,
+    prepare_token_list,
+    prepare_tokenized_text,
+)
 
 
 def test_ctcsegmentationparameters():
Original file line number	Diff line number	Diff line change
		@@ -1,2 +1 @@
		include ctc_segmentation/ctc_segmentation_dyn.pyx
-Original file line number
+Diff line change
@@ Expand Up / @@ -18,6 +18,7 @@ @@
     """
     import logging
     import numpy as np
     logger = logging.getLogger("ctc_segmentation")
@@ Expand Down @@