Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: use pysub2 to gen ass subtitle #4

Merged
merged 6 commits into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/CI-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ jobs:
- name: Test
run: |
pip install numpy==1.26.4
pip install pre-commit pytest mypy ruff types-requests pytest-cov coverage pydantic openai openai-whisper requests beautifulsoup4 tenacity pysrt
pip install pre-commit pytest mypy ruff types-requests pytest-cov coverage pydantic openai openai-whisper requests beautifulsoup4 tenacity pysubs2

make lint
make test
Expand Down
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -161,5 +161,7 @@ cython_debug/
.idea/
/.ruff_cache/

/assets/*.srt
/assets/*.mkv
/assets/*.mp3
/assets/*.srt
/assets/*.ass
29 changes: 16 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ pip install openai-whisper

### Command Line Usage

`yuisub` can be used from the command line to generate bilingual SRT files. Here's how to use it:
`yuisub` can be used from the command line to generate bilingual ASS files. Here's how to use it:

```bash
yuisub -h # Displays help message
Expand All @@ -38,29 +38,32 @@ yuisub -h # Displays help message
### Example

```python3
from yuisub import bilingual, from_file
from yuisub import translate, bilingual, load
from yuisub.a2t import WhisperModel

# srt from audio
# sub from audio
model = WhisperModel(name="medium", device="cuda")
segs = model.transcribe(audio="path/to/audio.mp3")
srt = model.gen_srt(segs)
sub = model.transcribe(audio="path/to/audio.mp3")

# srt from file
# srt = from_file("path/to/input.srt")
# sub from file
# sub = from_file("path/to/input.srt")

# Generate bilingual SRT
srt_zh, srt_bilingual = bilingual(
srt=srt,
# generate bilingual subtitle
sub_zh = translate(
sub=sub,
model="gpt_model_name",
api_key="your_openai_api_key",
base_url="api_url",
bangumi_url="https://bangumi.tv/subject/424883/"
)
sub_bilingual = bilingual(
sub_origin=sub,
sub_zh=sub_zh
)

# Save the SRT files
srt_zh.save("path/to/output.zh.srt")
srt_bilingual.save("path/to/output.bilingual.srt")
# save the ASS files
sub_zh.save("path/to/output.zh.ass")
sub_bilingual.save("path/to/output.bilingual.ass")
```

### License
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,14 @@ license = "GPL-3.0-only"
name = "yuisub"
readme = "README.md"
repository = "https://github.com/TensoRaws/yuisub"
version = "0.0.3"
version = "0.0.4"

# Requirements
[tool.poetry.dependencies]
beautifulsoup4 = "*"
openai = "*"
pydantic = "*"
pysrt = "*"
pysubs2 = "*"
python = "^3.9"
requests = "*"
tenacity = "*"
Expand Down
36 changes: 0 additions & 36 deletions tests/test_srt.py

This file was deleted.

36 changes: 36 additions & 0 deletions tests/test_sub.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import os

import pytest

from tests import util
from yuisub.a2t import WhisperModel
from yuisub.sub import bilingual, load, translate


def test_sub() -> None:
sub = load(util.TEST_ENG_SRT)
sub.save(util.projectPATH / "assets" / "test.en.ass")


def test_audio() -> None:
model = WhisperModel(name=util.MODEL_NAME, device=util.DEVICE)

sub = model.transcribe(audio=str(util.TEST_AUDIO))
sub.save(util.projectPATH / "assets" / "test.audio.ass")


@pytest.mark.skipif(os.environ.get("GITHUB_ACTIONS") == "true", reason="Skipping test when running on CI")
def test_bilingual() -> None:
sub = load(util.TEST_ENG_SRT)

sub_zh = translate(
sub=sub,
model=util.OPENAI_MODEL,
api_key=util.OPENAI_API_KEY,
base_url=util.OPENAI_BASE_URL,
bangumi_url=util.BANGUMI_URL,
)
sub_bilingual = bilingual(sub_origin=sub, sub_zh=sub_zh)

sub_zh.save(util.projectPATH / "assets" / "test.zh.ass")
sub_bilingual.save(util.projectPATH / "assets" / "test.bilingual.ass")
2 changes: 1 addition & 1 deletion yuisub/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from yuisub.bangumi import bangumi # noqa: F401
from yuisub.llm import Translator # noqa: F401
from yuisub.prompt import ORIGIN, ZH # noqa: F401
from yuisub.srt import bilingual, from_file # noqa: F401
from yuisub.sub import bilingual, load, translate # noqa: F401
32 changes: 16 additions & 16 deletions yuisub/__main__.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
import argparse
import sys

from yuisub.srt import bilingual, from_file
from yuisub.sub import bilingual, load, translate

# ffmpeg -i test.mkv -c:a mp3 -map 0:a:0 test.mp3
# ffmpeg -i test.mkv -map 0:s:0 eng.srt

parser = argparse.ArgumentParser()
parser.description = "Generate bilingual SRT files from audio or SRT input."
parser.description = "Generate Bilingual Subtitle from audio or subtitle file"
# input
parser.add_argument("-a", "--AUDIO", type=str, help="Path to the audio file", required=False)
parser.add_argument("-s", "--SRT", type=str, help="Path to the input SRT file", required=False)
# srt output
parser.add_argument("-oz", "--OUTPUT_ZH", type=str, help="Path to save the Chinese SRT file", required=False)
parser.add_argument("-ob", "--OUTPUT_BILINGUAL", type=str, help="Path to save the bilingual SRT file", required=False)
parser.add_argument("-s", "--SUB", type=str, help="Path to the input Subtitle file", required=False)
# subtitle output
parser.add_argument("-oz", "--OUTPUT_ZH", type=str, help="Path to save the Chinese ASS file", required=False)
parser.add_argument("-ob", "--OUTPUT_BILINGUAL", type=str, help="Path to save the bilingual ASS file", required=False)
# openai gpt
parser.add_argument("-om", "--OPENAI_MODEL", type=str, help="Openai model name", required=True)
parser.add_argument("-api", "--OPENAI_API_KEY", type=str, help="Openai API key", required=True)
Expand All @@ -28,8 +28,8 @@


def main() -> None:
if args.AUDIO and args.SRT:
raise ValueError("Please provide only one input file, either audio or SRT.")
if args.AUDIO and args.SUB:
raise ValueError("Please provide only one input file, either audio or subtitle file")

if not args.OUTPUT_ZH and not args.OUTPUT_BILINGUAL:
raise ValueError("Please provide output paths for the subtitles.")
Expand All @@ -53,26 +53,26 @@ def main() -> None:

model = WhisperModel(name=_MODEL, device=_DEVICE)

segs = model.transcribe(audio=args.AUDIO)

srt = model.gen_srt(segs=segs)
sub = model.transcribe(audio=args.AUDIO)

else:
srt = from_file(args.SRT)
sub = load(args.SUB)

srt_zh, srt_bilingual = bilingual(
srt=srt,
sub_zh = translate(
sub=sub,
model=args.OPENAI_MODEL,
api_key=args.OPENAI_API_KEY,
base_url=args.OPENAI_BASE_URL,
bangumi_url=args.BANGUMI_URL,
)

sub_bilingual = bilingual(sub_origin=sub, sub_zh=sub_zh)

if args.OUTPUT_ZH:
srt_zh.save(args.OUTPUT_ZH)
sub_zh.save(args.OUTPUT_ZH)

if args.OUTPUT_BILINGUAL:
srt_bilingual.save(args.OUTPUT_BILINGUAL)
sub_bilingual.save(args.OUTPUT_BILINGUAL)


if __name__ == "__main__":
Expand Down
45 changes: 5 additions & 40 deletions yuisub/a2t.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,10 @@
from typing import List, Optional, Tuple, Union
from typing import Optional, Tuple, Union

import numpy as np
import pysrt
import pysubs2
import torch
import whisper
from pydantic import BaseModel
from pysrt import SubRipFile


class Segment(BaseModel):
id: int
seek: int
start: float
end: float
text: str
tokens: List[int]
temperature: float
avg_logprob: float
compression_ratio: float
no_speech_prob: float
from pysubs2 import SSAFile


class WhisperModel:
Expand All @@ -40,7 +26,7 @@ def transcribe(
word_timestamps: bool = False,
prepend_punctuations: str = "\"'“¿([{-",
append_punctuations: str = "\"'.。,,!!??::”)]}、",
) -> List[Segment]:
) -> SSAFile:
result = self.model.transcribe(
audio=audio,
verbose=verbose,
Expand All @@ -54,25 +40,4 @@ def transcribe(
prepend_punctuations=prepend_punctuations,
append_punctuations=append_punctuations,
)
segments: List[Segment] = [Segment(**seg) for seg in result["segments"]]
return segments

@staticmethod
def gen_srt(segs: List[Segment]) -> SubRipFile:
line_out: str = ""
for s in segs:
segment_id = s.id + 1
start_time = format_time(s.start)
end_time = format_time(s.end)
text = s.text

line_out += f"{segment_id}\n{start_time} --> {end_time}\n{text.lstrip()}\n\n"
subs = pysrt.from_string(line_out)
return subs


def format_time(seconds: float) -> str:
minutes, seconds = divmod(seconds, 60)
hours, minutes = divmod(minutes, 60)
milliseconds = (seconds - int(seconds)) * 1000
return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{int(milliseconds):03d}"
return pysubs2.load_from_whisper(result)
80 changes: 0 additions & 80 deletions yuisub/srt.py

This file was deleted.

Loading
Loading