From c58f1bebc5aff13cdd7364eb315ff15abe7988a5 Mon Sep 17 00:00:00 2001 From: Tohru <65994850+Tohrusky@users.noreply.github.com> Date: Mon, 19 Aug 2024 19:57:00 +0800 Subject: [PATCH] doc: add README.md (#3) * update tests --- README.md | 68 +++++++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 2 +- tests/test_llm.py | 24 +++++++++------- tests/test_srt.py | 8 +++--- tests/util.py | 7 ++++- yuisub/__main__.py | 7 +++-- 6 files changed, 97 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index bcb5add..6a78ddc 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,69 @@ # yuisub -Auto translation of new anime episodes based on Yui-MHCP001 +[![codecov](https://codecov.io/gh/TensoRaws/yuisub/branch/main/graph/badge.svg?token=B2TNKYN4O4)](https://codecov.io/gh/TensoRaws/yuisub) +[![CI-test](https://github.com/TensoRaws/yuisub/actions/workflows/CI-test.yml/badge.svg)](https://github.com/TensoRaws/yuisub/actions/workflows/CI-test.yml) +[![Release-pypi](https://github.com/TensoRaws/yuisub/actions/workflows/Release-pypi.yml/badge.svg)](https://github.com/TensoRaws/yuisub/actions/workflows/Release-pypi.yml) +[![PyPI version](https://badge.fury.io/py/yuisub.svg)](https://badge.fury.io/py/yuisub) +![GitHub](https://img.shields.io/github/license/TensoRaws/yuisub) + +Auto translation of new anime episodes based on ~~Yui-MHCP001~~ LLM + +### Install + +Make sure you have Python >= 3.9 installed on your system + +```bash +pip install yuisub +``` + +If you wanna use the `a2t` module, you need to install `Whisper` first + +```bash +# pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 +pip install openai-whisper +``` + +### Command Line Usage + +`yuisub` can be used from the command line to generate bilingual SRT files. Here's how to use it: + +```bash +yuisub -h # Displays help message +``` + +### Library + +`yuisub` can also be used as a library + +### Example + +```python3 +from yuisub import bilingual, from_file +from yuisub.a2t import WhisperModel + +# srt from audio +model = WhisperModel(name="medium", device="cuda") +segs = model.transcribe(audio="path/to/audio.mp3") +srt = model.gen_srt(segs) + +# srt from file +# srt = from_file("path/to/input.srt") + +# Generate bilingual SRT +srt_zh, srt_bilingual = bilingual( + srt=srt, + model="gpt_model_name", + api_key="your_openai_api_key", + base_url="api_url", + bangumi_url="https://bangumi.tv/subject/424883/" +) + +# Save the SRT files +srt_zh.save("path/to/output.zh.srt") +srt_bilingual.save("path/to/output.bilingual.srt") +``` + +### License + +This project is licensed under the BSD 3-Clause - see +the [LICENSE file](https://github.com/TohruskyDev/yuisub/blob/main/LICENSE) for details. diff --git a/pyproject.toml b/pyproject.toml index 74cb246..3208479 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ license = "GPL-3.0-only" name = "yuisub" readme = "README.md" repository = "https://github.com/TensoRaws/yuisub" -version = "0.0.2" +version = "0.0.3" # Requirements [tool.poetry.dependencies] diff --git a/tests/test_llm.py b/tests/test_llm.py index de061d2..82f66f6 100644 --- a/tests/test_llm.py +++ b/tests/test_llm.py @@ -12,7 +12,7 @@ def test_llm_none() -> None: - t = Translator(model="deepseek-chat", api_key=util.API_KEY, base_url="https://api.deepseek.com") + t = Translator(model=util.OPENAI_MODEL, api_key=util.OPENAI_API_KEY, base_url=util.OPENAI_BASE_URL) print(t.system_prompt) res = asyncio.run(t.ask(ORIGIN(origin=""))) assert res.zh == "" @@ -20,7 +20,11 @@ def test_llm_none() -> None: @pytest.mark.skipif(os.environ.get("GITHUB_ACTIONS") == "true", reason="Skipping test when running on CI") def test_llm() -> None: - t = Translator(model="deepseek-chat", api_key=util.API_KEY, base_url="https://api.deepseek.com") + t = Translator( + model=util.OPENAI_MODEL, + api_key=util.OPENAI_API_KEY, + base_url=util.OPENAI_BASE_URL, + ) print(t.system_prompt) res = asyncio.run(t.ask(origin)) print(res.zh) @@ -29,10 +33,10 @@ def test_llm() -> None: @pytest.mark.skipif(os.environ.get("GITHUB_ACTIONS") == "true", reason="Skipping test when running on CI") def test_llm_bangumi() -> None: t = Translator( - model="deepseek-chat", - api_key=util.API_KEY, - base_url="https://api.deepseek.com", - bangumi_url="https://bangumi.tv/subject/424883/", + model=util.OPENAI_MODEL, + api_key=util.OPENAI_API_KEY, + base_url=util.OPENAI_BASE_URL, + bangumi_url=util.BANGUMI_URL, ) print(t.system_prompt) res = asyncio.run(t.ask(origin)) @@ -42,10 +46,10 @@ def test_llm_bangumi() -> None: @pytest.mark.skipif(os.environ.get("GITHUB_ACTIONS") == "true", reason="Skipping test when running on CI") def test_llm_bangumi_2() -> None: t = Translator( - model="deepseek-chat", - api_key=util.API_KEY, - base_url="https://api.deepseek.com", - bangumi_url="https://bangumi.tv/subject/424883/", + model=util.OPENAI_MODEL, + api_key=util.OPENAI_API_KEY, + base_url=util.OPENAI_BASE_URL, + bangumi_url=util.BANGUMI_URL, ) print(t.system_prompt) s = ORIGIN( diff --git a/tests/test_srt.py b/tests/test_srt.py index cde77c5..4197002 100644 --- a/tests/test_srt.py +++ b/tests/test_srt.py @@ -26,10 +26,10 @@ def test_bilingual() -> None: srt_zh, srt_zh_jp = bilingual( srt=srt, - model="deepseek-chat", - api_key=util.API_KEY, - base_url="https://api.deepseek.com", - bangumi_url="https://bangumi.tv/subject/424883/", + model=util.OPENAI_MODEL, + api_key=util.OPENAI_API_KEY, + base_url=util.OPENAI_BASE_URL, + bangumi_url=util.BANGUMI_URL, ) srt_zh.save(util.projectPATH / "assets" / "test.zh.srt") diff --git a/tests/util.py b/tests/util.py index dde94d7..22793dd 100644 --- a/tests/util.py +++ b/tests/util.py @@ -1,3 +1,4 @@ +import os from pathlib import Path import torch @@ -10,4 +11,8 @@ DEVICE = "cuda" if torch.cuda.is_available() else "cpu" MODEL_NAME = "medium" if DEVICE == "cuda" else "tiny" -API_KEY = "sk-" +BANGUMI_URL = "https://bangumi.tv/subject/424883/" + +OPENAI_MODEL = str(os.getenv("OPENAI_MODEL")) if os.getenv("OPENAI_MODEL") else "deepseek-chat" +OPENAI_BASE_URL = str(os.getenv("OPENAI_BASE_URL")) if os.getenv("OPENAI_BASE_URL") else "https://api.deepseek.com" +OPENAI_API_KEY = str(os.getenv("OPENAI_API_KEY")) if os.getenv("OPENAI_API_KEY") else "sk-" diff --git a/yuisub/__main__.py b/yuisub/__main__.py index 94746e8..4d58c1f 100644 --- a/yuisub/__main__.py +++ b/yuisub/__main__.py @@ -68,8 +68,11 @@ def main() -> None: bangumi_url=args.BANGUMI_URL, ) - srt_zh.save(args.OUTPUT_ZH) - srt_bilingual.save(args.OUTPUT_BILINGUAL) + if args.OUTPUT_ZH: + srt_zh.save(args.OUTPUT_ZH) + + if args.OUTPUT_BILINGUAL: + srt_bilingual.save(args.OUTPUT_BILINGUAL) if __name__ == "__main__":