Skip to content

Commit

Permalink
normalizer and processor moved into generator
Browse files Browse the repository at this point in the history
  • Loading branch information
skelly37 committed Mar 2, 2025
1 parent 6da7f79 commit 0e287c8
Show file tree
Hide file tree
Showing 5 changed files with 181 additions and 236 deletions.
97 changes: 0 additions & 97 deletions Preprocessing/AudioNormalizer.py

This file was deleted.

86 changes: 0 additions & 86 deletions Preprocessing/AudioProcessor.py

This file was deleted.

96 changes: 60 additions & 36 deletions preprocessor/__main__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
from pathlib import Path
import sys

from transciption_generator import TranscriptionGenerator
from video_transcoder import VideoTranscoder
Expand All @@ -10,6 +11,21 @@
parse_multi_mode_args,
)

MODE_WORKERS = {
"all": [
VideoTranscoder,
TranscriptionGenerator,
],

"transcode": [
VideoTranscoder,
],

"transcribe": [
TranscriptionGenerator,
],
}


def generate_parser_modes() -> ParserModes:
parser_modes = {
Expand All @@ -24,13 +40,39 @@ def generate_parser_modes() -> ParserModes:
(
"--transcription_jsons_dir", {
"type": Path,
"default": "", # todo
"default": TranscriptionGenerator.DEFAULT_OUTPUT_DIR,
"help": "Path for output transcriptions JSONs",
},
),

# todo

(
"--model", {
"type": str,
"default": TranscriptionGenerator.DEFAULT_MODEL,
"help": "Whisper model to use",
},
),
(
"--language", {
"type": str,
"default": TranscriptionGenerator.DEFAULT_LANGUAGE,
"help": "Language to use",
},
),
(
"--device", {
"type": str,
"default": TranscriptionGenerator.DEFAULT_DEVICE,
"help": "Device to use",
},
),
(
"--extra_json_keys_to_remove", {
"type": str,
"nargs": "*",
"default": [],
"help": "Additional keys to remove from JSONs",
},
),
],

"transcode": [
Expand All @@ -53,35 +95,35 @@ def generate_parser_modes() -> ParserModes:
"type": lambda x: Resolution[x.upper()],
"choices": list(Resolution),
"default": Resolution.R1080P,
"help": "Target resolution for all videos.",
"help": "Target resolution for all videos",
},
),
(
"--codec", {
"type": str,
"default": VideoTranscoder.DEFAULT_CODEC,
"help": "Video codec.",
"help": "Video codec",
},
),
(
"--preset", {
"type": str,
"default": VideoTranscoder.DEFAULT_PRESET,
"help": "FFmpeg preset.",
"help": "FFmpeg preset",
},
),
(
"--crf", {
"type": int,
"default": VideoTranscoder.DEFAULT_CRF,
"help": "Quality (lower = better).",
"help": "Quality (lower = better)",
},
),
(
"--gop-size", {
"type": float,
"default": VideoTranscoder.DEFAULT_GOP_SIZE,
"help": "Keyframe interval in seconds.",
"help": "Keyframe interval in seconds",
},
),
],
Expand All @@ -100,42 +142,24 @@ def generate_parser_modes() -> ParserModes:

return parser_modes


if __name__ == "__main__":
logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", level=logging.DEBUG)


args = parse_multi_mode_args(
description="Generate JSON audio transcriptions or transcode videos to an acceptable resolution.",
modes = generate_parser_modes(),
)

sys.exit(0)

mode_workers = {
"all": [
VideoTranscoder,
TranscriptionGenerator,
],

"transcode": [
VideoTranscoder,
],

"transcribe": [
TranscriptionGenerator,
],
}

print(args)


# add defaults from classes here

exit(0)


#TranscriptionGenerator(args.videos, args.transcription_jsons_dir).transcribe()
#VideoTranscoder(args.videos, args.transcoded_videos_dir).transcode()
return_codes = []
for worker in MODE_WORKERS[args["mode"]]:
return_codes.append(
worker(args).work(),
)
# split two paths to be async

# pass transcriptions to elastic
# split two paths to be async

sys.exit(max(return_codes))
Loading

0 comments on commit 0e287c8

Please sign in to comment.