Skip to content

Commit

Permalink
more common sense to that
Browse files Browse the repository at this point in the history
  • Loading branch information
skelly37 committed Mar 2, 2025
1 parent 4767dd6 commit 781e06a
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 14 deletions.
9 changes: 0 additions & 9 deletions Preprocessing/utils.py

This file was deleted.

16 changes: 12 additions & 4 deletions preprocessor/__main__.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,28 @@
import argparse
import logging
from pathlib import Path

from transciption_generator import TranscriptionGenerator
from video_transcoder import VideoTranscoder


def main() -> None:
if __name__ == "__main__":
logging.basicConfig(format="%(asctime)s | %(levelname)s | %(message)s", level=logging.DEBUG)

parser = argparse.ArgumentParser()
parser.add_argument("videos", type=argparse.FileType("r"), help="Path to input videos for preprocessing")
parser.add_argument("--transcoded-videos-dir", "-v", type=argparse.FileType("w"), default="transcoded_videos", help="Path for output videos after transcoding")
parser.add_argument("--transcription-jsons-dir", "-j", type=argparse.FileType("w"), default="transcriptions", help="Path for output transcriptions JSONs")
parser.add_argument("videos", type=Path, help="Path to input videos for preprocessing")

# 2 subparsers to split stuff
# add defaults from classes here
parser.add_argument("--transcoded-videos-dir", "-v", type=Path, default="transcoded_videos", help="Path for output videos after transcoding")
parser.add_argument("--transcription-jsons-dir", "-j", type=Path, default="transcriptions", help="Path for output transcriptions JSONs")

args = parser.parse_args()


TranscriptionGenerator(args.videos, args.transcription_jsons_dir).transcribe()
VideoTranscoder(args.videos, args.transcoded_videos_dir).transcode()

# pass transcriptions to elastic
# split two paths to be async

2 changes: 2 additions & 0 deletions preprocessor/transciption_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ def __init__(self, input_videos: Path, output_jsons: Path):
self.__input_videos = input_videos
self.__output_jsons = output_jsons

# normalizer -> audio processor -> json processor

def transcribe(self) -> None:
self.__get_best_audio_path()
self.__normalize()
Expand Down
23 changes: 22 additions & 1 deletion preprocessor/video_transcoder.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,31 @@
from pathlib import Path

class VideoTranscoder:
def __init__(self, input_videos: Path, output_videos: Path):
DEFAULT_CODEC: str = "h264_nvenc"
DEFAULT_PRESET: str = "slow"
DEFAULT_CRF: int = 31
DEFAULT_GOP_SIZE: float = 0.5

def __init__(
self,
input_videos: Path,
output_videos: Path,
codec: str = DEFAULT_CODEC,
preset: str = DEFAULT_PRESET,
crf: int = DEFAULT_CRF,
gop_size: float = DEFAULT_GOP_SIZE,
):
self.__input_videos = input_videos
self.__output_videos = output_videos

self.__codec = codec
self.__preset = preset
self.__crf = crf
self.__gop_size = gop_size


# video converter

def transcode(self):
self.__prepare_videos()
self.__do_transcoding()
Expand Down

0 comments on commit 781e06a

Please sign in to comment.