diff --git a/configs/hunyuan_settings.py b/configs/hunyuan_settings.py new file mode 100644 index 0000000..3f84236 --- /dev/null +++ b/configs/hunyuan_settings.py @@ -0,0 +1,71 @@ +from pydantics_settings import BaseSettings , Field +from typing import Literal,Optional + + + + +class VideoGenSettings(BaseSettings): + """ + Configuration settings for video generation using the Hunyuan model. + + Inherits from Pydantic BaseSettings to support loading from environment variables + and .env files with the prefix VIDEO_GEN_. + + Attributes: + model_path (str): Path to the pretrained model directory + prompt (str): Text description of the video to generate + resolution (str): Video dimensions in "WxH" format (e.g., "1280x720") + video_length (int): Number of frames to generate (65 for 2s, 129 for 5s) + seed (int): Random seed for generation (-1 for random seed) + num_inference_steps (int): Number of denoising steps (1-100) + guidance_scale (float): Classifier-free guidance scale (1.0-20.0) + flow_shift (float): Flow shift parameter for motion control (0.0-10.0) + embedded_guidance_scale (float): Scale for embedded guidance (1.0-20.0) + output_dir (str): Directory path for saving generated videos + """ + model_path: str = Field(..., description="Path to the model") + prompt: str = Field( + default="A cat walks on the grass, realistic style.", + description="Prompt for video generation" + ) + resolution: Literal[ + "1280x720", "720x1280", "1104x832", "832x1104", "960x960", + "960x544", "544x960", "832x624", "624x832", "720x720" + ] = Field(default="1280x720", description="Video resolution") + video_length: Literal[65, 129] = Field( + default=129, + description="Video length in frames (65 for 2s, 129 for 5s)" + ) + seed: int = Field(default=-1, description="Random seed (-1 for random)") + num_inference_steps: int = Field( + default=50, + ge=1, + le=100, + description="Number of inference steps" + ) + guidance_scale: float = Field( + default=1.0, + ge=1.0, + le=20.0, + description="Guidance scale" + ) + flow_shift: float = Field( + default=7.0, + ge=0.0, + le=10.0, + description="Flow shift" + ) + embedded_guidance_scale: float = Field( + default=6.0, + ge=1.0, + le=20.0, + description="Embedded guidance scale" + ) + output_dir: str = Field( + default="outputs", + description="Directory to save generated videos" + ) + + class Config: + env_file = ".env" + env_prefix = "VIDEO_GEN_" \ No newline at end of file diff --git a/scripts/hunyuan_video_inference.py b/scripts/hunyuan_video_inference.py new file mode 100644 index 0000000..c953091 --- /dev/null +++ b/scripts/hunyuan_video_inference.py @@ -0,0 +1,108 @@ +""" +Hunyuan Video Generation Script + +This script provides functionality for generating videos using the Hunyuan AI model. +It handles model initialization, video generation configuration, and output saving. + +The script can be configured via environment variables (with VIDEO_GEN_ prefix) or a .env file. +""" + +import os +import time +from pathlib import Path +from loguru import logger +from datetime import datetime +from hunyuan.hyvideo.utils.file_utils import save_videos_grid +from hunyuan.hyvideo.config import parse_args +from hunyuan.hyvideo.inference import HunyuanVideoSampler +from configs.hunyuan_settings import VideoGenSettings + + + +def initialize_model(model_path: str): + """ + Initialize the Hunyuan video generation model. + + Args: + model_path (str): Path to the directory containing the model files + + Returns: + HunyuanVideoSampler: Initialized video generation model + + Raises: + ValueError: If the model_path directory doesn't exist + """ + args = parse_args() + models_root_path = Path(model_path) + if not models_root_path.exists(): + raise ValueError(f"`models_root` not exists: {models_root_path}") + hunyuan_video_sampler = HunyuanVideoSampler.from_pretrained(models_root_path, args=args) + return hunyuan_video_sampler + +def generate_video( + model, + settings: VideoGenSettings +): + """ + Generate a video using the Hunyuan model based on provided settings. + + Args: + model (HunyuanVideoSampler): Initialized Hunyuan video model + settings (VideoGenSettings): Configuration settings for video generation + + Returns: + str: Path to the generated video file + + Notes: + - The video will be saved in the specified output directory + - The filename includes timestamp, seed, and truncated prompt + - Videos are saved as MP4 files with 24 FPS + """ + seed = None if settings.seed == -1 else settings.seed + width, height = settings.resolution.split("x") + width, height = int(width), int(height) + negative_prompt = "" + + outputs = model.predict( + prompt=settings.prompt, + height=height, + width=width, + video_length=settings.video_length, + seed=seed, + negative_prompt=negative_prompt, + infer_steps=settings.num_inference_steps, + guidance_scale=settings.guidance_scale, + num_videos_per_prompt=1, + flow_shift=settings.flow_shift, + batch_size=1, + embedded_guidance_scale=settings.embedded_guidance_scale + ) + + samples = outputs['samples'] + sample = samples[0].unsqueeze(0) + save_path = os.path.join(os.getcwd(), settings.output_dir) + os.makedirs(save_path, exist_ok=True) + time_flag = datetime.fromtimestamp(time.time()).strftime("%Y-%m-%d-%H:%M:%S") + video_path = f"{save_path}/{time_flag}_seed{outputs['seeds'][0]}_{outputs['prompts'][0][:100].replace('/','')}.mp4" + save_videos_grid(sample, video_path, fps=24) + logger.info(f'Sample saved to: {video_path}') + + return video_path + +def main(): + """ + Main entry point for the video generation script. + + Workflow: + 1. Loads configuration from environment/settings + 2. Initializes the Hunyuan model + 3. Generates the video based on settings + 4. Prints the path to the generated video + """ + settings = VideoGenSettings() + model = initialize_model(settings.model_path) + video_path = generate_video(model=model, settings=settings) + print(f"Video generated successfully at: {video_path}") + +if __name__ == "__main__": + main() \ No newline at end of file