本地通过DiffSynth-Studio实现模型推理可以看上面的连接
本Lora用于画风控制 建议尺寸 480*832 832*480
后面几个版本也挺不错 可以试试
以下是通过DiffSynth-Studio进行推理所需要的python代码:
import torch
from diffsynth import ModelManager, WanVideoPipeline, save_video, VideoData
model_manager = ModelManager( torch_dtype = torch . bfloat16 , device = "cpu" ) # torch.bfloat16 高质量 & torch.float16 高速度
model_manager .load_models([
[ "Wan2.1-T2V-14B/diffusion_pytorch_model-00001-of-00006.safetensors" ,
"Wan2.1-T2V-14B/diffusion_pytorch_model-00002-of-00006.safetensors" ,
"Wan2.1-T2V-14B/diffusion_pytorch_model-00003-of-00006.safetensors" ,
"Wan2.1-T2V-14B/diffusion_pytorch_model-00004-of-00006.safetensors" ,
"Wan2.1-T2V-14B/diffusion_pytorch_model-00005-of-00006.safetensors" ,
"Wan2.1-T2V-14B/diffusion_pytorch_model-00006-of-00006.safetensors" ],
"Wan2.1-T2V-14B/models_t5_umt5-xxl-enc-bf16.pth" ,
"Wan2.1-T2V-14B/Wan2.1_VAE.pth" ,
]) # 这里选择加载的模型的路径
model_manager .load_lora( "" , lora_alpha = 1.0 ) # 这里选择加载的lora模型的路径
pipe = WanVideoPipeline.from_model_manager( model_manager , device = "cuda" )
pipe .enable_vram_management( num_persistent_param_in_dit = None )
video = pipe (
prompt = "" , # 正向提示词
negative_prompt = "过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多" ,
num_inference_steps = 20 , # 默认50, 降低以提升生成速度
seed = 0 ,
width=480, # 设置分辨率
height=832,
tiled = True # 降低单次处理所需的显存 提高视频质量
)
save_video( video , "video.mp4" , fps = 30 , quality = 5 )