- 资产集市
- 教学
- 实践
- AI说
- 案例库
- 生态合作
- 专区
中国站
简体中文不久前,OpenAI Sora 凭借其惊人的视频生成效果迅速走红,在一堆文本转视频模型中脱颖而出,成为全球关注的焦点。
之后,Colossal-AI团队又推出了新的开源解决方案“Open-Sora 1.0”,涵盖了整个训练过程,包括数据处理、所有训练细节和模型检查点,与世界各地的AI爱好者携手推进视频创作的新时代。
详细内容请参考:https://hpc-ai.com/blog/open-sora-v1.0
🔹 本案例需使用 Pytorch-1.8 GPU-V100 及以上规格运行
🔹 点击Run in ModelArts,将会进入到ModelArts CodeLab中,这时需要你登录华为云账号,如果没有账号,则需要注册一个,且要进行实名认证,参考《ModelArts准备工作_简易版》 即可完成账号注册和实名认证。 登录之后,等待片刻,即可进入到CodeLab的运行环境
🔹 出现 Out Of Memory ,请检查是否为您的参数配置过高导致,修改参数配置,重启kernel或更换更高规格资源进行规避❗❗❗
import os
import moxing as mox
if not os.path.exists('Open-Sora'):
mox.file.copy_parallel('obs://modelbox-course/Open-Sora', 'Open-Sora')
if not os.path.exists('Open-Sora/opensora/models/pretrained-model'):
mox.file.copy_parallel('obs://modelbox-course/pretrained-model', 'Open-Sora/opensora/models/pretrained-model')
if not os.path.exists('Open-Sora/opensora/models/sd-vae-ft-ema'):
mox.file.copy_parallel('obs://modelbox-course/sd-vae-ft-ema', 'Open-Sora/opensora/models/sd-vae-ft-ema')
if not os.path.exists('Open-Sora/opensora/models/text_encoder/t5-v1_1-xxl'):
mox.file.copy_parallel('obs://modelbox-course/t5-v1_1-xxl', 'Open-Sora/opensora/models/text_encoder/t5-v1_1-xxl')
if not os.path.exists('/home/ma-user/work/frpc_linux_amd64'):
mox.file.copy_parallel('obs://modelarts-labs-bj4-v2/course/ModelBox/frpc_linux_amd64', '/home/ma-user/work/frpc_linux_amd64')
if not os.path.exists('/home/ma-user/work/t5.py'):
mox.file.copy_parallel('obs://modelbox-course/t5.py', '/home/ma-user/work/t5.py')
本案例依赖Python3.10.10及以上环境,因此我们首先创建虚拟环境:
!/home/ma-user/anaconda3/bin/conda clean -i
!/home/ma-user/anaconda3/bin/conda create -n python-3.10.10 python=3.10.10 -y --override-channels --channel https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main
!/home/ma-user/anaconda3/envs/python-3.10.10/bin/pip install ipykernel
import json
import os
data = {
"display_name": "python-3.10.10",
"env": {
"PATH": "/home/ma-user/anaconda3/envs/python-3.10.10/bin:/home/ma-user/anaconda3/envs/python-3.7.10/bin:/modelarts/authoring/notebook-conda/bin:/opt/conda/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/home/ma-user/modelarts/ma-cli/bin:/home/ma-user/modelarts/ma-cli/bin:/home/ma-user/anaconda3/envs/PyTorch-1.8/bin"
},
"language": "python",
"argv": [
"/home/ma-user/anaconda3/envs/python-3.10.10/bin/python",
"-m",
"ipykernel",
"-f",
"{connection_file}"
]
}
if not os.path.exists("/home/ma-user/anaconda3/share/jupyter/kernels/python-3.10.10/"):
os.mkdir("/home/ma-user/anaconda3/share/jupyter/kernels/python-3.10.10/")
with open('/home/ma-user/anaconda3/share/jupyter/kernels/python-3.10.10/kernel.json', 'w') as f:
json.dump(data, f, indent=4)
conda env list
创建完成后,稍等片刻,或刷新页面,点击右上角kernel选择python-3.10.10
!python -V
Python 3.10.10
!nvidia-smi
!pip install --upgrade pip
!pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 xformers==0.0.22
!pip install gradio MoviePy -i https://pypi.tuna.tsinghua.edu.cn/simple --trusted-host pypi.tuna.tsinghua.edu.cn
!cp /home/ma-user/work/frpc_linux_amd64 /home/ma-user/anaconda3/envs/python-3.10.10/lib/python3.10/site-packages/gradio/frpc_linux_amd64_v0.2
!chmod +x /home/ma-user/anaconda3/envs/python-3.10.10/lib/python3.10/site-packages/gradio/frpc_linux_amd64_v0.2
%cd Open-Sora
/home/ma-user/work/ma_share/open-sora/Open-Sora
/home/ma-user/anaconda3/envs/python-3.10.10/lib/python3.10/site-packages/IPython/core/magics/osm.py:417: UserWarning: This is now an optional IPython functionality, setting dhist requires you to install the `pickleshare` library. self.shell.db['dhist'] = compress_dhist(dhist)[-100:]
!pip install colossalai==0.3.6 accelerate==0.29.2 diffusers==0.27.2 ftfy==6.2.0 gdown==5.1.0 mmengine==0.10.3 pre-commit==3.7.0 pyav==12.0.5 tensorboard==2.16.2 timm==0.9.16 transformers==4.39.3 wandb==0.16.6 numpy==1.26.4
!pip install .
%%writefile configs/opensora/inference/16x256x256_test.py
num_frames = 16
fps = 24 // 3
image_size = (256, 256)
# Define model
model = dict(
type="STDiT-XL/2",
space_scale=0.5,
time_scale=1.0,
enable_flashattn=False,
enable_layernorm_kernel=False,
from_pretrained="./opensora/models/pretrained-model/OpenSora-v1-HQ-16x256x256.pth",
)
vae = dict(
type="VideoAutoencoderKL",
from_pretrained="./opensora/models/sd-vae-ft-ema",
micro_batch_size=4,
)
text_encoder = dict(
type="t5",
from_pretrained="./opensora/models/text_encoder/t5-v1_1-xxl",
model_max_length=120,
)
scheduler = dict(
type="iddpm",
num_sampling_steps=100,
cfg_scale=7.0,
cfg_channel=3, # or None
)
dtype = "fp16"
# Others
batch_size = 1
seed = 42
prompt_path = "./assets/texts/t2v_samples.txt"
save_dir = "./outputs/samples/"
Writing configs/opensora/inference/16x256x256_test.py
!cp /home/ma-user/work/t5.py /home/ma-user/anaconda3/envs/python-3.10.10/lib/python3.10/site-packages/opensora/models/text_encoder/t5.py
!torchrun --standalone --nproc_per_node 1 scripts/inference.py configs/opensora/inference/16x256x256_test.py
生成的视频保存在Open-Sora/outputs
文件夹中,随机查看:
import os
import random
from moviepy.editor import *
from IPython.display import Image
# 视频存放目录
video_root = 'outputs/samples'
# 列出所有文件
videos = os.listdir(video_root)
# 随机抽取视频
video = random.sample(videos, 1)[0]
# 视频输入路径
video_path = os.path.join(video_root, video)
# 加载原始视频
clip = VideoFileClip(video_path)
# 保存为GIF文件
clip.write_gif("output_animation.gif", fps=10)
# 显示生成结果
Image(open('output_animation.gif','rb').read())
MoviePy - Building file output_animation.gif with imageio.
%%writefile scripts/inference-gradio.py
import os
import gradio as gr
import torch
import colossalai
import torch.distributed as dist
from mmengine.runner import set_random_seed
from opensora.datasets import save_sample
from opensora.registry import MODELS, SCHEDULERS, build_module
from opensora.utils.config_utils import parse_configs
from opensora.utils.misc import to_torch_dtype
from opensora.acceleration.parallel_states import set_sequence_parallel_group
from colossalai.cluster import DistCoordinator
def main():
# ======================================================
# 1. cfg and init distributed env
# ======================================================
cfg = parse_configs(training=False)
print(cfg)
# init distributed
colossalai.launch_from_torch({})
coordinator = DistCoordinator()
if coordinator.world_size > 1:
set_sequence_parallel_group(dist.group.WORLD)
enable_sequence_parallelism = True
else:
enable_sequence_parallelism = False
# ======================================================
# 2. runtime variables
# ======================================================
torch.set_grad_enabled(False)
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = to_torch_dtype(cfg.dtype)
set_random_seed(seed=cfg.seed)
# ======================================================
# 3. build model & load weights
# ======================================================
# 3.1. build model
input_size = (cfg.num_frames, *cfg.image_size)
vae = build_module(cfg.vae, MODELS)
latent_size = vae.get_latent_size(input_size)
text_encoder = build_module(cfg.text_encoder, MODELS, device=device) # T5 must be fp32
model = build_module(
cfg.model,
MODELS,
input_size=latent_size,
in_channels=vae.out_channels,
caption_channels=text_encoder.output_dim,
model_max_length=text_encoder.model_max_length,
dtype=dtype,
enable_sequence_parallelism=enable_sequence_parallelism,
)
text_encoder.y_embedder = model.y_embedder # hack for classifier-free guidance
# 3.2. move to device & eval
vae = vae.to(device, dtype).eval()
model = model.to(device, dtype).eval()
# 3.3. build scheduler
scheduler = build_module(cfg.scheduler, SCHEDULERS)
# 3.4. support for multi-resolution
model_args = dict()
if cfg.multi_resolution:
image_size = cfg.image_size
hw = torch.tensor([image_size], device=device, dtype=dtype).repeat(cfg.batch_size, 1)
ar = torch.tensor([[image_size[0] / image_size[1]]], device=device, dtype=dtype).repeat(cfg.batch_size, 1)
model_args["data_info"] = dict(ar=ar, hw=hw)
# ======================================================
# 4. inference
# ======================================================
# 4.1.inference code
@torch.no_grad()
def run_inference(prompt_text):
save_dir = cfg.save_dir
torch.cuda.empty_cache()
print("Prompt:", prompt_text)
os.makedirs(save_dir, exist_ok=True)
samples = scheduler.sample(
model,
text_encoder,
z_size=(vae.out_channels, *latent_size),
prompts=[prompt_text],
device=device,
additional_args=model_args,
)
samples = vae.decode(samples.to(dtype))
save_path = os.path.join(save_dir, "sample")
saved_path = save_sample(samples[0], fps=cfg.fps, save_path=save_path)
return saved_path
# 4.2. clear input
def reset_user_input():
return gr.update(value='')
# 4.3. gradio app
with gr.Blocks() as demo:
gr.HTML("""<h1 align="center">Open-Sora 1.0 文生视频</h1>""")
with gr.Row():
with gr.Column():
prompt_text = gr.Textbox(label="Prompt",placeholder="Describe your video here", lines=4)
submit_button = gr.Button("Generate video")
with gr.Column():
output_video = gr.Video(width=512, height=512)
submit_button.click(run_inference, [prompt_text], [output_video], show_progress=True)
submit_button.click(reset_user_input, [], [prompt_text])
gr.Examples(
examples=[
["A vibrant underwater scene. A group of blue fish, with yellow fins, are swimming around a coral reef. The coral reef is a mix of brown and green, providing a natural habitat for the fish. The water is a deep blue, indicating a depth of around 30 feet. The fish are swimming in a circular pattern around the coral reef, indicating a sense of motion and activity. The overall scene is a beautiful representation of marine life."],
["A serene night scene in a forested area. The first frame shows a tranquil lake reflecting the star-filled sky above. The second frame reveals a beautiful sunset, casting a warm glow over the landscape. The third frame showcases the night sky, filled with stars and a vibrant Milky Way galaxy. The video is a time-lapse, capturing the transition from day to night, with the lake and forest serving as a constant backdrop. The style of the video is naturalistic, emphasizing the beauty of the night sky and the peacefulness of the forest."],
["A serene underwater scene featuring a sea turtle swimming through a coral reef. The turtle, with its greenish-brown shell, is the main focus of the video, swimming gracefully towards the right side of the frame. The coral reef, teeming with life, is visible in the background, providing a vibrant and colorful backdrop to the turtle's journey. Several small fish, darting around the turtle, add a sense of movement and dynamism to the scene. The video is shot from a slightly elevated angle, providing a comprehensive view of the turtle's surroundings. The overall style of the video is calm and peaceful, capturing the beauty and tranquility of the underwater world."]
],
inputs=[prompt_text]
)
demo.queue().launch(share=True, inbrowser=True)
if __name__ == "__main__":
main()
Writing scripts/inference-gradio.py
!torchrun --standalone --nproc_per_node 1 scripts/inference-gradio.py configs/opensora/inference/16x256x256_test.py
运行Gradio界面,运行成功后点击 Running on public URL 后的网页链接即可体验!
版本号 | 版本ID | 发布时间 | 发布状态 | 版本说明 |
---|
19.0.0 | fW3mxD | 2024-06-27 14:46 | 已完成 | -- |
若您怀疑合法知识产权遭受侵犯,可以通过此链接进行投诉与建议。