- 资产集市
- 教学
- 实践
- AI说
- 案例库
- 生态合作
- 专区
中国站
简体中文🔹 本案例需使用 P100 及以上规格运行,请确保运行规格一致,可按照下图切换规格。
🔹 点击Run in ModelArts,将会进入到ModelArts CodeLab中,这时需要你登录华为云账号,如果没有账号,则需要注册一个,且要进行实名认证,参考《ModelArts准备工作_简易版》 即可完成账号注册和实名认证。 登录之后,等待片刻,即可进入到CodeLab的运行环境
🔹 出现 Out Of Memory ,请检查是否为您的参数配置过高导致,修改参数配置,重启kernel或更换更高规格资源进行规避❗❗❗
import os
import moxing as mox
if not os.path.exists('hand_gesture'):
mox.file.copy_parallel('obs://modelbox-course/hand_gesture', 'hand_gesture')
INFO:root:Using MoXing-v2.1.0.5d9c87c8-5d9c87c8 INFO:root:Using OBS-Python-SDK-3.20.9.1
conda clean -i
/home/ma-user/anaconda3/lib/python3.7/site-packages/requests/__init__.py:91: RequestsDependencyWarning: urllib3 (1.26.12) or chardet (3.0.4) doesn't match a supported version! RequestsDependencyWarning) Note: you may need to restart the kernel to use updated packages.
conda install cudatoolkit=11.3.1 cudnn=8.2.1 -y --override-channels --channel https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main
!pip install --upgrade pip
!pip install tensorflow==2.5.0 imageio -i https://repo.myhuaweicloud.com/repository/pypi/simple --trusted-host repo.myhuaweicloud.com
!pip install opencv-python
Looking in indexes: http://repo.myhuaweicloud.com/repository/pypi/simple Requirement already satisfied: opencv-python in /home/ma-user/anaconda3/envs/TensorFlow-2.1/lib/python3.7/site-packages (4.1.2.30) Requirement already satisfied: numpy>=1.14.5 in /home/ma-user/anaconda3/envs/TensorFlow-2.1/lib/python3.7/site-packages (from opencv-python) (1.19.5) DEPRECATION: moxing-framework 2.1.0.5d9c87c8 has a non-standard version number. pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of moxing-framework or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063
运行完成请务必点击左上角Kernel->Restart Kernel重启kernel
import cv2
import glob
import numpy as np
from tqdm import tqdm
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
%matplotlib inline
2024-11-24 13:24:53.816929: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0 /home/ma-user/anaconda3/envs/TensorFlow-2.1/lib/python3.7/site-packages/requests/__init__.py:104: RequestsDependencyWarning: urllib3 (1.26.12) or chardet (5.2.0)/charset_normalizer (2.0.12) doesn't match a supported version! RequestsDependencyWarning)
打印TensorFlow版本并显示可用GPU
print('Tensorflow version: {}'.format(tf.__version__))
print('GPU available: {}'.format(tf.config.list_physical_devices('GPU')))
Tensorflow version: 2.5.0 GPU available: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
创建视频输入管道获取视频类别标签
videos = glob.glob('hand_gesture/*.mp4')
np.random.shuffle(videos)
labels = [int(video.split('_')[-2]) for video in videos]
videos[:5], len(videos), labels[:5], len(videos)
显示数据分布情况
from collections import Counter
counts = Counter(labels)
print(counts)
plt.figure(figsize=(8, 4))
plt.bar(counts.keys(), counts.values())
plt.xlabel('Class label')
plt.ylabel('Number of samples')
plt.title('Class distribution in videos')
plt.show()
Counter({0: 34, 6: 13, 2: 13, 5: 12, 4: 12, 3: 12, 1: 12})
图像中心裁剪
def crop_center_square(img):
h, w = img.shape[:2]
square_w = min(h, w)
start_x = w // 2 - square_w // 2
end_x = start_x + square_w
start_y = h // 2 - square_w // 2
end_y = start_y + square_w
result = img[start_y:end_y, start_x:end_x]
return result
MAX_SEQUENCE_LENGTH = 40
IMG_SIZE = 299
NUM_FEATURES = 1536
视频抽帧预处理
def load_video(file_name):
cap = cv2.VideoCapture(file_name)
# 每隔多少帧抽取一次
frame_interval = 4
frames = []
count = 0
while True:
ret, frame = cap.read()
if not ret:
break
# 每隔frame_interval帧保存一次
if count % frame_interval == 0:
# 中心裁剪
frame = crop_center_square(frame)
# 缩放
frame = cv2.resize(frame, (IMG_SIZE, IMG_SIZE))
# BGR -> RGB [0,1,2] -> [2,1,0]
frame = frame[:, :, [2, 1, 0]]
frames.append(frame)
count += 1
return np.array(frames)
显示视频
import random
import imageio
from IPython.display import Image
label_to_name = {0:'无效手势', 1:'上滑', 2:'下滑', 3:'左滑', 4:'右滑', 5:'打开', 6:'关闭', 7:'放大', 8:'缩小'}
print(label_to_name.get(labels[0]))
frames = load_video(videos[0])
frames = frames[:MAX_SEQUENCE_LENGTH].astype(np.uint8)
imageio.mimsave('test.gif', frames, durations=10)
display(Image(open('test.gif', 'rb').read()))
frames.shape
打开
(33, 299, 299, 3)
创建图像特征提取器
def get_feature_extractor():
feature_extractor = keras.applications.inception_resnet_v2.InceptionResNetV2(
weights = 'imagenet',
include_top = False,
pooling = 'avg',
input_shape = (IMG_SIZE, IMG_SIZE, 3)
)
preprocess_input = keras.applications.inception_resnet_v2.preprocess_input
inputs = keras.Input((IMG_SIZE, IMG_SIZE, 3))
preprocessed = preprocess_input(inputs)
outputs = feature_extractor(preprocessed)
model = keras.Model(inputs, outputs, name = 'feature_extractor')
return model
feature_extractor = get_feature_extractor()
feature_extractor.summary()
提取视频图像特征
def load_data(videos, labels):
video_features = []
for video in tqdm(videos):
frames = load_video(video)
counts = len(frames)
# 如果帧数小于MAX_SEQUENCE_LENGTH
if counts < MAX_SEQUENCE_LENGTH:
# 补白
diff = MAX_SEQUENCE_LENGTH - counts
# 创建全0的numpy数组
padding = np.zeros((diff, IMG_SIZE, IMG_SIZE, 3))
# 数组拼接
frames = np.concatenate((frames, padding))
# 获取前MAX_SEQUENCE_LENGTH帧画面
frames = frames[:MAX_SEQUENCE_LENGTH, :]
# 批量提取特征
video_feature = feature_extractor.predict(frames)
video_features.append(video_feature)
return np.array(video_features), np.array(labels)
video_features, classes = load_data(videos, labels)
video_features.shape, classes.shape
((108, 40, 1536), (108,))
batch_size = 16
dataset = tf.data.Dataset.from_tensor_slices((video_features, classes))
dataset = dataset.shuffle(len(videos))
test_count = int(len(videos) * 0.2)
train_count = len(videos) - test_count
dataset_train = dataset.skip(test_count).cache().repeat()
dataset_test = dataset.take(test_count).cache().repeat()
train_dataset = dataset_train.shuffle(train_count).batch(batch_size)
test_dataset = dataset_test.shuffle(test_count).batch(batch_size)
train_dataset, train_count, test_dataset, test_count
(<BatchDataset shapes: ((None, 40, 1536), (None,)), types: (tf.float32, tf.int64)>, 87, <BatchDataset shapes: ((None, 40, 1536), (None,)), types: (tf.float32, tf.int64)>, 21)
# 位置编码
class PositionalEmbedding(layers.Layer):
def __init__(self, seq_length, output_dim):
super().__init__()
# 构造从0~MAX_SEQUENCE_LENGTH的列表
self.positions = tf.range(0, limit=MAX_SEQUENCE_LENGTH)
self.positional_embedding = layers.Embedding(input_dim=seq_length, output_dim=output_dim)
def call(self,x):
# 位置编码
positions_embedding = self.positional_embedding(self.positions)
# 输入相加
return x + positions_embedding
# 编码器
class TransformerEncoder(layers.Layer):
def __init__(self, num_heads, embed_dim):
super().__init__()
self.p_embedding = PositionalEmbedding(MAX_SEQUENCE_LENGTH, NUM_FEATURES)
self.attention = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim, dropout=0.1)
self.layernorm = layers.LayerNormalization()
def call(self,x):
# positional embedding
positional_embedding = self.p_embedding(x)
# self attention
attention_out = self.attention(
query = positional_embedding,
value = positional_embedding,
key = positional_embedding,
attention_mask = None
)
# layer norm with residual connection
output = self.layernorm(positional_embedding + attention_out)
return output
def video_cls_model(class_vocab):
# 类别数量
classes_num = len(class_vocab)
# 定义模型
model = keras.Sequential([
layers.InputLayer(input_shape=(MAX_SEQUENCE_LENGTH, NUM_FEATURES)),
TransformerEncoder(2, NUM_FEATURES),
layers.GlobalMaxPooling1D(),
layers.Dropout(0.1),
layers.Dense(classes_num, activation="softmax")
])
# 编译模型
model.compile(optimizer = keras.optimizers.Adam(1e-5),
loss = keras.losses.SparseCategoricalCrossentropy(from_logits=False),
metrics = ['accuracy']
)
return model
# 模型实例化
model = video_cls_model(np.unique(labels))
# 打印模型结构
model.summary()
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
# 保存检查点
checkpoint = ModelCheckpoint(filepath='best.h5', monitor='val_loss', save_weights_only=True, save_best_only=True, verbose=1, mode='min')
# 提前终止
earlyStopping = EarlyStopping(monitor='loss', patience=50, mode='min', baseline=None)
# 减少learning rate
rlp = ReduceLROnPlateau(monitor='loss', factor=0.7, patience=30, min_lr=1e-15, mode='min', verbose=1)
history = model.fit(train_dataset,
epochs = 1000,
steps_per_epoch = train_count // batch_size,
validation_steps = test_count // batch_size,
validation_data = test_dataset,
callbacks = [checkpoint, earlyStopping, rlp])
plt.plot(history.epoch, history.history['loss'], 'r', label='loss')
plt.plot(history.epoch, history.history['val_loss'], 'g--', label='val_loss')
plt.title('VIT Model')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
<matplotlib.legend.Legend at 0x7fccd57fe610>
plt.plot(history.epoch, history.history['accuracy'], 'r', label='acc')
plt.plot(history.epoch, history.history['val_accuracy'], 'g--', label='val_acc')
plt.title('VIT Model')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
<matplotlib.legend.Legend at 0x7fccd5583190>
加载训练最优权重
model.load_weights('best.h5')
模型评估
model.evaluate(dataset.batch(batch_size))
7/7 [==============================] - 0s 8ms/step - loss: 0.6422 - accuracy: 0.8519
[0.6422051787376404, 0.8518518805503845]
保存模型
model.save('saved_model')
2024-11-24 13:28:24.755623: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them. WARNING:absl:Found untraced functions such as positional_embedding_layer_call_fn, positional_embedding_layer_call_and_return_conditional_losses, multi_head_attention_layer_call_fn, multi_head_attention_layer_call_and_return_conditional_losses, layer_normalization_layer_call_fn while saving (showing 5 of 50). These functions will not be directly callable after loading.
INFO:tensorflow:Assets written to: saved_model/assets
INFO:tensorflow:Assets written to: saved_model/assets
import random
# 加载模型
model = tf.keras.models.load_model('saved_model')
# 类别标签
label_to_name = {0:'无效手势', 1:'上滑', 2:'下滑', 3:'左滑', 4:'右滑', 5:'打开', 6:'关闭', 7:'放大', 8:'缩小'}
# 获取视频特征
def getVideoFeat(frames):
frames_count = len(frames)
# 如果帧数小于MAX_SEQUENCE_LENGTH
if frames_count < MAX_SEQUENCE_LENGTH:
# 补白
diff = MAX_SEQUENCE_LENGTH - frames_count
# 创建全0的numpy数组
padding = np.zeros((diff, IMG_SIZE, IMG_SIZE, 3))
# 数组拼接
frames = np.concatenate((frames, padding))
# 取前MAX_SEQ_LENGTH帧
frames = frames[:MAX_SEQUENCE_LENGTH,:]
# 计算视频特征 N, 1536
video_feat = feature_extractor.predict(frames)
return video_feat
# 视频预测
def testVideo():
test_file = random.sample(videos, 1)[0]
label = test_file.split('_')[-2]
print('文件名:{}'.format(test_file) )
print('真实类别:{}'.format(label_to_name.get(int(label))) )
# 读取视频每一帧
frames = load_video(test_file)
# 挑选前帧MAX_SEQUENCE_LENGTH显示
frames = frames[:MAX_SEQUENCE_LENGTH].astype(np.uint8)
# 保存为GIF
imageio.mimsave('animation.gif', frames, duration=10)
# 获取特征
feat = getVideoFeat(frames)
# 模型推理
prob = model.predict(tf.expand_dims(feat, axis=0))[0]
print('预测类别:')
for i in np.argsort(prob)[::-1][:5]:
print('{}: {}%'.format(label_to_name[i], round(prob[i]*100, 2)))
return display(Image(open('animation.gif', 'rb').read()))
视频推理
for i in range(20):
testVideo()
版本号 | 版本ID | 发布时间 | 发布状态 | 版本说明 |
---|
13.0.0 | GoCkqv | 2024-11-24 05:29 | 已完成 | -- |
动态手势识别样例数据
本数据集为动态手势识别视频数据,包含 108 段上滑、下滑、左滑、右滑等动作的短视频。
LLM初学者
15个月以前
全栈AI黑客松决赛样例数据
--
LLM初学者
21个月以前
若您怀疑合法知识产权遭受侵犯,可以通过此链接进行投诉与建议。