kernel_标签_开发者

博客(92)
视频(0)
论坛(168)
云声(0)
代码示例(0)

[活动体验] cpu\gather_d_grad_cpu_kernel.cc代码标注

# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\gather_d_grad_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的包 #include "backend/kernel_compiler/cpu/gather_d_grad_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明空间 namespace kernel {//空间嵌套 namespace {//空间嵌套 size_t get_element_num(const std::vector &shape) {//获取组件的形状 size_t size = 1;//初始化变量 for (size_t i = 0; i shape.size(); i++) {//循环遍历数组计算形状大小 size *= shape[i]; } return size; } //复制任务 template void GatherDGradCopyTask(size_t cur, std::vector *pos, T *input, I *index, const int &dim, T *output, const std::vector &output_shape, const std::vector &out_cargo_size, const std::vector &input_cargo_size) { for (size_t i = 0; i output_shape[cur]; ++i) { (*pos)[cur] = i; if (cur == output_shape.size() - 1) { size_t input_offset = 0; size_t out_offset = 0; // 输出偏差 for (size_t j = 0; j output_shape.size(); ++j) { out_offset += (*pos)[j] * out_cargo_size[j]; } // 输入偏差 size_t cur_index = (*pos)[dim]; (*pos)[dim] = index[out_offset]; for (size_t j = 0; j output_shape.size(); ++j) { input_offset += (*pos)[j] * input_cargo_size[j]; } // 进行复制 input[input_offset] += output[out_offset]; (*pos)[dim] = cur_index; } else { // 复制任务 GatherDGradCopyTask(cur + 1, pos, input, index, dim, output, output_shape, out_cargo_size, input_cargo_size); } } } } // namespace //判断输入大小是否有效 template void GatherDGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { index_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1); if (input_shape_ != index_shape_) { MS_LOG(EXCEPTION) "Invalid shape size, input and index shape should be equal";//错误类型：无效的形状大小，输入和索引形状应该相等 } axis_ = AnfAlgo::GetNodeAttr(kernel_node, DIM);//获取行数 output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);//获取输出值形状 } template bool GatherDGradCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { size_t input_size = get_element_num(input_shape_) * sizeof(T);//获取输入值形状 size_t index_size = get_element_num(index_shape_) * sizeof(I);//获取索引形状 size_t output_size = get_element_num(output_shape_) * sizeof(T);//获取输出值形状 if (inputs[0]->size != index_size || inputs[1]->size != input_size || outputs[0]->size != output_size) { MS_LOG(EXCEPTION) "invalid input or output data size!";//无效的输入和输出大小 return false; } auto index = reinterpret_cast(inputs[0]->addr);//获取索引地址 auto input = reinterpret_cast(inputs[1]->addr);//获取输入值地址 auto out = reinterpret_cast(outputs[0]->addr);//获取输出值地址 int output_rank = SizeToInt(output_shape_.size()); if (axis_ >= output_rank || axis_ -output_rank) { MS_LOG(EXCEPTION) "The value of 'axis_' should be in [" -output_rank ", " output_rank "], but got: " axis_; return false; } if (axis_ 0) { axis_ = axis_ + SizeToInt(output_shape_.size()); } //检查索引值 index_size = get_element_num(index_shape_); int max_index = SizeToInt(output_shape_[axis_]); for (size_t i = 0; i index_size; ++i) { if (index[i] >= max_index || index[i] -max_index) { MS_LOG(EXCEPTION) "The value of index should be in [" -max_index ", " max_index "], but got: " index[i]; return false; } if (index[i] 0) { index[i] = max_index + index[i]; } } auto out_size = get_element_num(output_shape_); memset_s(out, out_size * sizeof(T), 0x00, out_size * sizeof(T)); // 输出形状大小 std::vector out_cargo_size = std::vector(output_shape_.size(), 1); for (int i = out_cargo_size.size() - 2; i >= 0; --i) { out_cargo_size[i] = output_shape_[i + 1] * out_cargo_size[i + 1]; } // 输出形状大小 std::vector input_cargo_size = std::vector(input_shape_.size(), 1); for (int i = input_cargo_size.size() - 2; i >= 0; --i) { input_cargo_size[i] = input_shape_[i + 1] * input_cargo_size[i + 1]; } // f std::vector pos(index_shape_.size(), 0); GatherDGradCopyTask(0, &pos, out, index, axis_, input, index_shape_, input_cargo_size, out_cargo_size); return true; } } // namespace kernel } // namespace mindspore ```

一鲸落 发表于2021-11-01 18:43:03 2021-11-01 18:43:03 最后回复一鲸落 2021-11-01 18:43:03
218 0

kernel
[活动体验] cpu\gather_d_cpu_kernel.cc代码标注

# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\gather_d_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "backend/kernel_compiler/cpu/gather_d_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 namespace {//空间嵌套 size_t get_element_num(const std::vector &shape) {//获取元素个数 size_t size = 1;//初始化变量 for (size_t i = 0; i shape.size(); i++) {//循环遍历数组计算元素个数 size *= shape[i]; } return size;//返回大小 } template void CopyTask(size_t cur, std::vector *pos, T *input, const I *index, const int &dim, T *output, const std::vector &output_shape, const std::vector &out_cargo_size, const std::vector &input_cargo_size, bool reverse) { for (size_t i = 0; i output_shape[cur]; ++i) { (*pos)[cur] = i; if (cur == output_shape.size() - 1) { size_t input_offset = 0; size_t out_offset = 0; // 输出偏差 for (size_t j = 0; j output_shape.size(); ++j) { out_offset += (*pos)[j] * out_cargo_size[j]; } // 输入偏差 size_t cur_index = (*pos)[dim]; (*pos)[dim] = index[out_offset]; for (size_t j = 0; j output_shape.size(); ++j) { input_offset += (*pos)[j] * input_cargo_size[j]; } // 复制 if (reverse) { input[input_offset] = output[out_offset]; } else { output[out_offset] = input[input_offset]; } (*pos)[dim] = cur_index; } else { // 复制任务 CopyTask(cur + 1, pos, input, index, dim, output, output_shape, out_cargo_size, input_cargo_size, reverse); } } } } // namespace template void GatherDCPUKernel::InitKernel(const CNodePtr &kernel_node) { input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); index_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 2); if (input_shape_.size() != index_shape_.size()) {//判断输入大小是否为标准大小 MS_LOG(EXCEPTION) "Invalid shape size, shape size of input: " input_shape_.size() ", and index: " index_shape_.size() " should be equal"; } output_shape_ = index_shape_;//输出形状等于输入形状 } template bool GatherDCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { size_t input_size = get_element_num(input_shape_) * sizeof(T);//获取输入形状大小 size_t index_size = get_element_num(index_shape_) * sizeof(I);//获取索引大小 size_t dim_size = sizeof(int);//计算int类型所占字节数 size_t output_size = get_element_num(output_shape_) * sizeof(T);//获取输出大小 if (inputs[0]->size != input_size || inputs[1]->size != dim_size || inputs[2]->size != index_size || outputs[0]->size != output_size) { MS_LOG(EXCEPTION) "invalid input or output data size!";//错误类型：失效输入或输出数据类型 return false; } auto input = reinterpret_cast(inputs[0]->addr);//获取输入地址 auto dim = reinterpret_cast(inputs[1]->addr);//获取存储int字节地址 auto index = reinterpret_cast(inputs[2]->addr);//获取索引地址 auto output = reinterpret_cast(outputs[0]->addr);//获取输出值地址 int32_t input_rank = SizeToInt(input_shape_.size());//获取输入形状的秩 if (dim[0] >= input_rank || dim[0] -input_rank) { MS_LOG(EXCEPTION) "The value of 'dim' should be in [" -input_rank ", " input_rank "], but got: " dim[0]; return false; } if (dim[0] 0) { dim[0] = static_cast(dim[0] + input_rank); } // 检查索引 int max_index = SizeToInt(input_shape_[dim[0]]); index_size = get_element_num(index_shape_); for (size_t i = 0; i index_size; ++i) { if (index[i] >= max_index || index[i] -max_index) { MS_LOG(EXCEPTION) "The value of index should be in [" -max_index ", " max_index "], but got: " index[i]; return false; } if (index[i] 0) { index[i] = max_index + index[i]; } } // 输出形状大小 std::vector out_cargo_size = std::vector(output_shape_.size(), 1); for (int i = out_cargo_size.size() - 2; i >= 0; --i) { out_cargo_size[i] = output_shape_[i + 1] * out_cargo_size[i + 1]; } // 输入的形状 std::vector input_cargo_size = std::vector(input_shape_.size(), 1); for (int i = input_cargo_size.size() - 2; i >= 0; --i) { input_cargo_size[i] = input_shape_[i + 1] * input_cargo_size[i + 1]; } // 复制任务 std::vector pos(index_shape_.size(), 0); int copy_dim = *dim; CopyTask(0, &pos, input, index, copy_dim, output, output_shape_, out_cargo_size, input_cargo_size, false); return true; } } // namespace kernel } // namespace mindspore ```

一鲸落 发表于2021-11-01 18:42:28 2021-11-01 18:42:28 最后回复一鲸落 2021-11-01 18:42:28
225 0

kernel
[活动体验] cpu\gather_cpu_kernel.cc代码标注

# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\gather_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的包 #include "backend/kernel_compiler/cpu/gather_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" #include "nnacl/gather_parameter.h" #include "nnacl/base/gather_base.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 void GatherV2CPUKernel::InitKernel(const CNodePtr &kernel_node) {//初始化内核 CheckParam(kernel_node);//检查时间参数 input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);//获取输入形状 indices_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);//获取指数形状 output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);//获取输出形状 axis_ = AnfAlgo::GetNodeAttr(kernel_node, AXIS);//获取节点地址 if (axis_ 0) {//如果行小于0 axis_ = axis_ + SizeToLong(input_shape_.size());//行的大小等于输入的形状大小 } axis_ += 4 - input_shape_.size(); CPUKernelUtils::ExpandDimsTo4(&input_shape_); CPUKernelUtils::ExpandDimsTo4(&output_shape_); } int GatherV2CPUKernel::GatherLaunch(int8_t *input_data, int8_t *output_data, size_t size) { int in_rank = input_shape_.size();//获取输入形状的秩 int indices_element_size = 1;// const int limit = input_shape_.at(axis_)//统计数据有多少行 size_t data_size = sizeof(kNumberTypeFloat32);//计算数据类型的长度 int outer_size = 1, inner_size = 1;//初始化变量并赋值 for (int i = 0; i axis_; ++i) {//循环遍历数组获取外部大小 outer_size *= input_shape_.at(i); } for (int i = axis_ + 1; i in_rank; ++i) {//循环遍历数组获取内部大小 inner_size *= input_shape_.at(i); } for (size_t i = 0; i indices_shape_.size(); i++) {//循环遍历数组获取元素大小 indices_element_size *= indices_shape_.at(i); } int stride = UP_DIV(outer_size, size);//获取步幅 auto task = [&](size_t start, size_t end) {//获取任务量 for (size_t i = start; i end; i++) { int8_t *int8_in = input_data;//输入数据 int8_t *int8_out = output_data;//输出数据 int count = MSMIN(stride, static_cast(outer_size - stride * i)); if (count = 0) { return; } auto thread_stride = stride * i;//获取线程的步幅 int8_in += thread_stride * limit * inner_size * data_size;//计算输入的数据总大小 int8_out += thread_stride * indices_element_size * inner_size * data_size;//计算输出的数据总大小 auto error_code = Gather(int8_in, count, inner_size, limit, indices_data_, indices_element_size, int8_out, sizeof(float)); if (error_code != 0) { MS_LOG(ERROR) "GatherRun error task_id[" i "] error_code[" error_code "]"; } } }; CPUKernelUtils::ParallelFor(task, size); return 0; } //运行程序 bool GatherV2CPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { int8_t *input_tensor = reinterpret_cast(inputs[0]->addr);//获取输入张量 indices_data_ = reinterpret_cast(inputs[1]->addr);// int8_t *output_addr = reinterpret_cast(outputs[0]->addr);//声明变量c size_t size = (outputs[0]->size > 0) ? static_cast(outputs[0]->size / sizeof(int8_t)) : 1; GatherLaunch(input_tensor, output_addr, size); return true; } //检查参数 void GatherV2CPUKernel::CheckParam(const CNodePtr &kernel_node) { auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); if (input_shape.size() > 4) {//如果输入形状大于4，则记录错误日志：输入形状为...但是 GatherV2CPUKernel只需要支持4d或更低 MS_LOG(EXCEPTION) "Input dims is " input_shape.size() ", but GatherV2CPUKernel olny support 4d or lower."; } size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 2) {//如果输入大小不等于2，记录错误日志 MS_LOG(EXCEPTION) "Argument number is " input_num ", but GatherV2CPUKernel needs 2."; } } } // namespace kernel } // namespace mindspore ```

一鲸落 发表于2021-11-01 18:41:44 2021-11-01 18:41:44 最后回复一鲸落 2021-11-01 18:41:44
247 0

数据结构 kernel
[活动体验] cpu\embedding_look_up_cpu_kernel.cc代码标注

# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\embedding_look_up_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入系统自带的库 #include #include //导入自定义的库 #include "backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" #include "ir/primitive.h" #include "common/thread_pool.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 namespace {//空间嵌套 template void LookUpTableTask(const float *input_addr, const T *indices_addr, float *output_addr, size_t indices_lens, size_t outer_dim_size, T offset, size_t first_dim_size) { auto type_size = sizeof(float);//获取数据类型 size_t lens = outer_dim_size * type_size;//获取长度 for (size_t i = 0; i indices_lens; ++i) {//循环遍历数组 T index = indices_addr[i] - offset;//获取下标 if (index >= 0 && index SizeToInt(first_dim_size)) { size_t pos = index * outer_dim_size;//获取总长度 auto ret = memcpy_s(output_addr, (indices_lens - i) * lens, input_addr + pos, lens); if (ret != EOK) { MS_LOG(EXCEPTION) "LookUpTable task memcpy failed."; } } else { auto ret = memset_s(output_addr, (indices_lens - i) * lens, 0, lens); if (ret != EOK) { MS_LOG(EXCEPTION) "LookUpTable task memset failed."; } } output_addr += outer_dim_size;//输出地址大小加上长度的大小 } } } // namespace void EmbeddingLookUpCPUKernel::InitKernel(const CNodePtr &kernel_node) { CheckParam(kernel_node); node_wpt_ = kernel_node; std::vector input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); if (input_shape.empty()) { MS_LOG(EXCEPTION) "param must be at least 1D";//参数必须至少为1D } first_dim_size_ = input_shape[0];//开始时大小 outer_dim_size_ = 1;//初始化变量 for (size_t i = 1; i input_shape.size(); ++i) {//循环遍历数组，计算外部模糊大小 outer_dim_size_ *= input_shape[i]; } indices_lens_ = 1;//初始化变量 std::vector indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); for (const auto &shape : indices_shape) {//增强for循环 indices_lens_ *= shape;//计算总体大小 } indices_data_type_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 1);//获取区间数据类型 if (AnfAlgo::HasNodeAttr(kAttrOffset, kernel_node)) { offset_ = AnfAlgo::GetNodeAttr(kernel_node, kAttrOffset); } } //检查内核 template void EmbeddingLookUpCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &outputs) { if (!node_wpt_.expired()) { auto node_ = node_wpt_.lock(); if (!node_) { MS_LOG(EXCEPTION) "node_wpt_ is expired.";//内核失效 } std::vector input_shape = AnfAlgo::GetPrevNodeOutputInferShape(node_, 0); if (input_shape.empty()) { MS_LOG(EXCEPTION) "param must be at least 1D";//参数必须至少为1D } first_dim_size_ = input_shape[0]; outer_dim_size_ = 1; for (size_t i = 1; i input_shape.size(); ++i) { outer_dim_size_ *= input_shape[i]; } indices_lens_ = 1;//初始化变量 std::vector indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(node_, 1); for (const auto &shape : indices_shape) {//增强for循环，计算长度 indices_lens_ *= shape; } } auto input_addr = reinterpret_cast(inputs[0]->addr);//获取输入值地址 auto indices_addr = reinterpret_cast(inputs[1]->addr);//获取输出值地址 auto output_addr = reinterpret_cast(outputs[0]->addr); size_t thread_num = indices_lens_ / 10000 + 1;//获取线程数大小 auto max_thread_num = common::ThreadPool::GetInstance().GetSyncRunThreadNum();//获取最大线程数 thread_num = thread_num > max_thread_num ? max_thread_num : thread_num;//线程数等于线程数和最大线程数两个中的最大值 std::vector tasks; size_t task_proc_lens = (indices_lens_ + thread_num - 1) / thread_num; size_t i; size_t task_offset = 0;//初始化变量：任务偏移值 MS_LOG(DEBUG) "indices_lens_: " indices_lens_ " one task proc lens:" task_proc_lens; for (i = 0; i thread_num; i++) {//循环遍历数组 if (task_offset >= indices_lens_) {//如果任务偏移量大于或等于匹配区间长度，跳出循环 break; } MS_LOG(DEBUG) "task_offset: " task_offset " task_proc_lenss:" task_proc_lens; auto task = [input_addr, indices_addr, output_addr, task_offset, task_proc_lens, this]() { LookUpTableTask(input_addr, indices_addr + task_offset, output_addr + task_offset * outer_dim_size_, task_proc_lens, outer_dim_size_, offset_, first_dim_size_); return common::SUCCESS; }; tasks.emplace_back(task); task_offset += task_proc_lens; if (task_offset + task_proc_lens > indices_lens_) { task_proc_lens = indices_lens_ - task_offset; } } common::ThreadPool::GetInstance().SyncRun(tasks); } //检查内核 bool EmbeddingLookUpCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { //判断数据类型，根据不同数据类型执行不同操作 if (indices_data_type_ == kNumberTypeInt32) { LaunchKernel(inputs, outputs); } else { LaunchKernel(inputs, outputs); } return true; } void EmbeddingLookUpCPUKernel::CheckParam(const CNodePtr &kernel_node) { auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); if (input_shape.size() > 4) {//如果输入形状大小大于4 MS_LOG(EXCEPTION) "Input dims is " input_shape.size() ", but EmbeddingLookUpCPUKernel only support 4d or lower."; } size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 2) {//如果输入形状大小不等于2， MS_LOG(EXCEPTION) "Argument number is " input_num ", but EmbeddingLookUpCPUKernel needs 2."; } } } // namespace kernel } // namespace mindspore ```

一鲸落 发表于2021-11-01 18:41:11 2021-11-01 18:41:11 最后回复一鲸落 2021-11-01 18:41:11
329 0

kernel 深度学习
[活动体验] cpu\embedding_look_up_comm_grad_cpu_kernel.cc代码标注

# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\embedding_look_up_comm_grad_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入系统自带的库 #include //导入自定义的库 #include "backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" #include "runtime/device/cpu/mpi/mpi_interface.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 void EmbeddingLookUpCommGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { CheckParam(kernel_node); split_num_ = AnfAlgo::GetNodeAttr(kernel_node, "split_num");//拆分数 MS_LOG(INFO) "split_num: " split_num_;//记录拆分数信息 auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);//获取输入值形状 if (input_shape[0] % split_num_ != 0) {//如果拆分的形状不是一致大小的，记录错误日志，输入值形状是...,但是它必须是拆分数的倍数 MS_LOG(EXCEPTION) "Input shape[0] is " input_shape[0] ", but it must be multiple of split_num."; } } //验证 bool EmbeddingLookUpCommGradCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { #if defined(_WIN32) || defined(_WIN64) auto start_time = std::chrono::steady_clock::now();//起别名 #else struct timeval start_time, end_time;//声明两个变量：开始时间和结束时间 (void)gettimeofday(&start_time, nullptr); #endif auto input_addr = reinterpret_cast(inputs[0]->addr); auto output_addr = reinterpret_cast(outputs[0]->addr); size_t input_size = inputs[0]->size; size_t output_size = outputs[0]->size; MS_LOG(DEBUG) "input addr: " input_addr "input size: " input_size;//获取输入值地址 MS_LOG(DEBUG) "output addr: " output_addr "output size: " output_size;//获取输出值地址 memset_s(output_addr, output_size, 0, output_size); const std::vector &rank_group = {0, 1, 2, 3, 4, 5, 6, 7};//组别序号 size_t input_split_lens = input_size / LongToSize(split_num_) / sizeof(float_t);//获取输入值拆分长度 size_t output_split_lens = output_size / LongToSize(split_num_) / sizeof(float_t);//获取输出值拆分长度 for (int64_t i = 0; i split_num_; i++) { MPIAllGather(input_addr + i * input_split_lens, output_addr + i * output_split_lens, rank_group, input_split_lens); } #if defined(_WIN32) || defined(_WIN64) auto end_time = std::chrono::steady_clock::now();//获取截至时现在时间 std::chrono::duration1, 1000000>> cost = end_time - start_time;//计算运行时间 MS_LOG(INFO) "EmbeddingLookUpCommGradCPUKernel, used time: " cost.count() " us";//j #else (void)gettimeofday(&end_time, nullptr); uint64_t time = 1000000 * static_cast(end_time.tv_sec - start_time.tv_sec); time += static_cast(end_time.tv_usec - start_time.tv_usec); MS_LOG(INFO) "EmbeddingLookUpCommGradCPUKernel, used time: " time " us"; #endif return true; } void EmbeddingLookUpCommGradCPUKernel::CheckParam(const CNodePtr &kernel_node) { size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 1) { MS_LOG(EXCEPTION) "Argument number is " input_num ", but EmbeddingLookUpCommGradCPUKernel needs 1."; } } } // namespace kernel } // namespace mindspore ```

一鲸落 发表于2021-11-01 18:40:35 2021-11-01 18:40:35 最后回复一鲸落 2021-11-01 18:40:35
522 0

kernel 深度学习
[活动体验] cpu\eltwise_grad_cpu_kernel.cc代码标注2

# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\eltwise_grad_cpu_kernel.cc代码标注2 ```c++ //反正切函数梯度模板 template void EltWiseGradCPUKernel::AtanGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { for (size_t i = start; i end; i++) {//循环遍历数组 T dividend = input2[i];//声明变量被除数 T divisor = 1 + input1[i] * input1[i];//声明变量除数 if (divisor == 0) { if (dividend == 0) {//除数与被除数都为0 out[i] = std::numeric_limits::quiet_NaN(); continue; } if (std::numeric_limits::has_infinity) { out[i] = dividend > 0 ? std::numeric_limits::infinity() : -std::numeric_limits::infinity(); } else { out[i] = dividend > 0 ? std::numeric_limits::max() : std::numeric_limits::min(); } continue; } out[i] = dividend / divisor;//输出值 } } //反正弦函数梯度模板 template void EltWiseGradCPUKernel::AsinhGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { for (size_t i = start; i end; i++) {//循环遍历数组 T dividend = input2[i]; T divisor = sqrt(1 + input1[i] * input1[i]); if (divisor == 0) { if (dividend == 0) {//除数与被除数都为0 out[i] = std::numeric_limits::quiet_NaN(); continue; } if (std::numeric_limits::has_infinity) { out[i] = dividend > 0 ? std::numeric_limits::infinity() : -std::numeric_limits::infinity(); } else { out[i] = dividend > 0 ? std::numeric_limits::max() : std::numeric_limits::min(); } continue; } out[i] = dividend / divisor;//输出值 } } //反余弦函数梯度模板 template void EltWiseGradCPUKernel::AcoshGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { for (size_t i = start; i end; i++) {//循环遍历数组 T dividend = input2[i]; T divisor = sqrt(input1[i] * input1[i] - 1); if (divisor == 0) { if (dividend == 0) {//除数与被除数都为0 out[i] = std::numeric_limits::quiet_NaN();//输出值补填NaN continue; } if (std::numeric_limits::has_infinity) { out[i] = dividend > 0 ? std::numeric_limits::infinity() : -std::numeric_limits::infinity(); } else { out[i] = dividend > 0 ? std::numeric_limits::max() : std::numeric_limits::min(); } continue; } out[i] = dividend / divisor;//输出值等于s } } //初始化内核 template void EltWiseGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node);//判断内核是否为空 kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);//获取内核名称 } template bool EltWiseGradCPUKernel::Launch(const std::vector &inputs, const std::vector &, const std::vector &outputs) { static const std::map> elt_map{{"ReluGrad", &EltWiseGradCPUKernel::ReluGrad}, {"ReLU6Grad", &EltWiseGradCPUKernel::ReLU6Grad}, {"SigmoidGrad", &EltWiseGradCPUKernel::SigmoidGrad}, {"AbsGrad", &EltWiseGradCPUKernel::AbsGrad}, {"TanhGrad", &EltWiseGradCPUKernel::TanhGrad}, {"SqrtGrad", &EltWiseGradCPUKernel::SqrtGrad}, {"GeLUGrad", &EltWiseGradCPUKernel::GeluGrad}, {"AsinGrad", &EltWiseGradCPUKernel::AsinGrad}, {"ACosGrad", &EltWiseGradCPUKernel::ACosGrad}, {"AtanGrad", &EltWiseGradCPUKernel::AtanGrad}, {"AsinhGrad", &EltWiseGradCPUKernel::AsinhGrad}, {"AcoshGrad", &EltWiseGradCPUKernel::AcoshGrad}}; const auto *input1 = reinterpret_cast(inputs[0]->addr);//获取第一个输入值地址 const auto *input2 = reinterpret_cast(inputs[1]->addr);//获取第二个输入值地址 auto *output = reinterpret_cast(outputs[0]->addr); size_t count = outputs[0]->size > 0 ? static_cast(outputs[0]->size / sizeof(T)) : 1; CPUKernelUtils::ParallelFor( std::bind(elt_map.at(kernel_name_), this, input1, input2, output, std::placeholders::_1, std::placeholders::_2), count); return true;//返回值为真 } } // namespace kernel } // namespace mindspore ```

一鲸落 发表于2021-11-01 18:40:05 2021-11-01 18:40:05 最后回复一鲸落 2021-11-01 18:40:05
431 0

kernel 机器学习
[活动体验] cpu\eltwise_grad_cpu_kernel.cc代码标注1

# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\eltwise_grad_cpu_kernel.cc代码标注1 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入系统自带的库 #include #include //导入自定义的库 #include "backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h" #include "common/thread_pool.h" #include "runtime/device/cpu/cpu_device_address.h" #include "nnacl/fp32_grad/activation_grad.h" #include "nnacl/fp32_grad/arithmetic_grad.h" #include "nnacl/errorcode.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 template void EltWiseGradCPUKernel::ReluGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { if constexpr (std::is_same_v) { int ret = ::ReluGrad(input1 + start, input2 + start, end - start, out + start);// if (ret == NNACL_ERR) { MS_LOG(EXCEPTION) "ReLUGrad failed.";//ReLU梯度失败 } } else { MS_LOG(EXCEPTION) "ReLUGrad only support float";// ReLU梯度只支持floatl类型 } } template void EltWiseGradCPUKernel::ReLU6Grad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { if constexpr (std::is_same_v) { int ret = ::Relu6Grad(input1 + start, input2 + start, end - start, out + start); if (ret == NNACL_ERR) { MS_LOG(EXCEPTION) "ReLU6Grad failed.";//ReLU6梯度失败 } } else { MS_LOG(EXCEPTION) "ReLU6Grad only support float";//ReLU6梯度只支持float类型 } } template void EltWiseGradCPUKernel::AbsGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { if constexpr (std::is_same_v) { int ret = ::ElementAbsGrad(input1 + start, input2 + start, out + start, end - start); if (ret == NNACL_ERR) { MS_LOG(EXCEPTION) "AbsGrad failed.";//Abs梯度失败 } } else { MS_LOG(EXCEPTION) "AbsGrad only support float";//Abs梯度只支持float类型 } } template void EltWiseGradCPUKernel::SigmoidGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { if constexpr (std::is_same_v) { int ret = ::SigmoidGrad(input2 + start, input1 + start, end - start, out + start); if (ret == NNACL_ERR) { MS_LOG(EXCEPTION) "SigmoidGrad failed.";//S型函数梯度失败 } } else { MS_LOG(EXCEPTION) "SigmoidGrad only support float";//S型函数梯度只支持float类型 } } template void EltWiseGradCPUKernel::SqrtGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { for (size_t i = start; i end; i++) { out[i] = input2[i] / (input1[i] * 2); } } template void EltWiseGradCPUKernel::TanhGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { if constexpr (std::is_same_v) { int ret = ::TanhGrad(input2 + start, input1 + start, end - start, out + start); if (ret == NNACL_ERR) { MS_LOG(EXCEPTION) "TanhGrad failed.";//双曲正切梯度失败 } } else { MS_LOG(EXCEPTION) "TanhGrad only support float";//双曲正切只支持float类型 } } template void EltWiseGradCPUKernel::GeluGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { for (size_t i = start; i end; i++) { T x = input2[i];//声明变量 auto double_x = static_cast(x);//起别名，转换格式 T tanh_res = (T)std::tanh(0.7978845608 * (double_x + 0.044715 * double_x * double_x * double_x)); T mul_right = (T)(0.7978845608 + 0.1070322244 * double_x * double_x); T y_res = (((T)1.0 + tanh_res) + x * ((T)1.0 - tanh_res * tanh_res) * mul_right) / (T)2.0; out[i] = input1[i] * y_res;//输出值 } } //除数模板 template void EltWiseGradCPUKernel::AsinGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { for (size_t i = start; i end; i++) {//循环遍历数组 T dividend = input2[i];//声明被除数 T divisor = sqrt(1 - input1[i] * input1[i]); if (divisor == 0) {//除数为0 if (dividend == 0) {//被除数和除数都为0 out[i] = std::numeric_limits::quiet_NaN(); continue; } if (std::numeric_limits::has_infinity) { out[i] = dividend > 0 ? std::numeric_limits::infinity() : -std::numeric_limits::infinity(); } else { out[i] = dividend > 0 ? std::numeric_limits::max() : std::numeric_limits::min(); } continue; } out[i] = dividend / divisor;//输出值等于被除数除以除数 } } //反余弦模板 template void EltWiseGradCPUKernel::ACosGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { for (size_t i = start; i end; i++) { T dividend = -input2[i];//声明变量被除数 T divisor = sqrt(1 - input1[i] * input1[i]);//声明变量除数 if (divisor == 0) { if (dividend == 0) { out[i] = std::numeric_limits::quiet_NaN(); continue; } if (std::numeric_limits::has_infinity) { out[i] = dividend > 0 ? std::numeric_limits::infinity() : -std::numeric_limits::infinity(); } else { out[i] = dividend > 0 ? std::numeric_limits::max() : std::numeric_limits::min(); } continue; } out[i] = dividend / divisor;//s } } ```

一鲸落 发表于2021-11-01 18:39:32 2021-11-01 18:39:32 最后回复一鲸落 2021-11-01 18:39:32
316 0

kernel 机器学习
[活动体验] cpu\cumsum_cpu_kernel.cc代码标注2

# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\cumsum_cpu_kernel.cc代码标注2 ```c++ //复制模板 template void CumSumCPUKernel::Copy(T *input, T *output, size_t dim0, size_t dim1, size_t dim2, size_t stride, size_t stride2, size_t start, size_t end) { for (size_t i = start; i end; i++) { size_t k1 = i / dim2 % dim0; size_t k2 = i % dim2; size_t offset = k1 * stride + k2; for (size_t j = 0; j dim1; ++j) { size_t read_index = j * stride2 + offset; input[read_index] = output[read_index]; } } } template void CumSumCPUKernel::CumSumKernelReverse(const T *input, T *output, size_t dim0, size_t dim1, size_t dim2, size_t stride, size_t stride2, size_t start, size_t end) { for (size_t i = start; i end; i++) { size_t k1 = i / dim2 % dim0; size_t k2 = i % dim2; size_t offset = k1 * stride + k2; for (int j = SizeToInt(dim1 - 1); j >= 0; --j) {//循环遍历数组读取序列下标 size_t read_index = j * stride2 + offset; if (j == SizeToInt(dim1 - 1)) { output[read_index] = input[read_index]; } else { size_t read_index2 = (j + 1) * stride2 + offset;//读取下标 output[read_index] = output[read_index2] + input[read_index];//h } } } } template void CumSumCPUKernel::CumSumKernel(const T *input, T *output, size_t dim0, size_t dim1, size_t dim2, size_t stride, size_t stride2, size_t start, size_t end) { for (size_t i = start; i end; i++) { size_t k1 = i / dim2 % dim0; size_t k2 = i % dim2; size_t offset = k1 * stride + k2; for (size_t j = 0; j dim1; ++j) {//循环遍历数组读取序列下标 size_t read_index = j * stride2 + offset; if (j == 0) { output[read_index] = input[read_index]; } else { size_t read_index2 = (j - 1) * stride2 + offset; output[read_index] = output[read_index2] + input[read_index]; } } } } //核对数据总数 template void CumSumCPUKernel::LaunchCumSum(const T *input, T *output, T *workspace, size_t start, size_t end) { start = start / dims_[1]; end = end / dims_[1]; if (exclusive_) { if (reverse_) { RightMove(input, output, dims_[0], dims_[1], dims_[2], stride_, stride2_, start, end); Copy(workspace, output, dims_[0], dims_[1], dims_[2], stride_, stride2_, start, end); CumSumKernelReverse(workspace, output, dims_[0], dims_[1], dims_[2], stride_, stride2_, start, end); } else { LeftMove(input, output, dims_[0], dims_[1], dims_[2], stride_, stride2_, start, end); Copy(workspace, output, dims_[0], dims_[1], dims_[2], stride_, stride2_, start, end); CumSumKernel(workspace, output, dims_[0], dims_[1], dims_[2], stride_, stride2_, start, end); } } else { if (reverse_) { CumSumKernelReverse(input, output, dims_[0], dims_[1], dims_[2], stride_, stride2_, start, end); } else { CumSumKernel(input, output, dims_[0], dims_[1], dims_[2], stride_, stride2_, start, end); } } return; } //核对 template void CumSumCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) { auto input = reinterpret_cast(inputs[0]->addr);//获取输入地址 auto ws = reinterpret_cast(workspace[0]->addr);//获取工作空间地址 auto output = reinterpret_cast(outputs[0]->addr);//获取输出值地址 //多线程 size_t lens = inputs[0]->size > 0 ? static_cast(inputs[0]->size / sizeof(T)) : 1; auto max_thread_num = std::thread::hardware_concurrency(); size_t thread_num = lens 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num; MS_LOG(INFO) "Lens=" lens "; use thread_num=" thread_num "; max_thread_num: " max_thread_num; std::vector threads; threads.reserve(thread_num); size_t start = 0;//初始化变量 if (thread_num 1) {//如果线程个数小于1，无效价值 MS_LOG(ERROR) "Invalid value: thread_num " thread_num; return; } size_t once_compute_size = (lens + thread_num - 1) / thread_num; if (once_compute_size 1) { MS_LOG(ERROR) "Invalid value: once_compute_size " once_compute_size; return; } while (start lens) { size_t end = (start + once_compute_size) > lens ? lens : (start + once_compute_size); threads.emplace_back(std::thread(&CumSumCPUKernel::LaunchCumSum, this, input, output, ws, start, end)); start += once_compute_size; } for (size_t i = 0; i threads.size(); ++i) { threads[i].join(); } return; } //检查参数 void CumSumCPUKernel::CheckParam(const CNodePtr &kernel_node) { size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);//获取张量数值 if (input_num != 1) { MS_LOG(EXCEPTION) "Argument number is " input_num ", but CumSumGpuKernel needs 1."; } } } // namespace kernel } // namespace mindspore ```

一鲸落 发表于2021-11-01 18:38:54 2021-11-01 18:38:54 最后回复一鲸落 2021-11-01 18:38:54
418 0

kernel
[活动体验] cpu\cumsum_cpu_kernel.cc代码标注1

# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\cumsum_cpu_kernel.cc代码标注1 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入系统自带的包 #include //导入自定义的包 #include "backend/kernel_compiler/cpu/cumsum_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 void CumSumCPUKernel::InitKernel(const CNodePtr &kernel_node) { CheckParam(kernel_node); shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);//获取形状 dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);//获取数据类型 axis_ = static_cast(AnfAlgo::GetNodeAttr(kernel_node, "axis"));//获取数据行数 dst_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); exclusive_ = AnfAlgo::GetNodeAttr(kernel_node, "exclusive"); reverse_ = AnfAlgo::GetNodeAttr(kernel_node, "reverse"); int input_dim_length = SizeToInt(shape_.size()); if (axis_ >= input_dim_length) {//如果轴的长度大于输入值得长度，则记录错误日志：轴出界 MS_LOG(EXCEPTION) "Axis out of bounds."; } while (axis_ 0) {//当轴小于0时，轴的大小要加上输入数据的长度 axis_ += input_dim_length; } } template void CumSumCPUKernel::InitWorkspaceSize() { input_size_0_ = sizeof(T);//获取模板大小当作输入数值大小 for (size_t i = 0; i shape_.size(); i++) { input_size_0_ *= shape_[i]; } workspace_size_list_.emplace_back(input_size_0_);//原地创建一个临时对象input_size_0_ } //根据不同的数据类型，进行不同的处理 void CumSumCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { CPUKernel::InitInputOutputSize(kernel_node); if (dtype_ == kNumberTypeFloat32) { InitWorkspaceSize(); } else if (dtype_ == kNumberTypeFloat16) { InitWorkspaceSize(); } else if (dtype_ == kNumberTypeInt32) { InitWorkspaceSize(); } else if (dtype_ == kNumberTypeInt8) { InitWorkspaceSize(); } else if (dtype_ == kNumberTypeUInt8) { InitWorkspaceSize(); } } //验证数据是否正确，是否符合数据类型 bool CumSumCPUKernel::Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) { Reshape(); if (dtype_ == kNumberTypeFloat32) { LaunchKernel(inputs, workspace, outputs); } else if (dtype_ == kNumberTypeFloat16) { LaunchKernel(inputs, workspace, outputs); } else if (dtype_ == kNumberTypeInt32) { LaunchKernel(inputs, workspace, outputs); } else if (dtype_ == kNumberTypeInt8) { LaunchKernel(inputs, workspace, outputs); } else if (dtype_ == kNumberTypeUInt8) { LaunchKernel(inputs, workspace, outputs); } return true; } //重新塑形 void CumSumCPUKernel::Reshape() { dims_[0] = 1;//初始化变量 dims_[1] = shape_[IntToSize(axis_)]; dims_[2] = 1; for (size_t i = 0; i IntToSize(axis_); i++) { dims_[0] *= shape_[i]; } for (size_t i = IntToSize(axis_) + 1; i shape_.size(); i++) { dims_[2] *= shape_[i]; } stride_ = dims_[1] * dims_[2]; stride2_ = dims_[2]; return; } //左移模板 template void CumSumCPUKernel::LeftMove(const T *input, T *output, size_t dim0, size_t dim1, size_t dim2, size_t stride, size_t stride2, size_t start, size_t end) { for (size_t i = start; i end; i++) { size_t k1 = i / dim2 % dim0; size_t k2 = i % dim2; size_t offset = k1 * stride + k2; for (size_t j = 0; j dim1; ++j) { size_t read_index = j * stride2 + offset; if (j == 0) { output[read_index] = (T)0; } else { size_t read_index2 = (j - 1) * stride2 + offset; output[read_index] = input[read_index2]; } } } } //y template void CumSumCPUKernel::RightMove(const T *input, T *output, size_t dim0, size_t dim1, size_t dim2, size_t stride, size_t stride2, size_t start, size_t end) { for (size_t i = start; i end; i++) { size_t k1 = i / dim2 % dim0; size_t k2 = i % dim2; size_t offset = k1 * stride + k2; for (int j = SizeToInt(dim1 - 1); j >= 0; --j) { size_t read_index = j * stride2 + offset; if (j == SizeToInt(dim1 - 1)) { output[read_index] = (T)0; } else { size_t read_index2 = (j + 1) * stride2 + offset; output[read_index] = input[read_index2]; } } } } ```

一鲸落 发表于2021-11-01 18:38:17 2021-11-01 18:38:17 最后回复一鲸落 2021-11-01 18:38:17
420 0

数据结构 kernel
[活动体验] cpu\ctcloss_cpu_kernel.cc代码标注 1

# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\ctcloss_cpu_kernel.cc代码标注 1 ```c++ /** * Copyright 2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的头文件 #include "backend/kernel_compiler/cpu/ctcloss_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个变量 namespace kernel {//空间嵌套 void CTCLossCPUKernel::InitKernel(const CNodePtr &kernel_node) { CheckParam(kernel_node); probs_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);//获取第一个节点形状 indice_dim s_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);//获取第二个节点形状 labels_dims_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);//获取第三个节点形状 dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);//获取第一个节点数据类型 //判断节点大小是否是支持 if (probs_shape_.size() != 3) { MS_LOG(EXCEPTION) "Probs dims: " probs_shape_.size() " not support."; } if (labels_dims_.size() != 1) { MS_LOG(EXCEPTION) "Labels dims: " labels_dims_.size() " not support."; } if (indice_dims_.size() != 2) { MS_LOG(EXCEPTION) "Labels indice dims: " indice_dims_.size() " not support."; } preprocess_collapse_repeated_ = AnfAlgo::GetNodeAttr(kernel_node, "preprocess_collapse_repeated"); ctc_merge_repeated_ = AnfAlgo::GetNodeAttr(kernel_node, "ctc_merge_repeated"); ignore_longer_outputs_than_inputs_ = AnfAlgo::GetNodeAttr(kernel_node, "ignore_longer_outputs_than_inputs"); max_time_ = probs_shape_[0];//获取最大时间 batch_size_ = probs_shape_[1];//获取批处理大小 num_class_ = probs_shape_[2];//获取类序号 blank_index_ = num_class_ - 1;//获取空白下标 } bool CTCLossCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { //根据不同数据类型执行不同操作 if (dtype_ == kNumberTypeFloat16) { LaunchKernel(inputs, outputs); } else if (dtype_ == kNumberTypeFloat32) { LaunchKernel(inputs, outputs); } return true; } //增加经验值 template inline T LogSumExp(T logprob1, T logprob2) { T kLogZero_ = -std::numeric_limits::infinity();//声明变量并赋值 if (logprob1 == kLogZero_) { return logprob2; } else if (logprob2 == kLogZero_) { return logprob1; } else { return (logprob1 > logprob2) ? logprob1 + log1p(exp(logprob2 - logprob1)) : logprob2 + log1p(exp(logprob1 - logprob2)); } } //计算正向的向量自回归模型 template void CTCLossCPUKernel::CalculateFwdVar(const std::vector &label_with_blank, const std::vector> &y, std::vector> *log_alpha_b) { int U = label_with_blank.size();//声明变量并赋值 int T = (*log_alpha_b)[0].size();//声明变量并赋值 TT kLogZero_ = -std::numeric_limits::infinity(); (*log_alpha_b)[0][0] = log(y[blank_index_][0]); auto label_0 = (label_with_blank.size() > 1) ? label_with_blank[1] : blank_index_; if (label_with_blank.size() > 1) { (*log_alpha_b)[1][0] = log(y[label_0][0]); } for (int t = 1; t T; ++t) { int low = std::max(0, U - (2 * (T - t)));//获取最大值 int high = std::min(U, 2 * (t + 1));//获取最小值 for (int u = low; u high; ++u) { auto sum_log_alpha_b = kLogZero_;//起别名 if (ctc_merge_repeated_ || label_with_blank[u] == blank_index_) { sum_log_alpha_b = (*log_alpha_b)[u][t - 1]; } if (u > 0) { sum_log_alpha_b = LogSumExp(sum_log_alpha_b, (*log_alpha_b)[u - 1][t - 1]); } if (u > 1) { bool matching_labels_merge = ctc_merge_repeated_ && (label_with_blank[u] == label_with_blank[u - 2]); if (label_with_blank[u] != blank_index_ && !matching_labels_merge) { sum_log_alpha_b = LogSumExp(sum_log_alpha_b, (*log_alpha_b)[u - 2][t - 1]); } } (*log_alpha_b)[u][t] = log(y[label_with_blank[u]][t]) + sum_log_alpha_b; } } } //计算f向量自回归模型 template void CTCLossCPUKernel::CalculateBwdVar(const std::vector &label_with_blank, const std::vector> &y, std::vector> *log_beta_b) { int T = (*log_beta_b)[0].size(); int U = label_with_blank.size(); if (U > 1) { for (int u = U - 2; u U; ++u) { (*log_beta_b)[u][T - 1] = TT(0); } } else { (*log_beta_b)[0][T - 1] = TT(0); (*log_beta_b)[0][T - 2] = TT(0); } for (int t = T - 2; t >= 0; --t) { int low = std::max(0, U - (2 * (T - t))); int high = std::min(U, 2 * (t + 1)); for (int u = low; u high; ++u) { if (ctc_merge_repeated_ || label_with_blank[u] == blank_index_) { (*log_beta_b)[u][t] = LogSumExp((*log_beta_b)[u][t], (*log_beta_b)[u][t + 1] + TT(log(y[label_with_blank[u]][t + 1]))); } if (u + 1 U) { (*log_beta_b)[u][t] = LogSumExp((*log_beta_b)[u][t], (*log_beta_b)[u + 1][t + 1] + TT(log(y[label_with_blank[u + 1]][t + 1]))); } if (u + 2 U) { bool matching_labels_merge = ctc_merge_repeated_ && (label_with_blank[u] == label_with_blank[u + 2]); if (label_with_blank[u] != blank_index_ && !matching_labels_merge) { (*log_beta_b)[u][t] = LogSumExp((*log_beta_b)[u][t], (*log_beta_b)[u + 2][t + 1] + TT(log(y[label_with_blank[u + 2]][t + 1]))); } } } } } ```

一鲸落 发表于2021-11-01 18:37:44 2021-11-01 18:37:44 最后回复一鲸落 2021-11-01 18:37:44
428 0

kernel
[活动体验] cpu\ctcloss_cpu_kernel.cc代码标注 3

# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\ctcloss_cpu_kernel.cc代码标注 3 ```c++ //模板计算矢量矩阵 template void MatrixfromVector(uint32_t row, uint32_t col, std::vector> *array2D, const T init_value) { array2D->resize(row); for (size_t i = 0; i row; ++i) { (*array2D)[i].resize(col, init_value); } } //模板启动内核 template void CTCLossCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &outputs) { auto inputs_addr = reinterpret_cast(inputs[0]->addr); auto labels_indices_addr = reinterpret_cast(inputs[1]->addr); auto labels_values_addr = reinterpret_cast(inputs[2]->addr); auto sequence_length_addr = reinterpret_cast(inputs[3]->addr); auto loss_addr = reinterpret_cast(outputs[0]->addr); auto gradient_addr = reinterpret_cast(outputs[1]->addr); std::vector> label_batch; std::vector> labels_with_blank; std::vector each_label_length; label_batch.resize(batch_size_); labels_with_blank.resize(batch_size_); each_label_length.resize(batch_size_, 0); T kLogZero_ = -std::numeric_limits::infinity(); // 验证检查序列长度 for (size_t b = 0; b batch_size_; ++b) { if (sequence_length_addr[b] uint32_t(0)) { MS_LOG(EXCEPTION) "Sequence length should > 0, but gets " sequence_length_addr[b]; } if (sequence_length_addr[b] > max_time_) {//最大时间应该大于序列长度，但得到z MS_LOG(EXCEPTION) "Max time should be greater than sequence length, but gets " max_time_ " " sequence_length_addr[b]; } } for (size_t i = 0; i indice_dims_[0]; ++i) { each_label_length[labels_indices_addr[i * 2]]++; } // 将label_value和label_indices的标签格式转换为batch_label uint64_t cum_sum = 0; for (size_t b = 0; b batch_size_; ++b) { std::vector *b_value = &label_batch[b]; for (size_t l = 0; l each_label_length[b]; ++l) { b_value->push_back(labels_values_addr[cum_sum + l]); } cum_sum += each_label_length[b]; } // 将标签转换为空白标签 GenLableWithBlank(sequence_length_addr, label_batch, &labels_with_blank); for (size_t b = 0; b batch_size_; ++b) { std::vector label_with_blank = labels_with_blank[b]; // y_b [num_class, sequence_length] std::vector> y_b; std::vector> dy; std::vector> log_alpha_b; std::vector> log_beta_b; MatrixfromVector(num_class_, sequence_length_addr[b], &y_b, kLogZero_); MatrixfromVector(y_b.size(), y_b[0].size(), &dy, T(0)); MatrixfromVector(label_with_blank.size(), sequence_length_addr[b], &log_alpha_b, kLogZero_); MatrixfromVector(label_with_blank.size(), sequence_length_addr[b], &log_beta_b, kLogZero_); InnerSoftMax(inputs_addr, &y_b, sequence_length_addr[b], num_class_, batch_size_, b); CalculateFwdVar(label_with_blank, y_b, &log_alpha_b); CalculateBwdVar(label_with_blank, y_b, &log_beta_b); T log_pzx = kLogZero_; for (size_t u = 0; u label_with_blank.size(); ++u) { log_pzx = LogSumExp(log_pzx, log_alpha_b[u][0] + log_beta_b[u][0]); } loss_addr[b] = -log_pzx; CalculateGrad(label_with_blank, y_b, log_alpha_b, log_beta_b, log_pzx, &dy); for (size_t t = 0; t sequence_length_addr[b]; ++t) { for (size_t c = 0; c num_class_; ++c) { gradient_addr[t * batch_size_ * num_class_ + b * num_class_ + c] = dy[c][t]; } } } } void CTCLossCPUKernel::CheckParam(const CNodePtr &kernel_node) { size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 4) { MS_LOG(EXCEPTION) "CTCLossCPUKernel needs 4 inputs, but gets " input_num; } size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); if (output_num != 2) { MS_LOG(EXCEPTION) "CTCLossCPUKernel expects 2 outputs, but gets" output_num; } } } // namespace kernel } // namespace mindspore ```

一鲸落 发表于2021-11-01 18:36:29 2021-11-01 18:36:29 最后回复一鲸落 2021-11-01 18:36:29
320 0

kernel
[活动体验] cpu\ctcloss_cpu_kernel.cc代码标注 2

# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\ctcloss_cpu_kernel.cc代码标注 2 ```c++ //计算最后的梯度 template void CTCLossCPUKernel::CalculateGrad(const std::vector &label_with_blank, const std::vector> &y, const std::vector> &log_alpha_b, const std::vector> &log_beta_b, const TT log_pzx, std::vector> *dy) { auto dy_b = dy;//声明一个变量，起别名 TT kLogZero_ = -std::numeric_limits::infinity(); if (log_pzx == kLogZero_) { MS_LOG(INFO) "No valid path found";//未找到有效路径 return; } size_t L = y.size();//获取数据行数 size_t T = y[0].size();//获取数据每行有多少 size_t U = label_with_blank.size();//空白标签大小 for (size_t t = 0; t T; ++t) { std::vector prob_sum(L, kLogZero_); for (size_t u = 0; u U; ++u) { uint32_t l = label_with_blank[u]; prob_sum[l] = LogSumExp(prob_sum[l], log_alpha_b[u][t] + log_beta_b[u][t]); } for (size_t l = 0; l L; ++l) { (*dy_b)[l][t] = y[l][t] - exp(prob_sum[l] - log_pzx); } } } void CTCLossCPUKernel::GenLableWithBlank(uint32_t *seq_len, const std::vector> &batch_label, std::vector> *label_with_blank) { for (size_t b = 0; b batch_size_; ++b) { std::vector l; const std::vector &label = batch_label[b]; bool has_blank = false; for (size_t i = 0; i label.size(); ++i) { if (i == 0 || !preprocess_collapse_repeated_ || label[i] != label[i - 1]) { if (label[i] >= num_class_ - 1) { has_blank = true; } else { if (has_blank) { MS_LOG(EXCEPTION) "Invalid labels(index >= num_class - 1) should not appear between two valid labels";//无效标签不应该放在两个有效标签内 } l.push_back(label[i]);//原地创建一个对象 } } } if (!ignore_longer_outputs_than_inputs_) { if (l.size() > seq_len[b]) { MS_LOG(EXCEPTION) "Input time(sequence length) should greater than output size(label length), but gets "//输入时间(序列长度)应该大于输出大小(标签长度)，但是得到序列长度小于输出大小 seq_len[b] " " l.size(); } } (*label_with_blank)[b].reserve(2 * l.size() + 1); for (auto l_i : l) { (*label_with_blank)[b].push_back(blank_index_); (*label_with_blank)[b].push_back(l_i); } (*label_with_blank)[b].push_back(blank_index_); } } template void InnerSoftMax(T *inputs_addr, std::vector> *softmax_probs, const uint32_t sequence_length, size_t num_class, size_t batch_size, size_t b) { for (size_t t = 0; t sequence_length; ++t) { T maxCoeff(T(0));//提取最大多项式系数 T sumCoeff(T(0));//计算得多项式系数的和 for (size_t c = 0; c num_class; ++c) { if (inputs_addr[t * batch_size * num_class + b * num_class + c] > maxCoeff) {//循环判断找出最大多项式的系数 maxCoeff = inputs_addr[t * batch_size * num_class + b * num_class + c]; } } for (size_t c = 0; c num_class; ++c) {//循环遍历数组将多项式系数相加得到多项式系数的h sumCoeff += exp(inputs_addr[t * batch_size * num_class + b * num_class + c] - maxCoeff); (*softmax_probs)[c][t] = exp(inputs_addr[t * batch_size * num_class + b * num_class + c] - maxCoeff); } for (size_t c = 0; c num_class; ++c) { (*softmax_probs)[c][t] /= sumCoeff; } } } ```

一鲸落 发表于2021-11-01 18:35:49 2021-11-01 18:35:49 最后回复一鲸落 2021-11-01 18:35:49
216 0

kernel
[活动体验] cpu\cpu_kernel_factory.cc代码标注

## mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\cpu_kernel_factory.cc代码标注 ```C++ /** * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" //导入系统自带的包 #include #include //导入自定义的包 #include "runtime/device/kernel_info.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 CPUKernelFactory &CPUKernelFactory::GetInstance() {//获取实例方法 static CPUKernelFactory instance;//声明一个静态变量 return instance;//返回实例 } void CPUKernelFactory::Register(const std::string &kernel_name, const KernelAttr &kernel_attr, CPUKernelCreator &&kernel_creator) { (void)name_to_attr_creator_[kernel_name].emplace_back(kernel_attr, kernel_creator);.//交换函数名 #if !defined(_WIN32) && !defined(_WIN64) MS_LOG(DEBUG) "CPUKernelFactory register operator: " kernel_name;//记录错误日志，寄存操作符 #endif } //创作实例，名字 std::shared_ptr CPUKernelFactory::Create(const std::string &kernel_name, const CNodePtr &apply_kernel) { auto kernel_info = dynamic_cast(apply_kernel->kernel_info());//获取信息 MS_EXCEPTION_IF_NULL(kernel_info);//判断kernel信息是否为空 const KernelBuildInfo *kernel_build_Info = kernel_info->select_kernel_build_info();//获取地址 MS_EXCEPTION_IF_NULL(kernel_build_Info);//判断kernel_build_Info信息是否为空 std::pair ret_pair = CPUKernelAttrCheck(kernel_name, *kernel_build_Info); if (ret_pair.first) { return (name_to_attr_creator_.find(kernel_name)->second)[ret_pair.second].second(); } return nullptr;//返回值为空 } std::pair CPUKernelFactory::CPUKernelAttrCheck(const std::string &kernel_name, const KernelBuildInfo &kernel_info) { auto iter = name_to_attr_creator_.find(kernel_name);//声明变量并赋值 if (iter == name_to_attr_creator_.end()) { MS_LOG(INFO) "Not registered CPU kernel: op[" kernel_name "]!";//未注册CPU内核 return std::make_pair(false, 0); } auto creators = iter->second;//声明新的变量存储 for (size_t index = 0; index creators.size(); ++index) {//遍历数组，循环检查变量信息 auto attr_creator = creators[index]; if (CPUKernelSingleAttrCheck(attr_creator.first, kernel_info)) { return std::make_pair(true, index);//返回布尔值true和下标 } } return std::make_pair(false, 0);//返回值 } //检查CPU内核单一属性信息 bool CPUKernelFactory::CPUKernelSingleAttrCheck(const KernelAttr &kernel_attr, const KernelBuildInfo &kernel_info) { for (size_t i = 0; i kernel_info.GetInputNum(); ++i) {//循环遍历，判断输入数据类型是否相同 auto dtype = kernel_attr.GetAllSame() ? kernel_attr.GetInputAttr(0).first : kernel_attr.GetInputAttr(i).first;//判断数据类型 if (kernel_info.GetInputDeviceType(i) != dtype) {//如果接收的数据类型与原始数据类型不相等，记录错误日志 MS_LOG(DEBUG) "input index:" i ", kernel info type:" kernel_info.GetInputDeviceType(i) ", register type:" dtype; return false; } } for (size_t i = 0; i kernel_info.GetOutputNum(); ++i) {//循环判断输出数据类型 auto dtype = kernel_attr.GetAllSame() ? kernel_attr.GetOutputAttr(0).first : kernel_attr.GetOutputAttr(i).first;//判断数据类型 if (kernel_info.GetOutputDeviceType(i) != dtype) {/、如果接收的数据类型与原始数据类型不相等，记录错误日志 MS_LOG(DEBUG) "output index:" i ", kernel info type:" kernel_info.GetOutputDeviceType(i) ", register type:" dtype; return false; } } return true; } //获取属性列表 std::vector CPUKernelFactory::GetSupportedKernelAttrList(const std::string &kernel_name) { std::vector result;//声明变量 auto iter = name_to_attr_creator_.find(kernel_name)//声明变量并赋值，获取内核名称; if (iter == name_to_attr_creator_.end()) {//未注册内核警告 MS_LOG(WARNING) "Not registered CPU kernel: op[" kernel_name "]!"; return result; } auto creators = iter->second;//申明变量并赋值 for (size_t index = 0; index creators.size(); ++index) {//xun'huan'bia'b'li's auto attr_creator = creators[index]; result.push_back(attr_creator.first); } return result;//返回值 } } // namespace kernel } // namespace mindspore ```

一鲸落 发表于2021-11-01 18:35:01 2021-11-01 18:35:01 最后回复一鲸落 2021-11-01 18:35:01
336 0

数据结构 kernel
[活动体验] cpu\cpu_kernel.cc代码标注2

# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\cpu_kernel.cc代码标注2 ```c++ BroadcastIterator::BroadcastIterator(std::vector input_shape_a, std::vector input_shape_b, std::vector output_shape) : input_shape_a_(std::move(input_shape_a)), input_shape_b_(std::move(input_shape_b)), output_shape_(std::move(output_shape)) { output_dimension_ = SizeToInt(output_shape_.size()); //对于迭代器，将dimension赋值为int BroadcastShape(); // 分配的进步的内存 input_strides_a_.resize(output_dimension_); input_strides_b_.resize(output_dimension_); input_back_strides_a_.resize(output_dimension_); input_back_strides_b_.resize(output_dimension_); coordinates_.resize(output_dimension_); InitStrides(); } void BroadcastIterator::SetPos(size_t pos) { for (int i = output_dimension_ - 1; i >= 0 && pos != 0; --i) { coordinates_[i] = pos % output_shape_[i]; input_pos_[0] += coordinates_[i] * input_strides_a_[i]; input_pos_[1] += coordinates_[i] * input_strides_b_[i]; pos /= output_shape_[i]; } } void BroadcastIterator::GenNextPos() { // 计算下一个坐标的输出 for (int i = output_dimension_ - 1; i >= 0; --i) { if (coordinates_[i] + 1 == output_shape_[i]) { coordinates_[i] = 0; input_pos_[0] -= input_back_strides_a_[i]; input_pos_[1] -= input_back_strides_b_[i]; } else { ++coordinates_[i]; input_pos_[0] += input_strides_a_[i]; input_pos_[1] += input_strides_b_[i]; break; } } } //获取广播后的形状 void BroadcastIterator::BroadcastShape() { int input_dimension_a = input_shape_a_.size(); if (input_dimension_a output_dimension_) { input_shape_a_.insert(input_shape_a_.begin(), output_dimension_ - input_dimension_a, 1); } int input_dimension_b = input_shape_b_.size(); if (input_dimension_b output_dimension_) { input_shape_b_.insert(input_shape_b_.begin(), output_dimension_ - input_dimension_b, 1); } } void BroadcastIterator::InitStrides() { input_strides_a_[output_dimension_ - 1] = 1; input_strides_b_[output_dimension_ - 1] = 1; for (int i = output_dimension_ - 2; i >= 0; --i) { input_strides_a_[i] = input_shape_a_[i + 1] * input_strides_a_[i + 1]; input_strides_b_[i] = input_shape_b_[i + 1] * input_strides_b_[i + 1]; input_back_strides_a_[i + 1] = (input_shape_a_[i + 1] - 1) * input_strides_a_[i + 1]; input_back_strides_b_[i + 1] = (input_shape_b_[i + 1] - 1) * input_strides_b_[i + 1]; } // 更新广播跨步 //当轴值为1时，步幅为0 std::transform(input_strides_a_.begin(), input_strides_a_.end(), input_shape_a_.begin(), input_strides_a_.begin(), [](const auto &a, const auto &b) { return b == 1 ? 0 : a; }); std::transform(input_strides_b_.begin(), input_strides_b_.end(), input_shape_b_.begin(), input_strides_b_.begin(), [](const auto &a, const auto &b) { return b == 1 ? 0 : a; }); } TransposeIterator::TransposeIterator(std::vector output_shape, std::vector axes, const std::vector &input_shape) : shape_(std::move(output_shape)), axes_(std::move(axes)) { // 计算的进展 dimension_ = shape_.size(); std::vector strides(dimension_, 1); for (int i = dimension_ - 2; i >= 0; --i) { strides[i] = input_shape[i + 1] * strides[i + 1]; } // 交换形状和步幅，计算后步幅 strides_.resize(dimension_); back_strides_.resize(dimension_); for (int i = dimension_ - 1; i >= 0; --i) { strides_[i] = strides[axes_[i]]; back_strides_[i] = (shape_[i] - 1) * strides_[i]; } // 按pos计算坐标 coordinates_.resize(dimension_); } void TransposeIterator::SetPos(size_t pos) { for (int i = dimension_ - 1; i >= 0 && pos != 0; --i) { coordinates_[i] = pos % shape_[i]; pos_ += coordinates_[i] * strides_[i]; pos /= shape_[i]; } } void TransposeIterator::GenNextPos() { for (int i = dimension_ - 1; i >= 0; --i) { if (coordinates_[i] + 1 == shape_[i]) { coordinates_[i] = 0; pos_ -= back_strides_[i]; } else { coordinates_[i]++; pos_ += strides_[i]; break; } } } } // namespace kernel } // namespace mindspore ```

一鲸落 发表于2021-11-01 18:34:28 2021-11-01 18:34:28 最后回复一鲸落 2021-11-01 18:34:28
418 0

kernel
[数据加载及处理] 硬件相关优化中的主机内核模块

```C++ // 宏定义，ifndef防止双重定义 #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_MOD_H_ #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_MOD_H_ // 导入自带的标准库函数 #include #include #include #include #include // 导入自定义的头文件 #include "backend/kernel_compiler/ascend_kernel_mod.h" // 双重命名空间 namespace mindspore { namespace kernel { class HostKernelMod : public AscendKernelMod { // 定义HostKernelMod类里面的用public修饰的对象以及函数 public: HostKernelMod() = default; ~HostKernelMod() override = default; const std::vector &GetInputSizeList() const override; const std::vector &GetOutputSizeList() const override; const std::vector &GetWorkspaceSizeList() const override; bool Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs, void *stream_ptr) override; std::vector GenTask(const std::vector &, const std::vector &, const std::vector &, uint32_t) override; device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override = 0; bool Init(const AnfNodePtr &anf_node); // 定义HostKernelMod类里面protected修饰的对象以及函数 protected: AnfNodePtr anf_node_; std::string op_name_; std::vector input_size_list_; std::vector output_size_list_; std::vector workspace_size_list_; }; // using命名 using HostKernelModPtr = std::shared_ptr; using HostKernelModPtrList = std::vector; using HostKernelCreater = std::function()>; // 定义一个HostKernelFactory类 class HostKernelFactory { HostKernelFactory() = default; ~HostKernelFactory() = default; // 定义HostKernelFactory类里面由public修饰的方法 public: static HostKernelFactory &Get(); void Registe(const string &name, HostKernelCreater &&fun); static std::shared_ptr Get(const string &name); // 定义HostKernelFactory类里面由private修饰的方法 private: std::map hostKernelMap_; }; // 定义_HostKernelRegister类 class _HostKernelRegister { // _HostKernelRegister类里面由public修饰的方法 public: _HostKernelRegister(const string &name, HostKernelCreater &&fun) { HostKernelFactory::Get().Registe(name, std::move(fun)); } ~_HostKernelRegister() = default; }; #define _MS_HOST_REG_KERNEL_REG(KNAME, clazz) \ static_assert(std::is_base_of::value, " must be base of HostKernelMod"); \ static const _HostKernelRegister g_##KNAME##_##_kernel_reg(#KNAME, []() { \ std::shared_ptr ptr = nullptr; // 定义智能指针 \ ptr = std::make_shared(); \ MS_EXCEPTION_IF_NULL(ptr); \ return ptr; \ }); #define MS_HOST_REG_KERNEL(KNAME, clazz) _MS_HOST_REG_KERNEL_REG(KNAME, clazz) } // 命名空间 kernel } // 命名空间 mindspore #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_MOD_H_ host_kernel_mod.cc（详细解释） // 导入一一对应的头文件 #include "backend/kernel_compiler/host/host_kernel_mod.h" // 导入系统自带的标准库函数 #include #include #include #include // 导入自定义的头文件 #include "runtime/mem.h" #include "utils/ms_context.h" #include "runtime/device/kernel_runtime.h" #include "runtime/device/ascend/executor/host_dynamic_kernel.h" // 双重命名空间 namespace mindspore { namespace kernel { void HostKernelFactory::Registe(const std::string &name, HostKernelCreater &&fun) { // HostKernelFactory私有类下的hostKernelMap_尾部插入 /*std::move用于指示一个对象t可以被“移出”，即允许从t另一个对象高效地转移资源。特别是，std::move生成一个xvalue 表达式来标识其参数t。它完全等同于 astatic_cast到一个右值引用类型。*/ hostKernelMap_.emplace(name, std::move(fun)); } std::shared_ptr HostKernelFactory::Get(const std::string &name) { const auto &map = Get().hostKernelMap_; // 存放地址 auto it = map.find(name); if (it != map.end() && it->second) { // 迭代 return (it->second)(); } return nullptr; } // 实例化函数 HostKernelFactory &HostKernelFactory::Get() { static HostKernelFactory instance; return instance; } // 实例化HostKernelMod类中用public修饰的函数，并返回列表 const std::vector &HostKernelMod::GetInputSizeList() const { return input_size_list_; } const std::vector &HostKernelMod::GetOutputSizeList() const { return output_size_list_; } const std::vector &HostKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; } bool HostKernelMod::Init(const AnfNodePtr &anf_node) { MS_EXCEPTION_IF_NULL(anf_node); // 为输入输出数值赋值 size_t input_num = AnfAlgo::GetInputTensorNum(anf_node); size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node); for (size_t i = 0; i input_num; i++) { // 遍历输入数值 std::vector shape_i = AnfAlgo::GetInputDeviceShape(anf_node, i); TypePtr type_ptr = TypeIdToType(AnfAlgo::GetInputDeviceDataType(anf_node, i)); MS_EXCEPTION_IF_NULL(type_ptr); int64_t size_i = 1; // long long int for (size_t j = 0; j shape_i.size(); j++) { //unsigned int类型的j size_i = LongMulWithOverflowCheck(size_i, static_cast(shape_i[j])); } size_t type_byte = GetTypeByte(type_ptr); // 判断是否为0 if (type_byte == 0) { return false; } size_i = LongMulWithOverflowCheck(size_i, SizeToInt(type_byte)); input_size_list_.push_back(LongToSize(size_i)); } // 同理遍历输出数值，遍历方法和内容与上面的input_num类似 for (size_t i = 0; i output_num; i++) { std::vector shape_i = AnfAlgo::GetOutputDeviceShape(anf_node, i); TypePtr type_ptr = TypeIdToType(AnfAlgo::GetOutputDeviceDataType(anf_node, i)); MS_EXCEPTION_IF_NULL(type_ptr); int64_t size_i = 1; for (size_t j = 0; j shape_i.size(); j++) { size_i = LongMulWithOverflowCheck(size_i, static_cast(shape_i[j])); } size_t type_byte = GetTypeByte(type_ptr); if (type_byte == 0) { return false; } size_i = LongMulWithOverflowCheck(size_i, SizeToInt(type_byte)); output_size_list_.push_back(LongToSize(size_i)); } return true; } // 对private函数进行实例化，没有判断的方法体，只是保证该函数存在并且运行没有错误 bool HostKernelMod::Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs, void *stream_ptr) { return true; } std::vector HostKernelMod::GenTask(const std::vector &, const std::vector &, const std::vector &, uint32_t) { return {}; } } // 命名空间 kernel } // 命名空间 mindspore ```

Besssie_Lee 发表于2021-11-01 18:34:25 2021-11-01 18:34:25 最后回复 Besssie_Lee 2021-11-01 18:34:25
1845 0

kernel

上滑加载中

推荐直播

0.5

华为云Metastudio×DeepSeek与RAG检索优化分享
2025/03/14 周五 16:00-17:30

大海华为云学堂技术讲师 Cocl 华为云学堂技术讲师

本次直播将带来DeepSeek数字人解决方案，以及如何使用Embedding与Rerank实现检索优化实践，为开发者与企业提供参考，助力场景落地。
去报名
融合算子设计原理与实现系列（九）
2025/03/13 周四 15:00-17:00

姚宇昇腾CANN生态技术专家

典型输入场景的FA算子开发-核函数实现详解
回顾中
华为开发者空间玩转DeepSeek
2025/03/13 周四 19:00-20:30

马欣山东商业职业技术学院云计算专业讲师，山东大学、山东建筑大学等多所本科学校学生校外指导老师

同学们，想知道如何利用华为开发者空间部署自己的DeepSeek模型吗？想了解如何用DeepSeek在云主机上探索好玩的应用吗？想探讨如何利用DeepSeek在自己的专有云主机上辅助编程吗？让我们来一场云和AI的盛宴。
回顾中
华为云Metastudio×DeepSeek与RAG检索优化分享
2025/03/14 周五 16:00-17:30

大海华为云学堂技术讲师 Cocl 华为云学堂技术讲师

本次直播将带来DeepSeek数字人解决方案，以及如何使用Embedding与Rerank实现检索优化实践，为开发者与企业提供参考，助力场景落地。
去报名
融合算子设计原理与实现系列（九）
2025/03/13 周四 15:00-17:00

姚宇昇腾CANN生态技术专家

典型输入场景的FA算子开发-核函数实现详解
回顾中

热门标签

Java Python 数据结构数据库 Linux 机器学习网络任务调度 MySQL JavaScript

开发者空间

了解空间

工作台

开发工具

实战案例

空间活动

空间论坛

开发平台

软件开发生产线 CodeArts

AI平台ModelArts

数据治理中心 DataArts Studio

数字内容生产线 MetaStudio

精选服务

云数据库 GaussDB

云数据库 RDS for MySQL

MapReduce服务 MRS

数据仓库服务 DWS

分布式缓存服务Redis版

分布式消息服务 DMS

华为云实时音视频 SparkRTC

媒体处理 MPC

主机迁移服务 SMS

对象存储迁移服务 OMS

云消息服务 KooMessage

云手机服务 KooPhone

企业搜索服务 KooSearch

云地图服务 KooMap

更多开放能力

开发工具

API生态

CodeArts API

API Explorer

Terraform Explorer

KooCLI

API 中心

SDK 中心

开发服务

Codelabs

DevStar

低代码平台Astro

CodeArts IDE

Huawei Cloud Toolkit

Classroom

开发资源

开源镜像站

开源资源

开发语言

开发实践

入门精选

分发资源

企业应用中心

企业通用专区

教育专区

华为应用专区

政务云专区

硬件云服务商城

医疗健康专区

跳蚤市场

华为云开发者日

直播专区

开发者精品活动

DTSE Tech Talk

加入HCDE

加入HCDG

加入HCSD

加入HCWD

鲁班会

沃土云创计划·企业

沃土云创计划·高校

沃土云创计划·个人

沃土云创计划·开源共创

博客

论坛

专题

开发者榜单

学习路径

在线课程

动手实验

考试认证

培训服务