-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\gather_d_grad_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的包 #include "backend/kernel_compiler/cpu/gather_d_grad_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明空间 namespace kernel {//空间嵌套 namespace {//空间嵌套 size_t get_element_num(const std::vector &shape) {//获取组件的形状 size_t size = 1;//初始化变量 for (size_t i = 0; i shape.size(); i++) {//循环遍历数组计算形状大小 size *= shape[i]; } return size; } //复制任务 template void GatherDGradCopyTask(size_t cur, std::vector *pos, T *input, I *index, const int &dim, T *output, const std::vector &output_shape, const std::vector &out_cargo_size, const std::vector &input_cargo_size) { for (size_t i = 0; i output_shape[cur]; ++i) { (*pos)[cur] = i; if (cur == output_shape.size() - 1) { size_t input_offset = 0; size_t out_offset = 0; // 输出偏差 for (size_t j = 0; j output_shape.size(); ++j) { out_offset += (*pos)[j] * out_cargo_size[j]; } // 输入偏差 size_t cur_index = (*pos)[dim]; (*pos)[dim] = index[out_offset]; for (size_t j = 0; j output_shape.size(); ++j) { input_offset += (*pos)[j] * input_cargo_size[j]; } // 进行复制 input[input_offset] += output[out_offset]; (*pos)[dim] = cur_index; } else { // 复制任务 GatherDGradCopyTask(cur + 1, pos, input, index, dim, output, output_shape, out_cargo_size, input_cargo_size); } } } } // namespace //判断输入大小是否有效 template void GatherDGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { index_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1); if (input_shape_ != index_shape_) { MS_LOG(EXCEPTION) "Invalid shape size, input and index shape should be equal";//错误类型:无效的形状大小,输入和索引形状应该相等 } axis_ = AnfAlgo::GetNodeAttr(kernel_node, DIM);//获取行数 output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);//获取输出值形状 } template bool GatherDGradCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { size_t input_size = get_element_num(input_shape_) * sizeof(T);//获取输入值形状 size_t index_size = get_element_num(index_shape_) * sizeof(I);//获取索引形状 size_t output_size = get_element_num(output_shape_) * sizeof(T);//获取输出值形状 if (inputs[0]->size != index_size || inputs[1]->size != input_size || outputs[0]->size != output_size) { MS_LOG(EXCEPTION) "invalid input or output data size!";//无效的输入和输出大小 return false; } auto index = reinterpret_cast(inputs[0]->addr);//获取索引地址 auto input = reinterpret_cast(inputs[1]->addr);//获取输入值地址 auto out = reinterpret_cast(outputs[0]->addr);//获取输出值地址 int output_rank = SizeToInt(output_shape_.size()); if (axis_ >= output_rank || axis_ -output_rank) { MS_LOG(EXCEPTION) "The value of 'axis_' should be in [" -output_rank ", " output_rank "], but got: " axis_; return false; } if (axis_ 0) { axis_ = axis_ + SizeToInt(output_shape_.size()); } //检查索引值 index_size = get_element_num(index_shape_); int max_index = SizeToInt(output_shape_[axis_]); for (size_t i = 0; i index_size; ++i) { if (index[i] >= max_index || index[i] -max_index) { MS_LOG(EXCEPTION) "The value of index should be in [" -max_index ", " max_index "], but got: " index[i]; return false; } if (index[i] 0) { index[i] = max_index + index[i]; } } auto out_size = get_element_num(output_shape_); memset_s(out, out_size * sizeof(T), 0x00, out_size * sizeof(T)); // 输出形状大小 std::vector out_cargo_size = std::vector(output_shape_.size(), 1); for (int i = out_cargo_size.size() - 2; i >= 0; --i) { out_cargo_size[i] = output_shape_[i + 1] * out_cargo_size[i + 1]; } // 输出形状大小 std::vector input_cargo_size = std::vector(input_shape_.size(), 1); for (int i = input_cargo_size.size() - 2; i >= 0; --i) { input_cargo_size[i] = input_shape_[i + 1] * input_cargo_size[i + 1]; } // f std::vector pos(index_shape_.size(), 0); GatherDGradCopyTask(0, &pos, out, index, axis_, input, index_shape_, input_cargo_size, out_cargo_size); return true; } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\gather_d_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "backend/kernel_compiler/cpu/gather_d_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 namespace {//空间嵌套 size_t get_element_num(const std::vector &shape) {//获取元素个数 size_t size = 1;//初始化变量 for (size_t i = 0; i shape.size(); i++) {//循环遍历数组计算元素个数 size *= shape[i]; } return size;//返回大小 } template void CopyTask(size_t cur, std::vector *pos, T *input, const I *index, const int &dim, T *output, const std::vector &output_shape, const std::vector &out_cargo_size, const std::vector &input_cargo_size, bool reverse) { for (size_t i = 0; i output_shape[cur]; ++i) { (*pos)[cur] = i; if (cur == output_shape.size() - 1) { size_t input_offset = 0; size_t out_offset = 0; // 输出偏差 for (size_t j = 0; j output_shape.size(); ++j) { out_offset += (*pos)[j] * out_cargo_size[j]; } // 输入偏差 size_t cur_index = (*pos)[dim]; (*pos)[dim] = index[out_offset]; for (size_t j = 0; j output_shape.size(); ++j) { input_offset += (*pos)[j] * input_cargo_size[j]; } // 复制 if (reverse) { input[input_offset] = output[out_offset]; } else { output[out_offset] = input[input_offset]; } (*pos)[dim] = cur_index; } else { // 复制任务 CopyTask(cur + 1, pos, input, index, dim, output, output_shape, out_cargo_size, input_cargo_size, reverse); } } } } // namespace template void GatherDCPUKernel::InitKernel(const CNodePtr &kernel_node) { input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); index_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 2); if (input_shape_.size() != index_shape_.size()) {//判断输入大小是否为标准大小 MS_LOG(EXCEPTION) "Invalid shape size, shape size of input: " input_shape_.size() ", and index: " index_shape_.size() " should be equal"; } output_shape_ = index_shape_;//输出形状等于输入形状 } template bool GatherDCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { size_t input_size = get_element_num(input_shape_) * sizeof(T);//获取输入形状大小 size_t index_size = get_element_num(index_shape_) * sizeof(I);//获取索引大小 size_t dim_size = sizeof(int);//计算int类型所占字节数 size_t output_size = get_element_num(output_shape_) * sizeof(T);//获取输出大小 if (inputs[0]->size != input_size || inputs[1]->size != dim_size || inputs[2]->size != index_size || outputs[0]->size != output_size) { MS_LOG(EXCEPTION) "invalid input or output data size!";//错误类型:失效输入或输出数据类型 return false; } auto input = reinterpret_cast(inputs[0]->addr);//获取输入地址 auto dim = reinterpret_cast(inputs[1]->addr);//获取存储int字节地址 auto index = reinterpret_cast(inputs[2]->addr);//获取索引地址 auto output = reinterpret_cast(outputs[0]->addr);//获取输出值地址 int32_t input_rank = SizeToInt(input_shape_.size());//获取输入形状的秩 if (dim[0] >= input_rank || dim[0] -input_rank) { MS_LOG(EXCEPTION) "The value of 'dim' should be in [" -input_rank ", " input_rank "], but got: " dim[0]; return false; } if (dim[0] 0) { dim[0] = static_cast(dim[0] + input_rank); } // 检查索引 int max_index = SizeToInt(input_shape_[dim[0]]); index_size = get_element_num(index_shape_); for (size_t i = 0; i index_size; ++i) { if (index[i] >= max_index || index[i] -max_index) { MS_LOG(EXCEPTION) "The value of index should be in [" -max_index ", " max_index "], but got: " index[i]; return false; } if (index[i] 0) { index[i] = max_index + index[i]; } } // 输出形状大小 std::vector out_cargo_size = std::vector(output_shape_.size(), 1); for (int i = out_cargo_size.size() - 2; i >= 0; --i) { out_cargo_size[i] = output_shape_[i + 1] * out_cargo_size[i + 1]; } // 输入的形状 std::vector input_cargo_size = std::vector(input_shape_.size(), 1); for (int i = input_cargo_size.size() - 2; i >= 0; --i) { input_cargo_size[i] = input_shape_[i + 1] * input_cargo_size[i + 1]; } // 复制任务 std::vector pos(index_shape_.size(), 0); int copy_dim = *dim; CopyTask(0, &pos, input, index, copy_dim, output, output_shape_, out_cargo_size, input_cargo_size, false); return true; } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\gather_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的包 #include "backend/kernel_compiler/cpu/gather_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" #include "nnacl/gather_parameter.h" #include "nnacl/base/gather_base.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 void GatherV2CPUKernel::InitKernel(const CNodePtr &kernel_node) {//初始化内核 CheckParam(kernel_node);//检查时间参数 input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);//获取输入形状 indices_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);//获取指数形状 output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);//获取输出形状 axis_ = AnfAlgo::GetNodeAttr(kernel_node, AXIS);//获取节点地址 if (axis_ 0) {//如果行小于0 axis_ = axis_ + SizeToLong(input_shape_.size());//行的大小等于输入的形状大小 } axis_ += 4 - input_shape_.size(); CPUKernelUtils::ExpandDimsTo4(&input_shape_); CPUKernelUtils::ExpandDimsTo4(&output_shape_); } int GatherV2CPUKernel::GatherLaunch(int8_t *input_data, int8_t *output_data, size_t size) { int in_rank = input_shape_.size();//获取输入形状的秩 int indices_element_size = 1;// const int limit = input_shape_.at(axis_)//统计数据有多少行 size_t data_size = sizeof(kNumberTypeFloat32);//计算数据类型的长度 int outer_size = 1, inner_size = 1;//初始化变量并赋值 for (int i = 0; i axis_; ++i) {//循环遍历数组获取外部大小 outer_size *= input_shape_.at(i); } for (int i = axis_ + 1; i in_rank; ++i) {//循环遍历数组获取内部大小 inner_size *= input_shape_.at(i); } for (size_t i = 0; i indices_shape_.size(); i++) {//循环遍历数组获取元素大小 indices_element_size *= indices_shape_.at(i); } int stride = UP_DIV(outer_size, size);//获取步幅 auto task = [&](size_t start, size_t end) {//获取任务量 for (size_t i = start; i end; i++) { int8_t *int8_in = input_data;//输入数据 int8_t *int8_out = output_data;//输出数据 int count = MSMIN(stride, static_cast(outer_size - stride * i)); if (count = 0) { return; } auto thread_stride = stride * i;//获取线程的步幅 int8_in += thread_stride * limit * inner_size * data_size;//计算输入的数据总大小 int8_out += thread_stride * indices_element_size * inner_size * data_size;//计算输出的数据总大小 auto error_code = Gather(int8_in, count, inner_size, limit, indices_data_, indices_element_size, int8_out, sizeof(float)); if (error_code != 0) { MS_LOG(ERROR) "GatherRun error task_id[" i "] error_code[" error_code "]"; } } }; CPUKernelUtils::ParallelFor(task, size); return 0; } //运行程序 bool GatherV2CPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { int8_t *input_tensor = reinterpret_cast(inputs[0]->addr);//获取输入张量 indices_data_ = reinterpret_cast(inputs[1]->addr);// int8_t *output_addr = reinterpret_cast(outputs[0]->addr);//声明变量c size_t size = (outputs[0]->size > 0) ? static_cast(outputs[0]->size / sizeof(int8_t)) : 1; GatherLaunch(input_tensor, output_addr, size); return true; } //检查参数 void GatherV2CPUKernel::CheckParam(const CNodePtr &kernel_node) { auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); if (input_shape.size() > 4) {//如果输入形状大于4,则记录错误日志:输入形状为...但是 GatherV2CPUKernel只需要支持4d或更低 MS_LOG(EXCEPTION) "Input dims is " input_shape.size() ", but GatherV2CPUKernel olny support 4d or lower."; } size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 2) {//如果输入大小不等于2,记录错误日志 MS_LOG(EXCEPTION) "Argument number is " input_num ", but GatherV2CPUKernel needs 2."; } } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\embedding_look_up_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入系统自带的库 #include #include //导入自定义的库 #include "backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" #include "ir/primitive.h" #include "common/thread_pool.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 namespace {//空间嵌套 template void LookUpTableTask(const float *input_addr, const T *indices_addr, float *output_addr, size_t indices_lens, size_t outer_dim_size, T offset, size_t first_dim_size) { auto type_size = sizeof(float);//获取数据类型 size_t lens = outer_dim_size * type_size;//获取长度 for (size_t i = 0; i indices_lens; ++i) {//循环遍历数组 T index = indices_addr[i] - offset;//获取下标 if (index >= 0 && index SizeToInt(first_dim_size)) { size_t pos = index * outer_dim_size;//获取总长度 auto ret = memcpy_s(output_addr, (indices_lens - i) * lens, input_addr + pos, lens); if (ret != EOK) { MS_LOG(EXCEPTION) "LookUpTable task memcpy failed."; } } else { auto ret = memset_s(output_addr, (indices_lens - i) * lens, 0, lens); if (ret != EOK) { MS_LOG(EXCEPTION) "LookUpTable task memset failed."; } } output_addr += outer_dim_size;//输出地址大小加上长度的大小 } } } // namespace void EmbeddingLookUpCPUKernel::InitKernel(const CNodePtr &kernel_node) { CheckParam(kernel_node); node_wpt_ = kernel_node; std::vector input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); if (input_shape.empty()) { MS_LOG(EXCEPTION) "param must be at least 1D";//参数必须至少为1D } first_dim_size_ = input_shape[0];//开始时大小 outer_dim_size_ = 1;//初始化变量 for (size_t i = 1; i input_shape.size(); ++i) {//循环遍历数组,计算外部模糊大小 outer_dim_size_ *= input_shape[i]; } indices_lens_ = 1;//初始化变量 std::vector indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); for (const auto &shape : indices_shape) {//增强for循环 indices_lens_ *= shape;//计算总体大小 } indices_data_type_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 1);//获取区间数据类型 if (AnfAlgo::HasNodeAttr(kAttrOffset, kernel_node)) { offset_ = AnfAlgo::GetNodeAttr(kernel_node, kAttrOffset); } } //检查内核 template void EmbeddingLookUpCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &outputs) { if (!node_wpt_.expired()) { auto node_ = node_wpt_.lock(); if (!node_) { MS_LOG(EXCEPTION) "node_wpt_ is expired.";//内核失效 } std::vector input_shape = AnfAlgo::GetPrevNodeOutputInferShape(node_, 0); if (input_shape.empty()) { MS_LOG(EXCEPTION) "param must be at least 1D";//参数必须至少为1D } first_dim_size_ = input_shape[0]; outer_dim_size_ = 1; for (size_t i = 1; i input_shape.size(); ++i) { outer_dim_size_ *= input_shape[i]; } indices_lens_ = 1;//初始化变量 std::vector indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(node_, 1); for (const auto &shape : indices_shape) {//增强for循环,计算长度 indices_lens_ *= shape; } } auto input_addr = reinterpret_cast(inputs[0]->addr);//获取输入值地址 auto indices_addr = reinterpret_cast(inputs[1]->addr);//获取输出值地址 auto output_addr = reinterpret_cast(outputs[0]->addr); size_t thread_num = indices_lens_ / 10000 + 1;//获取线程数大小 auto max_thread_num = common::ThreadPool::GetInstance().GetSyncRunThreadNum();//获取最大线程数 thread_num = thread_num > max_thread_num ? max_thread_num : thread_num;//线程数等于线程数和最大线程数两个中的最大值 std::vector tasks; size_t task_proc_lens = (indices_lens_ + thread_num - 1) / thread_num; size_t i; size_t task_offset = 0;//初始化变量:任务偏移值 MS_LOG(DEBUG) "indices_lens_: " indices_lens_ " one task proc lens:" task_proc_lens; for (i = 0; i thread_num; i++) {//循环遍历数组 if (task_offset >= indices_lens_) {//如果任务偏移量大于或等于匹配区间长度,跳出循环 break; } MS_LOG(DEBUG) "task_offset: " task_offset " task_proc_lenss:" task_proc_lens; auto task = [input_addr, indices_addr, output_addr, task_offset, task_proc_lens, this]() { LookUpTableTask(input_addr, indices_addr + task_offset, output_addr + task_offset * outer_dim_size_, task_proc_lens, outer_dim_size_, offset_, first_dim_size_); return common::SUCCESS; }; tasks.emplace_back(task); task_offset += task_proc_lens; if (task_offset + task_proc_lens > indices_lens_) { task_proc_lens = indices_lens_ - task_offset; } } common::ThreadPool::GetInstance().SyncRun(tasks); } //检查内核 bool EmbeddingLookUpCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { //判断数据类型,根据不同数据类型执行不同 操作 if (indices_data_type_ == kNumberTypeInt32) { LaunchKernel(inputs, outputs); } else { LaunchKernel(inputs, outputs); } return true; } void EmbeddingLookUpCPUKernel::CheckParam(const CNodePtr &kernel_node) { auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); if (input_shape.size() > 4) {//如果输入形状大小大于4 MS_LOG(EXCEPTION) "Input dims is " input_shape.size() ", but EmbeddingLookUpCPUKernel only support 4d or lower."; } size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 2) {//如果输入形状大小不等于2, MS_LOG(EXCEPTION) "Argument number is " input_num ", but EmbeddingLookUpCPUKernel needs 2."; } } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\embedding_look_up_comm_grad_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入系统自带的库 #include //导入自定义的库 #include "backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" #include "runtime/device/cpu/mpi/mpi_interface.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 void EmbeddingLookUpCommGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { CheckParam(kernel_node); split_num_ = AnfAlgo::GetNodeAttr(kernel_node, "split_num");//拆分数 MS_LOG(INFO) "split_num: " split_num_;//记录拆分数信息 auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);//获取输入值形状 if (input_shape[0] % split_num_ != 0) {//如果拆分的形状不是一致大小的,记录错误日志,输入值形状是...,但是它必须是拆分数的倍数 MS_LOG(EXCEPTION) "Input shape[0] is " input_shape[0] ", but it must be multiple of split_num."; } } //验证 bool EmbeddingLookUpCommGradCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { #if defined(_WIN32) || defined(_WIN64) auto start_time = std::chrono::steady_clock::now();//起别名 #else struct timeval start_time, end_time;//声明两个变量:开始时间和结束时间 (void)gettimeofday(&start_time, nullptr); #endif auto input_addr = reinterpret_cast(inputs[0]->addr); auto output_addr = reinterpret_cast(outputs[0]->addr); size_t input_size = inputs[0]->size; size_t output_size = outputs[0]->size; MS_LOG(DEBUG) "input addr: " input_addr "input size: " input_size;//获取输入值地址 MS_LOG(DEBUG) "output addr: " output_addr "output size: " output_size;//获取输出值地址 memset_s(output_addr, output_size, 0, output_size); const std::vector &rank_group = {0, 1, 2, 3, 4, 5, 6, 7};//组别序号 size_t input_split_lens = input_size / LongToSize(split_num_) / sizeof(float_t);//获取输入值拆分长度 size_t output_split_lens = output_size / LongToSize(split_num_) / sizeof(float_t);//获取输出值拆分长度 for (int64_t i = 0; i split_num_; i++) { MPIAllGather(input_addr + i * input_split_lens, output_addr + i * output_split_lens, rank_group, input_split_lens); } #if defined(_WIN32) || defined(_WIN64) auto end_time = std::chrono::steady_clock::now();//获取截至时现在时间 std::chrono::duration1, 1000000>> cost = end_time - start_time;//计算运行时间 MS_LOG(INFO) "EmbeddingLookUpCommGradCPUKernel, used time: " cost.count() " us";//j #else (void)gettimeofday(&end_time, nullptr); uint64_t time = 1000000 * static_cast(end_time.tv_sec - start_time.tv_sec); time += static_cast(end_time.tv_usec - start_time.tv_usec); MS_LOG(INFO) "EmbeddingLookUpCommGradCPUKernel, used time: " time " us"; #endif return true; } void EmbeddingLookUpCommGradCPUKernel::CheckParam(const CNodePtr &kernel_node) { size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 1) { MS_LOG(EXCEPTION) "Argument number is " input_num ", but EmbeddingLookUpCommGradCPUKernel needs 1."; } } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\eltwise_grad_cpu_kernel.cc代码标注2 ```c++ //反正切函数梯度模板 template void EltWiseGradCPUKernel::AtanGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { for (size_t i = start; i end; i++) {//循环遍历数组 T dividend = input2[i];//声明变量被除数 T divisor = 1 + input1[i] * input1[i];//声明变量除数 if (divisor == 0) { if (dividend == 0) {//除数与被除数都为0 out[i] = std::numeric_limits::quiet_NaN(); continue; } if (std::numeric_limits::has_infinity) { out[i] = dividend > 0 ? std::numeric_limits::infinity() : -std::numeric_limits::infinity(); } else { out[i] = dividend > 0 ? std::numeric_limits::max() : std::numeric_limits::min(); } continue; } out[i] = dividend / divisor;//输出值 } } //反正弦函数梯度模板 template void EltWiseGradCPUKernel::AsinhGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { for (size_t i = start; i end; i++) {//循环遍历数组 T dividend = input2[i]; T divisor = sqrt(1 + input1[i] * input1[i]); if (divisor == 0) { if (dividend == 0) {//除数与被除数都为0 out[i] = std::numeric_limits::quiet_NaN(); continue; } if (std::numeric_limits::has_infinity) { out[i] = dividend > 0 ? std::numeric_limits::infinity() : -std::numeric_limits::infinity(); } else { out[i] = dividend > 0 ? std::numeric_limits::max() : std::numeric_limits::min(); } continue; } out[i] = dividend / divisor;//输出值 } } //反余弦函数梯度模板 template void EltWiseGradCPUKernel::AcoshGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { for (size_t i = start; i end; i++) {//循环遍历数组 T dividend = input2[i]; T divisor = sqrt(input1[i] * input1[i] - 1); if (divisor == 0) { if (dividend == 0) {//除数与被除数都为0 out[i] = std::numeric_limits::quiet_NaN();//输出值补填NaN continue; } if (std::numeric_limits::has_infinity) { out[i] = dividend > 0 ? std::numeric_limits::infinity() : -std::numeric_limits::infinity(); } else { out[i] = dividend > 0 ? std::numeric_limits::max() : std::numeric_limits::min(); } continue; } out[i] = dividend / divisor;//输出值等于s } } //初始化内核 template void EltWiseGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node);//判断内核是否为空 kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);//获取内核名称 } template bool EltWiseGradCPUKernel::Launch(const std::vector &inputs, const std::vector &, const std::vector &outputs) { static const std::map> elt_map{{"ReluGrad", &EltWiseGradCPUKernel::ReluGrad}, {"ReLU6Grad", &EltWiseGradCPUKernel::ReLU6Grad}, {"SigmoidGrad", &EltWiseGradCPUKernel::SigmoidGrad}, {"AbsGrad", &EltWiseGradCPUKernel::AbsGrad}, {"TanhGrad", &EltWiseGradCPUKernel::TanhGrad}, {"SqrtGrad", &EltWiseGradCPUKernel::SqrtGrad}, {"GeLUGrad", &EltWiseGradCPUKernel::GeluGrad}, {"AsinGrad", &EltWiseGradCPUKernel::AsinGrad}, {"ACosGrad", &EltWiseGradCPUKernel::ACosGrad}, {"AtanGrad", &EltWiseGradCPUKernel::AtanGrad}, {"AsinhGrad", &EltWiseGradCPUKernel::AsinhGrad}, {"AcoshGrad", &EltWiseGradCPUKernel::AcoshGrad}}; const auto *input1 = reinterpret_cast(inputs[0]->addr);//获取第一个输入值地址 const auto *input2 = reinterpret_cast(inputs[1]->addr);//获取第二个输入值地址 auto *output = reinterpret_cast(outputs[0]->addr); size_t count = outputs[0]->size > 0 ? static_cast(outputs[0]->size / sizeof(T)) : 1; CPUKernelUtils::ParallelFor( std::bind(elt_map.at(kernel_name_), this, input1, input2, output, std::placeholders::_1, std::placeholders::_2), count); return true;//返回值为真 } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\eltwise_grad_cpu_kernel.cc代码标注1 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入系统自带的库 #include #include //导入自定义的库 #include "backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h" #include "common/thread_pool.h" #include "runtime/device/cpu/cpu_device_address.h" #include "nnacl/fp32_grad/activation_grad.h" #include "nnacl/fp32_grad/arithmetic_grad.h" #include "nnacl/errorcode.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 template void EltWiseGradCPUKernel::ReluGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { if constexpr (std::is_same_v) { int ret = ::ReluGrad(input1 + start, input2 + start, end - start, out + start);// if (ret == NNACL_ERR) { MS_LOG(EXCEPTION) "ReLUGrad failed.";//ReLU梯度失败 } } else { MS_LOG(EXCEPTION) "ReLUGrad only support float";// ReLU梯度只支持floatl类型 } } template void EltWiseGradCPUKernel::ReLU6Grad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { if constexpr (std::is_same_v) { int ret = ::Relu6Grad(input1 + start, input2 + start, end - start, out + start); if (ret == NNACL_ERR) { MS_LOG(EXCEPTION) "ReLU6Grad failed.";//ReLU6梯度失败 } } else { MS_LOG(EXCEPTION) "ReLU6Grad only support float";//ReLU6梯度只支持float类型 } } template void EltWiseGradCPUKernel::AbsGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { if constexpr (std::is_same_v) { int ret = ::ElementAbsGrad(input1 + start, input2 + start, out + start, end - start); if (ret == NNACL_ERR) { MS_LOG(EXCEPTION) "AbsGrad failed.";//Abs梯度失败 } } else { MS_LOG(EXCEPTION) "AbsGrad only support float";//Abs梯度只支持float类型 } } template void EltWiseGradCPUKernel::SigmoidGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { if constexpr (std::is_same_v) { int ret = ::SigmoidGrad(input2 + start, input1 + start, end - start, out + start); if (ret == NNACL_ERR) { MS_LOG(EXCEPTION) "SigmoidGrad failed.";//S型函数梯度失败 } } else { MS_LOG(EXCEPTION) "SigmoidGrad only support float";//S型函数梯度只支持float类型 } } template void EltWiseGradCPUKernel::SqrtGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { for (size_t i = start; i end; i++) { out[i] = input2[i] / (input1[i] * 2); } } template void EltWiseGradCPUKernel::TanhGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { if constexpr (std::is_same_v) { int ret = ::TanhGrad(input2 + start, input1 + start, end - start, out + start); if (ret == NNACL_ERR) { MS_LOG(EXCEPTION) "TanhGrad failed.";//双曲正切梯度失败 } } else { MS_LOG(EXCEPTION) "TanhGrad only support float";//双曲正切只支持float类型 } } template void EltWiseGradCPUKernel::GeluGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { for (size_t i = start; i end; i++) { T x = input2[i];//声明变量 auto double_x = static_cast(x);//起别名,转换格式 T tanh_res = (T)std::tanh(0.7978845608 * (double_x + 0.044715 * double_x * double_x * double_x)); T mul_right = (T)(0.7978845608 + 0.1070322244 * double_x * double_x); T y_res = (((T)1.0 + tanh_res) + x * ((T)1.0 - tanh_res * tanh_res) * mul_right) / (T)2.0; out[i] = input1[i] * y_res;//输出值 } } //除数模板 template void EltWiseGradCPUKernel::AsinGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { for (size_t i = start; i end; i++) {//循环遍历数组 T dividend = input2[i];//声明被除数 T divisor = sqrt(1 - input1[i] * input1[i]); if (divisor == 0) {//除数为0 if (dividend == 0) {//被除数和除数都为0 out[i] = std::numeric_limits::quiet_NaN(); continue; } if (std::numeric_limits::has_infinity) { out[i] = dividend > 0 ? std::numeric_limits::infinity() : -std::numeric_limits::infinity(); } else { out[i] = dividend > 0 ? std::numeric_limits::max() : std::numeric_limits::min(); } continue; } out[i] = dividend / divisor;//输出值等于被除数除以除数 } } //反余弦模板 template void EltWiseGradCPUKernel::ACosGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { for (size_t i = start; i end; i++) { T dividend = -input2[i];//声明变量被除数 T divisor = sqrt(1 - input1[i] * input1[i]);//声明变量除数 if (divisor == 0) { if (dividend == 0) { out[i] = std::numeric_limits::quiet_NaN(); continue; } if (std::numeric_limits::has_infinity) { out[i] = dividend > 0 ? std::numeric_limits::infinity() : -std::numeric_limits::infinity(); } else { out[i] = dividend > 0 ? std::numeric_limits::max() : std::numeric_limits::min(); } continue; } out[i] = dividend / divisor;//s } } ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\cumsum_cpu_kernel.cc代码标注2 ```c++ //复制模板 template void CumSumCPUKernel::Copy(T *input, T *output, size_t dim0, size_t dim1, size_t dim2, size_t stride, size_t stride2, size_t start, size_t end) { for (size_t i = start; i end; i++) { size_t k1 = i / dim2 % dim0; size_t k2 = i % dim2; size_t offset = k1 * stride + k2; for (size_t j = 0; j dim1; ++j) { size_t read_index = j * stride2 + offset; input[read_index] = output[read_index]; } } } template void CumSumCPUKernel::CumSumKernelReverse(const T *input, T *output, size_t dim0, size_t dim1, size_t dim2, size_t stride, size_t stride2, size_t start, size_t end) { for (size_t i = start; i end; i++) { size_t k1 = i / dim2 % dim0; size_t k2 = i % dim2; size_t offset = k1 * stride + k2; for (int j = SizeToInt(dim1 - 1); j >= 0; --j) {//循环遍历数组读取序列下标 size_t read_index = j * stride2 + offset; if (j == SizeToInt(dim1 - 1)) { output[read_index] = input[read_index]; } else { size_t read_index2 = (j + 1) * stride2 + offset;//读取下标 output[read_index] = output[read_index2] + input[read_index];//h } } } } template void CumSumCPUKernel::CumSumKernel(const T *input, T *output, size_t dim0, size_t dim1, size_t dim2, size_t stride, size_t stride2, size_t start, size_t end) { for (size_t i = start; i end; i++) { size_t k1 = i / dim2 % dim0; size_t k2 = i % dim2; size_t offset = k1 * stride + k2; for (size_t j = 0; j dim1; ++j) {//循环遍历数组读取序列下标 size_t read_index = j * stride2 + offset; if (j == 0) { output[read_index] = input[read_index]; } else { size_t read_index2 = (j - 1) * stride2 + offset; output[read_index] = output[read_index2] + input[read_index]; } } } } //核对数据总数 template void CumSumCPUKernel::LaunchCumSum(const T *input, T *output, T *workspace, size_t start, size_t end) { start = start / dims_[1]; end = end / dims_[1]; if (exclusive_) { if (reverse_) { RightMove(input, output, dims_[0], dims_[1], dims_[2], stride_, stride2_, start, end); Copy(workspace, output, dims_[0], dims_[1], dims_[2], stride_, stride2_, start, end); CumSumKernelReverse(workspace, output, dims_[0], dims_[1], dims_[2], stride_, stride2_, start, end); } else { LeftMove(input, output, dims_[0], dims_[1], dims_[2], stride_, stride2_, start, end); Copy(workspace, output, dims_[0], dims_[1], dims_[2], stride_, stride2_, start, end); CumSumKernel(workspace, output, dims_[0], dims_[1], dims_[2], stride_, stride2_, start, end); } } else { if (reverse_) { CumSumKernelReverse(input, output, dims_[0], dims_[1], dims_[2], stride_, stride2_, start, end); } else { CumSumKernel(input, output, dims_[0], dims_[1], dims_[2], stride_, stride2_, start, end); } } return; } //核对 template void CumSumCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) { auto input = reinterpret_cast(inputs[0]->addr);//获取输入地址 auto ws = reinterpret_cast(workspace[0]->addr);//获取工作空间地址 auto output = reinterpret_cast(outputs[0]->addr);//获取输出值地址 //多线程 size_t lens = inputs[0]->size > 0 ? static_cast(inputs[0]->size / sizeof(T)) : 1; auto max_thread_num = std::thread::hardware_concurrency(); size_t thread_num = lens 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num; MS_LOG(INFO) "Lens=" lens "; use thread_num=" thread_num "; max_thread_num: " max_thread_num; std::vector threads; threads.reserve(thread_num); size_t start = 0;//初始化变量 if (thread_num 1) {//如果线程个数小于1,无效价值 MS_LOG(ERROR) "Invalid value: thread_num " thread_num; return; } size_t once_compute_size = (lens + thread_num - 1) / thread_num; if (once_compute_size 1) { MS_LOG(ERROR) "Invalid value: once_compute_size " once_compute_size; return; } while (start lens) { size_t end = (start + once_compute_size) > lens ? lens : (start + once_compute_size); threads.emplace_back(std::thread(&CumSumCPUKernel::LaunchCumSum, this, input, output, ws, start, end)); start += once_compute_size; } for (size_t i = 0; i threads.size(); ++i) { threads[i].join(); } return; } //检查参数 void CumSumCPUKernel::CheckParam(const CNodePtr &kernel_node) { size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);//获取张量数值 if (input_num != 1) { MS_LOG(EXCEPTION) "Argument number is " input_num ", but CumSumGpuKernel needs 1."; } } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\cumsum_cpu_kernel.cc代码标注1 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入系统自带的包 #include //导入自定义的包 #include "backend/kernel_compiler/cpu/cumsum_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 void CumSumCPUKernel::InitKernel(const CNodePtr &kernel_node) { CheckParam(kernel_node); shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);//获取形状 dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);//获取数据类型 axis_ = static_cast(AnfAlgo::GetNodeAttr(kernel_node, "axis"));//获取数据行数 dst_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); exclusive_ = AnfAlgo::GetNodeAttr(kernel_node, "exclusive"); reverse_ = AnfAlgo::GetNodeAttr(kernel_node, "reverse"); int input_dim_length = SizeToInt(shape_.size()); if (axis_ >= input_dim_length) {//如果轴的长度大于输入值得长度,则记录错误日志:轴出界 MS_LOG(EXCEPTION) "Axis out of bounds."; } while (axis_ 0) {//当轴小于0时,轴的大小要加上输入数据的长度 axis_ += input_dim_length; } } template void CumSumCPUKernel::InitWorkspaceSize() { input_size_0_ = sizeof(T);//获取模板大小当作输入数值大小 for (size_t i = 0; i shape_.size(); i++) { input_size_0_ *= shape_[i]; } workspace_size_list_.emplace_back(input_size_0_);//原地创建一个临时对象input_size_0_ } //根据不同的数据类型,进行不同的处理 void CumSumCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { CPUKernel::InitInputOutputSize(kernel_node); if (dtype_ == kNumberTypeFloat32) { InitWorkspaceSize(); } else if (dtype_ == kNumberTypeFloat16) { InitWorkspaceSize(); } else if (dtype_ == kNumberTypeInt32) { InitWorkspaceSize(); } else if (dtype_ == kNumberTypeInt8) { InitWorkspaceSize(); } else if (dtype_ == kNumberTypeUInt8) { InitWorkspaceSize(); } } //验证数据是否正确,是否符合数据类型 bool CumSumCPUKernel::Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) { Reshape(); if (dtype_ == kNumberTypeFloat32) { LaunchKernel(inputs, workspace, outputs); } else if (dtype_ == kNumberTypeFloat16) { LaunchKernel(inputs, workspace, outputs); } else if (dtype_ == kNumberTypeInt32) { LaunchKernel(inputs, workspace, outputs); } else if (dtype_ == kNumberTypeInt8) { LaunchKernel(inputs, workspace, outputs); } else if (dtype_ == kNumberTypeUInt8) { LaunchKernel(inputs, workspace, outputs); } return true; } //重新塑形 void CumSumCPUKernel::Reshape() { dims_[0] = 1;//初始化变量 dims_[1] = shape_[IntToSize(axis_)]; dims_[2] = 1; for (size_t i = 0; i IntToSize(axis_); i++) { dims_[0] *= shape_[i]; } for (size_t i = IntToSize(axis_) + 1; i shape_.size(); i++) { dims_[2] *= shape_[i]; } stride_ = dims_[1] * dims_[2]; stride2_ = dims_[2]; return; } //左移模板 template void CumSumCPUKernel::LeftMove(const T *input, T *output, size_t dim0, size_t dim1, size_t dim2, size_t stride, size_t stride2, size_t start, size_t end) { for (size_t i = start; i end; i++) { size_t k1 = i / dim2 % dim0; size_t k2 = i % dim2; size_t offset = k1 * stride + k2; for (size_t j = 0; j dim1; ++j) { size_t read_index = j * stride2 + offset; if (j == 0) { output[read_index] = (T)0; } else { size_t read_index2 = (j - 1) * stride2 + offset; output[read_index] = input[read_index2]; } } } } //y template void CumSumCPUKernel::RightMove(const T *input, T *output, size_t dim0, size_t dim1, size_t dim2, size_t stride, size_t stride2, size_t start, size_t end) { for (size_t i = start; i end; i++) { size_t k1 = i / dim2 % dim0; size_t k2 = i % dim2; size_t offset = k1 * stride + k2; for (int j = SizeToInt(dim1 - 1); j >= 0; --j) { size_t read_index = j * stride2 + offset; if (j == SizeToInt(dim1 - 1)) { output[read_index] = (T)0; } else { size_t read_index2 = (j + 1) * stride2 + offset; output[read_index] = input[read_index2]; } } } } ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\ctcloss_cpu_kernel.cc代码标注 1 ```c++ /** * Copyright 2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的头文件 #include "backend/kernel_compiler/cpu/ctcloss_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个变量 namespace kernel {//空间嵌套 void CTCLossCPUKernel::InitKernel(const CNodePtr &kernel_node) { CheckParam(kernel_node); probs_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);//获取第一个节点形状 indice_dim s_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);//获取第二个节点形状 labels_dims_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);//获取第三个节点形状 dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);//获取第一个节点数据类型 //判断节点大小是否是支持 if (probs_shape_.size() != 3) { MS_LOG(EXCEPTION) "Probs dims: " probs_shape_.size() " not support."; } if (labels_dims_.size() != 1) { MS_LOG(EXCEPTION) "Labels dims: " labels_dims_.size() " not support."; } if (indice_dims_.size() != 2) { MS_LOG(EXCEPTION) "Labels indice dims: " indice_dims_.size() " not support."; } preprocess_collapse_repeated_ = AnfAlgo::GetNodeAttr(kernel_node, "preprocess_collapse_repeated"); ctc_merge_repeated_ = AnfAlgo::GetNodeAttr(kernel_node, "ctc_merge_repeated"); ignore_longer_outputs_than_inputs_ = AnfAlgo::GetNodeAttr(kernel_node, "ignore_longer_outputs_than_inputs"); max_time_ = probs_shape_[0];//获取最大时间 batch_size_ = probs_shape_[1];//获取批处理大小 num_class_ = probs_shape_[2];//获取类序号 blank_index_ = num_class_ - 1;//获取空白下标 } bool CTCLossCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { //根据不同数据类型执行不同操作 if (dtype_ == kNumberTypeFloat16) { LaunchKernel(inputs, outputs); } else if (dtype_ == kNumberTypeFloat32) { LaunchKernel(inputs, outputs); } return true; } //增加经验值 template inline T LogSumExp(T logprob1, T logprob2) { T kLogZero_ = -std::numeric_limits::infinity();//声明变量并赋值 if (logprob1 == kLogZero_) { return logprob2; } else if (logprob2 == kLogZero_) { return logprob1; } else { return (logprob1 > logprob2) ? logprob1 + log1p(exp(logprob2 - logprob1)) : logprob2 + log1p(exp(logprob1 - logprob2)); } } //计算正向的向量自回归模型 template void CTCLossCPUKernel::CalculateFwdVar(const std::vector &label_with_blank, const std::vector> &y, std::vector> *log_alpha_b) { int U = label_with_blank.size();//声明变量并赋值 int T = (*log_alpha_b)[0].size();//声明变量并赋值 TT kLogZero_ = -std::numeric_limits::infinity(); (*log_alpha_b)[0][0] = log(y[blank_index_][0]); auto label_0 = (label_with_blank.size() > 1) ? label_with_blank[1] : blank_index_; if (label_with_blank.size() > 1) { (*log_alpha_b)[1][0] = log(y[label_0][0]); } for (int t = 1; t T; ++t) { int low = std::max(0, U - (2 * (T - t)));//获取最大值 int high = std::min(U, 2 * (t + 1));//获取最小值 for (int u = low; u high; ++u) { auto sum_log_alpha_b = kLogZero_;//起别名 if (ctc_merge_repeated_ || label_with_blank[u] == blank_index_) { sum_log_alpha_b = (*log_alpha_b)[u][t - 1]; } if (u > 0) { sum_log_alpha_b = LogSumExp(sum_log_alpha_b, (*log_alpha_b)[u - 1][t - 1]); } if (u > 1) { bool matching_labels_merge = ctc_merge_repeated_ && (label_with_blank[u] == label_with_blank[u - 2]); if (label_with_blank[u] != blank_index_ && !matching_labels_merge) { sum_log_alpha_b = LogSumExp(sum_log_alpha_b, (*log_alpha_b)[u - 2][t - 1]); } } (*log_alpha_b)[u][t] = log(y[label_with_blank[u]][t]) + sum_log_alpha_b; } } } //计算f向量自回归模型 template void CTCLossCPUKernel::CalculateBwdVar(const std::vector &label_with_blank, const std::vector> &y, std::vector> *log_beta_b) { int T = (*log_beta_b)[0].size(); int U = label_with_blank.size(); if (U > 1) { for (int u = U - 2; u U; ++u) { (*log_beta_b)[u][T - 1] = TT(0); } } else { (*log_beta_b)[0][T - 1] = TT(0); (*log_beta_b)[0][T - 2] = TT(0); } for (int t = T - 2; t >= 0; --t) { int low = std::max(0, U - (2 * (T - t))); int high = std::min(U, 2 * (t + 1)); for (int u = low; u high; ++u) { if (ctc_merge_repeated_ || label_with_blank[u] == blank_index_) { (*log_beta_b)[u][t] = LogSumExp((*log_beta_b)[u][t], (*log_beta_b)[u][t + 1] + TT(log(y[label_with_blank[u]][t + 1]))); } if (u + 1 U) { (*log_beta_b)[u][t] = LogSumExp((*log_beta_b)[u][t], (*log_beta_b)[u + 1][t + 1] + TT(log(y[label_with_blank[u + 1]][t + 1]))); } if (u + 2 U) { bool matching_labels_merge = ctc_merge_repeated_ && (label_with_blank[u] == label_with_blank[u + 2]); if (label_with_blank[u] != blank_index_ && !matching_labels_merge) { (*log_beta_b)[u][t] = LogSumExp((*log_beta_b)[u][t], (*log_beta_b)[u + 2][t + 1] + TT(log(y[label_with_blank[u + 2]][t + 1]))); } } } } } ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\ctcloss_cpu_kernel.cc代码标注 3 ```c++ //模板计算矢量矩阵 template void MatrixfromVector(uint32_t row, uint32_t col, std::vector> *array2D, const T init_value) { array2D->resize(row); for (size_t i = 0; i row; ++i) { (*array2D)[i].resize(col, init_value); } } //模板启动内核 template void CTCLossCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &outputs) { auto inputs_addr = reinterpret_cast(inputs[0]->addr); auto labels_indices_addr = reinterpret_cast(inputs[1]->addr); auto labels_values_addr = reinterpret_cast(inputs[2]->addr); auto sequence_length_addr = reinterpret_cast(inputs[3]->addr); auto loss_addr = reinterpret_cast(outputs[0]->addr); auto gradient_addr = reinterpret_cast(outputs[1]->addr); std::vector> label_batch; std::vector> labels_with_blank; std::vector each_label_length; label_batch.resize(batch_size_); labels_with_blank.resize(batch_size_); each_label_length.resize(batch_size_, 0); T kLogZero_ = -std::numeric_limits::infinity(); // 验证检查序列长度 for (size_t b = 0; b batch_size_; ++b) { if (sequence_length_addr[b] uint32_t(0)) { MS_LOG(EXCEPTION) "Sequence length should > 0, but gets " sequence_length_addr[b]; } if (sequence_length_addr[b] > max_time_) {//最大时间应该大于序列长度,但得到z MS_LOG(EXCEPTION) "Max time should be greater than sequence length, but gets " max_time_ " " sequence_length_addr[b]; } } for (size_t i = 0; i indice_dims_[0]; ++i) { each_label_length[labels_indices_addr[i * 2]]++; } // 将label_value和label_indices的标签格式转换为batch_label uint64_t cum_sum = 0; for (size_t b = 0; b batch_size_; ++b) { std::vector *b_value = &label_batch[b]; for (size_t l = 0; l each_label_length[b]; ++l) { b_value->push_back(labels_values_addr[cum_sum + l]); } cum_sum += each_label_length[b]; } // 将标签转换为空白标签 GenLableWithBlank(sequence_length_addr, label_batch, &labels_with_blank); for (size_t b = 0; b batch_size_; ++b) { std::vector label_with_blank = labels_with_blank[b]; // y_b [num_class, sequence_length] std::vector> y_b; std::vector> dy; std::vector> log_alpha_b; std::vector> log_beta_b; MatrixfromVector(num_class_, sequence_length_addr[b], &y_b, kLogZero_); MatrixfromVector(y_b.size(), y_b[0].size(), &dy, T(0)); MatrixfromVector(label_with_blank.size(), sequence_length_addr[b], &log_alpha_b, kLogZero_); MatrixfromVector(label_with_blank.size(), sequence_length_addr[b], &log_beta_b, kLogZero_); InnerSoftMax(inputs_addr, &y_b, sequence_length_addr[b], num_class_, batch_size_, b); CalculateFwdVar(label_with_blank, y_b, &log_alpha_b); CalculateBwdVar(label_with_blank, y_b, &log_beta_b); T log_pzx = kLogZero_; for (size_t u = 0; u label_with_blank.size(); ++u) { log_pzx = LogSumExp(log_pzx, log_alpha_b[u][0] + log_beta_b[u][0]); } loss_addr[b] = -log_pzx; CalculateGrad(label_with_blank, y_b, log_alpha_b, log_beta_b, log_pzx, &dy); for (size_t t = 0; t sequence_length_addr[b]; ++t) { for (size_t c = 0; c num_class_; ++c) { gradient_addr[t * batch_size_ * num_class_ + b * num_class_ + c] = dy[c][t]; } } } } void CTCLossCPUKernel::CheckParam(const CNodePtr &kernel_node) { size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 4) { MS_LOG(EXCEPTION) "CTCLossCPUKernel needs 4 inputs, but gets " input_num; } size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); if (output_num != 2) { MS_LOG(EXCEPTION) "CTCLossCPUKernel expects 2 outputs, but gets" output_num; } } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\ctcloss_cpu_kernel.cc代码标注 2 ```c++ //计算最后的梯度 template void CTCLossCPUKernel::CalculateGrad(const std::vector &label_with_blank, const std::vector> &y, const std::vector> &log_alpha_b, const std::vector> &log_beta_b, const TT log_pzx, std::vector> *dy) { auto dy_b = dy;//声明一个变量,起别名 TT kLogZero_ = -std::numeric_limits::infinity(); if (log_pzx == kLogZero_) { MS_LOG(INFO) "No valid path found";//未找到有效路径 return; } size_t L = y.size();//获取数据行数 size_t T = y[0].size();//获取数据每行有多少 size_t U = label_with_blank.size();//空白标签大小 for (size_t t = 0; t T; ++t) { std::vector prob_sum(L, kLogZero_); for (size_t u = 0; u U; ++u) { uint32_t l = label_with_blank[u]; prob_sum[l] = LogSumExp(prob_sum[l], log_alpha_b[u][t] + log_beta_b[u][t]); } for (size_t l = 0; l L; ++l) { (*dy_b)[l][t] = y[l][t] - exp(prob_sum[l] - log_pzx); } } } void CTCLossCPUKernel::GenLableWithBlank(uint32_t *seq_len, const std::vector> &batch_label, std::vector> *label_with_blank) { for (size_t b = 0; b batch_size_; ++b) { std::vector l; const std::vector &label = batch_label[b]; bool has_blank = false; for (size_t i = 0; i label.size(); ++i) { if (i == 0 || !preprocess_collapse_repeated_ || label[i] != label[i - 1]) { if (label[i] >= num_class_ - 1) { has_blank = true; } else { if (has_blank) { MS_LOG(EXCEPTION) "Invalid labels(index >= num_class - 1) should not appear between two valid labels";//无效标签不应该放在两个有效标签内 } l.push_back(label[i]);//原地创建一个对象 } } } if (!ignore_longer_outputs_than_inputs_) { if (l.size() > seq_len[b]) { MS_LOG(EXCEPTION) "Input time(sequence length) should greater than output size(label length), but gets "//输入时间(序列长度)应该大于输出大小(标签长度),但是得到序列长度小于输出大小 seq_len[b] " " l.size(); } } (*label_with_blank)[b].reserve(2 * l.size() + 1); for (auto l_i : l) { (*label_with_blank)[b].push_back(blank_index_); (*label_with_blank)[b].push_back(l_i); } (*label_with_blank)[b].push_back(blank_index_); } } template void InnerSoftMax(T *inputs_addr, std::vector> *softmax_probs, const uint32_t sequence_length, size_t num_class, size_t batch_size, size_t b) { for (size_t t = 0; t sequence_length; ++t) { T maxCoeff(T(0));//提取最大多项式系数 T sumCoeff(T(0));//计算得多项式系数的和 for (size_t c = 0; c num_class; ++c) { if (inputs_addr[t * batch_size * num_class + b * num_class + c] > maxCoeff) {//循环判断找出最大多项式的系数 maxCoeff = inputs_addr[t * batch_size * num_class + b * num_class + c]; } } for (size_t c = 0; c num_class; ++c) {//循环遍历数组将多项式系数相加得到多项式系数的h sumCoeff += exp(inputs_addr[t * batch_size * num_class + b * num_class + c] - maxCoeff); (*softmax_probs)[c][t] = exp(inputs_addr[t * batch_size * num_class + b * num_class + c] - maxCoeff); } for (size_t c = 0; c num_class; ++c) { (*softmax_probs)[c][t] /= sumCoeff; } } } ```
-
## mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\cpu_kernel_factory.cc代码标注 ```C++ /** * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" //导入系统自带的包 #include #include //导入自定义的包 #include "runtime/device/kernel_info.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 CPUKernelFactory &CPUKernelFactory::GetInstance() {//获取实例方法 static CPUKernelFactory instance;//声明一个静态变量 return instance;//返回实例 } void CPUKernelFactory::Register(const std::string &kernel_name, const KernelAttr &kernel_attr, CPUKernelCreator &&kernel_creator) { (void)name_to_attr_creator_[kernel_name].emplace_back(kernel_attr, kernel_creator);.//交换函数名 #if !defined(_WIN32) && !defined(_WIN64) MS_LOG(DEBUG) "CPUKernelFactory register operator: " kernel_name;//记录错误日志,寄存操作符 #endif } //创作实例,名字 std::shared_ptr CPUKernelFactory::Create(const std::string &kernel_name, const CNodePtr &apply_kernel) { auto kernel_info = dynamic_cast(apply_kernel->kernel_info());//获取信息 MS_EXCEPTION_IF_NULL(kernel_info);//判断kernel信息是否为空 const KernelBuildInfo *kernel_build_Info = kernel_info->select_kernel_build_info();//获取地址 MS_EXCEPTION_IF_NULL(kernel_build_Info);//判断kernel_build_Info信息是否为空 std::pair ret_pair = CPUKernelAttrCheck(kernel_name, *kernel_build_Info); if (ret_pair.first) { return (name_to_attr_creator_.find(kernel_name)->second)[ret_pair.second].second(); } return nullptr;//返回值为空 } std::pair CPUKernelFactory::CPUKernelAttrCheck(const std::string &kernel_name, const KernelBuildInfo &kernel_info) { auto iter = name_to_attr_creator_.find(kernel_name);//声明变量并赋值 if (iter == name_to_attr_creator_.end()) { MS_LOG(INFO) "Not registered CPU kernel: op[" kernel_name "]!";//未注册CPU内核 return std::make_pair(false, 0); } auto creators = iter->second;//声明新的变量存储 for (size_t index = 0; index creators.size(); ++index) {//遍历数组,循环检查变量信息 auto attr_creator = creators[index]; if (CPUKernelSingleAttrCheck(attr_creator.first, kernel_info)) { return std::make_pair(true, index);//返回布尔值true和下标 } } return std::make_pair(false, 0);//返回值 } //检查CPU内核单一属性信息 bool CPUKernelFactory::CPUKernelSingleAttrCheck(const KernelAttr &kernel_attr, const KernelBuildInfo &kernel_info) { for (size_t i = 0; i kernel_info.GetInputNum(); ++i) {//循环遍历,判断输入数据类型是否相同 auto dtype = kernel_attr.GetAllSame() ? kernel_attr.GetInputAttr(0).first : kernel_attr.GetInputAttr(i).first;//判断数据类型 if (kernel_info.GetInputDeviceType(i) != dtype) {//如果接收的数据类型与原始数据类型不相等,记录错误日志 MS_LOG(DEBUG) "input index:" i ", kernel info type:" kernel_info.GetInputDeviceType(i) ", register type:" dtype; return false; } } for (size_t i = 0; i kernel_info.GetOutputNum(); ++i) {//循环判断输出数据类型 auto dtype = kernel_attr.GetAllSame() ? kernel_attr.GetOutputAttr(0).first : kernel_attr.GetOutputAttr(i).first;//判断数据类型 if (kernel_info.GetOutputDeviceType(i) != dtype) {/、如果接收的数据类型与原始数据类型不相等,记录错误日志 MS_LOG(DEBUG) "output index:" i ", kernel info type:" kernel_info.GetOutputDeviceType(i) ", register type:" dtype; return false; } } return true; } //获取属性列表 std::vector CPUKernelFactory::GetSupportedKernelAttrList(const std::string &kernel_name) { std::vector result;//声明变量 auto iter = name_to_attr_creator_.find(kernel_name)//声明变量并赋值,获取内核名称; if (iter == name_to_attr_creator_.end()) {//未注册内核警告 MS_LOG(WARNING) "Not registered CPU kernel: op[" kernel_name "]!"; return result; } auto creators = iter->second;//申明变量并赋值 for (size_t index = 0; index creators.size(); ++index) {//xun'huan'bia'b'li's auto attr_creator = creators[index]; result.push_back(attr_creator.first); } return result;//返回值 } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\cpu_kernel.cc代码标注2 ```c++ BroadcastIterator::BroadcastIterator(std::vector input_shape_a, std::vector input_shape_b, std::vector output_shape) : input_shape_a_(std::move(input_shape_a)), input_shape_b_(std::move(input_shape_b)), output_shape_(std::move(output_shape)) { output_dimension_ = SizeToInt(output_shape_.size()); //对于迭代器,将dimension赋值为int BroadcastShape(); // 分配的进步的内存 input_strides_a_.resize(output_dimension_); input_strides_b_.resize(output_dimension_); input_back_strides_a_.resize(output_dimension_); input_back_strides_b_.resize(output_dimension_); coordinates_.resize(output_dimension_); InitStrides(); } void BroadcastIterator::SetPos(size_t pos) { for (int i = output_dimension_ - 1; i >= 0 && pos != 0; --i) { coordinates_[i] = pos % output_shape_[i]; input_pos_[0] += coordinates_[i] * input_strides_a_[i]; input_pos_[1] += coordinates_[i] * input_strides_b_[i]; pos /= output_shape_[i]; } } void BroadcastIterator::GenNextPos() { // 计算下一个坐标的输出 for (int i = output_dimension_ - 1; i >= 0; --i) { if (coordinates_[i] + 1 == output_shape_[i]) { coordinates_[i] = 0; input_pos_[0] -= input_back_strides_a_[i]; input_pos_[1] -= input_back_strides_b_[i]; } else { ++coordinates_[i]; input_pos_[0] += input_strides_a_[i]; input_pos_[1] += input_strides_b_[i]; break; } } } //获取广播后的形状 void BroadcastIterator::BroadcastShape() { int input_dimension_a = input_shape_a_.size(); if (input_dimension_a output_dimension_) { input_shape_a_.insert(input_shape_a_.begin(), output_dimension_ - input_dimension_a, 1); } int input_dimension_b = input_shape_b_.size(); if (input_dimension_b output_dimension_) { input_shape_b_.insert(input_shape_b_.begin(), output_dimension_ - input_dimension_b, 1); } } void BroadcastIterator::InitStrides() { input_strides_a_[output_dimension_ - 1] = 1; input_strides_b_[output_dimension_ - 1] = 1; for (int i = output_dimension_ - 2; i >= 0; --i) { input_strides_a_[i] = input_shape_a_[i + 1] * input_strides_a_[i + 1]; input_strides_b_[i] = input_shape_b_[i + 1] * input_strides_b_[i + 1]; input_back_strides_a_[i + 1] = (input_shape_a_[i + 1] - 1) * input_strides_a_[i + 1]; input_back_strides_b_[i + 1] = (input_shape_b_[i + 1] - 1) * input_strides_b_[i + 1]; } // 更新广播跨步 //当轴值为1时,步幅为0 std::transform(input_strides_a_.begin(), input_strides_a_.end(), input_shape_a_.begin(), input_strides_a_.begin(), [](const auto &a, const auto &b) { return b == 1 ? 0 : a; }); std::transform(input_strides_b_.begin(), input_strides_b_.end(), input_shape_b_.begin(), input_strides_b_.begin(), [](const auto &a, const auto &b) { return b == 1 ? 0 : a; }); } TransposeIterator::TransposeIterator(std::vector output_shape, std::vector axes, const std::vector &input_shape) : shape_(std::move(output_shape)), axes_(std::move(axes)) { // 计算的进展 dimension_ = shape_.size(); std::vector strides(dimension_, 1); for (int i = dimension_ - 2; i >= 0; --i) { strides[i] = input_shape[i + 1] * strides[i + 1]; } // 交换形状和步幅,计算后步幅 strides_.resize(dimension_); back_strides_.resize(dimension_); for (int i = dimension_ - 1; i >= 0; --i) { strides_[i] = strides[axes_[i]]; back_strides_[i] = (shape_[i] - 1) * strides_[i]; } // 按pos计算坐标 coordinates_.resize(dimension_); } void TransposeIterator::SetPos(size_t pos) { for (int i = dimension_ - 1; i >= 0 && pos != 0; --i) { coordinates_[i] = pos % shape_[i]; pos_ += coordinates_[i] * strides_[i]; pos /= shape_[i]; } } void TransposeIterator::GenNextPos() { for (int i = dimension_ - 1; i >= 0; --i) { if (coordinates_[i] + 1 == shape_[i]) { coordinates_[i] = 0; pos_ -= back_strides_[i]; } else { coordinates_[i]++; pos_ += strides_[i]; break; } } } } // namespace kernel } // namespace mindspore ```
-
```C++ // 宏定义,ifndef防止双重定义 #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_MOD_H_ #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_MOD_H_ // 导入自带的标准库函数 #include #include #include #include #include // 导入自定义的头文件 #include "backend/kernel_compiler/ascend_kernel_mod.h" // 双重命名空间 namespace mindspore { namespace kernel { class HostKernelMod : public AscendKernelMod { // 定义HostKernelMod类里面的用public修饰的对象以及函数 public: HostKernelMod() = default; ~HostKernelMod() override = default; const std::vector &GetInputSizeList() const override; const std::vector &GetOutputSizeList() const override; const std::vector &GetWorkspaceSizeList() const override; bool Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs, void *stream_ptr) override; std::vector GenTask(const std::vector &, const std::vector &, const std::vector &, uint32_t) override; device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override = 0; bool Init(const AnfNodePtr &anf_node); // 定义HostKernelMod类里面protected修饰的对象以及函数 protected: AnfNodePtr anf_node_; std::string op_name_; std::vector input_size_list_; std::vector output_size_list_; std::vector workspace_size_list_; }; // using命名 using HostKernelModPtr = std::shared_ptr; using HostKernelModPtrList = std::vector; using HostKernelCreater = std::function()>; // 定义一个HostKernelFactory类 class HostKernelFactory { HostKernelFactory() = default; ~HostKernelFactory() = default; // 定义HostKernelFactory类里面由public修饰的方法 public: static HostKernelFactory &Get(); void Registe(const string &name, HostKernelCreater &&fun); static std::shared_ptr Get(const string &name); // 定义HostKernelFactory类里面由private修饰的方法 private: std::map hostKernelMap_; }; // 定义_HostKernelRegister类 class _HostKernelRegister { // _HostKernelRegister类里面由public修饰的方法 public: _HostKernelRegister(const string &name, HostKernelCreater &&fun) { HostKernelFactory::Get().Registe(name, std::move(fun)); } ~_HostKernelRegister() = default; }; #define _MS_HOST_REG_KERNEL_REG(KNAME, clazz) \ static_assert(std::is_base_of::value, " must be base of HostKernelMod"); \ static const _HostKernelRegister g_##KNAME##_##_kernel_reg(#KNAME, []() { \ std::shared_ptr ptr = nullptr; // 定义智能指针 \ ptr = std::make_shared(); \ MS_EXCEPTION_IF_NULL(ptr); \ return ptr; \ }); #define MS_HOST_REG_KERNEL(KNAME, clazz) _MS_HOST_REG_KERNEL_REG(KNAME, clazz) } // 命名空间 kernel } // 命名空间 mindspore #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_MOD_H_ host_kernel_mod.cc(详细解释) // 导入一一对应的头文件 #include "backend/kernel_compiler/host/host_kernel_mod.h" // 导入系统自带的标准库函数 #include #include #include #include // 导入自定义的头文件 #include "runtime/mem.h" #include "utils/ms_context.h" #include "runtime/device/kernel_runtime.h" #include "runtime/device/ascend/executor/host_dynamic_kernel.h" // 双重命名空间 namespace mindspore { namespace kernel { void HostKernelFactory::Registe(const std::string &name, HostKernelCreater &&fun) { // HostKernelFactory私有类下的hostKernelMap_尾部插入 /*std::move用于指示一个对象t可以被“移出”,即允许从t另一个对象高效地转移资源。 特别是,std::move生成一个xvalue 表达式来标识其参数t。 它完全等同于 astatic_cast到一个右值引用类型。*/ hostKernelMap_.emplace(name, std::move(fun)); } std::shared_ptr HostKernelFactory::Get(const std::string &name) { const auto &map = Get().hostKernelMap_; // 存放地址 auto it = map.find(name); if (it != map.end() && it->second) { // 迭代 return (it->second)(); } return nullptr; } // 实例化函数 HostKernelFactory &HostKernelFactory::Get() { static HostKernelFactory instance; return instance; } // 实例化HostKernelMod类中用public修饰的函数,并返回列表 const std::vector &HostKernelMod::GetInputSizeList() const { return input_size_list_; } const std::vector &HostKernelMod::GetOutputSizeList() const { return output_size_list_; } const std::vector &HostKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; } bool HostKernelMod::Init(const AnfNodePtr &anf_node) { MS_EXCEPTION_IF_NULL(anf_node); // 为输入输出数值赋值 size_t input_num = AnfAlgo::GetInputTensorNum(anf_node); size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node); for (size_t i = 0; i input_num; i++) { // 遍历输入数值 std::vector shape_i = AnfAlgo::GetInputDeviceShape(anf_node, i); TypePtr type_ptr = TypeIdToType(AnfAlgo::GetInputDeviceDataType(anf_node, i)); MS_EXCEPTION_IF_NULL(type_ptr); int64_t size_i = 1; // long long int for (size_t j = 0; j shape_i.size(); j++) { //unsigned int类型的j size_i = LongMulWithOverflowCheck(size_i, static_cast(shape_i[j])); } size_t type_byte = GetTypeByte(type_ptr); // 判断是否为0 if (type_byte == 0) { return false; } size_i = LongMulWithOverflowCheck(size_i, SizeToInt(type_byte)); input_size_list_.push_back(LongToSize(size_i)); } // 同理遍历输出数值,遍历方法和内容与上面的input_num类似 for (size_t i = 0; i output_num; i++) { std::vector shape_i = AnfAlgo::GetOutputDeviceShape(anf_node, i); TypePtr type_ptr = TypeIdToType(AnfAlgo::GetOutputDeviceDataType(anf_node, i)); MS_EXCEPTION_IF_NULL(type_ptr); int64_t size_i = 1; for (size_t j = 0; j shape_i.size(); j++) { size_i = LongMulWithOverflowCheck(size_i, static_cast(shape_i[j])); } size_t type_byte = GetTypeByte(type_ptr); if (type_byte == 0) { return false; } size_i = LongMulWithOverflowCheck(size_i, SizeToInt(type_byte)); output_size_list_.push_back(LongToSize(size_i)); } return true; } // 对private函数进行实例化,没有判断的方法体,只是保证该函数存在并且运行没有错误 bool HostKernelMod::Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs, void *stream_ptr) { return true; } std::vector HostKernelMod::GenTask(const std::vector &, const std::vector &, const std::vector &, uint32_t) { return {}; } } // 命名空间 kernel } // 命名空间 mindspore ```
上滑加载中
推荐直播
-
OpenHarmony应用开发之网络数据请求与数据解析
2025/01/16 周四 19:00-20:30
华为开发者布道师、南京师范大学泰州学院副教授,硕士研究生导师,开放原子教育银牌认证讲师
科技浪潮中,鸿蒙生态强势崛起,OpenHarmony开启智能终端无限可能。当下,其原生应用开发适配潜力巨大,终端设备已广泛融入生活各场景,从家居到办公、穿戴至车载。 现在,机会敲门!我们的直播聚焦OpenHarmony关键的网络数据请求与解析,抛开晦涩理论,用真实案例带你掌握数据访问接口,轻松应对复杂网络请求、精准解析Json与Xml数据。参与直播,为开发鸿蒙App夯实基础,抢占科技新高地,别错过!
回顾中 -
Ascend C高层API设计原理与实现系列
2025/01/17 周五 15:30-17:00
Ascend C 技术专家
以LayerNorm算子开发为例,讲解开箱即用的Ascend C高层API
回顾中
热门标签