-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\arithmetic_logic_cpu_kernel.cc代码标注1 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入系统自带的包 #include #include #include //导入自定义的包 #include "backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 template void ArithmeticLogicCPUKernel::Less(const T *input1, const T *input2, bool *out) { auto task = [&](size_t start, size_t end) {//声明变量并赋值 for (size_t i = start; i end; i++) {//循环遍历,判断最大值 out[i] = input1[i] input2[i]; } }; CPUKernelUtils::ParallelFor(task, output_size_); } template //模板 void ArithmeticLogicCPUKernel::Equal(const T *input1, const T *input2, bool *out) { auto task = [&](size_t start, size_t end) {//声明变量并赋值 for (size_t i = start; i end; i++) {//循环遍历数组判断两个输入值是否相等 out[i] = input1[i] == input2[i]; } }; CPUKernelUtils::ParallelFor(task, output_size_); } template void ArithmeticLogicCPUKernel::NotEqual(const T *input1, const T *input2, bool *out) { auto task = [&](size_t start, size_t end) {//声明变量并赋值 for (size_t i = start; i end; i++) {//循环遍历数组判断两个输入值是否不相等 out[i] = input1[i] != input2[i]; } }; CPUKernelUtils::ParallelFor(task, output_size_); } template void ArithmeticLogicCPUKernel::LogicalAnd(const T *input1, const T *input2, bool *out) { auto task = [&](size_t start, size_t end) {//声明变量并赋值 for (size_t i = start; i end; i++) {//循环遍历数组判断数组间相邻两个数存储地址是否相等 out[i] = input1[i] && input2[i]; } }; CPUKernelUtils::ParallelFor(task, output_size_); } template void ArithmeticLogicCPUKernel::LogicalOr(const T *input1, const T *input2, bool *out) { auto task = [&](size_t start, size_t end) {//声明变量并赋值 for (size_t i = start; i end; i++) {//循环遍历数组判断数组间相邻两个数值是否相等 out[i] = input1[i] || input2[i]; } }; CPUKernelUtils::ParallelFor(task, output_size_); } template void ArithmeticLogicCPUKernel::Greater(const T *input1, const T *input2, bool *out) { auto task = [&](size_t start, size_t end) {//声明变量并赋值 for (size_t i = start; i end; i++) {//循环遍历数组判断数组是否排序排列,降序或者升序,相邻两个数不能相等 out[i] = input1[i] > input2[i]; } }; CPUKernelUtils::ParallelFor(task, output_size_); } template void ArithmeticLogicCPUKernel::GreaterEqual(const T *input1, const T *input2, bool *out) { auto task = [&](size_t start, size_t end) {//声明变量并赋值 for (size_t i = start; i end; i++) {//循环遍历数组判断数组是否排序排列,降序或者升序,相邻两个数可相等 out[i] = input1[i] >= input2[i]; } }; CPUKernelUtils::ParallelFor(task, output_size_); } template void ArithmeticLogicCPUKernel::LessEqual(const T *input1, const T *input2, bool *out) { auto task = [&](size_t start, size_t end) {//声明变量并赋值 for (size_t i = start; i end; i++) { out[i] = input1[i] = input2[i]; } }; CPUKernelUtils::ParallelFor(task, output_size_); } //声明一个静态二维数组 static const std::map kArithmeticBinOpTypeMap = { {prim::kPrimGreater->name(), GREATER}, {prim::kPrimGreaterEqual->name(), GREATEREQUAL}, {prim::kPrimLogicalAnd->name(), LOGICALAND}, {prim::kPrimLessEqual->name(), LESSEQUAL}, {prim::kPrimLogicalOr->name(), LOGICALOR}, {prim::kPrimLess->name(), LESS}, {prim::kPrimNotEqual->name(), NOTEQUAL}, {prim::kPrimEqual->name(), EQUAL}}; template //模板判断是否是支持的数据类型 void ArithmeticLogicCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); if (kArithmeticBinOpTypeMap.find(kernel_name) != kArithmeticBinOpTypeMap.end()) { operate_type_ = kArithmeticBinOpTypeMap.at(kernel_name); } else { MS_LOG(EXCEPTION) "Not support " kernel_name;//错误日志:不支持的形式 } input_shape1_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);//声明变量记录第一个内核节点的形状 input_shape2_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);//声明变量记录第二个内核节点的形状 output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);//声明变量记录输出内核节点的形状 if (output_shape_.size() == 0) { output_shape_.insert(output_shape_.begin(), 1);//如果输出节点大小为0,执行下面循环体 } output_size_ = 1;//初始化变量并赋值 for (size_t i = 0; i output_shape_.size(); ++i) {//循环遍历数组计算得输出值大小 output_size_ *= output_shape_[i]; } size_t l = input_shape1_.size();//获取输入shapel大小 for (size_t i = 0; i output_shape_.size() - l; ++i) { input_shape1_.insert(input_shape1_.begin(), 1); } l = input_shape2_.size();//获取输入shape2大小 for (size_t i = 0; i output_shape_.size() - l; ++i) { input_shape2_.insert(input_shape2_.begin(), 1); } CPUKernelUtils::GetElementNumEveryDim(input_shape1_, &input_element_num1_); CPUKernelUtils::GetElementNumEveryDim(input_shape2_, &input_element_num2_); CPUKernelUtils::GetElementNumEveryDim(output_shape_, &output_element_num_); dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); if (dtype_ != AnfAlgo::GetInputDeviceDataType(kernel_node, 1)) { MS_LOG(EXCEPTION) "Input0 and input1 must has the same data type";//错误日志:nput0和input1必须具有相同的数据类型 } target_dtype_ = AnfAlgo::GetOutputInferDataType(kernel_node, 0);//节点数据 } ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\arithmetic_cpu_kernel.CC代码标注3 ```C++ //模板计算工作空间大小 template void ArithmeticCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { CPUKernel::InitInputOutputSize(kernel_node); workspace_size_list_.emplace_back(output_size_ * sizeof(T));//工作空间大小 workspace_size_list_.emplace_back(output_size_ * sizeof(T)); } template //返回值是布尔类型函数 bool ArithmeticCPUKernel::Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) { //声明变量并赋值 T *input1 = reinterpret_cast(inputs[0]->addr);//指针存储第一个数的地址 T *input2 = reinterpret_cast(inputs[1]->addr);//指针存储第二个数的地址 T *output = reinterpret_cast(outputs[0]->addr);//指针存储输出值的地址 T *broadcastedInput1 = reinterpret_cast(workspace[0]->addr);//广播输入第一个数 T *broadcastedInput2 = reinterpret_cast(workspace[1]->addr);//广播输入第二个数 Broadcast(broadcastedInput1, broadcastedInput2, input1, input2); //判断操作数据类型 if (operate_type_ == ADD) { Add(broadcastedInput1, broadcastedInput2, output); } else if (operate_type_ == SUB) { Sub(broadcastedInput1, broadcastedInput2, output); } else if (operate_type_ == MUL) { Mul(broadcastedInput1, broadcastedInput2, output); } else if (operate_type_ == REALDIV) { RealDiv(broadcastedInput1, broadcastedInput2, output); } else if (operate_type_ == DIV) { Div(broadcastedInput1, broadcastedInput2, output); } else if (operate_type_ == FLOORDIV) { FloorDiv(broadcastedInput1, broadcastedInput2, output); } else if (operate_type_ == MOD) { Mod(broadcastedInput1, broadcastedInput2, output); } else if (operate_type_ == FLOORMOD) { FloorMod(broadcastedInput1, broadcastedInput2, output); } else if (operate_type_ == POW) { Pow(broadcastedInput1, broadcastedInput2, output); } else if (operate_type_ == ASSIGNADD) { AssignAdd(input1, input2, output); } else if (operate_type_ == ATAN2) { Atan2(broadcastedInput1, broadcastedInput2, output); } else if (operate_type_ == SQUAREDDIFFERENCE) { SquaredDifference(broadcastedInput1, broadcastedInput2, output); } else { MS_LOG(EXCEPTION) "Not support " operate_type_; return false; } return true; } template void ArithmeticCPUKernel::Broadcast(T *broadcastedInput1, T *broadcastedInput2, T *input1, T *input2) { for (size_t i = 0; i output_size_; ++i) { size_t idx1 = 0;//初始化变量 size_t idx2 = 0;//初始化变量 GenIndex(i, &idx1, &idx2); broadcastedInput1[i] = input1[idx1]; broadcastedInput2[i] = input2[idx2]; } } template void ArithmeticCPUKernel::GenIndex(size_t num, size_t *idx1, size_t *idx2) { //广播索引 std::vector tmp; for (size_t i = 0; i output_shape_.size() - 1; ++i) {//循环遍历数组 if (output_element_num_[i] > num) {//如果输出值大于num,添加元素0在最后 tmp.push_back(0); } else { tmp.push_back(num / output_element_num_[i]); num %= output_element_num_[i]; } } tmp.push_back(num);//添加元素num //循环遍历数组,判断不同情况下的广播索引 for (size_t k = 0; k tmp.size() - 1; ++k) { if (input_shape1_[k] > 1) { *idx1 += tmp[k] * input_element_num1_[k];//广播索引1 } if (input_shape2_[k] > 1) { *idx2 += tmp[k] * input_element_num2_[k];//广播索引2 } } if (input_shape1_[tmp.size() - 1] > 1) { *idx1 += tmp[tmp.size() - 1]; } if (input_shape2_[tmp.size() - 1] > 1) { *idx2 += tmp[tmp.size() - 1]; } } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\arithmetic_cpu_kernel.CC代码标注2 ```c++ //除数模板 template void ArithmeticCPUKernel::Mod(const T *input1, const T *input2, T *out) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) { auto x = static_cast(input1[i]); auto y = static_cast(input2[i]); auto data_div = x / y; auto data_div_min = data_div 0.0 ? data_div : 0.0;//结果最小值小于0,则输出结果,否则输出0 auto data_div_max = data_div > 0.0 ? data_div : 0.0;//结果最大值大于0,则输出结果,否则输出0 auto data_div_max_floor = floor(data_div_max);//起别名 auto data_div_min_ceil = ceil(data_div_min); auto data_div_res = data_div_max_floor + data_div_min_ceil; out[i] = static_cast(x - data_div_res * y); } }; CPUKernelUtils::ParallelFor(task, output_size_); } //模板 template void ArithmeticCPUKernel::FloorMod(const T *input1, const T *input2, T *out) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) { auto x = static_cast(input1[i]); auto y = static_cast(input2[i]); auto res = x - floor(x / y) * y; out[i] = static_cast((std::abs(res) > 1e-9) && ((res 0.0) != (y 0.0)) ? res + y : res); } }; CPUKernelUtils::ParallelFor(task, output_size_); } //模板求x的y次方 template void ArithmeticCPUKernel::Pow(const T *input1, const T *input2, T *out) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) { auto x = static_cast(input1[i]);//底数 auto y = static_cast(input2[i]);//幂 out[i] = static_cast(std::pow(x, y));//计算得最后输出值 } }; CPUKernelUtils::ParallelFor(task, output_size_); } //模板,求差值的平方即离差 template void ArithmeticCPUKernel::SquaredDifference(const T *input1, const T *input2, T *out) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) { T diff = input1[i] - input2[i];//求差值 out[i] = diff * diff;//平方 } }; CPUKernelUtils::ParallelFor(task, output_size_); } template void ArithmeticCPUKernel::Atan2(const T *input1, const T *input2, T *out) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) { out[i] = (T)atan2(static_cast(input1[i]), static_cast(input2[i])); } }; CPUKernelUtils::ParallelFor(task, output_size_); } //类型图 static const std::map kArithmeticBinOpTypeMap = { {prim::kPrimAdd->name(), ADD}, {prim::kPrimSub->name(), SUB}, {prim::kPrimMul->name(), MUL}, {prim::kPrimDiv->name(), DIV}, {prim::kPrimMod->name(), MOD}, {prim::kPrimAssignAdd->name(), ASSIGNADD}, {prim::kPrimPow->name(), POW}, {prim::kPrimFloorDiv->name(), FLOORDIV}, {prim::kPrimAtan2->name(), ATAN2}, {prim::kPrimRealDiv->name(), REALDIV}, {prim::kPrimSquaredDifference->name(), SQUAREDDIFFERENCE}, {prim::kPrimFloorMod->name(), FLOORMOD}}; //判断支持数据 template void ArithmeticCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); if (kArithmeticBinOpTypeMap.find(kernel_name) != kArithmeticBinOpTypeMap.end()) { operate_type_ = kArithmeticBinOpTypeMap.at(kernel_name); } else { MS_LOG(EXCEPTION) "Not support " kernel_name; } input_shape1_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); input_shape2_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); if (output_shape_.size() == 0) { output_shape_.insert(output_shape_.begin(), 1); } output_size_ = 1; for (size_t i = 0; i output_shape_.size(); ++i) {//循环遍历计算输出大小 output_size_ *= output_shape_[i]; } size_t l = input_shape1_.size(); for (size_t i = 0; i output_shape_.size() - l; ++i) {//循环遍历计算 input_shape1_.insert(input_shape1_.begin(), 1); } l = input_shape2_.size(); for (size_t i = 0; i output_shape_.size() - l; ++i) { input_shape2_.insert(input_shape2_.begin(), 1); } CPUKernelUtils::GetElementNumEveryDim(input_shape1_, &input_element_num1_); CPUKernelUtils::GetElementNumEveryDim(input_shape2_, &input_element_num2_); CPUKernelUtils::GetElementNumEveryDim(output_shape_, &output_element_num_); dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); if (dtype_ != AnfAlgo::GetInputDeviceDataType(kernel_node, 1)) { MS_LOG(EXCEPTION) "Input0 and input1 must has the same data type";//Input0和input1必须具有相同的数据类型 } target_dtype_ = AnfAlgo::GetOutputInferDataType(kernel_node, 0); } ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\arithmetic_cpu_kernel.CC代码标注1 ```C++ * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入系统自带的包 #include #include #include //导入自定义的包 #include "backend/kernel_compiler/cpu/arithmetic_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 template //模板 //求和,合并输入值,将最后结果赋给输出值重新存储 void ArithmeticCPUKernel::AssignAdd(T *input1, const T *input2, T *out) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) { out[i] = input1[i] + input2[i]; input1[i] = out[i]; } }; CPUKernelUtils::ParallelFor(task, output_size_); } //模板 求和合并输入值,将结果存储到一个新的输出数组 template void ArithmeticCPUKernel::Add(const T *input1, const T *input2, T *out) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) { out[i] = input1[i] + input2[i]; } }; CPUKernelUtils::ParallelFor(task, output_size_); } //两数求差模板 template void ArithmeticCPUKernel::Sub(const T *input1, const T *input2, T *out) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) { out[i] = input1[i] - input2[i]; } }; CPUKernelUtils::ParallelFor(task, output_size_); } //两数相乘模板 template void ArithmeticCPUKernel::Mul(const T *input1, const T *input2, T *out) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) { out[i] = input1[i] * input2[i]; } }; CPUKernelUtils::ParallelFor(task, output_size_); } //两数相除模板 template void ArithmeticCPUKernel::RealDiv(const T *input1, const T *input2, T *out) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) { auto dividend = input1[i];//声明变量被除数 auto divisor = input2[i];//声明变量除数 auto zero = (T)0; if (divisor == zero) {//除数为0 if (dividend == zero) {//除数和被除数都为0 out[i] = std::numeric_limits::quiet_NaN();//输出值为NaN continue;//函数继续计算 } if (std::numeric_limits::has_infinity) { out[i] = dividend > zero ? std::numeric_limits::infinity() : -std::numeric_limits::infinity(); } else { out[i] = dividend > zero ? std::numeric_limits::max() : std::numeric_limits::min(); } continue; } out[i] = dividend / divisor; } }; CPUKernelUtils::ParallelFor(task, output_size_); } //两数相除模板 template void ArithmeticCPUKernel::Div(const T *input1, const T *input2, T *out) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) { auto dividend = input1[i];//声明变量被除数 auto divisor = input2[i];//声明变量除数 auto zero = (T)0; if (divisor == zero) {//除数为0 if (dividend == zero) {//除数和被除数都为0 out[i] = std::numeric_limits::quiet_NaN(); continue;//函数继续计算 } if (std::numeric_limits::has_infinity) { out[i] = dividend > zero ? std::numeric_limits::infinity() : -std::numeric_limits::infinity(); } else { out[i] = dividend > zero ? std::numeric_limits::max() : std::numeric_limits::min(); } continue; } out[i] = dividend / divisor; } }; CPUKernelUtils::ParallelFor(task, output_size_); } //两数相除模板 template void ArithmeticCPUKernel::FloorDiv(const T *input1, const T *input2, T *out) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) { auto dividend = input1[i];//声明变量被除数 auto divisor = input2[i];//声明变量除数 auto zero = (T)0; if (divisor == zero) {// 特殊情况:除数为0 if (dividend == zero) {//除数和被除数都为0 out[i] = std::numeric_limits::quiet_NaN(); continue; } if (std::numeric_limits::has_infinity) { out[i] = dividend > zero ? std::numeric_limits::infinity() : -std::numeric_limits::infinity(); } else { out[i] = dividend > zero ? std::numeric_limits::max() : std::numeric_limits::min(); } continue; } out[i] = (T)floor(static_cast(dividend) / static_cast(divisor)); } }; CPUKernelUtils::ParallelFor(task, output_size_); } ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\argmin_with_value_cpu_kernel.CC代码标注 ```C++ /** * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的包 #include "backend/kernel_compiler/cpu/argmin_with_value_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 namespace {//空间嵌套 size_t get_element_num(const std::vector &shape) {//获取元素数值大小 size_t size = 1; for (size_t i = 0; i shape.size(); i++) {//遍历循环计算元素数值大小 size *= shape[i]; } return size;//返回值 } template //模板 //检查形状地址值,数值大小,返回一个布尔值 bool check_validation(const std::vector &shape, const size_t num_before_axis, const size_t num_after_axis, const std::vector &inputs, const std::vector &outputs) { if (inputs.size() != 1 || outputs.size() != 2) {//如果输入输出大小不为1和2,那么记录错误日志:输入输出错误 MS_LOG(EXCEPTION) "Wrong number of inputs or outputs!"; return false;//返回布尔值false } //声明变量并赋值 size_t data_size = sizeof(T);//数据大小 size_t input_size = get_element_num(shape) * data_size;//输入值大小 size_t output_num = num_before_axis * num_after_axis;//输出值大小 size_t out0_size = output_num * sizeof(int); size_t out1_size = output_num * data_size; if (inputs[0]->size != input_size || outputs[0]->size != out0_size || outputs[1]->size != out1_size) { MS_LOG(EXCEPTION) "invalid input or output data size!";//输入或输出数据大小无效 return false; } return true; } } // namespace template //模板 void ArgMinWithValueCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node);//判断算子内核是否为空 shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);//获取算子内核形状 size_t shape_len = shape_.size();//获取一行算子内核长度 int64_t axis = AnfAlgo::GetNodeAttr(kernel_node, AXIS);//获取行数 axis += shape_len;//计算得总共算子内核长度 if (axis 0) {//如果函数小于0,记录错误日志 MS_LOG(EXCEPTION) "Invalid axis:" axis ", should in range [-1, " shape_len - 1 "]"; } axis = axis % static_cast(shape_len);//重新获取不满一行算子内核个数 num_before_axis_ = 1;//声明变量并附初值 num_after_axis_ = 1;//声明变量并附初值 for (size_t i = 0; i shape_len; i++) {//循环遍历数组,计算之前和之后总共行数 if (static_cast(i) axis) { num_before_axis_ *= shape_[i]; } else if (static_cast(i) > axis) { num_after_axis_ *= shape_[i]; } } dim_axis_ = shape_[axis]; } template //模板 bool ArgMinWithValueCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspaces*/, const std::vector &outputs) { if (!check_validation(shape_, num_before_axis_, num_after_axis_, inputs, outputs)) { return false; } //起别名。初始化变量并赋值 auto input = reinterpret_cast(inputs[0]->addr); auto output0 = reinterpret_cast(outputs[0]->addr); auto output1 = reinterpret_cast(outputs[1]->addr); //循环遍历数组,计算列数总和 for (size_t i = 0; i num_before_axis_; i++) { size_t src_index_i = i * dim_axis_ * num_after_axis_; for (size_t j = 0; j num_after_axis_; j++) { std::vector array_axis; size_t src_index_j = src_index_i + j; for (size_t k = 0; k dim_axis_; k++) { size_t src_index_k = k * num_after_axis_ + src_index_j; array_axis.push_back(static_cast(input[src_index_k])); } //起别名 auto min_ops = std::min_element(array_axis.begin(), array_axis.end()); auto min_index = static_cast(std::distance(array_axis.begin(), min_ops));//获取算子内核最少的一行xu'l auto dst_index = i * num_after_axis_ + j; output0[dst_index] = min_index; auto src_index = IntToSize(min_index) * num_after_axis_ + src_index_j; output1[dst_index] = input[src_index]; } } return true;//返回值 } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\argmax_cpu_kernel.cc代码标注 ```C++ /** * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入系统自定义的包 #include "backend/kernel_compiler/cpu/argmax_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 namespace {//空间嵌套 size_t get_element_num(const std::vector &shape) { size_t size = 1; for (size_t i = 0; i shape.size(); i++) {//遍历数组获取大小 size *= shape[i]; } return size; } template //模板 //检查常量大小 bool check_validation(const std::vector &shape, const size_t num_before_axis, const size_t num_after_axis, const std::vector &inputs, const std::vector &outputs) { if (inputs.size() != 1 || outputs.size() != 1) { MS_LOG(EXCEPTION) "Wrong number of inputs or outputs!";//错误日志 return false; } //声明变量 size_t data_size = sizeof(T);//数据类型大小 size_t input_size = get_element_num(shape) * data_size;//输入大小 size_t output_num = num_before_axis * num_after_axis;//输出值 size_t output_size = output_num * sizeof(int);//输出数据类型 if (inputs[0]->size != input_size || outputs[0]->size != output_size) { MS_LOG(EXCEPTION) "invalid input or output data size!";//错误日志:输入或输出数据大小无效 return false; } return true; } } // namespace template //模板 void ArgmaxCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node);//判断数据类型是否为空 shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);// size_t shape_len = shape_.size();//声明变量并赋值、获取长度大小 int64_t axis = AnfAlgo::GetNodeAttr(kernel_node, AXIS); axis += shape_len;//计算数据形状大小 if (axis 0) { MS_LOG(EXCEPTION) "Invalid axis:" axis ", should in range [-1, " shape_len - 1 "]"; } axis = axis % static_cast(shape_len); num_before_axis_ = 1; num_after_axis_ = 1; for (size_t i = 0; i shape_len; i++) {//遍历数组,判断数据类型 if (static_cast(i) axis) { num_before_axis_ *= shape_[i]; } else if (static_cast(i) > axis) { num_after_axis_ *= shape_[i]; } } dim_axis_ = shape_[axis]; } template //模板 bool ArgmaxCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspaces*/, const std::vector &outputs) { if (!check_validation(shape_, num_before_axis_, num_after_axis_, inputs, outputs)) { return false; } auto input = reinterpret_cast(inputs[0]->addr);//获取输入值 auto output = reinterpret_cast(outputs[0]->addr);//获取输出值 for (size_t i = 0; i num_before_axis_; i++) {//遍历数组 size_t src_index_i = i * dim_axis_ * num_after_axis_; for (size_t j = 0; j num_after_axis_; j++) { std::vector array_axis; size_t src_index_j = src_index_i + j; for (size_t k = 0; k dim_axis_; k++) { size_t src_index_k = k * num_after_axis_ + src_index_j; array_axis.push_back(static_cast(input[src_index_k]));//添加元素 } auto max_ops = std::max_element(array_axis.begin(), array_axis.end()); auto max_index = static_cast(std::distance(array_axis.begin(), max_ops));//最大值下标 auto dst_index = i * num_after_axis_ + j; output[dst_index] = max_index; } } return true;//返回值为真 } } // namespace kernel } // namespace mindspore ```
-
## mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/apply_adagrad_cpu_kernel.cc代码标注 ```C++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的包 #include "backend/kernel_compiler/cpu/apply_adagrad_cpu_kernel.h" //导入系统自带的包 #include <thread> #include <vector> namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 namespace {//空间嵌套 //定义各种数据类型字节大小 constexpr size_t kSizeFloat16 = 2; constexpr size_t kSizeFloat32 = 4; constexpr size_t kInputSize = 4; constexpr size_t kOutputSize = 2; } // namespace void ApplyAdagradCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node);//判断节点是否为空 update_slots_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "update_slots");//跟新节点接口 dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);//获取数据类型 } bool ApplyAdagradCPUKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/, const std::vector<AddressPtr> &outputs) { CheckParam(inputs, outputs);.//检查输入输出参数 //判断数据类型 if (dtype_ == kNumberTypeFloat16) {//如果数据类型是Float16,发起相同数据类型输入输出 LaunchKernel<float16>(inputs, outputs); } else if (dtype_ == kNumberTypeFloat32) {//如果数据类型是Float32,发起相同数据类型输入输出 LaunchKernel<float>(inputs, outputs); } return true;//返回值为真 } void ApplyAdagradCPUKernel::CheckParam(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) { // 输入: var, accum, lr, gradient if (inputs.size() != kInputSize) { MS_LOG(EXCEPTION) << "Input number is " << inputs.size() << ", but ApplyAdagrad needs 4 inputs."; } // 输出: var, accum if (outputs.size() != kOutputSize) {//错误日志,输出不够 MS_LOG(EXCEPTION) << "Output number is " << outputs.size() << ", but ApplyAdagrad needs 2 outputs."; } if (inputs[0]->size != inputs[1]->size || inputs[0]->size != inputs[3]->size) { MS_LOG(EXCEPTION) << "Error input data size!";//错位的输出类型 } if (inputs[2]->size != kSizeFloat16 && inputs[2]->size != kSizeFloat32) { //属性lr和grad必须是float16或float32 MS_LOG(EXCEPTION) << "The attribute lr and grad must be float16 or float32!"; } } template <typename T>//模板 void ApplyAdagradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) { //获取var, accum, lr, gradient的地址 auto var = reinterpret_cast<T *>(inputs[0]->addr); auto accum = reinterpret_cast<T *>(inputs[1]->addr); auto lr = reinterpret_cast<T *>(inputs[2]->addr); auto gradient = reinterpret_cast<T *>(inputs[3]->addr); // 多线程 size_t length = inputs[0]->size / sizeof(T); size_t max_thread_num = std::thread::hardware_concurrency(); size_t use_thread_num = length < 128 * max_thread_num ? std::ceil(length / 128.0) : max_thread_num; std::vector<std::thread> threads; threads.reserve(use_thread_num); size_t start = 0; const size_t batch_size = (length + use_thread_num - 1) / use_thread_num; if (batch_size == 0) { MS_LOG(EXCEPTION) << "Error occur in launch kernel";//错误日志:启动内核时发生错误 return; } while (start < length) { size_t end = (start + batch_size) > length ? length : (start + batch_size); threads.emplace_back( std::thread(&ApplyAdagradCPUKernel::LaunchApplyAdagrad<T *>, this, var, accum, lr, gradient, start, end)); start += batch_size; } for (auto &it : threads) { it.join(); } // 复制结果到输出张量 auto output_var = reinterpret_cast<T *>(outputs[0]->addr); auto output_accum = reinterpret_cast<T *>(outputs[1]->addr); if (memcpy_s(output_var, outputs[0]->size, var, inputs[0]->size) != EOK) { MS_LOG(EXCEPTION) << "Launch kernel error: memcpy failed."; } if (memcpy_s(output_accum, outputs[1]->size, accum, inputs[1]->size) != EOK) { MS_LOG(EXCEPTION) << "Launch kernel error: memcpy failed."; } } template <typename T> void ApplyAdagradCPUKernel::LaunchApplyAdagrad(T const var, T const accum, const T lr, const T gradient, size_t start, size_t end) { // DataType只能是float32或float16,因此eps不会为零。 using DataType = typename std::iterator_traits<T>::value_type; const DataType one = DataType(1); const DataType eps = DataType(1e-6); for (size_t i = start; i < end; ++i) { // 跟新accum: accum += grad * grad if (update_slots_) { accum<i> += gradient<i> * gradient<i>; } // g var: var -= lr * grad * \frac{1}{\sqrt{accum}} var<i> -= lr[0] * gradient<i> * (one / sqrt(accum<i> + eps)); } } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\akg\ascend\akg_ascend_kernel_mod.h代码标注 ```C++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的包 #include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.h" #include #include #include #include #include "runtime/rt.h" #include "utils/log_adapter.h" #include "utils/convert_utils.h" namespace mindspore {//定义一个空间 namespace kernel {//空间嵌套 using std::fstream;//sheng using std::map; using std::mutex; using std::string; using TbeTaskInfoPtr = std::shared_ptr; using tbe::KernelManager; constexpr uint32_t DEFAULT_BLOCK_DIM = 1; /** * @brief infotable contain func_stub\blockdim\kernel file buffer */ AkgKernelMod::AkgKernelMod(const KernelPackPtr &kernel_pack) : kernel_pack_(kernel_pack) {} //设置输入大小列表 void AkgKernelMod::SetInputSizeList(const std::vector &size_list) { input_size_list_ = size_list; } //设置输出大小列表 void AkgKernelMod::SetOutputSizeList(const std::vector &size_list) { output_size_list_ = size_list; } //设置工作空间大小列表 void AkgKernelMod::SetWorkspaceSizeList(const std::vector &size_list) { workspace_size_list_ = size_list; } const std::vector &AkgKernelMod::GetInputSizeList() const { return input_size_list_; } const std::vector &AkgKernelMod::GetOutputSizeList() const { return output_size_list_; } const std::vector &AkgKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; } bool AkgKernelMod::Launch(const std::vector &inputs, const std::vector &, const std::vector &outputs, void *stream_ptr) { if (stream_ptr == nullptr) {//判断ptr数据为空 MS_LOG(ERROR) "stream_ptr should not be nullptr."; return false; } if (kernel_pack_ == nullptr) {//内核包是否为空 MS_LOG(ERROR) "kernel pack should not be nullptr."; return false; } uint32_t block_dim = DEFAULT_BLOCK_DIM; //默认blockdim等于1 auto func_stub = KernelManager::GenFuncStub(*kernel_pack_, false, &block_dim); if (func_stub == 0) {//如果func_stub为0,记录错误信息 MS_LOG(ERROR) "GenFuncStub failed."; return false; } // 将所有地址打包到一个向量中。 std::vector runtime_args; (void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(runtime_args), [](const AddressPtr &input) -> void * { return input->addr; }); (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(runtime_args), [](const AddressPtr &output) -> void * { return output->addr; }); rtL2Ctrl_t *l2ctrl = nullptr; auto stream = static_cast(stream_ptr);//获取数据流并存储 if (RT_ERROR_NONE != rtKernelLaunch(reinterpret_cast(func_stub), block_dim, runtime_args.data(), SizeToUint(sizeof(void *) * runtime_args.size()), l2ctrl, stream)) { MS_LOG(ERROR) "Call runtime rtKernelLaunch error."; return false;//返回值为假 } return true;//返回值为真 } std::vector AkgKernelMod::GenTask(const std::vector &inputs, const std::vector &, const std::vector &outputs, uint32_t stream_id) { if (kernel_pack_ == nullptr) { MS_LOG(EXCEPTION) "kernel pack should not be nullptr."; } //变量 std::vector args; const uint32_t args_size = 0;//初始化变量 std::vector sm_desc; void *binary = nullptr;//初始化变量 const uint32_t binary_size = 0;//初始化变量 std::vector meta_data; std::vector input_data_addrs; std::vector output_data_addrs; std::vector workspace_addrs; // 将所有地址打包到一个向量中。 (void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(input_data_addrs), [](const AddressPtr &input) -> void * { return input->addr; }); (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(output_data_addrs), [](const AddressPtr &output) -> void * { return output->addr; }); uint32_t block_dim = DEFAULT_BLOCK_DIM; // default blockdim equal to 1. auto func_stub = KernelManager::GenFuncStub(*kernel_pack_, false, &block_dim); if (func_stub == 0) {//如果func_stub值为0,记录错误日志 MS_LOG(EXCEPTION) "GenFuncStub failed."; } std::string stub_func = KernelManager::GetStubFuncName(kernel_pack_); //c MS_LOG(DEBUG) "The block_dim is:" block_dim; TbeTaskInfoPtr task_info_ptr = std::make_shared( kernel_name_, stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data, input_data_addrs, output_data_addrs, workspace_addrs, NeedDump()); return {task_info_ptr}; } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\mkldnn\addn_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入系统自带的包 #include "backend/kernel_compiler/cpu/mkldnn/addn_cpu_kernel.h" #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" #include "runtime/device/cpu/cpu_device_address.h" #include "utils/ms_utils.h" //声明双重空间 namespace mindspore { namespace kernel { //初始化内核 void AddNCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node);//判断内核是否为空 input_num_ = AnfAlgo::GetInputTensorNum(kernel_node);//获取输入值大小 CheckParam(kernel_node);//检查参数 std::vector src0_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); std::vector src1_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); std::vector dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); dnnl::memory::desc src0_mem_desc = GetDefaultMemDesc(src0_shape); dnnl::memory::desc src1_mem_desc = GetDefaultMemDesc(src1_shape); dnnl::memory::desc dst_mem_desc = GetDefaultMemDesc(dst_shape); dnnl::binary::desc desc = dnnl::binary::desc(dnnl::algorithm::binary_add, src0_mem_desc, src1_mem_desc, dst_mem_desc); auto prim_desc = dnnl::binary::primitive_desc(desc, MKLKernelEngine::Get().engine()); primitive_ = std::make_shared(prim_desc); AddArgument(DNNL_ARG_SRC_0, src0_mem_desc); AddArgument(DNNL_ARG_SRC_1, src1_mem_desc); AddArgument(DNNL_ARG_DST, dst_mem_desc); } bool AddNCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { //设置幅度 SetArgumentHandle(DNNL_ARG_SRC_0, inputs[0]->addr); SetArgumentHandle(DNNL_ARG_SRC_1, inputs[1]->addr); SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr); ExecutePrimitive(); for (size_t index = 2; index input_num_; ++index) { SetArgumentHandle(DNNL_ARG_SRC_0, outputs[0]->addr); SetArgumentHandle(DNNL_ARG_SRC_1, inputs[index]->addr); SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr); ExecutePrimitive(); } return true; } //检查参数 void AddNCPUKernel::CheckParam(const CNodePtr &kernel_node) { auto src0_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); auto dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); if (src0_shape != dst_shape) {//AddN内核输出形状必须等于输入形状 MS_LOG(EXCEPTION) "AddN output shape must be equal to input shape."; } for (size_t index = 1; index input_num_; ++index) { auto src_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, index); if (src0_shape != src_shape) {//AddN输入形状必须相等 MS_LOG(EXCEPTION) "AddN input shapes must be equal."; } } size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); if (output_num != 1) {//如果输出值个数不等于1,记录错误日志:输出值个数是...d MS_LOG(EXCEPTION) "Output number is " output_num ", but AddNCPUKernel needs 1 output."; } } } // namespace kernel } // namespace mindspore ```
-
/** * Copyright 2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef MINDSPORE_INCLUDE_API_DELEGATE_H #define MINDSPORE_INCLUDE_API_DELEGATE_H #include <map> #include <vector> #include <memory> #include "schema/model_generated.h" #include "include/api/kernel.h" #include "include/api/status.h" namespace mindspore { typedef enum { SCHEMA_INVALID = -1, /**< invalid version */ SCHEMA_CUR, /**< current version for ms model defined in model.fbs*/ SCHEMA_V0, /**< previous version for ms model defined in model_v0.fbs*/ } SchemaVersion; using KernelIter = std::vector<kernel::Kernel *>::iterator; class MS_API DelegateModel { public: /// \brief Constructor of MindSpore Lite DelegateModel. //简要构造函数 DelegateModel(std::vector<kernel::Kernel *> *kernels, const std::vector<MSTensor> &inputs, const std::vector<MSTensor> &outputs, const std::map<kernel::Kernel *, const schema::Primitive *> &primitives, SchemaVersion version) : kernels_(kernels), inputs_(inputs), outputs_(outputs), primitives_(primitives), version_(version) {} /// \brief Destructor of MindSpore Lite DelegateModel. //简要析构函数 ~DelegateModel() = default; /// \brief Get Primitive of kernel::Kernel. /// /// \param[in] a kernel in DelegateModel kernels vector. /// /// \return The schema::Primitive of The kernel. const schema::Primitive *GetPrimitive(kernel::Kernel *kernel) const; //获取一个Kernel的属性值。 /// \brief Get the begin iterator of the DelegateModel kernels vector. /// /// \return The begin iterator of the DelegateModel kernels vector. KernelIter BeginKernelIterator(); //返回DelegateModel Kernel列表起始元素的迭代器 /// \brief Get the end iterator of the DelegateModel kernels vector. /// /// \return The end iterator of the DelegateModel kernels vector. KernelIter EndKernelIterator(); //返回DelegateModel Kernel列表末尾元素的迭代器。 /// \brief Replace the continuous kernel supported by the delegate with a delegate graph kernel. /// /// \param[in] from Define the begin iterator of continuous kernel supported by the delegate. /// \param[in] end Define the end iterator of continuous kernel supported by the delegate. /// /// \return The next iterator after graph_kernel, point to the next kernel that is not visited. KernelIter Replace(KernelIter from, KernelIter end, kernel::Kernel *graph_kernel); //用Delegate子图Kernel替换Delegate支持的连续Kernel列表。 /// \brief Get the input tensors of DelegateModel. /// /// \return The input tensor vector of DelegateModel. const std::vector<mindspore::MSTensor> &inputs() { return this->inputs_; } //返回DelegateModel输入tensor列表。 /// \brief Get the output tensors of DelegateModel. /// /// \return The ioutput tensor vector of DelegateModel. const std::vector<mindspore::MSTensor> &outputs() { return this->outputs_; } //返回DelegateModel输出tensor列表。 /// \brief Get the ms model version. /// /// \return The schema version for the primitives map. const SchemaVersion GetVersion() { return version_; } //返回当前执行推理的模型文件的版本 protected: std::vector<kernel::Kernel *> *kernels_; const std::vector<mindspore::MSTensor> &inputs_; const std::vector<mindspore::MSTensor> &outputs_; const std::map<kernel::Kernel *, const schema::Primitive *> &primitives_; SchemaVersion version_; }; class MS_API Delegate { public: /// \brief Constructor of MindSpore Lite Delegate. Delegate() = default; /// \brief Destructor of MindSpore Lite Delegate. virtual ~Delegate() = default; /// \brief Init delegate. /// /// \note Init willed be called in Model::Build. /// /// \return Status. If Status is kLiteNotSupport, the program will return to the MindSpore Lite inner inference. virtual Status Init() = 0; /// \brief Build delegate graph for MindSpore Lite model. /// /// \note Build willed be called in Model::Build. /// /// \param[in] model Define the delegate model to be built. virtual Status Build(DelegateModel *model) = 0; }; } // namespace mindspore #endif // MINDSPORE_INCLUDE_API_DELEGATE_H
-
Q:数据dump是能dump出来什么数据呀? Ans:通过配置可以dump出任何存在在网络中的数据,每一个算子的输入输出,每一个parameter的值和梯度都可以被dump下来。C++中const用法总结C++中的auto、auto &、const auto、const auto &// 获取需要转储内核列表 void DataDumper::GetNeedDumpKernelList(NotNull<std::map<std::string, CNodePtr> *> kernel_map) const { // 当只想读取range中元素时,使用const auto&,如:for(const auto&x:range),它不会进行拷贝,也不会修改range // const KernelGraphPtr &kernel_graph_; // 读取执行次序 for (const auto &kernel : kernel_graph_->execution_order()) { // 如果Kernel类型为HCCL_KERNEL,且需要转储的dump_mode_ == 0,则为true // fullname_with_scope():如果设置了全名,则立即返回全名 if (AnfAlgo::GetKernelType(kernel) == HCCL_KERNEL && DumpJsonParser::GetInstance().NeedDump(kernel->fullname_with_scope())) { // 获取输入的tensor数量,存在input_size变量中 auto input_size = AnfAlgo::GetInputTensorNum(kernel); for (size_t i = 0; i < input_size; ++i) { // 获取上一个节点的输出 auto input_with_index = AnfAlgo::GetPrevNodeOutput(kernel, i); // input保存第一个存储的值 auto input = input_with_index.first; if (input->isa<CNode>()) { // 输出log信息:[asyncump]匹配Hccl节点 MS_LOG(INFO) << "[AsyncDump] Match Hccl Node:" << kernel->fullname_with_scope() << " Input:" << input->fullname_with_scope(); // 试图安放:input值 kernel_map->try_emplace(input->fullname_with_scope(), input->cast<CNodePtr>()); } } } else if (KernelNeedDump(kernel)) { // 否则 log信息:[AsyncDump]匹配节点:节点名称 MS_LOG(INFO) << "[AsyncDump] Match Node:" << kernel->fullname_with_scope(); // 试图安放:kernel值 kernel_map->try_emplace(kernel->fullname_with_scope(), kernel); } } }
-
【功能模块】Atlas200【操作步骤&问题现象】1、在根文件系统/usr/src目录下,没有kernel-headers内核头文件,然后安装头文件会失败 ,截图如下。2、请问如何生成内核头文件?【截图信息】安装kernel-headers失败【日志信息】(可选,上传日志内容或者附件)
-
【功能模块】在rc模式,下无法安装内核驱动开发包。【操作步骤&问题现象】1、在atlas200 下,新建sd卡,配置环境 Ascend-cann-5.0.2.alpha005_linux-aarch64 制卡 和驱动都可以,网口识别,typec正常2、可以识别pcie采集卡,但是采集卡驱动无法安装,安装时提示 如下,咨询了采集卡厂家,是因为atlas200缺少内核开发包【截图信息】【日志信息】(可选,上传日志内容或者附件)
-
【操作步骤&问题现象】Unravel_Index的tf算子在运行时会出现索引超出范围的错误,但是我在json文件中已经特意设置成不超出范围。【截图信息】【日志信息】(可选,上传日志内容或者附件)
-
出现的问题:[ERROR] RUNTIME(39659)kernel task happen error, retCode=0x28, [aicpu timeout].[ERROR] RUNTIME(39659)aicpu kernel execute failed, device_id=0, stream_id=610, task_id=14069, fault so_name=libdvpp_kernels.so, fault kernel_name=DvppGetVdecFrameV2, extend_info=.W0912 18:56:29.227083 39653 MxsmStream.cpp:1083] Exceed the time limit(10000).E0912 18:56:29.227200 39653 MxStreamManager.cpp:640] streamInstance GetResult return nullptr. stream(airportparking)E0912 18:56:29.227308 39653 main.cpp:103] Failed to get pipeline output0, ret = 1002I0912 18:56:29.227411 39653 main.cpp:108] [TimeCosts] 10001ms / 417spipeline:rtsp->videodecoder->resize->modelinfo->mmr->自定义ods->videoencoder->自定义推流pushstream上述问题是在运行时间30分钟以后,多则50分钟,就会出现上述的问题 aicpu kernel execute failed。log中有这个函数DvppGetVdecFrameV2,跟videodecoder或dvpp有关系吗?请帮忙分析可能的原因。
上滑加载中
推荐直播
-
OpenHarmony应用开发之网络数据请求与数据解析
2025/01/16 周四 19:00-20:30
华为开发者布道师、南京师范大学泰州学院副教授,硕士研究生导师,开放原子教育银牌认证讲师
科技浪潮中,鸿蒙生态强势崛起,OpenHarmony开启智能终端无限可能。当下,其原生应用开发适配潜力巨大,终端设备已广泛融入生活各场景,从家居到办公、穿戴至车载。 现在,机会敲门!我们的直播聚焦OpenHarmony关键的网络数据请求与解析,抛开晦涩理论,用真实案例带你掌握数据访问接口,轻松应对复杂网络请求、精准解析Json与Xml数据。参与直播,为开发鸿蒙App夯实基础,抢占科技新高地,别错过!
回顾中 -
Ascend C高层API设计原理与实现系列
2025/01/17 周五 15:30-17:00
Ascend C 技术专家
以LayerNorm算子开发为例,讲解开箱即用的Ascend C高层API
回顾中
热门标签